Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F151863260
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
715 KB
Referenced Files
None
Subscribers
None
View Options
This file is larger than 256 KB, so syntax highlighting was skipped.
Index: stable/8/sys/amd64/include/xen
===================================================================
--- stable/8/sys/amd64/include/xen (revision 205282)
+++ stable/8/sys/amd64/include/xen (revision 205283)
Property changes on: stable/8/sys/amd64/include/xen
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
Merged /head/sys/amd64/include/xen:r203834,205197
Index: stable/8/sys/cddl/contrib/opensolaris
===================================================================
--- stable/8/sys/cddl/contrib/opensolaris (revision 205282)
+++ stable/8/sys/cddl/contrib/opensolaris (revision 205283)
Property changes on: stable/8/sys/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
Merged /head/sys/cddl/contrib/opensolaris:r203834,205197
Index: stable/8/sys/contrib/dev/acpica
===================================================================
--- stable/8/sys/contrib/dev/acpica (revision 205282)
+++ stable/8/sys/contrib/dev/acpica (revision 205283)
Property changes on: stable/8/sys/contrib/dev/acpica
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
Merged /head/sys/contrib/dev/acpica:r203834,205197
Index: stable/8/sys/contrib/pf
===================================================================
--- stable/8/sys/contrib/pf (revision 205282)
+++ stable/8/sys/contrib/pf (revision 205283)
Property changes on: stable/8/sys/contrib/pf
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
Merged /head/sys/contrib/pf:r203834,205197
Index: stable/8/sys/dev/cxgb/cxgb_sge.c
===================================================================
--- stable/8/sys/dev/cxgb/cxgb_sge.c (revision 205282)
+++ stable/8/sys/dev/cxgb/cxgb_sge.c (revision 205283)
@@ -1,3837 +1,3839 @@
/**************************************************************************
Copyright (c) 2007-2009, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***************************************************************************/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/bus.h>
#include <sys/conf.h>
#include <machine/bus.h>
#include <machine/resource.h>
#include <sys/bus_dma.h>
#include <sys/rman.h>
#include <sys/queue.h>
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#include <sys/proc.h>
#include <sys/sbuf.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/systm.h>
#include <sys/syslog.h>
#include <net/bpf.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <cxgb_include.h>
#include <sys/mvec.h>
int txq_fills = 0;
int multiq_tx_enable = 1;
extern struct sysctl_oid_list sysctl__hw_cxgb_children;
int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
"size of per-queue mbuf ring");
static int cxgb_tx_coalesce_force = 0;
TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
&cxgb_tx_coalesce_force, 0,
"coalesce small packets into a single work request regardless of ring state");
#define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1
#define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
#define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2
#define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5
#define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5
#define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2
#define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6
static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
&cxgb_tx_coalesce_enable_start);
SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
&cxgb_tx_coalesce_enable_start, 0,
"coalesce enable threshold");
static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
&cxgb_tx_coalesce_enable_stop, 0,
"coalesce disable threshold");
static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
&cxgb_tx_reclaim_threshold, 0,
"tx cleaning minimum threshold");
/*
* XXX don't re-enable this until TOE stops assuming
* we have an m_ext
*/
static int recycle_enable = 0;
int cxgb_ext_freed = 0;
int cxgb_ext_inited = 0;
int fl_q_size = 0;
int jumbo_q_size = 0;
extern int cxgb_use_16k_clusters;
extern int nmbjumbo4;
extern int nmbjumbo9;
extern int nmbjumbo16;
#define USE_GTS 0
#define SGE_RX_SM_BUF_SIZE 1536
#define SGE_RX_DROP_THRES 16
#define SGE_RX_COPY_THRES 128
/*
* Period of the Tx buffer reclaim timer. This timer does not need to run
* frequently as Tx buffers are usually reclaimed by new Tx packets.
*/
#define TX_RECLAIM_PERIOD (hz >> 1)
/*
* Values for sge_txq.flags
*/
enum {
TXQ_RUNNING = 1 << 0, /* fetch engine is running */
TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
};
struct tx_desc {
uint64_t flit[TX_DESC_FLITS];
} __packed;
struct rx_desc {
uint32_t addr_lo;
uint32_t len_gen;
uint32_t gen2;
uint32_t addr_hi;
} __packed;
struct rsp_desc { /* response queue descriptor */
struct rss_header rss_hdr;
uint32_t flags;
uint32_t len_cq;
uint8_t imm_data[47];
uint8_t intr_gen;
} __packed;
#define RX_SW_DESC_MAP_CREATED (1 << 0)
#define TX_SW_DESC_MAP_CREATED (1 << 1)
#define RX_SW_DESC_INUSE (1 << 3)
#define TX_SW_DESC_MAPPED (1 << 4)
#define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
#define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
#define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
#define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
struct tx_sw_desc { /* SW state per Tx descriptor */
struct mbuf *m;
bus_dmamap_t map;
int flags;
};
struct rx_sw_desc { /* SW state per Rx descriptor */
caddr_t rxsd_cl;
struct mbuf *m;
bus_dmamap_t map;
int flags;
};
struct txq_state {
unsigned int compl;
unsigned int gen;
unsigned int pidx;
};
struct refill_fl_cb_arg {
int error;
bus_dma_segment_t seg;
int nseg;
};
/*
* Maps a number of flits to the number of Tx descriptors that can hold them.
* The formula is
*
* desc = 1 + (flits - 2) / (WR_FLITS - 1).
*
* HW allows up to 4 descriptors to be combined into a WR.
*/
static uint8_t flit_desc_map[] = {
0,
#if SGE_NUM_GENBITS == 1
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
#elif SGE_NUM_GENBITS == 2
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
#else
# error "SGE_NUM_GENBITS must be 1 or 2"
#endif
};
#define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED)
#define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock)
#define TXQ_LOCK(qs) mtx_lock(&(qs)->lock)
#define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock)
#define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
+#define TXQ_RING_NEEDS_ENQUEUE(qs) \
+ drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
#define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
#define TXQ_RING_DEQUEUE_COND(qs, func, arg) \
drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
#define TXQ_RING_DEQUEUE(qs) \
drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
int cxgb_debug = 0;
static void sge_timer_cb(void *arg);
static void sge_timer_reclaim(void *arg, int ncount);
static void sge_txq_reclaim_handler(void *arg, int ncount);
static void cxgb_start_locked(struct sge_qset *qs);
/*
* XXX need to cope with bursty scheduling by looking at a wider
* window than we are now for determining the need for coalescing
*
*/
static __inline uint64_t
check_pkt_coalesce(struct sge_qset *qs)
{
struct adapter *sc;
struct sge_txq *txq;
uint8_t *fill;
if (__predict_false(cxgb_tx_coalesce_force))
return (1);
txq = &qs->txq[TXQ_ETH];
sc = qs->port->adapter;
fill = &sc->tunq_fill[qs->idx];
if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
/*
* if the hardware transmit queue is more than 1/8 full
* we mark it as coalescing - we drop back from coalescing
* when we go below 1/32 full and there are no packets enqueued,
* this provides us with some degree of hysteresis
*/
if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
*fill = 0;
else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
*fill = 1;
return (sc->tunq_coalesce);
}
#ifdef __LP64__
static void
set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
{
uint64_t wr_hilo;
#if _BYTE_ORDER == _LITTLE_ENDIAN
wr_hilo = wr_hi;
wr_hilo |= (((uint64_t)wr_lo)<<32);
#else
wr_hilo = wr_lo;
wr_hilo |= (((uint64_t)wr_hi)<<32);
#endif
wrp->wrh_hilo = wr_hilo;
}
#else
static void
set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
{
wrp->wrh_hi = wr_hi;
wmb();
wrp->wrh_lo = wr_lo;
}
#endif
struct coalesce_info {
int count;
int nbytes;
};
static int
coalesce_check(struct mbuf *m, void *arg)
{
struct coalesce_info *ci = arg;
int *count = &ci->count;
int *nbytes = &ci->nbytes;
if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
(*count < 7) && (m->m_next == NULL))) {
*count += 1;
*nbytes += m->m_len;
return (1);
}
return (0);
}
static struct mbuf *
cxgb_dequeue(struct sge_qset *qs)
{
struct mbuf *m, *m_head, *m_tail;
struct coalesce_info ci;
if (check_pkt_coalesce(qs) == 0)
return TXQ_RING_DEQUEUE(qs);
m_head = m_tail = NULL;
ci.count = ci.nbytes = 0;
do {
m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
if (m_head == NULL) {
m_tail = m_head = m;
} else if (m != NULL) {
m_tail->m_nextpkt = m;
m_tail = m;
}
} while (m != NULL);
if (ci.count > 7)
panic("trying to coalesce %d packets in to one WR", ci.count);
return (m_head);
}
/**
* reclaim_completed_tx - reclaims completed Tx descriptors
* @adapter: the adapter
* @q: the Tx queue to reclaim completed descriptors from
*
* Reclaims Tx descriptors that the SGE has indicated it has processed,
* and frees the associated buffers if possible. Called with the Tx
* queue's lock held.
*/
static __inline int
reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
{
struct sge_txq *q = &qs->txq[queue];
int reclaim = desc_reclaimable(q);
if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
(cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
if (reclaim < reclaim_min)
return (0);
mtx_assert(&qs->lock, MA_OWNED);
if (reclaim > 0) {
t3_free_tx_desc(qs, reclaim, queue);
q->cleaned += reclaim;
q->in_use -= reclaim;
}
if (isset(&qs->txq_stopped, TXQ_ETH))
clrbit(&qs->txq_stopped, TXQ_ETH);
return (reclaim);
}
/**
* should_restart_tx - are there enough resources to restart a Tx queue?
* @q: the Tx queue
*
* Checks if there are enough descriptors to restart a suspended Tx queue.
*/
static __inline int
should_restart_tx(const struct sge_txq *q)
{
unsigned int r = q->processed - q->cleaned;
return q->in_use - r < (q->size >> 1);
}
/**
* t3_sge_init - initialize SGE
* @adap: the adapter
* @p: the SGE parameters
*
* Performs SGE initialization needed every time after a chip reset.
* We do not initialize any of the queue sets here, instead the driver
* top-level must request those individually. We also do not enable DMA
* here, that should be done after the queues have been set up.
*/
void
t3_sge_init(adapter_t *adap, struct sge_params *p)
{
u_int ctrl, ups;
ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
#if SGE_NUM_GENBITS == 1
ctrl |= F_EGRGENCTRL;
#endif
if (adap->params.rev > 0) {
if (!(adap->flags & (USING_MSIX | USING_MSI)))
ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
}
t3_write_reg(adap, A_SG_CONTROL, ctrl);
t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
V_LORCQDRBTHRSH(512));
t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
V_TIMEOUT(200 * core_ticks_per_usec(adap)));
t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
adap->params.rev < T3_REV_C ? 1000 : 500);
t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
}
/**
* sgl_len - calculates the size of an SGL of the given capacity
* @n: the number of SGL entries
*
* Calculates the number of flits needed for a scatter/gather list that
* can hold the given number of entries.
*/
static __inline unsigned int
sgl_len(unsigned int n)
{
return ((3 * n) / 2 + (n & 1));
}
/**
* get_imm_packet - return the next ingress packet buffer from a response
* @resp: the response descriptor containing the packet data
*
* Return a packet containing the immediate data of the given response.
*/
static int
get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
{
m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
m->m_ext.ext_buf = NULL;
m->m_ext.ext_type = 0;
memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
return (0);
}
static __inline u_int
flits_to_desc(u_int n)
{
return (flit_desc_map[n]);
}
#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
F_HIRCQPARITYERROR)
#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
F_RSPQDISABLED)
/**
* t3_sge_err_intr_handler - SGE async event interrupt handler
* @adapter: the adapter
*
* Interrupt handler for SGE asynchronous (non-data) events.
*/
void
t3_sge_err_intr_handler(adapter_t *adapter)
{
unsigned int v, status;
status = t3_read_reg(adapter, A_SG_INT_CAUSE);
if (status & SGE_PARERR)
CH_ALERT(adapter, "SGE parity error (0x%x)\n",
status & SGE_PARERR);
if (status & SGE_FRAMINGERR)
CH_ALERT(adapter, "SGE framing error (0x%x)\n",
status & SGE_FRAMINGERR);
if (status & F_RSPQCREDITOVERFOW)
CH_ALERT(adapter, "SGE response queue credit overflow\n");
if (status & F_RSPQDISABLED) {
v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
CH_ALERT(adapter,
"packet delivered to disabled response queue (0x%x)\n",
(v >> S_RSPQ0DISABLED) & 0xff);
}
t3_write_reg(adapter, A_SG_INT_CAUSE, status);
if (status & SGE_FATALERR)
t3_fatal_err(adapter);
}
void
t3_sge_prep(adapter_t *adap, struct sge_params *p)
{
int i, nqsets;
nqsets = min(SGE_QSETS, mp_ncpus*4);
fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
while (!powerof2(fl_q_size))
fl_q_size--;
#if __FreeBSD_version >= 700111
if (cxgb_use_16k_clusters)
jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
else
jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
#else
jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE);
#endif
while (!powerof2(jumbo_q_size))
jumbo_q_size--;
if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
device_printf(adap->dev,
"Insufficient clusters and/or jumbo buffers.\n");
/* XXX Does ETHER_ALIGN need to be accounted for here? */
p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
for (i = 0; i < SGE_QSETS; ++i) {
struct qset_params *q = p->qset + i;
if (adap->params.nports > 2) {
q->coalesce_usecs = 50;
} else {
#ifdef INVARIANTS
q->coalesce_usecs = 10;
#else
q->coalesce_usecs = 5;
#endif
}
q->polling = 0;
q->rspq_size = RSPQ_Q_SIZE;
q->fl_size = fl_q_size;
q->jumbo_size = jumbo_q_size;
q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
q->txq_size[TXQ_OFLD] = 1024;
q->txq_size[TXQ_CTRL] = 256;
q->cong_thres = 0;
}
}
int
t3_sge_alloc(adapter_t *sc)
{
/* The parent tag. */
if (bus_dma_tag_create( NULL, /* parent */
1, 0, /* algnmnt, boundary */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
BUS_SPACE_UNRESTRICTED, /* nsegments */
BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
0, /* flags */
NULL, NULL, /* lock, lockarg */
&sc->parent_dmat)) {
device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
return (ENOMEM);
}
/*
* DMA tag for normal sized RX frames
*/
if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
return (ENOMEM);
}
/*
* DMA tag for jumbo sized RX frames.
*/
if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
return (ENOMEM);
}
/*
* DMA tag for TX frames.
*/
if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
NULL, NULL, &sc->tx_dmat)) {
device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
return (ENOMEM);
}
return (0);
}
int
t3_sge_free(struct adapter * sc)
{
if (sc->tx_dmat != NULL)
bus_dma_tag_destroy(sc->tx_dmat);
if (sc->rx_jumbo_dmat != NULL)
bus_dma_tag_destroy(sc->rx_jumbo_dmat);
if (sc->rx_dmat != NULL)
bus_dma_tag_destroy(sc->rx_dmat);
if (sc->parent_dmat != NULL)
bus_dma_tag_destroy(sc->parent_dmat);
return (0);
}
void
t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
{
qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
qs->rspq.polling = 0 /* p->polling */;
}
#if !defined(__i386__) && !defined(__amd64__)
static void
refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
{
struct refill_fl_cb_arg *cb_arg = arg;
cb_arg->error = error;
cb_arg->seg = segs[0];
cb_arg->nseg = nseg;
}
#endif
/**
* refill_fl - refill an SGE free-buffer list
* @sc: the controller softc
* @q: the free-list to refill
* @n: the number of new buffers to allocate
*
* (Re)populate an SGE free-buffer list with up to @n new packet buffers.
* The caller must assure that @n does not exceed the queue's capacity.
*/
static void
refill_fl(adapter_t *sc, struct sge_fl *q, int n)
{
struct rx_sw_desc *sd = &q->sdesc[q->pidx];
struct rx_desc *d = &q->desc[q->pidx];
struct refill_fl_cb_arg cb_arg;
struct mbuf *m;
caddr_t cl;
int err, count = 0;
cb_arg.error = 0;
while (n--) {
/*
* We only allocate a cluster, mbuf allocation happens after rx
*/
if (q->zone == zone_pack) {
if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
break;
cl = m->m_ext.ext_buf;
} else {
if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
break;
if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
uma_zfree(q->zone, cl);
break;
}
}
if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
uma_zfree(q->zone, cl);
goto done;
}
sd->flags |= RX_SW_DESC_MAP_CREATED;
}
#if !defined(__i386__) && !defined(__amd64__)
err = bus_dmamap_load(q->entry_tag, sd->map,
cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
if (err != 0 || cb_arg.error) {
if (q->zone == zone_pack)
uma_zfree(q->zone, cl);
m_free(m);
goto done;
}
#else
cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
#endif
sd->flags |= RX_SW_DESC_INUSE;
sd->rxsd_cl = cl;
sd->m = m;
d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
d->len_gen = htobe32(V_FLD_GEN1(q->gen));
d->gen2 = htobe32(V_FLD_GEN2(q->gen));
d++;
sd++;
if (++q->pidx == q->size) {
q->pidx = 0;
q->gen ^= 1;
sd = q->sdesc;
d = q->desc;
}
q->credits++;
count++;
}
done:
if (count)
t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
}
/**
* free_rx_bufs - free the Rx buffers on an SGE free list
* @sc: the controle softc
* @q: the SGE free list to clean up
*
* Release the buffers on an SGE free-buffer Rx queue. HW fetching from
* this queue should be stopped before calling this function.
*/
static void
free_rx_bufs(adapter_t *sc, struct sge_fl *q)
{
u_int cidx = q->cidx;
while (q->credits--) {
struct rx_sw_desc *d = &q->sdesc[cidx];
if (d->flags & RX_SW_DESC_INUSE) {
bus_dmamap_unload(q->entry_tag, d->map);
bus_dmamap_destroy(q->entry_tag, d->map);
if (q->zone == zone_pack) {
m_init(d->m, zone_pack, MCLBYTES,
M_NOWAIT, MT_DATA, M_EXT);
uma_zfree(zone_pack, d->m);
} else {
m_init(d->m, zone_mbuf, MLEN,
M_NOWAIT, MT_DATA, 0);
uma_zfree(zone_mbuf, d->m);
uma_zfree(q->zone, d->rxsd_cl);
}
}
d->rxsd_cl = NULL;
d->m = NULL;
if (++cidx == q->size)
cidx = 0;
}
}
static __inline void
__refill_fl(adapter_t *adap, struct sge_fl *fl)
{
refill_fl(adap, fl, min(16U, fl->size - fl->credits));
}
static __inline void
__refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
{
if ((fl->size - fl->credits) < max)
refill_fl(adap, fl, min(max, fl->size - fl->credits));
}
/**
* recycle_rx_buf - recycle a receive buffer
* @adapter: the adapter
* @q: the SGE free list
* @idx: index of buffer to recycle
*
* Recycles the specified buffer on the given free list by adding it at
* the next available slot on the list.
*/
static void
recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
{
struct rx_desc *from = &q->desc[idx];
struct rx_desc *to = &q->desc[q->pidx];
q->sdesc[q->pidx] = q->sdesc[idx];
to->addr_lo = from->addr_lo; // already big endian
to->addr_hi = from->addr_hi; // likewise
wmb(); /* necessary ? */
to->len_gen = htobe32(V_FLD_GEN1(q->gen));
to->gen2 = htobe32(V_FLD_GEN2(q->gen));
q->credits++;
if (++q->pidx == q->size) {
q->pidx = 0;
q->gen ^= 1;
}
t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
}
static void
alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
{
uint32_t *addr;
addr = arg;
*addr = segs[0].ds_addr;
}
static int
alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
{
size_t len = nelem * elem_size;
void *s = NULL;
void *p = NULL;
int err;
if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
BUS_SPACE_MAXADDR_32BIT,
BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
len, 0, NULL, NULL, tag)) != 0) {
device_printf(sc->dev, "Cannot allocate descriptor tag\n");
return (ENOMEM);
}
if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
map)) != 0) {
device_printf(sc->dev, "Cannot allocate descriptor memory\n");
return (ENOMEM);
}
bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
bzero(p, len);
*(void **)desc = p;
if (sw_size) {
len = nelem * sw_size;
s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
*(void **)sdesc = s;
}
if (parent_entry_tag == NULL)
return (0);
if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
NULL, NULL, entry_tag)) != 0) {
device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
return (ENOMEM);
}
return (0);
}
static void
sge_slow_intr_handler(void *arg, int ncount)
{
adapter_t *sc = arg;
t3_slow_intr_handler(sc);
}
/**
* sge_timer_cb - perform periodic maintenance of an SGE qset
* @data: the SGE queue set to maintain
*
* Runs periodically from a timer to perform maintenance of an SGE queue
* set. It performs two tasks:
*
* a) Cleans up any completed Tx descriptors that may still be pending.
* Normal descriptor cleanup happens when new packets are added to a Tx
* queue so this timer is relatively infrequent and does any cleanup only
* if the Tx queue has not seen any new packets in a while. We make a
* best effort attempt to reclaim descriptors, in that we don't wait
* around if we cannot get a queue's lock (which most likely is because
* someone else is queueing new packets and so will also handle the clean
* up). Since control queues use immediate data exclusively we don't
* bother cleaning them up here.
*
* b) Replenishes Rx queues that have run out due to memory shortage.
* Normally new Rx buffers are added when existing ones are consumed but
* when out of memory a queue can become empty. We try to add only a few
* buffers here, the queue will be replenished fully as these new buffers
* are used up if memory shortage has subsided.
*
* c) Return coalesced response queue credits in case a response queue is
* starved.
*
* d) Ring doorbells for T304 tunnel queues since we have seen doorbell
* fifo overflows and the FW doesn't implement any recovery scheme yet.
*/
static void
sge_timer_cb(void *arg)
{
adapter_t *sc = arg;
if ((sc->flags & USING_MSIX) == 0) {
struct port_info *pi;
struct sge_qset *qs;
struct sge_txq *txq;
int i, j;
int reclaim_ofl, refill_rx;
if (sc->open_device_map == 0)
return;
for (i = 0; i < sc->params.nports; i++) {
pi = &sc->port[i];
for (j = 0; j < pi->nqsets; j++) {
qs = &sc->sge.qs[pi->first_qset + j];
txq = &qs->txq[0];
reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
(qs->fl[1].credits < qs->fl[1].size));
if (reclaim_ofl || refill_rx) {
taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
break;
}
}
}
}
if (sc->params.nports > 2) {
int i;
for_each_port(sc, i) {
struct port_info *pi = &sc->port[i];
t3_write_reg(sc, A_SG_KDOORBELL,
F_SELEGRCNTX |
(FW_TUNNEL_SGEEC_START + pi->first_qset));
}
}
if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
sc->open_device_map != 0)
callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
}
/*
* This is meant to be a catch-all function to keep sge state private
* to sge.c
*
*/
int
t3_sge_init_adapter(adapter_t *sc)
{
callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
return (0);
}
int
t3_sge_reset_adapter(adapter_t *sc)
{
callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
return (0);
}
int
t3_sge_init_port(struct port_info *pi)
{
TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
return (0);
}
/**
* refill_rspq - replenish an SGE response queue
* @adapter: the adapter
* @q: the response queue to replenish
* @credits: how many new responses to make available
*
* Replenishes a response queue by making the supplied number of responses
* available to HW.
*/
static __inline void
refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
{
/* mbufs are allocated on demand when a rspq entry is processed. */
t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
}
static void
sge_txq_reclaim_handler(void *arg, int ncount)
{
struct sge_qset *qs = arg;
int i;
for (i = 0; i < 3; i++)
reclaim_completed_tx(qs, 16, i);
}
static void
sge_timer_reclaim(void *arg, int ncount)
{
struct port_info *pi = arg;
int i, nqsets = pi->nqsets;
adapter_t *sc = pi->adapter;
struct sge_qset *qs;
struct mtx *lock;
KASSERT((sc->flags & USING_MSIX) == 0,
("can't call timer reclaim for msi-x"));
for (i = 0; i < nqsets; i++) {
qs = &sc->sge.qs[pi->first_qset + i];
reclaim_completed_tx(qs, 16, TXQ_OFLD);
lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
&sc->sge.qs[0].rspq.lock;
if (mtx_trylock(lock)) {
/* XXX currently assume that we are *NOT* polling */
uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
if (qs->fl[0].credits < qs->fl[0].size - 16)
__refill_fl(sc, &qs->fl[0]);
if (qs->fl[1].credits < qs->fl[1].size - 16)
__refill_fl(sc, &qs->fl[1]);
if (status & (1 << qs->rspq.cntxt_id)) {
if (qs->rspq.credits) {
refill_rspq(sc, &qs->rspq, 1);
qs->rspq.credits--;
t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
1 << qs->rspq.cntxt_id);
}
}
mtx_unlock(lock);
}
}
}
/**
* init_qset_cntxt - initialize an SGE queue set context info
* @qs: the queue set
* @id: the queue set id
*
* Initializes the TIDs and context ids for the queues of a queue set.
*/
static void
init_qset_cntxt(struct sge_qset *qs, u_int id)
{
qs->rspq.cntxt_id = id;
qs->fl[0].cntxt_id = 2 * id;
qs->fl[1].cntxt_id = 2 * id + 1;
qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
mbufq_init(&qs->txq[TXQ_ETH].sendq);
mbufq_init(&qs->txq[TXQ_OFLD].sendq);
mbufq_init(&qs->txq[TXQ_CTRL].sendq);
}
static void
txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
{
txq->in_use += ndesc;
/*
* XXX we don't handle stopping of queue
* presumably start handles this when we bump against the end
*/
txqs->gen = txq->gen;
txq->unacked += ndesc;
txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
txq->unacked &= 31;
txqs->pidx = txq->pidx;
txq->pidx += ndesc;
#ifdef INVARIANTS
if (((txqs->pidx > txq->cidx) &&
(txq->pidx < txqs->pidx) &&
(txq->pidx >= txq->cidx)) ||
((txqs->pidx < txq->cidx) &&
(txq->pidx >= txq-> cidx)) ||
((txqs->pidx < txq->cidx) &&
(txq->cidx < txqs->pidx)))
panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
txqs->pidx, txq->pidx, txq->cidx);
#endif
if (txq->pidx >= txq->size) {
txq->pidx -= txq->size;
txq->gen ^= 1;
}
}
/**
* calc_tx_descs - calculate the number of Tx descriptors for a packet
* @m: the packet mbufs
* @nsegs: the number of segments
*
* Returns the number of Tx descriptors needed for the given Ethernet
* packet. Ethernet packets require addition of WR and CPL headers.
*/
static __inline unsigned int
calc_tx_descs(const struct mbuf *m, int nsegs)
{
unsigned int flits;
if (m->m_pkthdr.len <= PIO_LEN)
return 1;
flits = sgl_len(nsegs) + 2;
#ifdef TSO_SUPPORTED
if (m->m_pkthdr.csum_flags & CSUM_TSO)
flits++;
#endif
return flits_to_desc(flits);
}
static unsigned int
busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
{
struct mbuf *m0;
int err, pktlen, pass = 0;
bus_dma_tag_t tag = txq->entry_tag;
retry:
err = 0;
m0 = *m;
pktlen = m0->m_pkthdr.len;
#if defined(__i386__) || defined(__amd64__)
if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
goto done;
} else
#endif
err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
if (err == 0) {
goto done;
}
if (err == EFBIG && pass == 0) {
pass = 1;
/* Too many segments, try to defrag */
m0 = m_defrag(m0, M_DONTWAIT);
if (m0 == NULL) {
m_freem(*m);
*m = NULL;
return (ENOBUFS);
}
*m = m0;
goto retry;
} else if (err == ENOMEM) {
return (err);
} if (err) {
if (cxgb_debug)
printf("map failure err=%d pktlen=%d\n", err, pktlen);
m_freem(m0);
*m = NULL;
return (err);
}
done:
#if !defined(__i386__) && !defined(__amd64__)
bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
#endif
txsd->flags |= TX_SW_DESC_MAPPED;
return (0);
}
/**
* make_sgl - populate a scatter/gather list for a packet
* @sgp: the SGL to populate
* @segs: the packet dma segments
* @nsegs: the number of segments
*
* Generates a scatter/gather list for the buffers that make up a packet
* and returns the SGL size in 8-byte words. The caller must size the SGL
* appropriately.
*/
static __inline void
make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
{
int i, idx;
for (idx = 0, i = 0; i < nsegs; i++) {
/*
* firmware doesn't like empty segments
*/
if (segs[i].ds_len == 0)
continue;
if (i && idx == 0)
++sgp;
sgp->len[idx] = htobe32(segs[i].ds_len);
sgp->addr[idx] = htobe64(segs[i].ds_addr);
idx ^= 1;
}
if (idx) {
sgp->len[idx] = 0;
sgp->addr[idx] = 0;
}
}
/**
* check_ring_tx_db - check and potentially ring a Tx queue's doorbell
* @adap: the adapter
* @q: the Tx queue
*
* Ring the doorbell if a Tx queue is asleep. There is a natural race,
* where the HW is going to sleep just after we checked, however,
* then the interrupt handler will detect the outstanding TX packet
* and ring the doorbell for us.
*
* When GTS is disabled we unconditionally ring the doorbell.
*/
static __inline void
check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
{
#if USE_GTS
clear_bit(TXQ_LAST_PKT_DB, &q->flags);
if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
set_bit(TXQ_LAST_PKT_DB, &q->flags);
#ifdef T3_TRACE
T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
q->cntxt_id);
#endif
t3_write_reg(adap, A_SG_KDOORBELL,
F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
}
#else
wmb(); /* write descriptors before telling HW */
t3_write_reg(adap, A_SG_KDOORBELL,
F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
#endif
}
static __inline void
wr_gen2(struct tx_desc *d, unsigned int gen)
{
#if SGE_NUM_GENBITS == 2
d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
#endif
}
/**
* write_wr_hdr_sgl - write a WR header and, optionally, SGL
* @ndesc: number of Tx descriptors spanned by the SGL
* @txd: first Tx descriptor to be written
* @txqs: txq state (generation and producer index)
* @txq: the SGE Tx queue
* @sgl: the SGL
* @flits: number of flits to the start of the SGL in the first descriptor
* @sgl_flits: the SGL size in flits
* @wr_hi: top 32 bits of WR header based on WR type (big endian)
* @wr_lo: low 32 bits of WR header based on WR type (big endian)
*
* Write a work request header and an associated SGL. If the SGL is
* small enough to fit into one Tx descriptor it has already been written
* and we just need to write the WR header. Otherwise we distribute the
* SGL across the number of descriptors it spans.
*/
static void
write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
{
struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
if (__predict_true(ndesc == 1)) {
set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
V_WR_SGLSFLT(flits)) | wr_hi,
htonl(V_WR_LEN(flits + sgl_flits) |
V_WR_GEN(txqs->gen)) | wr_lo);
/* XXX gen? */
wr_gen2(txd, txqs->gen);
} else {
unsigned int ogen = txqs->gen;
const uint64_t *fp = (const uint64_t *)sgl;
struct work_request_hdr *wp = wrp;
wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
V_WR_SGLSFLT(flits)) | wr_hi;
while (sgl_flits) {
unsigned int avail = WR_FLITS - flits;
if (avail > sgl_flits)
avail = sgl_flits;
memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
sgl_flits -= avail;
ndesc--;
if (!sgl_flits)
break;
fp += avail;
txd++;
txsd++;
if (++txqs->pidx == txq->size) {
txqs->pidx = 0;
txqs->gen ^= 1;
txd = txq->desc;
txsd = txq->sdesc;
}
/*
* when the head of the mbuf chain
* is freed all clusters will be freed
* with it
*/
wrp = (struct work_request_hdr *)txd;
wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
V_WR_SGLSFLT(1)) | wr_hi;
wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
sgl_flits + 1)) |
V_WR_GEN(txqs->gen)) | wr_lo;
wr_gen2(txd, txqs->gen);
flits = 1;
}
wrp->wrh_hi |= htonl(F_WR_EOP);
wmb();
wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
wr_gen2((struct tx_desc *)wp, ogen);
}
}
/* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
#define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
#ifdef VLAN_SUPPORTED
#define GET_VTAG(cntrl, m) \
do { \
if ((m)->m_flags & M_VLANTAG) \
cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
} while (0)
#else
#define GET_VTAG(cntrl, m)
#endif
static int
t3_encap(struct sge_qset *qs, struct mbuf **m)
{
adapter_t *sc;
struct mbuf *m0;
struct sge_txq *txq;
struct txq_state txqs;
struct port_info *pi;
unsigned int ndesc, flits, cntrl, mlen;
int err, nsegs, tso_info = 0;
struct work_request_hdr *wrp;
struct tx_sw_desc *txsd;
struct sg_ent *sgp, *sgl;
uint32_t wr_hi, wr_lo, sgl_flits;
bus_dma_segment_t segs[TX_MAX_SEGS];
struct tx_desc *txd;
pi = qs->port;
sc = pi->adapter;
txq = &qs->txq[TXQ_ETH];
txd = &txq->desc[txq->pidx];
txsd = &txq->sdesc[txq->pidx];
sgl = txq->txq_sgl;
prefetch(txd);
m0 = *m;
DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan);
mtx_assert(&qs->lock, MA_OWNED);
cntrl = V_TXPKT_INTF(pi->txpkt_intf);
KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
#ifdef VLAN_SUPPORTED
if (m0->m_nextpkt == NULL && m0->m_next != NULL &&
m0->m_pkthdr.csum_flags & (CSUM_TSO))
tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
#endif
if (m0->m_nextpkt != NULL) {
busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
ndesc = 1;
mlen = 0;
} else {
if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
&m0, segs, &nsegs))) {
if (cxgb_debug)
printf("failed ... err=%d\n", err);
return (err);
}
mlen = m0->m_pkthdr.len;
ndesc = calc_tx_descs(m0, nsegs);
}
txq_prod(txq, ndesc, &txqs);
KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
txsd->m = m0;
if (m0->m_nextpkt != NULL) {
struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
int i, fidx;
if (nsegs > 7)
panic("trying to coalesce %d packets in to one WR", nsegs);
txq->txq_coalesced += nsegs;
wrp = (struct work_request_hdr *)txd;
flits = nsegs*2 + 1;
for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
struct cpl_tx_pkt_batch_entry *cbe;
uint64_t flit;
uint32_t *hflit = (uint32_t *)&flit;
int cflags = m0->m_pkthdr.csum_flags;
cntrl = V_TXPKT_INTF(pi->txpkt_intf);
GET_VTAG(cntrl, m0);
cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
if (__predict_false(!(cflags & CSUM_IP)))
cntrl |= F_TXPKT_IPCSUM_DIS;
if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP))))
cntrl |= F_TXPKT_L4CSUM_DIS;
hflit[0] = htonl(cntrl);
hflit[1] = htonl(segs[i].ds_len | 0x80000000);
flit |= htobe64(1 << 24);
cbe = &cpl_batch->pkt_entry[i];
cbe->cntrl = hflit[0];
cbe->len = hflit[1];
cbe->addr = htobe64(segs[i].ds_addr);
}
wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
V_WR_SGLSFLT(flits)) |
htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
wr_lo = htonl(V_WR_LEN(flits) |
V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
set_wr_hdr(wrp, wr_hi, wr_lo);
wmb();
wr_gen2(txd, txqs.gen);
check_ring_tx_db(sc, txq);
return (0);
} else if (tso_info) {
int min_size = TCPPKTHDRSIZE, eth_type, tagged;
struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
struct ip *ip;
struct tcphdr *tcp;
char *pkthdr;
txd->flit[2] = 0;
GET_VTAG(cntrl, m0);
cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
hdr->cntrl = htonl(cntrl);
hdr->len = htonl(mlen | 0x80000000);
DPRINTF("tso buf len=%d\n", mlen);
tagged = m0->m_flags & M_VLANTAG;
if (!tagged)
min_size -= ETHER_VLAN_ENCAP_LEN;
if (__predict_false(mlen < min_size)) {
printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
m0, mlen, m0->m_pkthdr.tso_segsz,
m0->m_pkthdr.csum_flags, m0->m_flags);
panic("tx tso packet too small");
}
/* Make sure that ether, ip, tcp headers are all in m0 */
if (__predict_false(m0->m_len < min_size)) {
m0 = m_pullup(m0, min_size);
if (__predict_false(m0 == NULL)) {
/* XXX panic probably an overreaction */
panic("couldn't fit header into mbuf");
}
}
pkthdr = m0->m_data;
if (tagged) {
eth_type = CPL_ETH_II_VLAN;
ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
ETHER_VLAN_ENCAP_LEN);
} else {
eth_type = CPL_ETH_II;
ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
}
tcp = (struct tcphdr *)((uint8_t *)ip +
sizeof(*ip));
tso_info |= V_LSO_ETH_TYPE(eth_type) |
V_LSO_IPHDR_WORDS(ip->ip_hl) |
V_LSO_TCPHDR_WORDS(tcp->th_off);
hdr->lso_info = htonl(tso_info);
if (__predict_false(mlen <= PIO_LEN)) {
/* pkt not undersized but fits in PIO_LEN
* Indicates a TSO bug at the higher levels.
*
*/
DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags);
txsd->m = NULL;
m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
flits = (mlen + 7) / 8 + 3;
wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
F_WR_SOP | F_WR_EOP | txqs.compl);
wr_lo = htonl(V_WR_LEN(flits) |
V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
wmb();
wr_gen2(txd, txqs.gen);
check_ring_tx_db(sc, txq);
return (0);
}
flits = 3;
} else {
struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
GET_VTAG(cntrl, m0);
cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
cntrl |= F_TXPKT_IPCSUM_DIS;
if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
cntrl |= F_TXPKT_L4CSUM_DIS;
cpl->cntrl = htonl(cntrl);
cpl->len = htonl(mlen | 0x80000000);
if (mlen <= PIO_LEN) {
txsd->m = NULL;
m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
flits = (mlen + 7) / 8 + 2;
wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
F_WR_SOP | F_WR_EOP | txqs.compl);
wr_lo = htonl(V_WR_LEN(flits) |
V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
wmb();
wr_gen2(txd, txqs.gen);
check_ring_tx_db(sc, txq);
return (0);
}
flits = 2;
}
wrp = (struct work_request_hdr *)txd;
sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
make_sgl(sgp, segs, nsegs);
sgl_flits = sgl_len(nsegs);
KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
wr_lo = htonl(V_WR_TID(txq->token));
write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
sgl_flits, wr_hi, wr_lo);
check_ring_tx_db(pi->adapter, txq);
return (0);
}
void
cxgb_tx_watchdog(void *arg)
{
struct sge_qset *qs = arg;
struct sge_txq *txq = &qs->txq[TXQ_ETH];
if (qs->coalescing != 0 &&
(txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
TXQ_RING_EMPTY(qs))
qs->coalescing = 0;
else if (qs->coalescing == 0 &&
(txq->in_use >= cxgb_tx_coalesce_enable_start))
qs->coalescing = 1;
if (TXQ_TRYLOCK(qs)) {
qs->qs_flags |= QS_FLUSHING;
cxgb_start_locked(qs);
qs->qs_flags &= ~QS_FLUSHING;
TXQ_UNLOCK(qs);
}
if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
qs, txq->txq_watchdog.c_cpu);
}
static void
cxgb_tx_timeout(void *arg)
{
struct sge_qset *qs = arg;
struct sge_txq *txq = &qs->txq[TXQ_ETH];
if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
qs->coalescing = 1;
if (TXQ_TRYLOCK(qs)) {
qs->qs_flags |= QS_TIMEOUT;
cxgb_start_locked(qs);
qs->qs_flags &= ~QS_TIMEOUT;
TXQ_UNLOCK(qs);
}
}
static void
cxgb_start_locked(struct sge_qset *qs)
{
struct mbuf *m_head = NULL;
struct sge_txq *txq = &qs->txq[TXQ_ETH];
int avail, txmax;
int in_use_init = txq->in_use;
struct port_info *pi = qs->port;
struct ifnet *ifp = pi->ifp;
avail = txq->size - txq->in_use - 4;
txmax = min(TX_START_MAX_DESC, avail);
if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
reclaim_completed_tx(qs, 0, TXQ_ETH);
if (!pi->link_config.link_ok) {
TXQ_RING_FLUSH(qs);
return;
}
TXQ_LOCK_ASSERT(qs);
while ((txq->in_use - in_use_init < txmax) &&
!TXQ_RING_EMPTY(qs) &&
(ifp->if_drv_flags & IFF_DRV_RUNNING) &&
pi->link_config.link_ok) {
reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
if ((m_head = cxgb_dequeue(qs)) == NULL)
break;
/*
* Encapsulation can modify our pointer, and or make it
* NULL on failure. In that event, we can't requeue.
*/
if (t3_encap(qs, &m_head) || m_head == NULL)
break;
/* Send a copy of the frame to the BPF listener */
ETHER_BPF_MTAP(ifp, m_head);
/*
* We sent via PIO, no longer need a copy
*/
if (m_head->m_nextpkt == NULL &&
m_head->m_pkthdr.len <= PIO_LEN)
m_freem(m_head);
m_head = NULL;
}
if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
pi->link_config.link_ok)
callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
qs, txq->txq_timer.c_cpu);
if (m_head != NULL)
m_freem(m_head);
}
static int
cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
{
struct port_info *pi = qs->port;
struct sge_txq *txq = &qs->txq[TXQ_ETH];
struct buf_ring *br = txq->txq_mr;
int error, avail;
avail = txq->size - txq->in_use;
TXQ_LOCK_ASSERT(qs);
/*
* We can only do a direct transmit if the following are true:
* - we aren't coalescing (ring < 3/4 full)
* - the link is up -- checked in caller
* - there are no packets enqueued already
* - there is space in hardware transmit queue
*/
if (check_pkt_coalesce(qs) == 0 &&
- TXQ_RING_EMPTY(qs) && avail > 4) {
+ !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > 4) {
if (t3_encap(qs, &m)) {
if (m != NULL &&
(error = drbr_enqueue(ifp, br, m)) != 0)
return (error);
} else {
/*
* We've bypassed the buf ring so we need to update
* the stats directly
*/
txq->txq_direct_packets++;
txq->txq_direct_bytes += m->m_pkthdr.len;
/*
** Send a copy of the frame to the BPF
** listener and set the watchdog on.
*/
ETHER_BPF_MTAP(ifp, m);
/*
* We sent via PIO, no longer need a copy
*/
if (m->m_pkthdr.len <= PIO_LEN)
m_freem(m);
}
} else if ((error = drbr_enqueue(ifp, br, m)) != 0)
return (error);
reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
(!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
cxgb_start_locked(qs);
else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
qs, txq->txq_timer.c_cpu);
return (0);
}
int
cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct sge_qset *qs;
struct port_info *pi = ifp->if_softc;
int error, qidx = pi->first_qset;
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
||(!pi->link_config.link_ok)) {
m_freem(m);
return (0);
}
if (m->m_flags & M_FLOWID)
qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
qs = &pi->adapter->sge.qs[qidx];
if (TXQ_TRYLOCK(qs)) {
/* XXX running */
error = cxgb_transmit_locked(ifp, qs, m);
TXQ_UNLOCK(qs);
} else
error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
return (error);
}
void
cxgb_start(struct ifnet *ifp)
{
struct port_info *pi = ifp->if_softc;
struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset];
if (!pi->link_config.link_ok)
return;
TXQ_LOCK(qs);
cxgb_start_locked(qs);
TXQ_UNLOCK(qs);
}
void
cxgb_qflush(struct ifnet *ifp)
{
/*
* flush any enqueued mbufs in the buf_rings
* and in the transmit queues
* no-op for now
*/
return;
}
/**
* write_imm - write a packet into a Tx descriptor as immediate data
* @d: the Tx descriptor to write
* @m: the packet
* @len: the length of packet data to write as immediate data
* @gen: the generation bit value to write
*
* Writes a packet as immediate data into a Tx descriptor. The packet
* contains a work request at its beginning. We must write the packet
* carefully so the SGE doesn't read accidentally before it's written in
* its entirety.
*/
static __inline void
write_imm(struct tx_desc *d, struct mbuf *m,
unsigned int len, unsigned int gen)
{
struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
struct work_request_hdr *to = (struct work_request_hdr *)d;
uint32_t wr_hi, wr_lo;
if (len > WR_LEN)
panic("len too big %d\n", len);
if (len < sizeof(*from))
panic("len too small %d", len);
memcpy(&to[1], &from[1], len - sizeof(*from));
wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
V_WR_BCNTLFLT(len & 7));
wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
V_WR_LEN((len + 7) / 8));
set_wr_hdr(to, wr_hi, wr_lo);
wmb();
wr_gen2(d, gen);
/*
* This check is a hack we should really fix the logic so
* that this can't happen
*/
if (m->m_type != MT_DONTFREE)
m_freem(m);
}
/**
* check_desc_avail - check descriptor availability on a send queue
* @adap: the adapter
* @q: the TX queue
* @m: the packet needing the descriptors
* @ndesc: the number of Tx descriptors needed
* @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
*
* Checks if the requested number of Tx descriptors is available on an
* SGE send queue. If the queue is already suspended or not enough
* descriptors are available the packet is queued for later transmission.
* Must be called with the Tx queue locked.
*
* Returns 0 if enough descriptors are available, 1 if there aren't
* enough descriptors and the packet has been queued, and 2 if the caller
* needs to retry because there weren't enough descriptors at the
* beginning of the call but some freed up in the mean time.
*/
static __inline int
check_desc_avail(adapter_t *adap, struct sge_txq *q,
struct mbuf *m, unsigned int ndesc,
unsigned int qid)
{
/*
* XXX We currently only use this for checking the control queue
* the control queue is only used for binding qsets which happens
* at init time so we are guaranteed enough descriptors
*/
if (__predict_false(!mbufq_empty(&q->sendq))) {
addq_exit: mbufq_tail(&q->sendq, m);
return 1;
}
if (__predict_false(q->size - q->in_use < ndesc)) {
struct sge_qset *qs = txq_to_qset(q, qid);
setbit(&qs->txq_stopped, qid);
if (should_restart_tx(q) &&
test_and_clear_bit(qid, &qs->txq_stopped))
return 2;
q->stops++;
goto addq_exit;
}
return 0;
}
/**
* reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
* @q: the SGE control Tx queue
*
* This is a variant of reclaim_completed_tx() that is used for Tx queues
* that send only immediate data (presently just the control queues) and
* thus do not have any mbufs
*/
static __inline void
reclaim_completed_tx_imm(struct sge_txq *q)
{
unsigned int reclaim = q->processed - q->cleaned;
q->in_use -= reclaim;
q->cleaned += reclaim;
}
static __inline int
immediate(const struct mbuf *m)
{
return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
}
/**
* ctrl_xmit - send a packet through an SGE control Tx queue
* @adap: the adapter
* @q: the control queue
* @m: the packet
*
* Send a packet through an SGE control Tx queue. Packets sent through
* a control queue must fit entirely as immediate data in a single Tx
* descriptor and have no page fragments.
*/
static int
ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
{
int ret;
struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
struct sge_txq *q = &qs->txq[TXQ_CTRL];
if (__predict_false(!immediate(m))) {
m_freem(m);
return 0;
}
wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
wrp->wrh_lo = htonl(V_WR_TID(q->token));
TXQ_LOCK(qs);
again: reclaim_completed_tx_imm(q);
ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
if (__predict_false(ret)) {
if (ret == 1) {
TXQ_UNLOCK(qs);
return (ENOSPC);
}
goto again;
}
write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
q->in_use++;
if (++q->pidx >= q->size) {
q->pidx = 0;
q->gen ^= 1;
}
TXQ_UNLOCK(qs);
wmb();
t3_write_reg(adap, A_SG_KDOORBELL,
F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
return (0);
}
/**
* restart_ctrlq - restart a suspended control queue
* @qs: the queue set cotaining the control queue
*
* Resumes transmission on a suspended Tx control queue.
*/
static void
restart_ctrlq(void *data, int npending)
{
struct mbuf *m;
struct sge_qset *qs = (struct sge_qset *)data;
struct sge_txq *q = &qs->txq[TXQ_CTRL];
adapter_t *adap = qs->port->adapter;
TXQ_LOCK(qs);
again: reclaim_completed_tx_imm(q);
while (q->in_use < q->size &&
(m = mbufq_dequeue(&q->sendq)) != NULL) {
write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
if (++q->pidx >= q->size) {
q->pidx = 0;
q->gen ^= 1;
}
q->in_use++;
}
if (!mbufq_empty(&q->sendq)) {
setbit(&qs->txq_stopped, TXQ_CTRL);
if (should_restart_tx(q) &&
test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
goto again;
q->stops++;
}
TXQ_UNLOCK(qs);
t3_write_reg(adap, A_SG_KDOORBELL,
F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
}
/*
* Send a management message through control queue 0
*/
int
t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
{
return ctrl_xmit(adap, &adap->sge.qs[0], m);
}
/**
* free_qset - free the resources of an SGE queue set
* @sc: the controller owning the queue set
* @q: the queue set
*
* Release the HW and SW resources associated with an SGE queue set, such
* as HW contexts, packet buffers, and descriptor rings. Traffic to the
* queue set must be quiesced prior to calling this.
*/
static void
t3_free_qset(adapter_t *sc, struct sge_qset *q)
{
int i;
reclaim_completed_tx(q, 0, TXQ_ETH);
for (i = 0; i < SGE_TXQ_PER_SET; i++) {
if (q->txq[i].txq_mr != NULL)
buf_ring_free(q->txq[i].txq_mr, M_DEVBUF);
if (q->txq[i].txq_ifq != NULL) {
ifq_delete(q->txq[i].txq_ifq);
free(q->txq[i].txq_ifq, M_DEVBUF);
}
}
for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
if (q->fl[i].desc) {
mtx_lock_spin(&sc->sge.reg_lock);
t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
mtx_unlock_spin(&sc->sge.reg_lock);
bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
q->fl[i].desc_map);
bus_dma_tag_destroy(q->fl[i].desc_tag);
bus_dma_tag_destroy(q->fl[i].entry_tag);
}
if (q->fl[i].sdesc) {
free_rx_bufs(sc, &q->fl[i]);
free(q->fl[i].sdesc, M_DEVBUF);
}
}
mtx_unlock(&q->lock);
MTX_DESTROY(&q->lock);
for (i = 0; i < SGE_TXQ_PER_SET; i++) {
if (q->txq[i].desc) {
mtx_lock_spin(&sc->sge.reg_lock);
t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
mtx_unlock_spin(&sc->sge.reg_lock);
bus_dmamap_unload(q->txq[i].desc_tag,
q->txq[i].desc_map);
bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
q->txq[i].desc_map);
bus_dma_tag_destroy(q->txq[i].desc_tag);
bus_dma_tag_destroy(q->txq[i].entry_tag);
}
if (q->txq[i].sdesc) {
free(q->txq[i].sdesc, M_DEVBUF);
}
}
if (q->rspq.desc) {
mtx_lock_spin(&sc->sge.reg_lock);
t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
mtx_unlock_spin(&sc->sge.reg_lock);
bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
q->rspq.desc_map);
bus_dma_tag_destroy(q->rspq.desc_tag);
MTX_DESTROY(&q->rspq.lock);
}
#ifdef LRO_SUPPORTED
tcp_lro_free(&q->lro.ctrl);
#endif
bzero(q, sizeof(*q));
}
/**
* t3_free_sge_resources - free SGE resources
* @sc: the adapter softc
*
* Frees resources used by the SGE queue sets.
*/
void
t3_free_sge_resources(adapter_t *sc)
{
int i, nqsets;
for (nqsets = i = 0; i < (sc)->params.nports; i++)
nqsets += sc->port[i].nqsets;
for (i = 0; i < nqsets; ++i) {
TXQ_LOCK(&sc->sge.qs[i]);
t3_free_qset(sc, &sc->sge.qs[i]);
}
}
/**
* t3_sge_start - enable SGE
* @sc: the controller softc
*
* Enables the SGE for DMAs. This is the last step in starting packet
* transfers.
*/
void
t3_sge_start(adapter_t *sc)
{
t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
}
/**
* t3_sge_stop - disable SGE operation
* @sc: the adapter
*
* Disables the DMA engine. This can be called in emeregencies (e.g.,
* from error interrupts) or from normal process context. In the latter
* case it also disables any pending queue restart tasklets. Note that
* if it is called in interrupt context it cannot disable the restart
* tasklets as it cannot wait, however the tasklets will have no effect
* since the doorbells are disabled and the driver will call this again
* later from process context, at which time the tasklets will be stopped
* if they are still running.
*/
void
t3_sge_stop(adapter_t *sc)
{
int i, nqsets;
t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
if (sc->tq == NULL)
return;
for (nqsets = i = 0; i < (sc)->params.nports; i++)
nqsets += sc->port[i].nqsets;
#ifdef notyet
/*
*
* XXX
*/
for (i = 0; i < nqsets; ++i) {
struct sge_qset *qs = &sc->sge.qs[i];
taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
}
#endif
}
/**
* t3_free_tx_desc - reclaims Tx descriptors and their buffers
* @adapter: the adapter
* @q: the Tx queue to reclaim descriptors from
* @reclaimable: the number of descriptors to reclaim
* @m_vec_size: maximum number of buffers to reclaim
* @desc_reclaimed: returns the number of descriptors reclaimed
*
* Reclaims Tx descriptors from an SGE Tx queue and frees the associated
* Tx buffers. Called with the Tx queue lock held.
*
* Returns number of buffers of reclaimed
*/
void
t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
{
struct tx_sw_desc *txsd;
unsigned int cidx, mask;
struct sge_txq *q = &qs->txq[queue];
#ifdef T3_TRACE
T3_TRACE2(sc->tb[q->cntxt_id & 7],
"reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
#endif
cidx = q->cidx;
mask = q->size - 1;
txsd = &q->sdesc[cidx];
mtx_assert(&qs->lock, MA_OWNED);
while (reclaimable--) {
prefetch(q->sdesc[(cidx + 1) & mask].m);
prefetch(q->sdesc[(cidx + 2) & mask].m);
if (txsd->m != NULL) {
if (txsd->flags & TX_SW_DESC_MAPPED) {
bus_dmamap_unload(q->entry_tag, txsd->map);
txsd->flags &= ~TX_SW_DESC_MAPPED;
}
m_freem_list(txsd->m);
txsd->m = NULL;
} else
q->txq_skipped++;
++txsd;
if (++cidx == q->size) {
cidx = 0;
txsd = q->sdesc;
}
}
q->cidx = cidx;
}
/**
* is_new_response - check if a response is newly written
* @r: the response descriptor
* @q: the response queue
*
* Returns true if a response descriptor contains a yet unprocessed
* response.
*/
static __inline int
is_new_response(const struct rsp_desc *r,
const struct sge_rspq *q)
{
return (r->intr_gen & F_RSPD_GEN2) == q->gen;
}
#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
#define NOMEM_INTR_DELAY 2500
/**
* write_ofld_wr - write an offload work request
* @adap: the adapter
* @m: the packet to send
* @q: the Tx queue
* @pidx: index of the first Tx descriptor to write
* @gen: the generation value to use
* @ndesc: number of descriptors the packet will occupy
*
* Write an offload work request to send the supplied packet. The packet
* data already carry the work request with most fields populated.
*/
static void
write_ofld_wr(adapter_t *adap, struct mbuf *m,
struct sge_txq *q, unsigned int pidx,
unsigned int gen, unsigned int ndesc,
bus_dma_segment_t *segs, unsigned int nsegs)
{
unsigned int sgl_flits, flits;
struct work_request_hdr *from;
struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
struct tx_desc *d = &q->desc[pidx];
struct txq_state txqs;
if (immediate(m) && nsegs == 0) {
write_imm(d, m, m->m_len, gen);
return;
}
/* Only TX_DATA builds SGLs */
from = mtod(m, struct work_request_hdr *);
memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
flits = m->m_len / 8;
sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
make_sgl(sgp, segs, nsegs);
sgl_flits = sgl_len(nsegs);
txqs.gen = gen;
txqs.pidx = pidx;
txqs.compl = 0;
write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
from->wrh_hi, from->wrh_lo);
}
/**
* calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
* @m: the packet
*
* Returns the number of Tx descriptors needed for the given offload
* packet. These packets are already fully constructed.
*/
static __inline unsigned int
calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
{
unsigned int flits, cnt = 0;
int ndescs;
if (m->m_len <= WR_LEN && nsegs == 0)
return (1); /* packet fits as immediate data */
/*
* This needs to be re-visited for TOE
*/
cnt = nsegs;
/* headers */
flits = m->m_len / 8;
ndescs = flits_to_desc(flits + sgl_len(cnt));
return (ndescs);
}
/**
* ofld_xmit - send a packet through an offload queue
* @adap: the adapter
* @q: the Tx offload queue
* @m: the packet
*
* Send an offload packet through an SGE offload queue.
*/
static int
ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
{
int ret, nsegs;
unsigned int ndesc;
unsigned int pidx, gen;
struct sge_txq *q = &qs->txq[TXQ_OFLD];
bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
struct tx_sw_desc *stx;
nsegs = m_get_sgllen(m);
vsegs = m_get_sgl(m);
ndesc = calc_tx_descs_ofld(m, nsegs);
busdma_map_sgl(vsegs, segs, nsegs);
stx = &q->sdesc[q->pidx];
TXQ_LOCK(qs);
again: reclaim_completed_tx(qs, 16, TXQ_OFLD);
ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
if (__predict_false(ret)) {
if (ret == 1) {
printf("no ofld desc avail\n");
m_set_priority(m, ndesc); /* save for restart */
TXQ_UNLOCK(qs);
return (EINTR);
}
goto again;
}
gen = q->gen;
q->in_use += ndesc;
pidx = q->pidx;
q->pidx += ndesc;
if (q->pidx >= q->size) {
q->pidx -= q->size;
q->gen ^= 1;
}
#ifdef T3_TRACE
T3_TRACE5(adap->tb[q->cntxt_id & 7],
"ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
ndesc, pidx, skb->len, skb->len - skb->data_len,
skb_shinfo(skb)->nr_frags);
#endif
TXQ_UNLOCK(qs);
write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
check_ring_tx_db(adap, q);
return (0);
}
/**
* restart_offloadq - restart a suspended offload queue
* @qs: the queue set cotaining the offload queue
*
* Resumes transmission on a suspended Tx offload queue.
*/
static void
restart_offloadq(void *data, int npending)
{
struct mbuf *m;
struct sge_qset *qs = data;
struct sge_txq *q = &qs->txq[TXQ_OFLD];
adapter_t *adap = qs->port->adapter;
bus_dma_segment_t segs[TX_MAX_SEGS];
struct tx_sw_desc *stx = &q->sdesc[q->pidx];
int nsegs, cleaned;
TXQ_LOCK(qs);
again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
while ((m = mbufq_peek(&q->sendq)) != NULL) {
unsigned int gen, pidx;
unsigned int ndesc = m_get_priority(m);
if (__predict_false(q->size - q->in_use < ndesc)) {
setbit(&qs->txq_stopped, TXQ_OFLD);
if (should_restart_tx(q) &&
test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
goto again;
q->stops++;
break;
}
gen = q->gen;
q->in_use += ndesc;
pidx = q->pidx;
q->pidx += ndesc;
if (q->pidx >= q->size) {
q->pidx -= q->size;
q->gen ^= 1;
}
(void)mbufq_dequeue(&q->sendq);
busdma_map_mbufs(&m, q, stx, segs, &nsegs);
TXQ_UNLOCK(qs);
write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
TXQ_LOCK(qs);
}
#if USE_GTS
set_bit(TXQ_RUNNING, &q->flags);
set_bit(TXQ_LAST_PKT_DB, &q->flags);
#endif
TXQ_UNLOCK(qs);
wmb();
t3_write_reg(adap, A_SG_KDOORBELL,
F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
}
/**
* queue_set - return the queue set a packet should use
* @m: the packet
*
* Maps a packet to the SGE queue set it should use. The desired queue
* set is carried in bits 1-3 in the packet's priority.
*/
static __inline int
queue_set(const struct mbuf *m)
{
return m_get_priority(m) >> 1;
}
/**
* is_ctrl_pkt - return whether an offload packet is a control packet
* @m: the packet
*
* Determines whether an offload packet should use an OFLD or a CTRL
* Tx queue. This is indicated by bit 0 in the packet's priority.
*/
static __inline int
is_ctrl_pkt(const struct mbuf *m)
{
return m_get_priority(m) & 1;
}
/**
* t3_offload_tx - send an offload packet
* @tdev: the offload device to send to
* @m: the packet
*
* Sends an offload packet. We use the packet priority to select the
* appropriate Tx queue as follows: bit 0 indicates whether the packet
* should be sent as regular or control, bits 1-3 select the queue set.
*/
int
t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
{
adapter_t *adap = tdev2adap(tdev);
struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
if (__predict_false(is_ctrl_pkt(m)))
return ctrl_xmit(adap, qs, m);
return ofld_xmit(adap, qs, m);
}
/**
* deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
* @tdev: the offload device that will be receiving the packets
* @q: the SGE response queue that assembled the bundle
* @m: the partial bundle
* @n: the number of packets in the bundle
*
* Delivers a (partial) bundle of Rx offload packets to an offload device.
*/
static __inline void
deliver_partial_bundle(struct t3cdev *tdev,
struct sge_rspq *q,
struct mbuf *mbufs[], int n)
{
if (n) {
q->offload_bundles++;
cxgb_ofld_recv(tdev, mbufs, n);
}
}
static __inline int
rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
struct mbuf *m, struct mbuf *rx_gather[],
unsigned int gather_idx)
{
rq->offload_pkts++;
m->m_pkthdr.header = mtod(m, void *);
rx_gather[gather_idx++] = m;
if (gather_idx == RX_BUNDLE_SIZE) {
cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
gather_idx = 0;
rq->offload_bundles++;
}
return (gather_idx);
}
static void
restart_tx(struct sge_qset *qs)
{
struct adapter *sc = qs->port->adapter;
if (isset(&qs->txq_stopped, TXQ_OFLD) &&
should_restart_tx(&qs->txq[TXQ_OFLD]) &&
test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
qs->txq[TXQ_OFLD].restarts++;
DPRINTF("restarting TXQ_OFLD\n");
taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
}
DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
qs->txq[TXQ_CTRL].in_use);
if (isset(&qs->txq_stopped, TXQ_CTRL) &&
should_restart_tx(&qs->txq[TXQ_CTRL]) &&
test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
qs->txq[TXQ_CTRL].restarts++;
DPRINTF("restarting TXQ_CTRL\n");
taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
}
}
/**
* t3_sge_alloc_qset - initialize an SGE queue set
* @sc: the controller softc
* @id: the queue set id
* @nports: how many Ethernet ports will be using this queue set
* @irq_vec_idx: the IRQ vector index for response queue interrupts
* @p: configuration parameters for this queue set
* @ntxq: number of Tx queues for the queue set
* @pi: port info for queue set
*
* Allocate resources and initialize an SGE queue set. A queue set
* comprises a response queue, two Rx free-buffer queues, and up to 3
* Tx queues. The Tx queues are assigned roles in the order Ethernet
* queue, offload queue, and control queue.
*/
int
t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
const struct qset_params *p, int ntxq, struct port_info *pi)
{
struct sge_qset *q = &sc->sge.qs[id];
int i, ret = 0;
MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
q->port = pi;
for (i = 0; i < SGE_TXQ_PER_SET; i++) {
if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
device_printf(sc->dev, "failed to allocate mbuf ring\n");
goto err;
}
if ((q->txq[i].txq_ifq =
malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT|M_ZERO))
== NULL) {
device_printf(sc->dev, "failed to allocate ifq\n");
goto err;
}
ifq_init(q->txq[i].txq_ifq, pi->ifp);
callout_init(&q->txq[i].txq_timer, 1);
callout_init(&q->txq[i].txq_watchdog, 1);
q->txq[i].txq_timer.c_cpu = id % mp_ncpus;
q->txq[i].txq_watchdog.c_cpu = id % mp_ncpus;
}
init_qset_cntxt(q, id);
q->idx = id;
if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
&q->fl[0].desc, &q->fl[0].sdesc,
&q->fl[0].desc_tag, &q->fl[0].desc_map,
sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
printf("error %d from alloc ring fl0\n", ret);
goto err;
}
if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
&q->fl[1].desc, &q->fl[1].sdesc,
&q->fl[1].desc_tag, &q->fl[1].desc_map,
sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
printf("error %d from alloc ring fl1\n", ret);
goto err;
}
if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
&q->rspq.phys_addr, &q->rspq.desc, NULL,
&q->rspq.desc_tag, &q->rspq.desc_map,
NULL, NULL)) != 0) {
printf("error %d from alloc ring rspq\n", ret);
goto err;
}
for (i = 0; i < ntxq; ++i) {
size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
if ((ret = alloc_ring(sc, p->txq_size[i],
sizeof(struct tx_desc), sz,
&q->txq[i].phys_addr, &q->txq[i].desc,
&q->txq[i].sdesc, &q->txq[i].desc_tag,
&q->txq[i].desc_map,
sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
printf("error %d from alloc ring tx %i\n", ret, i);
goto err;
}
mbufq_init(&q->txq[i].sendq);
q->txq[i].gen = 1;
q->txq[i].size = p->txq_size[i];
}
TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
q->fl[0].gen = q->fl[1].gen = 1;
q->fl[0].size = p->fl_size;
q->fl[1].size = p->jumbo_size;
q->rspq.gen = 1;
q->rspq.cidx = 0;
q->rspq.size = p->rspq_size;
q->txq[TXQ_ETH].stop_thres = nports *
flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
q->fl[0].buf_size = MCLBYTES;
q->fl[0].zone = zone_pack;
q->fl[0].type = EXT_PACKET;
#if __FreeBSD_version > 800000
if (cxgb_use_16k_clusters) {
q->fl[1].buf_size = MJUM16BYTES;
q->fl[1].zone = zone_jumbo16;
q->fl[1].type = EXT_JUMBO16;
} else {
q->fl[1].buf_size = MJUM9BYTES;
q->fl[1].zone = zone_jumbo9;
q->fl[1].type = EXT_JUMBO9;
}
#else
q->fl[1].buf_size = MJUMPAGESIZE;
q->fl[1].zone = zone_jumbop;
q->fl[1].type = EXT_JUMBOP;
#endif
#ifdef LRO_SUPPORTED
/* Allocate and setup the lro_ctrl structure */
q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
ret = tcp_lro_init(&q->lro.ctrl);
if (ret) {
printf("error %d from tcp_lro_init\n", ret);
goto err;
}
q->lro.ctrl.ifp = pi->ifp;
#endif
mtx_lock_spin(&sc->sge.reg_lock);
ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
q->rspq.phys_addr, q->rspq.size,
q->fl[0].buf_size, 1, 0);
if (ret) {
printf("error %d from t3_sge_init_rspcntxt\n", ret);
goto err_unlock;
}
for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
q->fl[i].phys_addr, q->fl[i].size,
q->fl[i].buf_size, p->cong_thres, 1,
0);
if (ret) {
printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
goto err_unlock;
}
}
ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
1, 0);
if (ret) {
printf("error %d from t3_sge_init_ecntxt\n", ret);
goto err_unlock;
}
if (ntxq > 1) {
ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
USE_GTS, SGE_CNTXT_OFLD, id,
q->txq[TXQ_OFLD].phys_addr,
q->txq[TXQ_OFLD].size, 0, 1, 0);
if (ret) {
printf("error %d from t3_sge_init_ecntxt\n", ret);
goto err_unlock;
}
}
if (ntxq > 2) {
ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
SGE_CNTXT_CTRL, id,
q->txq[TXQ_CTRL].phys_addr,
q->txq[TXQ_CTRL].size,
q->txq[TXQ_CTRL].token, 1, 0);
if (ret) {
printf("error %d from t3_sge_init_ecntxt\n", ret);
goto err_unlock;
}
}
snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
device_get_unit(sc->dev), irq_vec_idx);
MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
mtx_unlock_spin(&sc->sge.reg_lock);
t3_update_qset_coalesce(q, p);
q->port = pi;
refill_fl(sc, &q->fl[0], q->fl[0].size);
refill_fl(sc, &q->fl[1], q->fl[1].size);
refill_rspq(sc, &q->rspq, q->rspq.size - 1);
t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
V_NEWTIMER(q->rspq.holdoff_tmr));
return (0);
err_unlock:
mtx_unlock_spin(&sc->sge.reg_lock);
err:
TXQ_LOCK(q);
t3_free_qset(sc, q);
return (ret);
}
/*
* Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
* ethernet data. Hardware assistance with various checksums and any vlan tag
* will also be taken into account here.
*/
void
t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
{
struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
struct ifnet *ifp = pi->ifp;
DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
cpl->csum_valid && cpl->csum == 0xffff) {
m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
m->m_pkthdr.csum_data = 0xffff;
}
/*
* XXX need to add VLAN support for 6.x
*/
#ifdef VLAN_SUPPORTED
if (__predict_false(cpl->vlan_valid)) {
m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
m->m_flags |= M_VLANTAG;
}
#endif
m->m_pkthdr.rcvif = ifp;
m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
/*
* adjust after conversion to mbuf chain
*/
m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
m->m_len -= (sizeof(*cpl) + ethpad);
m->m_data += (sizeof(*cpl) + ethpad);
}
/**
* get_packet - return the next ingress packet buffer from a free list
* @adap: the adapter that received the packet
* @drop_thres: # of remaining buffers before we start dropping packets
* @qs: the qset that the SGE free list holding the packet belongs to
* @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
* @r: response descriptor
*
* Get the next packet from a free list and complete setup of the
* sk_buff. If the packet is small we make a copy and recycle the
* original buffer, otherwise we use the original buffer itself. If a
* positive drop threshold is supplied packets are dropped and their
* buffers recycled if (a) the number of remaining buffers is under the
* threshold and the packet is too big to copy, or (b) the packet should
* be copied but there is no memory for the copy.
*/
static int
get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
struct t3_mbuf_hdr *mh, struct rsp_desc *r)
{
unsigned int len_cq = ntohl(r->len_cq);
struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
int mask, cidx = fl->cidx;
struct rx_sw_desc *sd = &fl->sdesc[cidx];
uint32_t len = G_RSPD_LEN(len_cq);
uint32_t flags = M_EXT;
uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
caddr_t cl;
struct mbuf *m;
int ret = 0;
mask = fl->size - 1;
prefetch(fl->sdesc[(cidx + 1) & mask].m);
prefetch(fl->sdesc[(cidx + 2) & mask].m);
prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);
fl->credits--;
bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
if (recycle_enable && len <= SGE_RX_COPY_THRES &&
sopeop == RSPQ_SOP_EOP) {
if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
goto skip_recycle;
cl = mtod(m, void *);
memcpy(cl, sd->rxsd_cl, len);
recycle_rx_buf(adap, fl, fl->cidx);
m->m_pkthdr.len = m->m_len = len;
m->m_flags = 0;
mh->mh_head = mh->mh_tail = m;
ret = 1;
goto done;
} else {
skip_recycle:
bus_dmamap_unload(fl->entry_tag, sd->map);
cl = sd->rxsd_cl;
m = sd->m;
if ((sopeop == RSPQ_SOP_EOP) ||
(sopeop == RSPQ_SOP))
flags |= M_PKTHDR;
m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
if (fl->zone == zone_pack) {
/*
* restore clobbered data pointer
*/
m->m_data = m->m_ext.ext_buf;
} else {
m_cljset(m, cl, fl->type);
}
m->m_len = len;
}
switch(sopeop) {
case RSPQ_SOP_EOP:
ret = 1;
/* FALLTHROUGH */
case RSPQ_SOP:
mh->mh_head = mh->mh_tail = m;
m->m_pkthdr.len = len;
break;
case RSPQ_EOP:
ret = 1;
/* FALLTHROUGH */
case RSPQ_NSOP_NEOP:
if (mh->mh_tail == NULL) {
log(LOG_ERR, "discarding intermediate descriptor entry\n");
m_freem(m);
break;
}
mh->mh_tail->m_next = m;
mh->mh_tail = m;
mh->mh_head->m_pkthdr.len += len;
break;
}
if (cxgb_debug)
printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
done:
if (++fl->cidx == fl->size)
fl->cidx = 0;
return (ret);
}
/**
* handle_rsp_cntrl_info - handles control information in a response
* @qs: the queue set corresponding to the response
* @flags: the response control flags
*
* Handles the control information of an SGE response, such as GTS
* indications and completion credits for the queue set's Tx queues.
* HW coalesces credits, we don't do any extra SW coalescing.
*/
static __inline void
handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
{
unsigned int credits;
#if USE_GTS
if (flags & F_RSPD_TXQ0_GTS)
clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
#endif
credits = G_RSPD_TXQ0_CR(flags);
if (credits)
qs->txq[TXQ_ETH].processed += credits;
credits = G_RSPD_TXQ2_CR(flags);
if (credits)
qs->txq[TXQ_CTRL].processed += credits;
# if USE_GTS
if (flags & F_RSPD_TXQ1_GTS)
clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
# endif
credits = G_RSPD_TXQ1_CR(flags);
if (credits)
qs->txq[TXQ_OFLD].processed += credits;
}
static void
check_ring_db(adapter_t *adap, struct sge_qset *qs,
unsigned int sleeping)
{
;
}
/**
* process_responses - process responses from an SGE response queue
* @adap: the adapter
* @qs: the queue set to which the response queue belongs
* @budget: how many responses can be processed in this round
*
* Process responses from an SGE response queue up to the supplied budget.
* Responses include received packets as well as credits and other events
* for the queues that belong to the response queue's queue set.
* A negative budget is effectively unlimited.
*
* Additionally choose the interrupt holdoff time for the next interrupt
* on this queue. If the system is under memory shortage use a fairly
* long delay to help recovery.
*/
static int
process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
{
struct sge_rspq *rspq = &qs->rspq;
struct rsp_desc *r = &rspq->desc[rspq->cidx];
int budget_left = budget;
unsigned int sleeping = 0;
#ifdef LRO_SUPPORTED
int lro_enabled = qs->lro.enabled;
int skip_lro;
struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
#endif
struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
int ngathered = 0;
#ifdef DEBUG
static int last_holdoff = 0;
if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
printf("next_holdoff=%d\n", rspq->holdoff_tmr);
last_holdoff = rspq->holdoff_tmr;
}
#endif
rspq->next_holdoff = rspq->holdoff_tmr;
while (__predict_true(budget_left && is_new_response(r, rspq))) {
int eth, eop = 0, ethpad = 0;
uint32_t flags = ntohl(r->flags);
uint32_t rss_csum = *(const uint32_t *)r;
uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
eth = (r->rss_hdr.opcode == CPL_RX_PKT);
if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
struct mbuf *m;
if (cxgb_debug)
printf("async notification\n");
if (rspq->rspq_mh.mh_head == NULL) {
rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
m = rspq->rspq_mh.mh_head;
} else {
m = m_gethdr(M_DONTWAIT, MT_DATA);
}
if (m == NULL)
goto no_mem;
memcpy(mtod(m, char *), r, AN_PKT_SIZE);
m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
*mtod(m, char *) = CPL_ASYNC_NOTIF;
rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
eop = 1;
rspq->async_notif++;
goto skip;
} else if (flags & F_RSPD_IMM_DATA_VALID) {
struct mbuf *m = NULL;
DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
r->rss_hdr.opcode, rspq->cidx);
if (rspq->rspq_mh.mh_head == NULL)
rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
else
m = m_gethdr(M_DONTWAIT, MT_DATA);
if (rspq->rspq_mh.mh_head == NULL && m == NULL) {
no_mem:
rspq->next_holdoff = NOMEM_INTR_DELAY;
budget_left--;
break;
}
get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
eop = 1;
rspq->imm_data++;
} else if (r->len_cq) {
int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
if (eop) {
rspq->rspq_mh.mh_head->m_flags |= M_FLOWID;
rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash;
}
ethpad = 2;
} else {
rspq->pure_rsps++;
}
skip:
if (flags & RSPD_CTRL_MASK) {
sleeping |= flags & RSPD_GTS_MASK;
handle_rsp_cntrl_info(qs, flags);
}
r++;
if (__predict_false(++rspq->cidx == rspq->size)) {
rspq->cidx = 0;
rspq->gen ^= 1;
r = rspq->desc;
}
if (++rspq->credits >= (rspq->size / 4)) {
refill_rspq(adap, rspq, rspq->credits);
rspq->credits = 0;
}
if (!eth && eop) {
rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
/*
* XXX size mismatch
*/
m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
ngathered = rx_offload(&adap->tdev, rspq,
rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
rspq->rspq_mh.mh_head = NULL;
DPRINTF("received offload packet\n");
} else if (eth && eop) {
struct mbuf *m = rspq->rspq_mh.mh_head;
t3_rx_eth(adap, rspq, m, ethpad);
#ifdef LRO_SUPPORTED
/*
* The T304 sends incoming packets on any qset. If LRO
* is also enabled, we could end up sending packet up
* lro_ctrl->ifp's input. That is incorrect.
*
* The mbuf's rcvif was derived from the cpl header and
* is accurate. Skip LRO and just use that.
*/
skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro &&
(tcp_lro_rx(lro_ctrl, m, 0) == 0)) {
/* successfully queue'd for LRO */
} else
#endif
{
/*
* LRO not enabled, packet unsuitable for LRO,
* or unable to queue. Pass it up right now in
* either case.
*/
struct ifnet *ifp = m->m_pkthdr.rcvif;
(*ifp->if_input)(ifp, m);
}
rspq->rspq_mh.mh_head = NULL;
}
__refill_fl_lt(adap, &qs->fl[0], 32);
__refill_fl_lt(adap, &qs->fl[1], 32);
--budget_left;
}
deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
#ifdef LRO_SUPPORTED
/* Flush LRO */
while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
tcp_lro_flush(lro_ctrl, queued);
}
#endif
if (sleeping)
check_ring_db(adap, qs, sleeping);
mb(); /* commit Tx queue processed updates */
if (__predict_false(qs->txq_stopped > 1))
restart_tx(qs);
__refill_fl_lt(adap, &qs->fl[0], 512);
__refill_fl_lt(adap, &qs->fl[1], 512);
budget -= budget_left;
return (budget);
}
/*
* A helper function that processes responses and issues GTS.
*/
static __inline int
process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
{
int work;
static int last_holdoff = 0;
work = process_responses(adap, rspq_to_qset(rq), -1);
if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
printf("next_holdoff=%d\n", rq->next_holdoff);
last_holdoff = rq->next_holdoff;
}
t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
return (work);
}
/*
* Interrupt handler for legacy INTx interrupts for T3B-based cards.
* Handles data events from SGE response queues as well as error and other
* async events as they all use the same interrupt pin. We use one SGE
* response queue per port in this mode and protect all response queues with
* queue 0's lock.
*/
void
t3b_intr(void *data)
{
uint32_t i, map;
adapter_t *adap = data;
struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
t3_write_reg(adap, A_PL_CLI, 0);
map = t3_read_reg(adap, A_SG_DATA_INTR);
if (!map)
return;
if (__predict_false(map & F_ERRINTR))
taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
mtx_lock(&q0->lock);
for_each_port(adap, i)
if (map & (1 << i))
process_responses_gts(adap, &adap->sge.qs[i].rspq);
mtx_unlock(&q0->lock);
}
/*
* The MSI interrupt handler. This needs to handle data events from SGE
* response queues as well as error and other async events as they all use
* the same MSI vector. We use one SGE response queue per port in this mode
* and protect all response queues with queue 0's lock.
*/
void
t3_intr_msi(void *data)
{
adapter_t *adap = data;
struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
int i, new_packets = 0;
mtx_lock(&q0->lock);
for_each_port(adap, i)
if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
new_packets = 1;
mtx_unlock(&q0->lock);
if (new_packets == 0)
taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
}
void
t3_intr_msix(void *data)
{
struct sge_qset *qs = data;
adapter_t *adap = qs->port->adapter;
struct sge_rspq *rspq = &qs->rspq;
if (process_responses_gts(adap, rspq) == 0)
rspq->unhandled_irqs++;
}
#define QDUMP_SBUF_SIZE 32 * 400
static int
t3_dump_rspq(SYSCTL_HANDLER_ARGS)
{
struct sge_rspq *rspq;
struct sge_qset *qs;
int i, err, dump_end, idx;
static int multiplier = 1;
struct sbuf *sb;
struct rsp_desc *rspd;
uint32_t data[4];
rspq = arg1;
qs = rspq_to_qset(rspq);
if (rspq->rspq_dump_count == 0)
return (0);
if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
log(LOG_WARNING,
"dump count is too large %d\n", rspq->rspq_dump_count);
rspq->rspq_dump_count = 0;
return (EINVAL);
}
if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
log(LOG_WARNING,
"dump start of %d is greater than queue size\n",
rspq->rspq_dump_start);
rspq->rspq_dump_start = 0;
return (EINVAL);
}
err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
if (err)
return (err);
retry_sbufops:
sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
(data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
(rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
for (i = rspq->rspq_dump_start; i < dump_end; i++) {
idx = i & (RSPQ_Q_SIZE-1);
rspd = &rspq->desc[idx];
sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
be32toh(rspd->len_cq), rspd->intr_gen);
}
if (sbuf_overflowed(sb)) {
sbuf_delete(sb);
multiplier++;
goto retry_sbufops;
}
sbuf_finish(sb);
err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
sbuf_delete(sb);
return (err);
}
static int
t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
{
struct sge_txq *txq;
struct sge_qset *qs;
int i, j, err, dump_end;
static int multiplier = 1;
struct sbuf *sb;
struct tx_desc *txd;
uint32_t *WR, wr_hi, wr_lo, gen;
uint32_t data[4];
txq = arg1;
qs = txq_to_qset(txq, TXQ_ETH);
if (txq->txq_dump_count == 0) {
return (0);
}
if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
log(LOG_WARNING,
"dump count is too large %d\n", txq->txq_dump_count);
txq->txq_dump_count = 1;
return (EINVAL);
}
if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
log(LOG_WARNING,
"dump start of %d is greater than queue size\n",
txq->txq_dump_start);
txq->txq_dump_start = 0;
return (EINVAL);
}
err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
if (err)
return (err);
retry_sbufops:
sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
(data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
(data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
txq->txq_dump_start,
(txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
dump_end = txq->txq_dump_start + txq->txq_dump_count;
for (i = txq->txq_dump_start; i < dump_end; i++) {
txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
WR = (uint32_t *)txd->flit;
wr_hi = ntohl(WR[0]);
wr_lo = ntohl(WR[1]);
gen = G_WR_GEN(wr_lo);
sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
wr_hi, wr_lo, gen);
for (j = 2; j < 30; j += 4)
sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
}
if (sbuf_overflowed(sb)) {
sbuf_delete(sb);
multiplier++;
goto retry_sbufops;
}
sbuf_finish(sb);
err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
sbuf_delete(sb);
return (err);
}
static int
t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
{
struct sge_txq *txq;
struct sge_qset *qs;
int i, j, err, dump_end;
static int multiplier = 1;
struct sbuf *sb;
struct tx_desc *txd;
uint32_t *WR, wr_hi, wr_lo, gen;
txq = arg1;
qs = txq_to_qset(txq, TXQ_CTRL);
if (txq->txq_dump_count == 0) {
return (0);
}
if (txq->txq_dump_count > 256) {
log(LOG_WARNING,
"dump count is too large %d\n", txq->txq_dump_count);
txq->txq_dump_count = 1;
return (EINVAL);
}
if (txq->txq_dump_start > 255) {
log(LOG_WARNING,
"dump start of %d is greater than queue size\n",
txq->txq_dump_start);
txq->txq_dump_start = 0;
return (EINVAL);
}
retry_sbufops:
sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
txq->txq_dump_start,
(txq->txq_dump_start + txq->txq_dump_count) & 255);
dump_end = txq->txq_dump_start + txq->txq_dump_count;
for (i = txq->txq_dump_start; i < dump_end; i++) {
txd = &txq->desc[i & (255)];
WR = (uint32_t *)txd->flit;
wr_hi = ntohl(WR[0]);
wr_lo = ntohl(WR[1]);
gen = G_WR_GEN(wr_lo);
sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
wr_hi, wr_lo, gen);
for (j = 2; j < 30; j += 4)
sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
}
if (sbuf_overflowed(sb)) {
sbuf_delete(sb);
multiplier++;
goto retry_sbufops;
}
sbuf_finish(sb);
err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
sbuf_delete(sb);
return (err);
}
static int
t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
{
adapter_t *sc = arg1;
struct qset_params *qsp = &sc->params.sge.qset[0];
int coalesce_usecs;
struct sge_qset *qs;
int i, j, err, nqsets = 0;
struct mtx *lock;
if ((sc->flags & FULL_INIT_DONE) == 0)
return (ENXIO);
coalesce_usecs = qsp->coalesce_usecs;
err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
if (err != 0) {
return (err);
}
if (coalesce_usecs == qsp->coalesce_usecs)
return (0);
for (i = 0; i < sc->params.nports; i++)
for (j = 0; j < sc->port[i].nqsets; j++)
nqsets++;
coalesce_usecs = max(1, coalesce_usecs);
for (i = 0; i < nqsets; i++) {
qs = &sc->sge.qs[i];
qsp = &sc->params.sge.qset[i];
qsp->coalesce_usecs = coalesce_usecs;
lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
&sc->sge.qs[0].rspq.lock;
mtx_lock(lock);
t3_update_qset_coalesce(qs, qsp);
t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
V_NEWTIMER(qs->rspq.holdoff_tmr));
mtx_unlock(lock);
}
return (0);
}
void
t3_add_attach_sysctls(adapter_t *sc)
{
struct sysctl_ctx_list *ctx;
struct sysctl_oid_list *children;
ctx = device_get_sysctl_ctx(sc->dev);
children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
/* random information */
SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
"firmware_version",
CTLFLAG_RD, &sc->fw_version,
0, "firmware version");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"hw_revision",
CTLFLAG_RD, &sc->params.rev,
0, "chip model");
SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
"port_types",
CTLFLAG_RD, &sc->port_types,
0, "type of ports");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"enable_debug",
CTLFLAG_RW, &cxgb_debug,
0, "enable verbose debugging output");
SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce",
CTLFLAG_RD, &sc->tunq_coalesce,
"#tunneled packets freed");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"txq_overrun",
CTLFLAG_RD, &txq_fills,
0, "#times txq overrun");
}
static const char *rspq_name = "rspq";
static const char *txq_names[] =
{
"txq_eth",
"txq_ofld",
"txq_ctrl"
};
static int
sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
{
struct port_info *p = arg1;
uint64_t *parg;
if (!p)
return (EINVAL);
parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
PORT_LOCK(p);
t3_mac_update_stats(&p->mac);
PORT_UNLOCK(p);
return (sysctl_handle_quad(oidp, parg, 0, req));
}
void
t3_add_configured_sysctls(adapter_t *sc)
{
struct sysctl_ctx_list *ctx;
struct sysctl_oid_list *children;
int i, j;
ctx = device_get_sysctl_ctx(sc->dev);
children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"intr_coal",
CTLTYPE_INT|CTLFLAG_RW, sc,
0, t3_set_coalesce_usecs,
"I", "interrupt coalescing timer (us)");
for (i = 0; i < sc->params.nports; i++) {
struct port_info *pi = &sc->port[i];
struct sysctl_oid *poid;
struct sysctl_oid_list *poidlist;
struct mac_stats *mstats = &pi->mac.stats;
snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
poidlist = SYSCTL_CHILDREN(poid);
SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
"nqsets", CTLFLAG_RD, &pi->nqsets,
0, "#queue sets");
for (j = 0; j < pi->nqsets; j++) {
struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
*ctrlqpoid, *lropoid;
struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
*txqpoidlist, *ctrlqpoidlist,
*lropoidlist;
struct sge_txq *txq = &qs->txq[TXQ_ETH];
snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
qspoidlist = SYSCTL_CHILDREN(qspoid);
SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
CTLFLAG_RD, &qs->fl[0].empty, 0,
"freelist #0 empty");
SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
CTLFLAG_RD, &qs->fl[1].empty, 0,
"freelist #1 empty");
rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
txqpoidlist = SYSCTL_CHILDREN(txqpoid);
ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
"lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
lropoidlist = SYSCTL_CHILDREN(lropoid);
SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
CTLFLAG_RD, &qs->rspq.size,
0, "#entries in response queue");
SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
CTLFLAG_RD, &qs->rspq.cidx,
0, "consumer index");
SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
CTLFLAG_RD, &qs->rspq.credits,
0, "#credits");
SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
CTLFLAG_RD, &qs->rspq.phys_addr,
"physical_address_of the queue");
SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
CTLFLAG_RW, &qs->rspq.rspq_dump_start,
0, "start rspq dump entry");
SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
CTLFLAG_RW, &qs->rspq.rspq_dump_count,
0, "#rspq entries to dump");
SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
0, t3_dump_rspq, "A", "dump of the response queue");
SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
0, "#tunneled packets dropped");
SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
0, "#tunneled packets waiting to be sent");
#if 0
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
0, "#tunneled packets queue producer index");
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
0, "#tunneled packets queue consumer index");
#endif
SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
0, "#tunneled packets processed by the card");
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
CTLFLAG_RD, &txq->cleaned,
0, "#tunneled packets cleaned");
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
CTLFLAG_RD, &txq->in_use,
0, "#tunneled packet slots in use");
SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
CTLFLAG_RD, &txq->txq_frees,
"#tunneled packets freed");
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
CTLFLAG_RD, &txq->txq_skipped,
0, "#tunneled packet descriptors skipped");
SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
CTLFLAG_RD, &txq->txq_coalesced,
"#tunneled packets coalesced");
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
CTLFLAG_RD, &txq->txq_enqueued,
0, "#tunneled packets enqueued to hardware");
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
CTLFLAG_RD, &qs->txq_stopped,
0, "tx queues stopped");
SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
CTLFLAG_RD, &txq->phys_addr,
"physical_address_of the queue");
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
0, "txq generation");
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
CTLFLAG_RD, &txq->cidx,
0, "hardware queue cidx");
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
CTLFLAG_RD, &txq->pidx,
0, "hardware queue pidx");
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
0, "txq start idx for dump");
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
0, "txq #entries to dump");
SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
0, t3_dump_txq_eth, "A", "dump of the transmit queue");
SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
0, "ctrlq start idx for dump");
SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
0, "ctrl #entries to dump");
SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
#ifdef LRO_SUPPORTED
SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
#endif
}
/* Now add a node for mac stats. */
poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
CTLFLAG_RD, NULL, "MAC statistics");
poidlist = SYSCTL_CHILDREN(poid);
/*
* We (ab)use the length argument (arg2) to pass on the offset
* of the data that we are interested in. This is only required
* for the quad counters that are updated from the hardware (we
* make sure that we return the latest value).
* sysctl_handle_macstat first updates *all* the counters from
* the hardware, and then returns the latest value of the
* requested counter. Best would be to update only the
* requested counter from hardware, but t3_mac_update_stats()
* hides all the register details and we don't want to dive into
* all that here.
*/
#define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
(CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
sysctl_handle_macstat, "QU", 0)
CXGB_SYSCTL_ADD_QUAD(tx_octets);
CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
CXGB_SYSCTL_ADD_QUAD(tx_frames);
CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
CXGB_SYSCTL_ADD_QUAD(tx_pause);
CXGB_SYSCTL_ADD_QUAD(tx_deferred);
CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
CXGB_SYSCTL_ADD_QUAD(tx_underrun);
CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
CXGB_SYSCTL_ADD_QUAD(rx_octets);
CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
CXGB_SYSCTL_ADD_QUAD(rx_frames);
CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
CXGB_SYSCTL_ADD_QUAD(rx_pause);
CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
CXGB_SYSCTL_ADD_QUAD(rx_runt);
CXGB_SYSCTL_ADD_QUAD(rx_jabber);
CXGB_SYSCTL_ADD_QUAD(rx_short);
CXGB_SYSCTL_ADD_QUAD(rx_too_long);
CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
#undef CXGB_SYSCTL_ADD_QUAD
#define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
CTLFLAG_RD, &mstats->a, 0)
CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
CXGB_SYSCTL_ADD_ULONG(num_toggled);
CXGB_SYSCTL_ADD_ULONG(num_resets);
CXGB_SYSCTL_ADD_ULONG(link_faults);
#undef CXGB_SYSCTL_ADD_ULONG
}
}
/**
* t3_get_desc - dump an SGE descriptor for debugging purposes
* @qs: the queue set
* @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
* @idx: the descriptor index in the queue
* @data: where to dump the descriptor contents
*
* Dumps the contents of a HW descriptor of an SGE queue. Returns the
* size of the descriptor.
*/
int
t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
unsigned char *data)
{
if (qnum >= 6)
return (EINVAL);
if (qnum < 3) {
if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
return -EINVAL;
memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
return sizeof(struct tx_desc);
}
if (qnum == 3) {
if (!qs->rspq.desc || idx >= qs->rspq.size)
return (EINVAL);
memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
return sizeof(struct rsp_desc);
}
qnum -= 4;
if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
return (EINVAL);
memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
return sizeof(struct rx_desc);
}
Index: stable/8/sys/dev/e1000/if_em.c
===================================================================
--- stable/8/sys/dev/e1000/if_em.c (revision 205282)
+++ stable/8/sys/dev/e1000/if_em.c (revision 205283)
@@ -1,5393 +1,5393 @@
/******************************************************************************
Copyright (c) 2001-2009, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
/*$FreeBSD$*/
#ifdef HAVE_KERNEL_OPTION_HEADERS
#include "opt_device_polling.h"
#include "opt_inet.h"
#endif
#include <sys/param.h>
#include <sys/systm.h>
#if __FreeBSD_version >= 800000
#include <sys/buf_ring.h>
#endif
#include <sys/bus.h>
#include <sys/endian.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/rman.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#if __FreeBSD_version >= 700029
#include <sys/eventhandler.h>
#endif
#include <machine/bus.h>
#include <machine/resource.h>
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/if_types.h>
#include <net/if_vlan_var.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <machine/in_cksum.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
#include "e1000_api.h"
#include "e1000_82571.h"
#include "if_em.h"
/*********************************************************************
* Set this to one to display debug statistics
*********************************************************************/
int em_display_debug_stats = 0;
/*********************************************************************
* Driver version:
*********************************************************************/
char em_driver_version[] = "6.9.14";
/*********************************************************************
* PCI Device ID Table
*
* Used by probe to select devices to load on
* Last field stores an index into e1000_strings
* Last entry must be all 0s
*
* { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
*********************************************************************/
static em_vendor_info_t em_vendor_info_array[] =
{
/* Intel(R) PRO/1000 Network Connection */
{ 0x8086, E1000_DEV_ID_82540EM, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82540EM_LOM, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82540EP, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82540EP_LOM, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82540EP_LP, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82541EI, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82541ER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82541ER_LOM, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82541EI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82541GI, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82541GI_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82541GI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82542, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82543GC_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82543GC_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82544EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82544EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82544GC_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82544GC_LOM, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82545EM_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82545EM_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82545GM_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82545GM_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82545GM_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82546EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82546EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82546GB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82546GB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82546GB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82546GB_PCIE, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82547EI, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82547EI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82547GI, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
/* required last entry */
{ 0, 0, 0, 0, 0}
};
/*********************************************************************
* Table of branding strings for all supported NICs.
*********************************************************************/
static char *em_strings[] = {
"Intel(R) PRO/1000 Network Connection"
};
/*********************************************************************
* Function prototypes
*********************************************************************/
static int em_probe(device_t);
static int em_attach(device_t);
static int em_detach(device_t);
static int em_shutdown(device_t);
static int em_suspend(device_t);
static int em_resume(device_t);
static void em_start(struct ifnet *);
static void em_start_locked(struct ifnet *ifp);
#if __FreeBSD_version >= 800000
static int em_mq_start(struct ifnet *, struct mbuf *);
static int em_mq_start_locked(struct ifnet *, struct mbuf *);
static void em_qflush(struct ifnet *);
#endif
static int em_ioctl(struct ifnet *, u_long, caddr_t);
static void em_watchdog(struct adapter *);
static void em_init(void *);
static void em_init_locked(struct adapter *);
static void em_stop(void *);
static void em_media_status(struct ifnet *, struct ifmediareq *);
static int em_media_change(struct ifnet *);
static void em_identify_hardware(struct adapter *);
static int em_allocate_pci_resources(struct adapter *);
static int em_allocate_legacy(struct adapter *adapter);
static int em_allocate_msix(struct adapter *adapter);
static int em_setup_msix(struct adapter *);
static void em_free_pci_resources(struct adapter *);
static void em_local_timer(void *);
static int em_hardware_init(struct adapter *);
static void em_setup_interface(device_t, struct adapter *);
static void em_setup_transmit_structures(struct adapter *);
static void em_initialize_transmit_unit(struct adapter *);
static int em_setup_receive_structures(struct adapter *);
static void em_initialize_receive_unit(struct adapter *);
static void em_enable_intr(struct adapter *);
static void em_disable_intr(struct adapter *);
static void em_free_transmit_structures(struct adapter *);
static void em_free_receive_structures(struct adapter *);
static void em_update_stats_counters(struct adapter *);
static void em_txeof(struct adapter *);
static void em_tx_purge(struct adapter *);
static int em_allocate_receive_structures(struct adapter *);
static int em_allocate_transmit_structures(struct adapter *);
static int em_rxeof(struct adapter *, int);
#ifndef __NO_STRICT_ALIGNMENT
static int em_fixup_rx(struct adapter *);
#endif
static void em_receive_checksum(struct adapter *, struct e1000_rx_desc *,
struct mbuf *);
static void em_transmit_checksum_setup(struct adapter *, struct mbuf *,
u32 *, u32 *);
#if __FreeBSD_version >= 700000
static bool em_tso_setup(struct adapter *, struct mbuf *,
u32 *, u32 *);
#endif /* FreeBSD_version >= 700000 */
static void em_set_promisc(struct adapter *);
static void em_disable_promisc(struct adapter *);
static void em_set_multi(struct adapter *);
static void em_print_hw_stats(struct adapter *);
static void em_update_link_status(struct adapter *);
static int em_get_buf(struct adapter *, int);
#if __FreeBSD_version >= 700029
static void em_register_vlan(void *, struct ifnet *, u16);
static void em_unregister_vlan(void *, struct ifnet *, u16);
static void em_setup_vlan_hw_support(struct adapter *);
#endif
static int em_xmit(struct adapter *, struct mbuf **);
static void em_smartspeed(struct adapter *);
static int em_82547_fifo_workaround(struct adapter *, int);
static void em_82547_update_fifo_head(struct adapter *, int);
static int em_82547_tx_fifo_reset(struct adapter *);
static void em_82547_move_tail(void *);
static int em_dma_malloc(struct adapter *, bus_size_t,
struct em_dma_alloc *, int);
static void em_dma_free(struct adapter *, struct em_dma_alloc *);
static void em_print_debug_info(struct adapter *);
static void em_print_nvm_info(struct adapter *);
static int em_is_valid_ether_addr(u8 *);
static int em_sysctl_stats(SYSCTL_HANDLER_ARGS);
static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
static u32 em_fill_descriptors (bus_addr_t address, u32 length,
PDESC_ARRAY desc_array);
static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
static void em_add_int_delay_sysctl(struct adapter *, const char *,
const char *, struct em_int_delay_info *, int, int);
/* Management and WOL Support */
static void em_init_manageability(struct adapter *);
static void em_release_manageability(struct adapter *);
static void em_get_hw_control(struct adapter *);
static void em_release_hw_control(struct adapter *);
static void em_enable_wakeup(device_t);
#ifdef EM_LEGACY_IRQ
static void em_intr(void *);
#else /* FAST IRQ */
#if __FreeBSD_version < 700000
static void em_irq_fast(void *);
#else
static int em_irq_fast(void *);
#endif
/* MSIX handlers */
static void em_msix_tx(void *);
static void em_msix_rx(void *);
static void em_msix_link(void *);
static void em_handle_rx(void *context, int pending);
static void em_handle_tx(void *context, int pending);
static void em_handle_rxtx(void *context, int pending);
static void em_handle_link(void *context, int pending);
static void em_add_rx_process_limit(struct adapter *, const char *,
const char *, int *, int);
#endif /* ~EM_LEGACY_IRQ */
#ifdef DEVICE_POLLING
static poll_handler_t em_poll;
#endif /* POLLING */
/*********************************************************************
* FreeBSD Device Interface Entry Points
*********************************************************************/
static device_method_t em_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, em_probe),
DEVMETHOD(device_attach, em_attach),
DEVMETHOD(device_detach, em_detach),
DEVMETHOD(device_shutdown, em_shutdown),
DEVMETHOD(device_suspend, em_suspend),
DEVMETHOD(device_resume, em_resume),
{0, 0}
};
static driver_t em_driver = {
"em", em_methods, sizeof(struct adapter),
};
static devclass_t em_devclass;
DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
MODULE_DEPEND(em, pci, 1, 1, 1);
MODULE_DEPEND(em, ether, 1, 1, 1);
/*********************************************************************
* Tunable default values.
*********************************************************************/
#define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000)
#define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024)
#define M_TSO_LEN 66
/* Allow common code without TSO */
#ifndef CSUM_TSO
#define CSUM_TSO 0
#endif
static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
static int em_rxd = EM_DEFAULT_RXD;
static int em_txd = EM_DEFAULT_TXD;
static int em_smart_pwr_down = FALSE;
/* Controls whether promiscuous also shows bad packets */
static int em_debug_sbp = FALSE;
/* Local switch for MSI/MSIX */
static int em_enable_msi = TRUE;
TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
TUNABLE_INT("hw.em.rxd", &em_rxd);
TUNABLE_INT("hw.em.txd", &em_txd);
TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
TUNABLE_INT("hw.em.enable_msi", &em_enable_msi);
#ifndef EM_LEGACY_IRQ
/* How many packets rxeof tries to clean at a time */
static int em_rx_process_limit = 100;
TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
#endif
/* Flow control setting - default to FULL */
static int em_fc_setting = e1000_fc_full;
TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
/*
** Shadow VFTA table, this is needed because
** the real vlan filter table gets cleared during
** a soft reset and the driver needs to be able
** to repopulate it.
*/
static u32 em_shadow_vfta[EM_VFTA_SIZE];
/* Global used in WOL setup with multiport cards */
static int global_quad_port_a = 0;
/*********************************************************************
* Device identification routine
*
* em_probe determines if the driver should be loaded on
* adapter based on PCI vendor/device id of the adapter.
*
* return BUS_PROBE_DEFAULT on success, positive on failure
*********************************************************************/
static int
em_probe(device_t dev)
{
char adapter_name[60];
u16 pci_vendor_id = 0;
u16 pci_device_id = 0;
u16 pci_subvendor_id = 0;
u16 pci_subdevice_id = 0;
em_vendor_info_t *ent;
INIT_DEBUGOUT("em_probe: begin");
pci_vendor_id = pci_get_vendor(dev);
if (pci_vendor_id != EM_VENDOR_ID)
return (ENXIO);
pci_device_id = pci_get_device(dev);
pci_subvendor_id = pci_get_subvendor(dev);
pci_subdevice_id = pci_get_subdevice(dev);
ent = em_vendor_info_array;
while (ent->vendor_id != 0) {
if ((pci_vendor_id == ent->vendor_id) &&
(pci_device_id == ent->device_id) &&
((pci_subvendor_id == ent->subvendor_id) ||
(ent->subvendor_id == PCI_ANY_ID)) &&
((pci_subdevice_id == ent->subdevice_id) ||
(ent->subdevice_id == PCI_ANY_ID))) {
sprintf(adapter_name, "%s %s",
em_strings[ent->index],
em_driver_version);
device_set_desc_copy(dev, adapter_name);
return (BUS_PROBE_DEFAULT);
}
ent++;
}
return (ENXIO);
}
/*********************************************************************
* Device initialization routine
*
* The attach entry point is called when the driver is being loaded.
* This routine identifies the type of hardware, allocates all resources
* and initializes the hardware.
*
* return 0 on success, positive on failure
*********************************************************************/
static int
em_attach(device_t dev)
{
struct adapter *adapter;
int tsize, rsize;
int error = 0;
u16 eeprom_data, device_id;
INIT_DEBUGOUT("em_attach: begin");
adapter = device_get_softc(dev);
adapter->dev = adapter->osdep.dev = dev;
EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
EM_TX_LOCK_INIT(adapter, device_get_nameunit(dev));
EM_RX_LOCK_INIT(adapter, device_get_nameunit(dev));
/* SYSCTL stuff */
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
em_sysctl_debug_info, "I", "Debug Information");
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
em_sysctl_stats, "I", "Statistics");
callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
callout_init_mtx(&adapter->tx_fifo_timer, &adapter->tx_mtx, 0);
/* Determine hardware and mac info */
em_identify_hardware(adapter);
/* Setup PCI resources */
if (em_allocate_pci_resources(adapter)) {
device_printf(dev, "Allocation of PCI resources failed\n");
error = ENXIO;
goto err_pci;
}
/*
** For ICH8 and family we need to
** map the flash memory, and this
** must happen after the MAC is
** identified
*/
if ((adapter->hw.mac.type == e1000_ich8lan) ||
(adapter->hw.mac.type == e1000_ich9lan) ||
(adapter->hw.mac.type == e1000_ich10lan)) {
int rid = EM_BAR_TYPE_FLASH;
adapter->flash = bus_alloc_resource_any(dev,
SYS_RES_MEMORY, &rid, RF_ACTIVE);
if (adapter->flash == NULL) {
device_printf(dev, "Mapping of Flash failed\n");
error = ENXIO;
goto err_pci;
}
/* This is used in the shared code */
adapter->hw.flash_address = (u8 *)adapter->flash;
adapter->osdep.flash_bus_space_tag =
rman_get_bustag(adapter->flash);
adapter->osdep.flash_bus_space_handle =
rman_get_bushandle(adapter->flash);
}
/* Do Shared Code initialization */
if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
device_printf(dev, "Setup of Shared code failed\n");
error = ENXIO;
goto err_pci;
}
e1000_get_bus_info(&adapter->hw);
/* Set up some sysctls for the tunable interrupt delays */
em_add_int_delay_sysctl(adapter, "rx_int_delay",
"receive interrupt delay in usecs", &adapter->rx_int_delay,
E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
em_add_int_delay_sysctl(adapter, "tx_int_delay",
"transmit interrupt delay in usecs", &adapter->tx_int_delay,
E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
if (adapter->hw.mac.type >= e1000_82540) {
em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
"receive interrupt delay limit in usecs",
&adapter->rx_abs_int_delay,
E1000_REGISTER(&adapter->hw, E1000_RADV),
em_rx_abs_int_delay_dflt);
em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
"transmit interrupt delay limit in usecs",
&adapter->tx_abs_int_delay,
E1000_REGISTER(&adapter->hw, E1000_TADV),
em_tx_abs_int_delay_dflt);
}
#ifndef EM_LEGACY_IRQ
/* Sysctls for limiting the amount of work done in the taskqueue */
em_add_rx_process_limit(adapter, "rx_processing_limit",
"max number of rx packets to process", &adapter->rx_process_limit,
em_rx_process_limit);
#endif
/*
* Validate number of transmit and receive descriptors. It
* must not exceed hardware maximum, and must be multiple
* of E1000_DBA_ALIGN.
*/
if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
(adapter->hw.mac.type >= e1000_82544 && em_txd > EM_MAX_TXD) ||
(adapter->hw.mac.type < e1000_82544 && em_txd > EM_MAX_TXD_82543) ||
(em_txd < EM_MIN_TXD)) {
device_printf(dev, "Using %d TX descriptors instead of %d!\n",
EM_DEFAULT_TXD, em_txd);
adapter->num_tx_desc = EM_DEFAULT_TXD;
} else
adapter->num_tx_desc = em_txd;
if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
(adapter->hw.mac.type >= e1000_82544 && em_rxd > EM_MAX_RXD) ||
(adapter->hw.mac.type < e1000_82544 && em_rxd > EM_MAX_RXD_82543) ||
(em_rxd < EM_MIN_RXD)) {
device_printf(dev, "Using %d RX descriptors instead of %d!\n",
EM_DEFAULT_RXD, em_rxd);
adapter->num_rx_desc = EM_DEFAULT_RXD;
} else
adapter->num_rx_desc = em_rxd;
adapter->hw.mac.autoneg = DO_AUTO_NEG;
adapter->hw.phy.autoneg_wait_to_complete = FALSE;
adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
adapter->rx_buffer_len = 2048;
e1000_init_script_state_82541(&adapter->hw, TRUE);
e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE);
/* Copper options */
if (adapter->hw.phy.media_type == e1000_media_type_copper) {
adapter->hw.phy.mdix = AUTO_ALL_MODES;
adapter->hw.phy.disable_polarity_correction = FALSE;
adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
}
/*
* Set the frame limits assuming
* standard ethernet sized frames.
*/
adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
/*
* This controls when hardware reports transmit completion
* status.
*/
adapter->hw.mac.report_tx_early = 1;
tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc),
EM_DBA_ALIGN);
/* Allocate Transmit Descriptor ring */
if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
device_printf(dev, "Unable to allocate tx_desc memory\n");
error = ENOMEM;
goto err_tx_desc;
}
adapter->tx_desc_base =
(struct e1000_tx_desc *)adapter->txdma.dma_vaddr;
rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc),
EM_DBA_ALIGN);
/* Allocate Receive Descriptor ring */
if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
device_printf(dev, "Unable to allocate rx_desc memory\n");
error = ENOMEM;
goto err_rx_desc;
}
adapter->rx_desc_base =
(struct e1000_rx_desc *)adapter->rxdma.dma_vaddr;
/*
** Start from a known state, this is
** important in reading the nvm and
** mac from that.
*/
e1000_reset_hw(&adapter->hw);
/* Make sure we have a good EEPROM before we read from it */
if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
/*
** Some PCI-E parts fail the first check due to
** the link being in sleep state, call it again,
** if it fails a second time its a real issue.
*/
if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
device_printf(dev,
"The EEPROM Checksum Is Not Valid\n");
error = EIO;
goto err_hw_init;
}
}
/* Copy the permanent MAC address out of the EEPROM */
if (e1000_read_mac_addr(&adapter->hw) < 0) {
device_printf(dev, "EEPROM read error while reading MAC"
" address\n");
error = EIO;
goto err_hw_init;
}
if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
device_printf(dev, "Invalid MAC address\n");
error = EIO;
goto err_hw_init;
}
/* Initialize the hardware */
if (em_hardware_init(adapter)) {
device_printf(dev, "Unable to initialize the hardware\n");
error = EIO;
goto err_hw_init;
}
/* Allocate transmit descriptors and buffers */
if (em_allocate_transmit_structures(adapter)) {
device_printf(dev, "Could not setup transmit structures\n");
error = ENOMEM;
goto err_tx_struct;
}
/* Allocate receive descriptors and buffers */
if (em_allocate_receive_structures(adapter)) {
device_printf(dev, "Could not setup receive structures\n");
error = ENOMEM;
goto err_rx_struct;
}
/*
** Do interrupt configuration
*/
if (adapter->msi > 1) /* Do MSI/X */
error = em_allocate_msix(adapter);
else /* MSI or Legacy */
error = em_allocate_legacy(adapter);
if (error)
goto err_rx_struct;
/* Setup OS specific network interface */
em_setup_interface(dev, adapter);
/* Initialize statistics */
em_update_stats_counters(adapter);
adapter->hw.mac.get_link_status = 1;
em_update_link_status(adapter);
/* Indicate SOL/IDER usage */
if (e1000_check_reset_block(&adapter->hw))
device_printf(dev,
"PHY reset is blocked due to SOL/IDER session.\n");
/* Determine if we have to control management hardware */
adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
/*
* Setup Wake-on-Lan
*/
switch (adapter->hw.mac.type) {
case e1000_82542:
case e1000_82543:
break;
case e1000_82546:
case e1000_82546_rev_3:
case e1000_82571:
case e1000_80003es2lan:
if (adapter->hw.bus.func == 1)
e1000_read_nvm(&adapter->hw,
NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
else
e1000_read_nvm(&adapter->hw,
NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
eeprom_data &= EM_EEPROM_APME;
break;
default:
/* APME bit in EEPROM is mapped to WUC.APME */
eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) &
E1000_WUC_APME;
break;
}
if (eeprom_data)
adapter->wol = E1000_WUFC_MAG;
/*
* We have the eeprom settings, now apply the special cases
* where the eeprom may be wrong or the board won't support
* wake on lan on a particular port
*/
device_id = pci_get_device(dev);
switch (device_id) {
case E1000_DEV_ID_82546GB_PCIE:
adapter->wol = 0;
break;
case E1000_DEV_ID_82546EB_FIBER:
case E1000_DEV_ID_82546GB_FIBER:
case E1000_DEV_ID_82571EB_FIBER:
/* Wake events only supported on port A for dual fiber
* regardless of eeprom setting */
if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
E1000_STATUS_FUNC_1)
adapter->wol = 0;
break;
case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
case E1000_DEV_ID_82571EB_QUAD_COPPER:
case E1000_DEV_ID_82571EB_QUAD_FIBER:
case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
/* if quad port adapter, disable WoL on all but port A */
if (global_quad_port_a != 0)
adapter->wol = 0;
/* Reset for multiple quad port adapters */
if (++global_quad_port_a == 4)
global_quad_port_a = 0;
break;
}
/* Do we need workaround for 82544 PCI-X adapter? */
if (adapter->hw.bus.type == e1000_bus_type_pcix &&
adapter->hw.mac.type == e1000_82544)
adapter->pcix_82544 = TRUE;
else
adapter->pcix_82544 = FALSE;
#if __FreeBSD_version >= 700029
/* Register for VLAN events */
adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
#endif
/* Tell the stack that the interface is not active */
adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
INIT_DEBUGOUT("em_attach: end");
return (0);
err_rx_struct:
em_free_transmit_structures(adapter);
err_tx_struct:
err_hw_init:
em_release_hw_control(adapter);
em_dma_free(adapter, &adapter->rxdma);
err_rx_desc:
em_dma_free(adapter, &adapter->txdma);
err_tx_desc:
err_pci:
em_free_pci_resources(adapter);
EM_TX_LOCK_DESTROY(adapter);
EM_RX_LOCK_DESTROY(adapter);
EM_CORE_LOCK_DESTROY(adapter);
return (error);
}
/*********************************************************************
* Device removal routine
*
* The detach entry point is called when the driver is being removed.
* This routine stops the adapter and deallocates all the resources
* that were allocated for driver operation.
*
* return 0 on success, positive on failure
*********************************************************************/
static int
em_detach(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
struct ifnet *ifp = adapter->ifp;
INIT_DEBUGOUT("em_detach: begin");
/* Make sure VLANS are not using driver */
#if __FreeBSD_version >= 700000
if (adapter->ifp->if_vlantrunk != NULL) {
#else
if (adapter->ifp->if_nvlans != 0) {
#endif
device_printf(dev,"Vlan in use, detach first\n");
return (EBUSY);
}
#ifdef DEVICE_POLLING
if (ifp->if_capenable & IFCAP_POLLING)
ether_poll_deregister(ifp);
#endif
EM_CORE_LOCK(adapter);
EM_TX_LOCK(adapter);
adapter->in_detach = 1;
em_stop(adapter);
e1000_phy_hw_reset(&adapter->hw);
em_release_manageability(adapter);
if (((adapter->hw.mac.type == e1000_82573) ||
(adapter->hw.mac.type == e1000_82583) ||
(adapter->hw.mac.type == e1000_ich8lan) ||
(adapter->hw.mac.type == e1000_ich10lan) ||
(adapter->hw.mac.type == e1000_ich9lan)) &&
e1000_check_mng_mode(&adapter->hw))
em_release_hw_control(adapter);
if (adapter->wol) {
E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
em_enable_wakeup(dev);
}
EM_TX_UNLOCK(adapter);
EM_CORE_UNLOCK(adapter);
#if __FreeBSD_version >= 700029
/* Unregister VLAN events */
if (adapter->vlan_attach != NULL)
EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
if (adapter->vlan_detach != NULL)
EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
#endif
ether_ifdetach(adapter->ifp);
callout_drain(&adapter->timer);
callout_drain(&adapter->tx_fifo_timer);
em_free_pci_resources(adapter);
bus_generic_detach(dev);
if_free(ifp);
em_free_transmit_structures(adapter);
em_free_receive_structures(adapter);
/* Free Transmit Descriptor ring */
if (adapter->tx_desc_base) {
em_dma_free(adapter, &adapter->txdma);
adapter->tx_desc_base = NULL;
}
/* Free Receive Descriptor ring */
if (adapter->rx_desc_base) {
em_dma_free(adapter, &adapter->rxdma);
adapter->rx_desc_base = NULL;
}
EM_TX_LOCK_DESTROY(adapter);
EM_RX_LOCK_DESTROY(adapter);
EM_CORE_LOCK_DESTROY(adapter);
return (0);
}
/*********************************************************************
*
* Shutdown entry point
*
**********************************************************************/
static int
em_shutdown(device_t dev)
{
return em_suspend(dev);
}
/*
* Suspend/resume device methods.
*/
static int
em_suspend(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
EM_CORE_LOCK(adapter);
EM_TX_LOCK(adapter);
em_stop(adapter);
EM_TX_UNLOCK(adapter);
em_release_manageability(adapter);
if (((adapter->hw.mac.type == e1000_82573) ||
(adapter->hw.mac.type == e1000_82583) ||
(adapter->hw.mac.type == e1000_ich8lan) ||
(adapter->hw.mac.type == e1000_ich10lan) ||
(adapter->hw.mac.type == e1000_ich9lan)) &&
e1000_check_mng_mode(&adapter->hw))
em_release_hw_control(adapter);
if (adapter->wol) {
E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
em_enable_wakeup(dev);
}
EM_CORE_UNLOCK(adapter);
return bus_generic_suspend(dev);
}
static int
em_resume(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
struct ifnet *ifp = adapter->ifp;
EM_CORE_LOCK(adapter);
em_init_locked(adapter);
em_init_manageability(adapter);
EM_CORE_UNLOCK(adapter);
em_start(ifp);
return bus_generic_resume(dev);
}
/*********************************************************************
* Transmit entry point
*
* em_start is called by the stack to initiate a transmit.
* The driver will remain in this routine as long as there are
* packets to transmit and transmit resources are available.
* In case resources are not available stack is notified and
* the packet is requeued.
**********************************************************************/
#if __FreeBSD_version >= 800000
static int
em_mq_start_locked(struct ifnet *ifp, struct mbuf *m)
{
struct adapter *adapter = ifp->if_softc;
struct mbuf *next;
int error = E1000_SUCCESS;
EM_TX_LOCK_ASSERT(adapter);
/* To allow being called from a tasklet */
if (m == NULL)
goto process;
if (((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING)
|| (!adapter->link_active)) {
error = drbr_enqueue(ifp, adapter->br, m);
return (error);
- } else if (drbr_empty(ifp, adapter->br) &&
+ } else if (!drbr_needs_enqueue(ifp, adapter->br) &&
(adapter->num_tx_desc_avail > EM_TX_OP_THRESHOLD)) {
if ((error = em_xmit(adapter, &m)) != 0) {
if (m != NULL)
error = drbr_enqueue(ifp, adapter->br, m);
return (error);
} else {
/*
* We've bypassed the buf ring so we need to update
* ifp directly
*/
drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags);
/*
** Send a copy of the frame to the BPF
** listener and set the watchdog on.
*/
ETHER_BPF_MTAP(ifp, m);
adapter->watchdog_timer = EM_TX_TIMEOUT;
}
} else if ((error = drbr_enqueue(ifp, adapter->br, m)) != 0)
return (error);
process:
if (drbr_empty(ifp, adapter->br))
return(error);
/* Process the queue */
while (TRUE) {
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
next = drbr_dequeue(ifp, adapter->br);
if (next == NULL)
break;
if ((error = em_xmit(adapter, &next)) != 0) {
if (next != NULL)
error = drbr_enqueue(ifp, adapter->br, next);
break;
}
drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
ETHER_BPF_MTAP(ifp, next);
/* Set the watchdog */
adapter->watchdog_timer = EM_TX_TIMEOUT;
}
if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD)
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
return (error);
}
/*
** Multiqueue capable stack interface, this is not
** yet truely multiqueue, but that is coming...
*/
static int
em_mq_start(struct ifnet *ifp, struct mbuf *m)
{
struct adapter *adapter = ifp->if_softc;
int error = 0;
if (EM_TX_TRYLOCK(adapter)) {
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
error = em_mq_start_locked(ifp, m);
EM_TX_UNLOCK(adapter);
} else
error = drbr_enqueue(ifp, adapter->br, m);
return (error);
}
static void
em_qflush(struct ifnet *ifp)
{
struct mbuf *m;
struct adapter *adapter = (struct adapter *)ifp->if_softc;
EM_TX_LOCK(adapter);
while ((m = buf_ring_dequeue_sc(adapter->br)) != NULL)
m_freem(m);
if_qflush(ifp);
EM_TX_UNLOCK(adapter);
}
#endif /* FreeBSD_version */
static void
em_start_locked(struct ifnet *ifp)
{
struct adapter *adapter = ifp->if_softc;
struct mbuf *m_head;
EM_TX_LOCK_ASSERT(adapter);
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING)
return;
if (!adapter->link_active)
return;
while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
if (m_head == NULL)
break;
/*
* Encapsulation can modify our pointer, and or make it
* NULL on failure. In that event, we can't requeue.
*/
if (em_xmit(adapter, &m_head)) {
if (m_head == NULL)
break;
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
break;
}
/* Send a copy of the frame to the BPF listener */
ETHER_BPF_MTAP(ifp, m_head);
/* Set timeout in case hardware has problems transmitting. */
adapter->watchdog_timer = EM_TX_TIMEOUT;
}
if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD)
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
return;
}
static void
em_start(struct ifnet *ifp)
{
struct adapter *adapter = ifp->if_softc;
EM_TX_LOCK(adapter);
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
em_start_locked(ifp);
EM_TX_UNLOCK(adapter);
}
/*********************************************************************
* Ioctl entry point
*
* em_ioctl is called when the user wants to configure the
* interface.
*
* return 0 on success, positive on failure
**********************************************************************/
static int
em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
{
struct adapter *adapter = ifp->if_softc;
struct ifreq *ifr = (struct ifreq *)data;
#ifdef INET
struct ifaddr *ifa = (struct ifaddr *)data;
#endif
int error = 0;
if (adapter->in_detach)
return (error);
switch (command) {
case SIOCSIFADDR:
#ifdef INET
if (ifa->ifa_addr->sa_family == AF_INET) {
/*
* XXX
* Since resetting hardware takes a very long time
* and results in link renegotiation we only
* initialize the hardware only when it is absolutely
* required.
*/
ifp->if_flags |= IFF_UP;
if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
EM_CORE_LOCK(adapter);
em_init_locked(adapter);
EM_CORE_UNLOCK(adapter);
}
if (!(ifp->if_flags & IFF_NOARP))
arp_ifinit(ifp, ifa);
} else
#endif
error = ether_ioctl(ifp, command, data);
break;
case SIOCSIFMTU:
{
int max_frame_size;
u16 eeprom_data = 0;
IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
EM_CORE_LOCK(adapter);
switch (adapter->hw.mac.type) {
case e1000_82573:
/*
* 82573 only supports jumbo frames
* if ASPM is disabled.
*/
e1000_read_nvm(&adapter->hw,
NVM_INIT_3GIO_3, 1, &eeprom_data);
if (eeprom_data & NVM_WORD1A_ASPM_MASK) {
max_frame_size = ETHER_MAX_LEN;
break;
}
/* Allow Jumbo frames - fall thru */
case e1000_82571:
case e1000_82572:
case e1000_ich9lan:
case e1000_ich10lan:
case e1000_82574:
case e1000_80003es2lan: /* Limit Jumbo Frame size */
max_frame_size = 9234;
break;
/* Adapters that do not support jumbo frames */
case e1000_82542:
case e1000_82583:
case e1000_ich8lan:
max_frame_size = ETHER_MAX_LEN;
break;
default:
max_frame_size = MAX_JUMBO_FRAME_SIZE;
}
if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
ETHER_CRC_LEN) {
EM_CORE_UNLOCK(adapter);
error = EINVAL;
break;
}
ifp->if_mtu = ifr->ifr_mtu;
adapter->max_frame_size =
ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
em_init_locked(adapter);
EM_CORE_UNLOCK(adapter);
break;
}
case SIOCSIFFLAGS:
IOCTL_DEBUGOUT("ioctl rcv'd:\
SIOCSIFFLAGS (Set Interface Flags)");
EM_CORE_LOCK(adapter);
if (ifp->if_flags & IFF_UP) {
if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
if ((ifp->if_flags ^ adapter->if_flags) &
(IFF_PROMISC | IFF_ALLMULTI)) {
em_disable_promisc(adapter);
em_set_promisc(adapter);
}
} else
em_init_locked(adapter);
} else
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
EM_TX_LOCK(adapter);
em_stop(adapter);
EM_TX_UNLOCK(adapter);
}
adapter->if_flags = ifp->if_flags;
EM_CORE_UNLOCK(adapter);
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
EM_CORE_LOCK(adapter);
em_disable_intr(adapter);
em_set_multi(adapter);
if (adapter->hw.mac.type == e1000_82542 &&
adapter->hw.revision_id == E1000_REVISION_2) {
em_initialize_receive_unit(adapter);
}
#ifdef DEVICE_POLLING
if (!(ifp->if_capenable & IFCAP_POLLING))
#endif
em_enable_intr(adapter);
EM_CORE_UNLOCK(adapter);
}
break;
case SIOCSIFMEDIA:
/* Check SOL/IDER usage */
EM_CORE_LOCK(adapter);
if (e1000_check_reset_block(&adapter->hw)) {
EM_CORE_UNLOCK(adapter);
device_printf(adapter->dev, "Media change is"
" blocked due to SOL/IDER session.\n");
break;
}
EM_CORE_UNLOCK(adapter);
case SIOCGIFMEDIA:
IOCTL_DEBUGOUT("ioctl rcv'd: \
SIOCxIFMEDIA (Get/Set Interface Media)");
error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
break;
case SIOCSIFCAP:
{
int mask, reinit;
IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
reinit = 0;
mask = ifr->ifr_reqcap ^ ifp->if_capenable;
#ifdef DEVICE_POLLING
if (mask & IFCAP_POLLING) {
if (ifr->ifr_reqcap & IFCAP_POLLING) {
error = ether_poll_register(em_poll, ifp);
if (error)
return (error);
EM_CORE_LOCK(adapter);
em_disable_intr(adapter);
ifp->if_capenable |= IFCAP_POLLING;
EM_CORE_UNLOCK(adapter);
} else {
error = ether_poll_deregister(ifp);
/* Enable interrupt even in error case */
EM_CORE_LOCK(adapter);
em_enable_intr(adapter);
ifp->if_capenable &= ~IFCAP_POLLING;
EM_CORE_UNLOCK(adapter);
}
}
#endif
if (mask & IFCAP_HWCSUM) {
ifp->if_capenable ^= IFCAP_HWCSUM;
reinit = 1;
}
#if __FreeBSD_version >= 700000
if (mask & IFCAP_TSO4) {
ifp->if_capenable ^= IFCAP_TSO4;
reinit = 1;
}
#endif
if (mask & IFCAP_VLAN_HWTAGGING) {
ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
reinit = 1;
}
if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
em_init(adapter);
#if __FreeBSD_version >= 700000
VLAN_CAPABILITIES(ifp);
#endif
break;
}
default:
error = ether_ioctl(ifp, command, data);
break;
}
return (error);
}
/*********************************************************************
* Watchdog timer:
*
* This routine is called from the local timer every second.
* As long as transmit descriptors are being cleaned the value
* is non-zero and we do nothing. Reaching 0 indicates a tx hang
* and we then reset the device.
*
**********************************************************************/
static void
em_watchdog(struct adapter *adapter)
{
EM_CORE_LOCK_ASSERT(adapter);
/*
** The timer is set to 5 every time start queues a packet.
** Then txeof keeps resetting it as long as it cleans at
** least one descriptor.
** Finally, anytime all descriptors are clean the timer is
** set to 0.
*/
EM_TX_LOCK(adapter);
if ((adapter->watchdog_timer == 0) || (--adapter->watchdog_timer)) {
EM_TX_UNLOCK(adapter);
return;
}
/* If we are in this routine because of pause frames, then
* don't reset the hardware.
*/
if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
E1000_STATUS_TXOFF) {
adapter->watchdog_timer = EM_TX_TIMEOUT;
EM_TX_UNLOCK(adapter);
return;
}
if (e1000_check_for_link(&adapter->hw) == 0)
device_printf(adapter->dev, "watchdog timeout -- resetting\n");
adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
adapter->watchdog_events++;
EM_TX_UNLOCK(adapter);
em_init_locked(adapter);
}
/*********************************************************************
* Init entry point
*
* This routine is used in two ways. It is used by the stack as
* init entry point in network interface structure. It is also used
* by the driver as a hw/sw initialization routine to get to a
* consistent state.
*
* return 0 on success, positive on failure
**********************************************************************/
static void
em_init_locked(struct adapter *adapter)
{
struct ifnet *ifp = adapter->ifp;
device_t dev = adapter->dev;
u32 pba;
INIT_DEBUGOUT("em_init: begin");
EM_CORE_LOCK_ASSERT(adapter);
EM_TX_LOCK(adapter);
em_stop(adapter);
EM_TX_UNLOCK(adapter);
/*
* Packet Buffer Allocation (PBA)
* Writing PBA sets the receive portion of the buffer
* the remainder is used for the transmit buffer.
*
* Devices before the 82547 had a Packet Buffer of 64K.
* Default allocation: PBA=48K for Rx, leaving 16K for Tx.
* After the 82547 the buffer was reduced to 40K.
* Default allocation: PBA=30K for Rx, leaving 10K for Tx.
* Note: default does not leave enough room for Jumbo Frame >10k.
*/
switch (adapter->hw.mac.type) {
case e1000_82547:
case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
if (adapter->max_frame_size > 8192)
pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
else
pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
adapter->tx_fifo_head = 0;
adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
adapter->tx_fifo_size =
(E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
break;
/* Total Packet Buffer on these is 48K */
case e1000_82571:
case e1000_82572:
case e1000_80003es2lan:
pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
break;
case e1000_82573: /* 82573: Total Packet Buffer is 32K */
pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
break;
case e1000_82574:
case e1000_82583:
pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
break;
case e1000_ich9lan:
case e1000_ich10lan:
case e1000_ich8lan:
pba = E1000_PBA_8K;
break;
default:
/* Devices before 82547 had a Packet Buffer of 64K. */
if (adapter->max_frame_size > 8192)
pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
else
pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
}
INIT_DEBUGOUT1("em_init: pba=%dK",pba);
E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
/* Get the latest mac address, User can use a LAA */
bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
ETHER_ADDR_LEN);
/* Put the address into the Receive Address Array */
e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
/*
* With the 82571 adapter, RAR[0] may be overwritten
* when the other port is reset, we make a duplicate
* in RAR[14] for that eventuality, this assures
* the interface continues to function.
*/
if (adapter->hw.mac.type == e1000_82571) {
e1000_set_laa_state_82571(&adapter->hw, TRUE);
e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
E1000_RAR_ENTRIES - 1);
}
/* Initialize the hardware */
if (em_hardware_init(adapter)) {
device_printf(dev, "Unable to initialize the hardware\n");
return;
}
em_update_link_status(adapter);
/* Setup VLAN support, basic and offload if available */
E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
#if __FreeBSD_version < 700029
if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
u32 ctrl;
ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
ctrl |= E1000_CTRL_VME;
E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
}
#else
/* Use real VLAN Filter support */
em_setup_vlan_hw_support(adapter);
#endif
/* Set hardware offload abilities */
ifp->if_hwassist = 0;
if (adapter->hw.mac.type >= e1000_82543) {
if (ifp->if_capenable & IFCAP_TXCSUM)
ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
#if __FreeBSD_version >= 700000
if (ifp->if_capenable & IFCAP_TSO4)
ifp->if_hwassist |= CSUM_TSO;
#endif
}
/* Configure for OS presence */
em_init_manageability(adapter);
/* Prepare transmit descriptors and buffers */
em_setup_transmit_structures(adapter);
em_initialize_transmit_unit(adapter);
/* Setup Multicast table */
em_set_multi(adapter);
/* Prepare receive descriptors and buffers */
if (em_setup_receive_structures(adapter)) {
device_printf(dev, "Could not setup receive structures\n");
EM_TX_LOCK(adapter);
em_stop(adapter);
EM_TX_UNLOCK(adapter);
return;
}
em_initialize_receive_unit(adapter);
/* Don't lose promiscuous settings */
em_set_promisc(adapter);
ifp->if_drv_flags |= IFF_DRV_RUNNING;
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
callout_reset(&adapter->timer, hz, em_local_timer, adapter);
e1000_clear_hw_cntrs_base_generic(&adapter->hw);
/* MSI/X configuration for 82574 */
if (adapter->hw.mac.type == e1000_82574) {
int tmp;
tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
tmp |= E1000_CTRL_EXT_PBA_CLR;
E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
/*
** Set the IVAR - interrupt vector routing.
** Each nibble represents a vector, high bit
** is enable, other 3 bits are the MSIX table
** entry, we map RXQ0 to 0, TXQ0 to 1, and
** Link (other) to 2, hence the magic number.
*/
E1000_WRITE_REG(&adapter->hw, E1000_IVAR, 0x800A0908);
}
#ifdef DEVICE_POLLING
/*
* Only enable interrupts if we are not polling, make sure
* they are off otherwise.
*/
if (ifp->if_capenable & IFCAP_POLLING)
em_disable_intr(adapter);
else
#endif /* DEVICE_POLLING */
em_enable_intr(adapter);
/* Don't reset the phy next time init gets called */
adapter->hw.phy.reset_disable = TRUE;
}
static void
em_init(void *arg)
{
struct adapter *adapter = arg;
EM_CORE_LOCK(adapter);
em_init_locked(adapter);
EM_CORE_UNLOCK(adapter);
}
#ifdef DEVICE_POLLING
/*********************************************************************
*
* Legacy polling routine
*
*********************************************************************/
static int
em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
{
struct adapter *adapter = ifp->if_softc;
u32 reg_icr, rx_done = 0;
EM_CORE_LOCK(adapter);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
EM_CORE_UNLOCK(adapter);
return (rx_done);
}
if (cmd == POLL_AND_CHECK_STATUS) {
reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
callout_stop(&adapter->timer);
adapter->hw.mac.get_link_status = 1;
em_update_link_status(adapter);
callout_reset(&adapter->timer, hz,
em_local_timer, adapter);
}
}
EM_CORE_UNLOCK(adapter);
rx_done = em_rxeof(adapter, count);
EM_TX_LOCK(adapter);
em_txeof(adapter);
#if __FreeBSD_version >= 800000
if (!drbr_empty(ifp, adapter->br))
em_mq_start_locked(ifp, NULL);
#else
if (!IFQ_DRV_IS_EMPTY(&ifp->snd))
em_start_locked(ifp);
#endif
EM_TX_UNLOCK(adapter);
return (rx_done);
}
#endif /* DEVICE_POLLING */
#ifdef EM_LEGACY_IRQ
/*********************************************************************
*
* Legacy Interrupt Service routine
*
*********************************************************************/
static void
em_intr(void *arg)
{
struct adapter *adapter = arg;
struct ifnet *ifp = adapter->ifp;
u32 reg_icr;
if (ifp->if_capenable & IFCAP_POLLING)
return;
EM_CORE_LOCK(adapter);
reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
if (reg_icr & E1000_ICR_RXO)
adapter->rx_overruns++;
if ((reg_icr == 0xffffffff) || (reg_icr == 0)||
(adapter->hw.mac.type >= e1000_82571 &&
(reg_icr & E1000_ICR_INT_ASSERTED) == 0))
goto out;
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
goto out;
if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
callout_stop(&adapter->timer);
adapter->hw.mac.get_link_status = 1;
em_update_link_status(adapter);
/* Deal with TX cruft when link lost */
em_tx_purge(adapter);
callout_reset(&adapter->timer, hz,
em_local_timer, adapter);
goto out;
}
EM_TX_LOCK(adapter);
em_txeof(adapter);
em_rxeof(adapter, -1);
em_txeof(adapter);
if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
em_start_locked(ifp);
EM_TX_UNLOCK(adapter);
out:
EM_CORE_UNLOCK(adapter);
return;
}
#else /* EM_FAST_IRQ, then fast interrupt routines only */
static void
em_handle_link(void *context, int pending)
{
struct adapter *adapter = context;
struct ifnet *ifp = adapter->ifp;
if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
return;
EM_CORE_LOCK(adapter);
callout_stop(&adapter->timer);
em_update_link_status(adapter);
/* Deal with TX cruft when link lost */
em_tx_purge(adapter);
callout_reset(&adapter->timer, hz, em_local_timer, adapter);
EM_CORE_UNLOCK(adapter);
}
/* Combined RX/TX handler, used by Legacy and MSI */
static void
em_handle_rxtx(void *context, int pending)
{
struct adapter *adapter = context;
struct ifnet *ifp = adapter->ifp;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
EM_TX_LOCK(adapter);
em_txeof(adapter);
#if __FreeBSD_version >= 800000
if (!drbr_empty(ifp, adapter->br))
em_mq_start_locked(ifp, NULL);
#else
if (!IFQ_DRV_IS_EMPTY(&ifp->snd))
em_start_locked(ifp);
#endif
EM_TX_UNLOCK(adapter);
}
em_enable_intr(adapter);
}
/*********************************************************************
*
* Fast Legacy/MSI Combined Interrupt Service routine
*
*********************************************************************/
#if __FreeBSD_version < 700000
#define FILTER_STRAY
#define FILTER_HANDLED
static void
#else
static int
#endif
em_irq_fast(void *arg)
{
struct adapter *adapter = arg;
struct ifnet *ifp;
u32 reg_icr;
ifp = adapter->ifp;
reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
/* Hot eject? */
if (reg_icr == 0xffffffff)
return FILTER_STRAY;
/* Definitely not our interrupt. */
if (reg_icr == 0x0)
return FILTER_STRAY;
/*
* Starting with the 82571 chip, bit 31 should be used to
* determine whether the interrupt belongs to us.
*/
if (adapter->hw.mac.type >= e1000_82571 &&
(reg_icr & E1000_ICR_INT_ASSERTED) == 0)
return FILTER_STRAY;
/*
* Mask interrupts until the taskqueue is finished running. This is
* cheap, just assume that it is needed. This also works around the
* MSI message reordering errata on certain systems.
*/
em_disable_intr(adapter);
taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
/* Link status change */
if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
adapter->hw.mac.get_link_status = 1;
taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
}
if (reg_icr & E1000_ICR_RXO)
adapter->rx_overruns++;
return FILTER_HANDLED;
}
/*********************************************************************
*
* MSIX Interrupt Service Routines
*
**********************************************************************/
#define EM_MSIX_TX 0x00040000
#define EM_MSIX_RX 0x00010000
#define EM_MSIX_LINK 0x00100000
static void
em_msix_tx(void *arg)
{
struct adapter *adapter = arg;
struct ifnet *ifp = adapter->ifp;
++adapter->tx_irq;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
EM_TX_LOCK(adapter);
em_txeof(adapter);
EM_TX_UNLOCK(adapter);
taskqueue_enqueue(adapter->tq, &adapter->tx_task);
}
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_TX);
return;
}
/*********************************************************************
*
* MSIX RX Interrupt Service routine
*
**********************************************************************/
static void
em_msix_rx(void *arg)
{
struct adapter *adapter = arg;
struct ifnet *ifp = adapter->ifp;
++adapter->rx_irq;
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) &&
(em_rxeof(adapter, adapter->rx_process_limit) != 0))
taskqueue_enqueue(adapter->tq, &adapter->rx_task);
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_RX);
return;
}
/*********************************************************************
*
* MSIX Link Fast Interrupt Service routine
*
**********************************************************************/
static void
em_msix_link(void *arg)
{
struct adapter *adapter = arg;
u32 reg_icr;
++adapter->link_irq;
reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
adapter->hw.mac.get_link_status = 1;
taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
}
E1000_WRITE_REG(&adapter->hw, E1000_IMS,
EM_MSIX_LINK | E1000_IMS_LSC);
return;
}
static void
em_handle_rx(void *context, int pending)
{
struct adapter *adapter = context;
struct ifnet *ifp = adapter->ifp;
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) &&
(em_rxeof(adapter, adapter->rx_process_limit) != 0))
taskqueue_enqueue(adapter->tq, &adapter->rx_task);
}
static void
em_handle_tx(void *context, int pending)
{
struct adapter *adapter = context;
struct ifnet *ifp = adapter->ifp;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
if (!EM_TX_TRYLOCK(adapter))
return;
em_txeof(adapter);
#if __FreeBSD_version >= 800000
if (!drbr_empty(ifp, adapter->br))
em_mq_start_locked(ifp, NULL);
#else
if (!IFQ_DRV_IS_EMPTY(&ifp->snd))
em_start_locked(ifp);
#endif
EM_TX_UNLOCK(adapter);
}
}
#endif /* EM_FAST_IRQ */
/*********************************************************************
*
* Media Ioctl callback
*
* This routine is called whenever the user queries the status of
* the interface using ifconfig.
*
**********************************************************************/
static void
em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
{
struct adapter *adapter = ifp->if_softc;
u_char fiber_type = IFM_1000_SX;
INIT_DEBUGOUT("em_media_status: begin");
EM_CORE_LOCK(adapter);
em_update_link_status(adapter);
ifmr->ifm_status = IFM_AVALID;
ifmr->ifm_active = IFM_ETHER;
if (!adapter->link_active) {
EM_CORE_UNLOCK(adapter);
return;
}
ifmr->ifm_status |= IFM_ACTIVE;
if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
(adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
if (adapter->hw.mac.type == e1000_82545)
fiber_type = IFM_1000_LX;
ifmr->ifm_active |= fiber_type | IFM_FDX;
} else {
switch (adapter->link_speed) {
case 10:
ifmr->ifm_active |= IFM_10_T;
break;
case 100:
ifmr->ifm_active |= IFM_100_TX;
break;
case 1000:
ifmr->ifm_active |= IFM_1000_T;
break;
}
if (adapter->link_duplex == FULL_DUPLEX)
ifmr->ifm_active |= IFM_FDX;
else
ifmr->ifm_active |= IFM_HDX;
}
EM_CORE_UNLOCK(adapter);
}
/*********************************************************************
*
* Media Ioctl callback
*
* This routine is called when the user changes speed/duplex using
* media/mediopt option with ifconfig.
*
**********************************************************************/
static int
em_media_change(struct ifnet *ifp)
{
struct adapter *adapter = ifp->if_softc;
struct ifmedia *ifm = &adapter->media;
INIT_DEBUGOUT("em_media_change: begin");
if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
return (EINVAL);
EM_CORE_LOCK(adapter);
switch (IFM_SUBTYPE(ifm->ifm_media)) {
case IFM_AUTO:
adapter->hw.mac.autoneg = DO_AUTO_NEG;
adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
break;
case IFM_1000_LX:
case IFM_1000_SX:
case IFM_1000_T:
adapter->hw.mac.autoneg = DO_AUTO_NEG;
adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
break;
case IFM_100_TX:
adapter->hw.mac.autoneg = FALSE;
adapter->hw.phy.autoneg_advertised = 0;
if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
else
adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
break;
case IFM_10_T:
adapter->hw.mac.autoneg = FALSE;
adapter->hw.phy.autoneg_advertised = 0;
if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
else
adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
break;
default:
device_printf(adapter->dev, "Unsupported media type\n");
}
/* As the speed/duplex settings my have changed we need to
* reset the PHY.
*/
adapter->hw.phy.reset_disable = FALSE;
em_init_locked(adapter);
EM_CORE_UNLOCK(adapter);
return (0);
}
/*********************************************************************
*
* This routine maps the mbufs to tx descriptors.
*
* return 0 on success, positive on failure
**********************************************************************/
static int
em_xmit(struct adapter *adapter, struct mbuf **m_headp)
{
bus_dma_segment_t segs[EM_MAX_SCATTER];
bus_dmamap_t map;
struct em_buffer *tx_buffer, *tx_buffer_mapped;
struct e1000_tx_desc *ctxd = NULL;
struct mbuf *m_head;
u32 txd_upper, txd_lower, txd_used, txd_saved;
int nsegs, i, j, first, last = 0;
int error, do_tso, tso_desc = 0;
#if __FreeBSD_version < 700000
struct m_tag *mtag;
#endif
m_head = *m_headp;
txd_upper = txd_lower = txd_used = txd_saved = 0;
#if __FreeBSD_version >= 700000
do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
#else
do_tso = 0;
#endif
/*
* Force a cleanup if number of TX descriptors
* available hits the threshold
*/
if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
em_txeof(adapter);
/* Now do we at least have a minimal? */
if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
adapter->no_tx_desc_avail1++;
return (ENOBUFS);
}
}
/*
* TSO workaround:
* If an mbuf is only header we need
* to pull 4 bytes of data into it.
*/
if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
m_head = m_pullup(m_head, M_TSO_LEN + 4);
*m_headp = m_head;
if (m_head == NULL)
return (ENOBUFS);
}
/*
* Map the packet for DMA
*
* Capture the first descriptor index,
* this descriptor will have the index
* of the EOP which is the only one that
* now gets a DONE bit writeback.
*/
first = adapter->next_avail_tx_desc;
tx_buffer = &adapter->tx_buffer_area[first];
tx_buffer_mapped = tx_buffer;
map = tx_buffer->map;
error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
*m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
/*
* There are two types of errors we can (try) to handle:
* - EFBIG means the mbuf chain was too long and bus_dma ran
* out of segments. Defragment the mbuf chain and try again.
* - ENOMEM means bus_dma could not obtain enough bounce buffers
* at this point in time. Defer sending and try again later.
* All other errors, in particular EINVAL, are fatal and prevent the
* mbuf chain from ever going through. Drop it and report error.
*/
if (error == EFBIG) {
struct mbuf *m;
m = m_defrag(*m_headp, M_DONTWAIT);
if (m == NULL) {
adapter->mbuf_alloc_failed++;
m_freem(*m_headp);
*m_headp = NULL;
return (ENOBUFS);
}
*m_headp = m;
/* Try it again */
error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
*m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
if (error) {
adapter->no_tx_dma_setup++;
m_freem(*m_headp);
*m_headp = NULL;
return (error);
}
} else if (error != 0) {
adapter->no_tx_dma_setup++;
return (error);
}
/*
* TSO Hardware workaround, if this packet is not
* TSO, and is only a single descriptor long, and
* it follows a TSO burst, then we need to add a
* sentinel descriptor to prevent premature writeback.
*/
if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
if (nsegs == 1)
tso_desc = TRUE;
adapter->tx_tso = FALSE;
}
if (nsegs > (adapter->num_tx_desc_avail - 2)) {
adapter->no_tx_desc_avail2++;
bus_dmamap_unload(adapter->txtag, map);
return (ENOBUFS);
}
m_head = *m_headp;
/* Do hardware assists */
#if __FreeBSD_version >= 700000
if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
error = em_tso_setup(adapter, m_head, &txd_upper, &txd_lower);
if (error != TRUE)
return (ENXIO); /* something foobar */
/* we need to make a final sentinel transmit desc */
tso_desc = TRUE;
} else
#endif
if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
em_transmit_checksum_setup(adapter, m_head,
&txd_upper, &txd_lower);
i = adapter->next_avail_tx_desc;
if (adapter->pcix_82544)
txd_saved = i;
/* Set up our transmit descriptors */
for (j = 0; j < nsegs; j++) {
bus_size_t seg_len;
bus_addr_t seg_addr;
/* If adapter is 82544 and on PCIX bus */
if(adapter->pcix_82544) {
DESC_ARRAY desc_array;
u32 array_elements, counter;
/*
* Check the Address and Length combination and
* split the data accordingly
*/
array_elements = em_fill_descriptors(segs[j].ds_addr,
segs[j].ds_len, &desc_array);
for (counter = 0; counter < array_elements; counter++) {
if (txd_used == adapter->num_tx_desc_avail) {
adapter->next_avail_tx_desc = txd_saved;
adapter->no_tx_desc_avail2++;
bus_dmamap_unload(adapter->txtag, map);
return (ENOBUFS);
}
tx_buffer = &adapter->tx_buffer_area[i];
ctxd = &adapter->tx_desc_base[i];
ctxd->buffer_addr = htole64(
desc_array.descriptor[counter].address);
ctxd->lower.data = htole32(
(adapter->txd_cmd | txd_lower | (u16)
desc_array.descriptor[counter].length));
ctxd->upper.data =
htole32((txd_upper));
last = i;
if (++i == adapter->num_tx_desc)
i = 0;
tx_buffer->m_head = NULL;
tx_buffer->next_eop = -1;
txd_used++;
}
} else {
tx_buffer = &adapter->tx_buffer_area[i];
ctxd = &adapter->tx_desc_base[i];
seg_addr = segs[j].ds_addr;
seg_len = segs[j].ds_len;
/*
** TSO Workaround:
** If this is the last descriptor, we want to
** split it so we have a small final sentinel
*/
if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
seg_len -= 4;
ctxd->buffer_addr = htole64(seg_addr);
ctxd->lower.data = htole32(
adapter->txd_cmd | txd_lower | seg_len);
ctxd->upper.data =
htole32(txd_upper);
if (++i == adapter->num_tx_desc)
i = 0;
/* Now make the sentinel */
++txd_used; /* using an extra txd */
ctxd = &adapter->tx_desc_base[i];
tx_buffer = &adapter->tx_buffer_area[i];
ctxd->buffer_addr =
htole64(seg_addr + seg_len);
ctxd->lower.data = htole32(
adapter->txd_cmd | txd_lower | 4);
ctxd->upper.data =
htole32(txd_upper);
last = i;
if (++i == adapter->num_tx_desc)
i = 0;
} else {
ctxd->buffer_addr = htole64(seg_addr);
ctxd->lower.data = htole32(
adapter->txd_cmd | txd_lower | seg_len);
ctxd->upper.data =
htole32(txd_upper);
last = i;
if (++i == adapter->num_tx_desc)
i = 0;
}
tx_buffer->m_head = NULL;
tx_buffer->next_eop = -1;
}
}
adapter->next_avail_tx_desc = i;
if (adapter->pcix_82544)
adapter->num_tx_desc_avail -= txd_used;
else {
adapter->num_tx_desc_avail -= nsegs;
if (tso_desc) /* TSO used an extra for sentinel */
adapter->num_tx_desc_avail -= txd_used;
}
/*
** Handle VLAN tag, this is the
** biggest difference between
** 6.x and 7
*/
#if __FreeBSD_version < 700000
/* Find out if we are in vlan mode. */
mtag = VLAN_OUTPUT_TAG(ifp, m_head);
if (mtag != NULL) {
ctxd->upper.fields.special =
htole16(VLAN_TAG_VALUE(mtag));
#else /* FreeBSD 7 */
if (m_head->m_flags & M_VLANTAG) {
/* Set the vlan id. */
ctxd->upper.fields.special =
htole16(m_head->m_pkthdr.ether_vtag);
#endif
/* Tell hardware to add tag */
ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
}
tx_buffer->m_head = m_head;
tx_buffer_mapped->map = tx_buffer->map;
tx_buffer->map = map;
bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
/*
* Last Descriptor of Packet
* needs End Of Packet (EOP)
* and Report Status (RS)
*/
ctxd->lower.data |=
htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
/*
* Keep track in the first buffer which
* descriptor will be written back
*/
tx_buffer = &adapter->tx_buffer_area[first];
tx_buffer->next_eop = last;
/*
* Advance the Transmit Descriptor Tail (TDT), this tells the E1000
* that this frame is available to transmit.
*/
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
if (adapter->hw.mac.type == e1000_82547 &&
adapter->link_duplex == HALF_DUPLEX)
em_82547_move_tail(adapter);
else {
E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), i);
if (adapter->hw.mac.type == e1000_82547)
em_82547_update_fifo_head(adapter,
m_head->m_pkthdr.len);
}
return (0);
}
/*********************************************************************
*
* 82547 workaround to avoid controller hang in half-duplex environment.
* The workaround is to avoid queuing a large packet that would span
* the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
* in this case. We do that only when FIFO is quiescent.
*
**********************************************************************/
static void
em_82547_move_tail(void *arg)
{
struct adapter *adapter = arg;
struct e1000_tx_desc *tx_desc;
u16 hw_tdt, sw_tdt, length = 0;
bool eop = 0;
EM_TX_LOCK_ASSERT(adapter);
hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT(0));
sw_tdt = adapter->next_avail_tx_desc;
while (hw_tdt != sw_tdt) {
tx_desc = &adapter->tx_desc_base[hw_tdt];
length += tx_desc->lower.flags.length;
eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
if (++hw_tdt == adapter->num_tx_desc)
hw_tdt = 0;
if (eop) {
if (em_82547_fifo_workaround(adapter, length)) {
adapter->tx_fifo_wrk_cnt++;
callout_reset(&adapter->tx_fifo_timer, 1,
em_82547_move_tail, adapter);
break;
}
E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), hw_tdt);
em_82547_update_fifo_head(adapter, length);
length = 0;
}
}
}
static int
em_82547_fifo_workaround(struct adapter *adapter, int len)
{
int fifo_space, fifo_pkt_len;
fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
if (adapter->link_duplex == HALF_DUPLEX) {
fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
if (em_82547_tx_fifo_reset(adapter))
return (0);
else
return (1);
}
}
return (0);
}
static void
em_82547_update_fifo_head(struct adapter *adapter, int len)
{
int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
/* tx_fifo_head is always 16 byte aligned */
adapter->tx_fifo_head += fifo_pkt_len;
if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
adapter->tx_fifo_head -= adapter->tx_fifo_size;
}
}
static int
em_82547_tx_fifo_reset(struct adapter *adapter)
{
u32 tctl;
if ((E1000_READ_REG(&adapter->hw, E1000_TDT(0)) ==
E1000_READ_REG(&adapter->hw, E1000_TDH(0))) &&
(E1000_READ_REG(&adapter->hw, E1000_TDFT) ==
E1000_READ_REG(&adapter->hw, E1000_TDFH)) &&
(E1000_READ_REG(&adapter->hw, E1000_TDFTS) ==
E1000_READ_REG(&adapter->hw, E1000_TDFHS)) &&
(E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) {
/* Disable TX unit */
tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
E1000_WRITE_REG(&adapter->hw, E1000_TCTL,
tctl & ~E1000_TCTL_EN);
/* Reset FIFO pointers */
E1000_WRITE_REG(&adapter->hw, E1000_TDFT,
adapter->tx_head_addr);
E1000_WRITE_REG(&adapter->hw, E1000_TDFH,
adapter->tx_head_addr);
E1000_WRITE_REG(&adapter->hw, E1000_TDFTS,
adapter->tx_head_addr);
E1000_WRITE_REG(&adapter->hw, E1000_TDFHS,
adapter->tx_head_addr);
/* Re-enable TX unit */
E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
E1000_WRITE_FLUSH(&adapter->hw);
adapter->tx_fifo_head = 0;
adapter->tx_fifo_reset_cnt++;
return (TRUE);
}
else {
return (FALSE);
}
}
static void
em_set_promisc(struct adapter *adapter)
{
struct ifnet *ifp = adapter->ifp;
u32 reg_rctl;
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
if (ifp->if_flags & IFF_PROMISC) {
reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
/* Turn this on if you want to see bad packets */
if (em_debug_sbp)
reg_rctl |= E1000_RCTL_SBP;
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
} else if (ifp->if_flags & IFF_ALLMULTI) {
reg_rctl |= E1000_RCTL_MPE;
reg_rctl &= ~E1000_RCTL_UPE;
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
}
}
static void
em_disable_promisc(struct adapter *adapter)
{
u32 reg_rctl;
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
reg_rctl &= (~E1000_RCTL_UPE);
reg_rctl &= (~E1000_RCTL_MPE);
reg_rctl &= (~E1000_RCTL_SBP);
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
}
/*********************************************************************
* Multicast Update
*
* This routine is called whenever multicast address list is updated.
*
**********************************************************************/
static void
em_set_multi(struct adapter *adapter)
{
struct ifnet *ifp = adapter->ifp;
struct ifmultiaddr *ifma;
u32 reg_rctl = 0;
u8 *mta; /* Multicast array memory */
int mcnt = 0;
IOCTL_DEBUGOUT("em_set_multi: begin");
if (adapter->hw.mac.type == e1000_82542 &&
adapter->hw.revision_id == E1000_REVISION_2) {
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
e1000_pci_clear_mwi(&adapter->hw);
reg_rctl |= E1000_RCTL_RST;
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
msec_delay(5);
}
/* Allocate temporary memory to setup array */
mta = malloc(sizeof(u8) *
(ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
M_DEVBUF, M_NOWAIT | M_ZERO);
if (mta == NULL)
panic("em_set_multi memory failure\n");
if_maddr_rlock(ifp);
TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
break;
bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
&mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
mcnt++;
}
if_maddr_runlock(ifp);
if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
reg_rctl |= E1000_RCTL_MPE;
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
} else
e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
if (adapter->hw.mac.type == e1000_82542 &&
adapter->hw.revision_id == E1000_REVISION_2) {
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
reg_rctl &= ~E1000_RCTL_RST;
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
msec_delay(5);
if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
e1000_pci_set_mwi(&adapter->hw);
}
free(mta, M_DEVBUF);
}
/*********************************************************************
* Timer routine
*
* This routine checks for link status and updates statistics.
*
**********************************************************************/
static void
em_local_timer(void *arg)
{
struct adapter *adapter = arg;
struct ifnet *ifp = adapter->ifp;
EM_CORE_LOCK_ASSERT(adapter);
taskqueue_enqueue(adapter->tq,
&adapter->rxtx_task);
em_update_link_status(adapter);
em_update_stats_counters(adapter);
/* Reset LAA into RAR[0] on 82571 */
if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
em_print_hw_stats(adapter);
em_smartspeed(adapter);
/*
* Each second we check the watchdog to
* protect against hardware hangs.
*/
em_watchdog(adapter);
callout_reset(&adapter->timer, hz, em_local_timer, adapter);
}
static void
em_update_link_status(struct adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
struct ifnet *ifp = adapter->ifp;
device_t dev = adapter->dev;
u32 link_check = 0;
/* Get the cached link value or read phy for real */
switch (hw->phy.media_type) {
case e1000_media_type_copper:
if (hw->mac.get_link_status) {
/* Do the work to read phy */
e1000_check_for_link(hw);
link_check = !hw->mac.get_link_status;
if (link_check) /* ESB2 fix */
e1000_cfg_on_link_up(hw);
} else
link_check = TRUE;
break;
case e1000_media_type_fiber:
e1000_check_for_link(hw);
link_check = (E1000_READ_REG(hw, E1000_STATUS) &
E1000_STATUS_LU);
break;
case e1000_media_type_internal_serdes:
e1000_check_for_link(hw);
link_check = adapter->hw.mac.serdes_has_link;
break;
default:
case e1000_media_type_unknown:
break;
}
/* Now check for a transition */
if (link_check && (adapter->link_active == 0)) {
e1000_get_speed_and_duplex(hw, &adapter->link_speed,
&adapter->link_duplex);
/* Check if we must disable SPEED_MODE bit on PCI-E */
if ((adapter->link_speed != SPEED_1000) &&
((hw->mac.type == e1000_82571) ||
(hw->mac.type == e1000_82572))) {
int tarc0;
tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
tarc0 &= ~SPEED_MODE_BIT;
E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
}
if (bootverbose)
device_printf(dev, "Link is up %d Mbps %s\n",
adapter->link_speed,
((adapter->link_duplex == FULL_DUPLEX) ?
"Full Duplex" : "Half Duplex"));
adapter->link_active = 1;
adapter->smartspeed = 0;
ifp->if_baudrate = adapter->link_speed * 1000000;
if_link_state_change(ifp, LINK_STATE_UP);
} else if (!link_check && (adapter->link_active == 1)) {
ifp->if_baudrate = adapter->link_speed = 0;
adapter->link_duplex = 0;
if (bootverbose)
device_printf(dev, "Link is Down\n");
adapter->link_active = 0;
/* Link down, disable watchdog */
adapter->watchdog_timer = FALSE;
if_link_state_change(ifp, LINK_STATE_DOWN);
}
}
/*********************************************************************
*
* This routine disables all traffic on the adapter by issuing a
* global reset on the MAC and deallocates TX/RX buffers.
*
* This routine should always be called with BOTH the CORE
* and TX locks.
**********************************************************************/
static void
em_stop(void *arg)
{
struct adapter *adapter = arg;
struct ifnet *ifp = adapter->ifp;
EM_CORE_LOCK_ASSERT(adapter);
EM_TX_LOCK_ASSERT(adapter);
INIT_DEBUGOUT("em_stop: begin");
em_disable_intr(adapter);
callout_stop(&adapter->timer);
callout_stop(&adapter->tx_fifo_timer);
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
e1000_reset_hw(&adapter->hw);
if (adapter->hw.mac.type >= e1000_82544)
E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
}
/*********************************************************************
*
* Determine hardware revision.
*
**********************************************************************/
static void
em_identify_hardware(struct adapter *adapter)
{
device_t dev = adapter->dev;
/* Make sure our PCI config space has the necessary stuff set */
adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
(adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
device_printf(dev, "Memory Access and/or Bus Master bits "
"were not set!\n");
adapter->hw.bus.pci_cmd_word |=
(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
pci_write_config(dev, PCIR_COMMAND,
adapter->hw.bus.pci_cmd_word, 2);
}
/* Save off the information about this board */
adapter->hw.vendor_id = pci_get_vendor(dev);
adapter->hw.device_id = pci_get_device(dev);
adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
adapter->hw.subsystem_vendor_id =
pci_read_config(dev, PCIR_SUBVEND_0, 2);
adapter->hw.subsystem_device_id =
pci_read_config(dev, PCIR_SUBDEV_0, 2);
/* Do Shared Code Init and Setup */
if (e1000_set_mac_type(&adapter->hw)) {
device_printf(dev, "Setup init failure\n");
return;
}
}
static int
em_allocate_pci_resources(struct adapter *adapter)
{
device_t dev = adapter->dev;
int val, rid, error = E1000_SUCCESS;
rid = PCIR_BAR(0);
adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
&rid, RF_ACTIVE);
if (adapter->memory == NULL) {
device_printf(dev, "Unable to allocate bus resource: memory\n");
return (ENXIO);
}
adapter->osdep.mem_bus_space_tag =
rman_get_bustag(adapter->memory);
adapter->osdep.mem_bus_space_handle =
rman_get_bushandle(adapter->memory);
adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
/* Only older adapters use IO mapping */
if ((adapter->hw.mac.type > e1000_82543) &&
(adapter->hw.mac.type < e1000_82571)) {
/* Figure our where our IO BAR is ? */
for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
val = pci_read_config(dev, rid, 4);
if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
adapter->io_rid = rid;
break;
}
rid += 4;
/* check for 64bit BAR */
if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
rid += 4;
}
if (rid >= PCIR_CIS) {
device_printf(dev, "Unable to locate IO BAR\n");
return (ENXIO);
}
adapter->ioport = bus_alloc_resource_any(dev,
SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE);
if (adapter->ioport == NULL) {
device_printf(dev, "Unable to allocate bus resource: "
"ioport\n");
return (ENXIO);
}
adapter->hw.io_base = 0;
adapter->osdep.io_bus_space_tag =
rman_get_bustag(adapter->ioport);
adapter->osdep.io_bus_space_handle =
rman_get_bushandle(adapter->ioport);
}
/*
** Init the resource arrays
** used by MSIX setup
*/
for (int i = 0; i < 3; i++) {
adapter->rid[i] = i + 1; /* MSI/X RID starts at 1 */
adapter->tag[i] = NULL;
adapter->res[i] = NULL;
}
/*
* Setup MSI/X or MSI if PCI Express
*/
if (em_enable_msi)
adapter->msi = em_setup_msix(adapter);
adapter->hw.back = &adapter->osdep;
return (error);
}
/*********************************************************************
*
* Setup the Legacy or MSI Interrupt handler
*
**********************************************************************/
int
em_allocate_legacy(struct adapter *adapter)
{
device_t dev = adapter->dev;
int error;
/* Manually turn off all interrupts */
E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
/* Legacy RID is 0 */
if (adapter->msi == 0)
adapter->rid[0] = 0;
/* We allocate a single interrupt resource */
adapter->res[0] = bus_alloc_resource_any(dev,
SYS_RES_IRQ, &adapter->rid[0], RF_SHAREABLE | RF_ACTIVE);
if (adapter->res[0] == NULL) {
device_printf(dev, "Unable to allocate bus resource: "
"interrupt\n");
return (ENXIO);
}
#ifdef EM_LEGACY_IRQ
/* We do Legacy setup */
if ((error = bus_setup_intr(dev, adapter->res[0],
#if __FreeBSD_version > 700000
INTR_TYPE_NET | INTR_MPSAFE, NULL, em_intr, adapter,
#else /* 6.X */
INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
#endif
&adapter->tag[0])) != 0) {
device_printf(dev, "Failed to register interrupt handler");
return (error);
}
#else /* FAST_IRQ */
/*
* Try allocating a fast interrupt and the associated deferred
* processing contexts.
*/
TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
taskqueue_thread_enqueue, &adapter->tq);
taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
device_get_nameunit(adapter->dev));
#if __FreeBSD_version < 700000
if ((error = bus_setup_intr(dev, adapter->res[0],
INTR_TYPE_NET | INTR_FAST, em_irq_fast, adapter,
#else
if ((error = bus_setup_intr(dev, adapter->res[0],
INTR_TYPE_NET, em_irq_fast, NULL, adapter,
#endif
&adapter->tag[0])) != 0) {
device_printf(dev, "Failed to register fast interrupt "
"handler: %d\n", error);
taskqueue_free(adapter->tq);
adapter->tq = NULL;
return (error);
}
#endif /* EM_LEGACY_IRQ */
return (0);
}
/*********************************************************************
*
* Setup the MSIX Interrupt handlers
* This is not really Multiqueue, rather
* its just multiple interrupt vectors.
*
**********************************************************************/
int
em_allocate_msix(struct adapter *adapter)
{
device_t dev = adapter->dev;
int error;
/* Make sure all interrupts are disabled */
E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
/* First get the resources */
for (int i = 0; i < adapter->msi; i++) {
adapter->res[i] = bus_alloc_resource_any(dev,
SYS_RES_IRQ, &adapter->rid[i], RF_ACTIVE);
if (adapter->res[i] == NULL) {
device_printf(dev,
"Unable to allocate bus resource: "
"MSIX Interrupt\n");
return (ENXIO);
}
}
/*
* Now allocate deferred processing contexts.
*/
TASK_INIT(&adapter->rx_task, 0, em_handle_rx, adapter);
TASK_INIT(&adapter->tx_task, 0, em_handle_tx, adapter);
/*
* Handle compatibility for msi case for deferral due to
* trylock failure
*/
TASK_INIT(&adapter->rxtx_task, 0, em_handle_tx, adapter);
TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
taskqueue_thread_enqueue, &adapter->tq);
taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
device_get_nameunit(adapter->dev));
/*
* And setup the interrupt handlers
*/
/* First slot to RX */
if ((error = bus_setup_intr(dev, adapter->res[0],
#if __FreeBSD_version > 700000
INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx, adapter,
#else /* 6.X */
INTR_TYPE_NET | INTR_MPSAFE, em_msix_rx, adapter,
#endif
&adapter->tag[0])) != 0) {
device_printf(dev, "Failed to register RX handler");
return (error);
}
/* Next TX */
if ((error = bus_setup_intr(dev, adapter->res[1],
#if __FreeBSD_version > 700000
INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx, adapter,
#else /* 6.X */
INTR_TYPE_NET | INTR_MPSAFE, em_msix_tx, adapter,
#endif
&adapter->tag[1])) != 0) {
device_printf(dev, "Failed to register TX handler");
return (error);
}
/* And Link */
if ((error = bus_setup_intr(dev, adapter->res[2],
#if __FreeBSD_version > 700000
INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_link, adapter,
#else /* 6.X */
INTR_TYPE_NET | INTR_MPSAFE, em_msix_link, adapter,
#endif
&adapter->tag[2])) != 0) {
device_printf(dev, "Failed to register TX handler");
return (error);
}
return (0);
}
static void
em_free_pci_resources(struct adapter *adapter)
{
device_t dev = adapter->dev;
/* Make sure the for loop below runs once */
if (adapter->msi == 0)
adapter->msi = 1;
/*
* First release all the interrupt resources:
* notice that since these are just kept
* in an array we can do the same logic
* whether its MSIX or just legacy.
*/
for (int i = 0; i < adapter->msi; i++) {
if (adapter->tag[i] != NULL) {
bus_teardown_intr(dev, adapter->res[i],
adapter->tag[i]);
adapter->tag[i] = NULL;
}
if (adapter->res[i] != NULL) {
bus_release_resource(dev, SYS_RES_IRQ,
adapter->rid[i], adapter->res[i]);
}
}
if (adapter->msi)
pci_release_msi(dev);
if (adapter->msix != NULL)
bus_release_resource(dev, SYS_RES_MEMORY,
PCIR_BAR(EM_MSIX_BAR), adapter->msix);
if (adapter->memory != NULL)
bus_release_resource(dev, SYS_RES_MEMORY,
PCIR_BAR(0), adapter->memory);
if (adapter->flash != NULL)
bus_release_resource(dev, SYS_RES_MEMORY,
EM_FLASH, adapter->flash);
if (adapter->ioport != NULL)
bus_release_resource(dev, SYS_RES_IOPORT,
adapter->io_rid, adapter->ioport);
}
/*
* Setup MSI or MSI/X
*/
static int
em_setup_msix(struct adapter *adapter)
{
device_t dev = adapter->dev;
int val = 0;
if (adapter->hw.mac.type < e1000_82571)
return (0);
/* Setup MSI/X for Hartwell */
if (adapter->hw.mac.type == e1000_82574) {
/* Map the MSIX BAR */
int rid = PCIR_BAR(EM_MSIX_BAR);
adapter->msix = bus_alloc_resource_any(dev,
SYS_RES_MEMORY, &rid, RF_ACTIVE);
if (!adapter->msix) {
/* May not be enabled */
device_printf(adapter->dev,
"Unable to map MSIX table \n");
goto msi;
}
val = pci_msix_count(dev);
/*
** 82574 can be configured for 5 but
** we limit use to 3.
*/
if (val > 3) val = 3;
if ((val) && pci_alloc_msix(dev, &val) == 0) {
device_printf(adapter->dev,"Using MSIX interrupts\n");
return (val);
}
}
msi:
val = pci_msi_count(dev);
if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
adapter->msi = 1;
device_printf(adapter->dev,"Using MSI interrupt\n");
return (val);
}
return (0);
}
/*********************************************************************
*
* Initialize the hardware to a configuration
* as specified by the adapter structure.
*
**********************************************************************/
static int
em_hardware_init(struct adapter *adapter)
{
device_t dev = adapter->dev;
u16 rx_buffer_size;
INIT_DEBUGOUT("em_hardware_init: begin");
/* Issue a global reset */
e1000_reset_hw(&adapter->hw);
/* Get control from any management/hw control */
if (((adapter->hw.mac.type == e1000_82573) ||
(adapter->hw.mac.type == e1000_82583) ||
(adapter->hw.mac.type == e1000_ich8lan) ||
(adapter->hw.mac.type == e1000_ich10lan) ||
(adapter->hw.mac.type == e1000_ich9lan)) &&
e1000_check_mng_mode(&adapter->hw))
em_get_hw_control(adapter);
/* When hardware is reset, fifo_head is also reset */
adapter->tx_fifo_head = 0;
/* Set up smart power down as default off on newer adapters. */
if (!em_smart_pwr_down && (adapter->hw.mac.type == e1000_82571 ||
adapter->hw.mac.type == e1000_82572)) {
u16 phy_tmp = 0;
/* Speed up time to link by disabling smart power down. */
e1000_read_phy_reg(&adapter->hw,
IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
phy_tmp &= ~IGP02E1000_PM_SPD;
e1000_write_phy_reg(&adapter->hw,
IGP02E1000_PHY_POWER_MGMT, phy_tmp);
}
/*
* These parameters control the automatic generation (Tx) and
* response (Rx) to Ethernet PAUSE frames.
* - High water mark should allow for at least two frames to be
* received after sending an XOFF.
* - Low water mark works best when it is very near the high water mark.
* This allows the receiver to restart by sending XON when it has
* drained a bit. Here we use an arbitary value of 1500 which will
* restart after one full frame is pulled from the buffer. There
* could be several smaller frames in the buffer and if so they will
* not trigger the XON until their total number reduces the buffer
* by 1500.
* - The pause time is fairly large at 1000 x 512ns = 512 usec.
*/
rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) &
0xffff) << 10 );
adapter->hw.fc.high_water = rx_buffer_size -
roundup2(adapter->max_frame_size, 1024);
adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
if (adapter->hw.mac.type == e1000_80003es2lan)
adapter->hw.fc.pause_time = 0xFFFF;
else
adapter->hw.fc.pause_time = EM_FC_PAUSE_TIME;
adapter->hw.fc.send_xon = TRUE;
/* Set Flow control, use the tunable location if sane */
if ((em_fc_setting >= 0) || (em_fc_setting < 4))
adapter->hw.fc.requested_mode = em_fc_setting;
else
adapter->hw.fc.requested_mode = e1000_fc_none;
if (e1000_init_hw(&adapter->hw) < 0) {
device_printf(dev, "Hardware Initialization Failed\n");
return (EIO);
}
e1000_check_for_link(&adapter->hw);
return (0);
}
/*********************************************************************
*
* Setup networking device structure and register an interface.
*
**********************************************************************/
static void
em_setup_interface(device_t dev, struct adapter *adapter)
{
struct ifnet *ifp;
INIT_DEBUGOUT("em_setup_interface: begin");
ifp = adapter->ifp = if_alloc(IFT_ETHER);
if (ifp == NULL)
panic("%s: can not if_alloc()", device_get_nameunit(dev));
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
ifp->if_mtu = ETHERMTU;
ifp->if_init = em_init;
ifp->if_softc = adapter;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = em_ioctl;
ifp->if_start = em_start;
IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
IFQ_SET_READY(&ifp->if_snd);
ether_ifattach(ifp, adapter->hw.mac.addr);
ifp->if_capabilities = ifp->if_capenable = 0;
#if __FreeBSD_version >= 800000
/* Multiqueue tx functions */
ifp->if_transmit = em_mq_start;
ifp->if_qflush = em_qflush;
adapter->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &adapter->tx_mtx);
#endif
if (adapter->hw.mac.type >= e1000_82543) {
int version_cap;
#if __FreeBSD_version < 700000
version_cap = IFCAP_HWCSUM;
#else
version_cap = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
#endif
ifp->if_capabilities |= version_cap;
ifp->if_capenable |= version_cap;
}
#if __FreeBSD_version >= 700000
/* Identify TSO capable adapters */
if ((adapter->hw.mac.type > e1000_82544) &&
(adapter->hw.mac.type != e1000_82547))
ifp->if_capabilities |= IFCAP_TSO4;
/*
* By default only enable on PCI-E, this
* can be overriden by ifconfig.
*/
if (adapter->hw.mac.type >= e1000_82571)
ifp->if_capenable |= IFCAP_TSO4;
#endif
/*
* Tell the upper layer(s) we support long frames.
*/
ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
#ifdef DEVICE_POLLING
ifp->if_capabilities |= IFCAP_POLLING;
#endif
/*
* Specify the media types supported by this adapter and register
* callbacks to update media and link information
*/
ifmedia_init(&adapter->media, IFM_IMASK,
em_media_change, em_media_status);
if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
(adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
u_char fiber_type = IFM_1000_SX; /* default type */
if (adapter->hw.mac.type == e1000_82545)
fiber_type = IFM_1000_LX;
ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
0, NULL);
ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
} else {
ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
0, NULL);
ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
0, NULL);
ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
0, NULL);
if (adapter->hw.phy.type != e1000_phy_ife) {
ifmedia_add(&adapter->media,
IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
ifmedia_add(&adapter->media,
IFM_ETHER | IFM_1000_T, 0, NULL);
}
}
ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
}
/*********************************************************************
*
* Workaround for SmartSpeed on 82541 and 82547 controllers
*
**********************************************************************/
static void
em_smartspeed(struct adapter *adapter)
{
u16 phy_tmp;
if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) ||
adapter->hw.mac.autoneg == 0 ||
(adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
return;
if (adapter->smartspeed == 0) {
/* If Master/Slave config fault is asserted twice,
* we assume back-to-back */
e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
return;
e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
e1000_read_phy_reg(&adapter->hw,
PHY_1000T_CTRL, &phy_tmp);
if(phy_tmp & CR_1000T_MS_ENABLE) {
phy_tmp &= ~CR_1000T_MS_ENABLE;
e1000_write_phy_reg(&adapter->hw,
PHY_1000T_CTRL, phy_tmp);
adapter->smartspeed++;
if(adapter->hw.mac.autoneg &&
!e1000_phy_setup_autoneg(&adapter->hw) &&
!e1000_read_phy_reg(&adapter->hw,
PHY_CONTROL, &phy_tmp)) {
phy_tmp |= (MII_CR_AUTO_NEG_EN |
MII_CR_RESTART_AUTO_NEG);
e1000_write_phy_reg(&adapter->hw,
PHY_CONTROL, phy_tmp);
}
}
}
return;
} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
/* If still no link, perhaps using 2/3 pair cable */
e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
phy_tmp |= CR_1000T_MS_ENABLE;
e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
if(adapter->hw.mac.autoneg &&
!e1000_phy_setup_autoneg(&adapter->hw) &&
!e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) {
phy_tmp |= (MII_CR_AUTO_NEG_EN |
MII_CR_RESTART_AUTO_NEG);
e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp);
}
}
/* Restart process after EM_SMARTSPEED_MAX iterations */
if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
adapter->smartspeed = 0;
}
/*
* Manage DMA'able memory.
*/
static void
em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
{
if (error)
return;
*(bus_addr_t *) arg = segs[0].ds_addr;
}
static int
em_dma_malloc(struct adapter *adapter, bus_size_t size,
struct em_dma_alloc *dma, int mapflags)
{
int error;
#if __FreeBSD_version >= 700000
error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
#else
error = bus_dma_tag_create(NULL, /* parent */
#endif
EM_DBA_ALIGN, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
size, /* maxsize */
1, /* nsegments */
size, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockarg */
&dma->dma_tag);
if (error) {
device_printf(adapter->dev,
"%s: bus_dma_tag_create failed: %d\n",
__func__, error);
goto fail_0;
}
error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
if (error) {
device_printf(adapter->dev,
"%s: bus_dmamem_alloc(%ju) failed: %d\n",
__func__, (uintmax_t)size, error);
goto fail_2;
}
dma->dma_paddr = 0;
error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
if (error || dma->dma_paddr == 0) {
device_printf(adapter->dev,
"%s: bus_dmamap_load failed: %d\n",
__func__, error);
goto fail_3;
}
return (0);
fail_3:
bus_dmamap_unload(dma->dma_tag, dma->dma_map);
fail_2:
bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
bus_dma_tag_destroy(dma->dma_tag);
fail_0:
dma->dma_map = NULL;
dma->dma_tag = NULL;
return (error);
}
static void
em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
{
if (dma->dma_tag == NULL)
return;
if (dma->dma_map != NULL) {
bus_dmamap_sync(dma->dma_tag, dma->dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(dma->dma_tag, dma->dma_map);
bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
dma->dma_map = NULL;
}
bus_dma_tag_destroy(dma->dma_tag);
dma->dma_tag = NULL;
}
/*********************************************************************
*
* Allocate memory for tx_buffer structures. The tx_buffer stores all
* the information needed to transmit a packet on the wire.
*
**********************************************************************/
static int
em_allocate_transmit_structures(struct adapter *adapter)
{
device_t dev = adapter->dev;
struct em_buffer *tx_buffer;
int error;
/*
* Create DMA tags for tx descriptors
*/
#if __FreeBSD_version >= 700000
if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
#else
if ((error = bus_dma_tag_create(NULL, /* parent */
#endif
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
EM_TSO_SIZE, /* maxsize */
EM_MAX_SCATTER, /* nsegments */
EM_TSO_SEG_SIZE, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockarg */
&adapter->txtag)) != 0) {
device_printf(dev, "Unable to allocate TX DMA tag\n");
goto fail;
}
adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) *
adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
if (adapter->tx_buffer_area == NULL) {
device_printf(dev, "Unable to allocate tx_buffer memory\n");
error = ENOMEM;
goto fail;
}
/* Create the descriptor buffer dma maps */
for (int i = 0; i < adapter->num_tx_desc; i++) {
tx_buffer = &adapter->tx_buffer_area[i];
error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
if (error != 0) {
device_printf(dev, "Unable to create TX DMA map\n");
goto fail;
}
tx_buffer->next_eop = -1;
}
return (0);
fail:
em_free_transmit_structures(adapter);
return (error);
}
/*********************************************************************
*
* (Re)Initialize transmit structures.
*
**********************************************************************/
static void
em_setup_transmit_structures(struct adapter *adapter)
{
struct em_buffer *tx_buffer;
/* Clear the old ring contents */
bzero(adapter->tx_desc_base,
(sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
/* Free any existing TX buffers */
for (int i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
tx_buffer = &adapter->tx_buffer_area[i];
bus_dmamap_sync(adapter->txtag, tx_buffer->map,
BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(adapter->txtag, tx_buffer->map);
m_freem(tx_buffer->m_head);
tx_buffer->m_head = NULL;
tx_buffer->next_eop = -1;
}
/* Reset state */
adapter->next_avail_tx_desc = 0;
adapter->next_tx_to_clean = 0;
adapter->num_tx_desc_avail = adapter->num_tx_desc;
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
return;
}
/*********************************************************************
*
* Enable transmit unit.
*
**********************************************************************/
static void
em_initialize_transmit_unit(struct adapter *adapter)
{
u32 tctl, tarc, tipg = 0;
u64 bus_addr;
INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
/* Setup the Base and Length of the Tx Descriptor Ring */
bus_addr = adapter->txdma.dma_paddr;
E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(0),
adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(0),
(u32)(bus_addr >> 32));
E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(0),
(u32)bus_addr);
/* Setup the HW Tx Head and Tail descriptor pointers */
E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), 0);
E1000_WRITE_REG(&adapter->hw, E1000_TDH(0), 0);
HW_DEBUGOUT2("Base = %x, Length = %x\n",
E1000_READ_REG(&adapter->hw, E1000_TDBAL(0)),
E1000_READ_REG(&adapter->hw, E1000_TDLEN(0)));
/* Set the default values for the Tx Inter Packet Gap timer */
switch (adapter->hw.mac.type) {
case e1000_82542:
tipg = DEFAULT_82542_TIPG_IPGT;
tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
break;
case e1000_80003es2lan:
tipg = DEFAULT_82543_TIPG_IPGR1;
tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
E1000_TIPG_IPGR2_SHIFT;
break;
default:
if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
(adapter->hw.phy.media_type ==
e1000_media_type_internal_serdes))
tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
else
tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
}
E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
if(adapter->hw.mac.type >= e1000_82540)
E1000_WRITE_REG(&adapter->hw, E1000_TADV,
adapter->tx_abs_int_delay.value);
if ((adapter->hw.mac.type == e1000_82571) ||
(adapter->hw.mac.type == e1000_82572)) {
tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
tarc |= SPEED_MODE_BIT;
E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
} else if (adapter->hw.mac.type == e1000_80003es2lan) {
tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
tarc |= 1;
E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
tarc |= 1;
E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
}
/* Program the Transmit Control Register */
tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
tctl &= ~E1000_TCTL_CT;
tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
if (adapter->hw.mac.type >= e1000_82571)
tctl |= E1000_TCTL_MULR;
/* This write will effectively turn on the transmit unit. */
E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
/* Setup Transmit Descriptor Base Settings */
adapter->txd_cmd = E1000_TXD_CMD_IFCS;
if (adapter->tx_int_delay.value > 0)
adapter->txd_cmd |= E1000_TXD_CMD_IDE;
}
/*********************************************************************
*
* Free all transmit related data structures.
*
**********************************************************************/
static void
em_free_transmit_structures(struct adapter *adapter)
{
struct em_buffer *tx_buffer;
INIT_DEBUGOUT("free_transmit_structures: begin");
if (adapter->tx_buffer_area != NULL) {
for (int i = 0; i < adapter->num_tx_desc; i++) {
tx_buffer = &adapter->tx_buffer_area[i];
if (tx_buffer->m_head != NULL) {
bus_dmamap_sync(adapter->txtag, tx_buffer->map,
BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(adapter->txtag,
tx_buffer->map);
m_freem(tx_buffer->m_head);
tx_buffer->m_head = NULL;
} else if (tx_buffer->map != NULL)
bus_dmamap_unload(adapter->txtag,
tx_buffer->map);
if (tx_buffer->map != NULL) {
bus_dmamap_destroy(adapter->txtag,
tx_buffer->map);
tx_buffer->map = NULL;
}
}
}
if (adapter->tx_buffer_area != NULL) {
free(adapter->tx_buffer_area, M_DEVBUF);
adapter->tx_buffer_area = NULL;
}
if (adapter->txtag != NULL) {
bus_dma_tag_destroy(adapter->txtag);
adapter->txtag = NULL;
}
#if __FreeBSD_version >= 800000
if (adapter->br != NULL)
buf_ring_free(adapter->br, M_DEVBUF);
#endif
}
/*********************************************************************
*
* The offload context needs to be set when we transfer the first
* packet of a particular protocol (TCP/UDP). This routine has been
* enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
*
* Added back the old method of keeping the current context type
* and not setting if unnecessary, as this is reported to be a
* big performance win. -jfv
**********************************************************************/
static void
em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
u32 *txd_upper, u32 *txd_lower)
{
struct e1000_context_desc *TXD = NULL;
struct em_buffer *tx_buffer;
struct ether_vlan_header *eh;
struct ip *ip = NULL;
struct ip6_hdr *ip6;
int curr_txd, ehdrlen;
u32 cmd, hdr_len, ip_hlen;
u16 etype;
u8 ipproto;
cmd = hdr_len = ipproto = 0;
curr_txd = adapter->next_avail_tx_desc;
/*
* Determine where frame payload starts.
* Jump over vlan headers if already present,
* helpful for QinQ too.
*/
eh = mtod(mp, struct ether_vlan_header *);
if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
etype = ntohs(eh->evl_proto);
ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
} else {
etype = ntohs(eh->evl_encap_proto);
ehdrlen = ETHER_HDR_LEN;
}
/*
* We only support TCP/UDP for IPv4 and IPv6 for the moment.
* TODO: Support SCTP too when it hits the tree.
*/
switch (etype) {
case ETHERTYPE_IP:
ip = (struct ip *)(mp->m_data + ehdrlen);
ip_hlen = ip->ip_hl << 2;
/* Setup of IP header checksum. */
if (mp->m_pkthdr.csum_flags & CSUM_IP) {
/*
* Start offset for header checksum calculation.
* End offset for header checksum calculation.
* Offset of place to put the checksum.
*/
TXD = (struct e1000_context_desc *)
&adapter->tx_desc_base[curr_txd];
TXD->lower_setup.ip_fields.ipcss = ehdrlen;
TXD->lower_setup.ip_fields.ipcse =
htole16(ehdrlen + ip_hlen);
TXD->lower_setup.ip_fields.ipcso =
ehdrlen + offsetof(struct ip, ip_sum);
cmd |= E1000_TXD_CMD_IP;
*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
}
if (mp->m_len < ehdrlen + ip_hlen)
return; /* failure */
hdr_len = ehdrlen + ip_hlen;
ipproto = ip->ip_p;
break;
case ETHERTYPE_IPV6:
ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
if (mp->m_len < ehdrlen + ip_hlen)
return; /* failure */
/* IPv6 doesn't have a header checksum. */
hdr_len = ehdrlen + ip_hlen;
ipproto = ip6->ip6_nxt;
break;
default:
*txd_upper = 0;
*txd_lower = 0;
return;
}
switch (ipproto) {
case IPPROTO_TCP:
if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
/* no need for context if already set */
if (adapter->last_hw_offload == CSUM_TCP)
return;
adapter->last_hw_offload = CSUM_TCP;
/*
* Start offset for payload checksum calculation.
* End offset for payload checksum calculation.
* Offset of place to put the checksum.
*/
TXD = (struct e1000_context_desc *)
&adapter->tx_desc_base[curr_txd];
TXD->upper_setup.tcp_fields.tucss = hdr_len;
TXD->upper_setup.tcp_fields.tucse = htole16(0);
TXD->upper_setup.tcp_fields.tucso =
hdr_len + offsetof(struct tcphdr, th_sum);
cmd |= E1000_TXD_CMD_TCP;
}
break;
case IPPROTO_UDP:
{
if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
/* no need for context if already set */
if (adapter->last_hw_offload == CSUM_UDP)
return;
adapter->last_hw_offload = CSUM_UDP;
/*
* Start offset for header checksum calculation.
* End offset for header checksum calculation.
* Offset of place to put the checksum.
*/
TXD = (struct e1000_context_desc *)
&adapter->tx_desc_base[curr_txd];
TXD->upper_setup.tcp_fields.tucss = hdr_len;
TXD->upper_setup.tcp_fields.tucse = htole16(0);
TXD->upper_setup.tcp_fields.tucso =
hdr_len + offsetof(struct udphdr, uh_sum);
}
/* Fall Thru */
}
default:
break;
}
TXD->tcp_seg_setup.data = htole32(0);
TXD->cmd_and_length =
htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
tx_buffer = &adapter->tx_buffer_area[curr_txd];
tx_buffer->m_head = NULL;
tx_buffer->next_eop = -1;
if (++curr_txd == adapter->num_tx_desc)
curr_txd = 0;
adapter->num_tx_desc_avail--;
adapter->next_avail_tx_desc = curr_txd;
}
#if __FreeBSD_version >= 700000
/**********************************************************************
*
* Setup work for hardware segmentation offload (TSO)
*
**********************************************************************/
static bool
em_tso_setup(struct adapter *adapter, struct mbuf *mp, u32 *txd_upper,
u32 *txd_lower)
{
struct e1000_context_desc *TXD;
struct em_buffer *tx_buffer;
struct ether_vlan_header *eh;
struct ip *ip;
struct ip6_hdr *ip6;
struct tcphdr *th;
int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
u16 etype;
/*
* This function could/should be extended to support IP/IPv6
* fragmentation as well. But as they say, one step at a time.
*/
/*
* Determine where frame payload starts.
* Jump over vlan headers if already present,
* helpful for QinQ too.
*/
eh = mtod(mp, struct ether_vlan_header *);
if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
etype = ntohs(eh->evl_proto);
ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
} else {
etype = ntohs(eh->evl_encap_proto);
ehdrlen = ETHER_HDR_LEN;
}
/* Ensure we have at least the IP+TCP header in the first mbuf. */
if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
return FALSE; /* -1 */
/*
* We only support TCP for IPv4 and IPv6 (notyet) for the moment.
* TODO: Support SCTP too when it hits the tree.
*/
switch (etype) {
case ETHERTYPE_IP:
isip6 = 0;
ip = (struct ip *)(mp->m_data + ehdrlen);
if (ip->ip_p != IPPROTO_TCP)
return FALSE; /* 0 */
ip->ip_len = 0;
ip->ip_sum = 0;
ip_hlen = ip->ip_hl << 2;
if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
return FALSE; /* -1 */
th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
#if 1
th->th_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(IPPROTO_TCP));
#else
th->th_sum = mp->m_pkthdr.csum_data;
#endif
break;
case ETHERTYPE_IPV6:
isip6 = 1;
return FALSE; /* Not supported yet. */
ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
if (ip6->ip6_nxt != IPPROTO_TCP)
return FALSE; /* 0 */
ip6->ip6_plen = 0;
ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
return FALSE; /* -1 */
th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
#if 0
th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
htons(IPPROTO_TCP)); /* XXX: function notyet. */
#else
th->th_sum = mp->m_pkthdr.csum_data;
#endif
break;
default:
return FALSE;
}
hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
*txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */
E1000_TXD_DTYP_D | /* Data descr type */
E1000_TXD_CMD_TSE); /* Do TSE on this packet */
/* IP and/or TCP header checksum calculation and insertion. */
*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
E1000_TXD_POPTS_TXSM) << 8;
curr_txd = adapter->next_avail_tx_desc;
tx_buffer = &adapter->tx_buffer_area[curr_txd];
TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
/* IPv6 doesn't have a header checksum. */
if (!isip6) {
/*
* Start offset for header checksum calculation.
* End offset for header checksum calculation.
* Offset of place put the checksum.
*/
TXD->lower_setup.ip_fields.ipcss = ehdrlen;
TXD->lower_setup.ip_fields.ipcse =
htole16(ehdrlen + ip_hlen - 1);
TXD->lower_setup.ip_fields.ipcso =
ehdrlen + offsetof(struct ip, ip_sum);
}
/*
* Start offset for payload checksum calculation.
* End offset for payload checksum calculation.
* Offset of place to put the checksum.
*/
TXD->upper_setup.tcp_fields.tucss =
ehdrlen + ip_hlen;
TXD->upper_setup.tcp_fields.tucse = 0;
TXD->upper_setup.tcp_fields.tucso =
ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
/*
* Payload size per packet w/o any headers.
* Length of all headers up to payload.
*/
TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
TXD->cmd_and_length = htole32(adapter->txd_cmd |
E1000_TXD_CMD_DEXT | /* Extended descr */
E1000_TXD_CMD_TSE | /* TSE context */
(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
E1000_TXD_CMD_TCP | /* Do TCP checksum */
(mp->m_pkthdr.len - (hdr_len))); /* Total len */
tx_buffer->m_head = NULL;
tx_buffer->next_eop = -1;
if (++curr_txd == adapter->num_tx_desc)
curr_txd = 0;
adapter->num_tx_desc_avail--;
adapter->next_avail_tx_desc = curr_txd;
adapter->tx_tso = TRUE;
return TRUE;
}
#endif /* __FreeBSD_version >= 700000 */
/**********************************************************************
*
* Examine each tx_buffer in the used queue. If the hardware is done
* processing the packet then free associated resources. The
* tx_buffer is put back on the free queue.
*
**********************************************************************/
static void
em_txeof(struct adapter *adapter)
{
int first, last, done, num_avail;
u32 cleaned = 0;
struct em_buffer *tx_buffer;
struct e1000_tx_desc *tx_desc, *eop_desc;
struct ifnet *ifp = adapter->ifp;
EM_TX_LOCK_ASSERT(adapter);
if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
return;
num_avail = adapter->num_tx_desc_avail;
first = adapter->next_tx_to_clean;
tx_desc = &adapter->tx_desc_base[first];
tx_buffer = &adapter->tx_buffer_area[first];
last = tx_buffer->next_eop;
eop_desc = &adapter->tx_desc_base[last];
/*
* What this does is get the index of the
* first descriptor AFTER the EOP of the
* first packet, that way we can do the
* simple comparison on the inner while loop.
*/
if (++last == adapter->num_tx_desc)
last = 0;
done = last;
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
/* We clean the range of the packet */
while (first != done) {
tx_desc->upper.data = 0;
tx_desc->lower.data = 0;
tx_desc->buffer_addr = 0;
++num_avail; ++cleaned;
if (tx_buffer->m_head) {
ifp->if_opackets++;
bus_dmamap_sync(adapter->txtag,
tx_buffer->map,
BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(adapter->txtag,
tx_buffer->map);
m_freem(tx_buffer->m_head);
tx_buffer->m_head = NULL;
}
tx_buffer->next_eop = -1;
if (++first == adapter->num_tx_desc)
first = 0;
tx_buffer = &adapter->tx_buffer_area[first];
tx_desc = &adapter->tx_desc_base[first];
}
/* See if we can continue to the next packet */
last = tx_buffer->next_eop;
if (last != -1) {
eop_desc = &adapter->tx_desc_base[last];
/* Get new done point */
if (++last == adapter->num_tx_desc) last = 0;
done = last;
} else
break;
}
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
adapter->next_tx_to_clean = first;
/*
* If we have enough room, clear IFF_DRV_OACTIVE to
* tell the stack that it is OK to send packets.
* If there are no pending descriptors, clear the timeout.
*/
if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
if (num_avail == adapter->num_tx_desc) {
adapter->watchdog_timer = 0;
adapter->num_tx_desc_avail = num_avail;
return;
}
}
/* If any descriptors cleaned, reset the watchdog */
if (cleaned)
adapter->watchdog_timer = EM_TX_TIMEOUT;
adapter->num_tx_desc_avail = num_avail;
return;
}
/*********************************************************************
*
* When Link is lost sometimes there is work still in the TX ring
* which will result in a watchdog, rather than allow that do an
* attempted cleanup and then reinit here. Note that this has been
* seens mostly with fiber adapters.
*
**********************************************************************/
static void
em_tx_purge(struct adapter *adapter)
{
if ((!adapter->link_active) && (adapter->watchdog_timer)) {
EM_TX_LOCK(adapter);
em_txeof(adapter);
EM_TX_UNLOCK(adapter);
if (adapter->watchdog_timer) { /* Still not clean? */
adapter->watchdog_timer = 0;
em_init_locked(adapter);
}
}
}
/*********************************************************************
*
* Get a buffer from system mbuf buffer pool.
*
**********************************************************************/
static int
em_get_buf(struct adapter *adapter, int i)
{
struct mbuf *m;
bus_dma_segment_t segs[1];
bus_dmamap_t map;
struct em_buffer *rx_buffer;
int error, nsegs;
m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
if (m == NULL) {
adapter->mbuf_cluster_failed++;
return (ENOBUFS);
}
m->m_len = m->m_pkthdr.len = MCLBYTES;
if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
m_adj(m, ETHER_ALIGN);
/*
* Using memory from the mbuf cluster pool, invoke the
* bus_dma machinery to arrange the memory mapping.
*/
error = bus_dmamap_load_mbuf_sg(adapter->rxtag,
adapter->rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
if (error != 0) {
m_free(m);
return (error);
}
/* If nsegs is wrong then the stack is corrupt. */
KASSERT(nsegs == 1, ("Too many segments returned!"));
rx_buffer = &adapter->rx_buffer_area[i];
if (rx_buffer->m_head != NULL)
bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
map = rx_buffer->map;
rx_buffer->map = adapter->rx_sparemap;
adapter->rx_sparemap = map;
bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
rx_buffer->m_head = m;
adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
return (0);
}
/*********************************************************************
*
* Allocate memory for rx_buffer structures. Since we use one
* rx_buffer per received packet, the maximum number of rx_buffer's
* that we'll need is equal to the number of receive descriptors
* that we've allocated.
*
**********************************************************************/
static int
em_allocate_receive_structures(struct adapter *adapter)
{
device_t dev = adapter->dev;
struct em_buffer *rx_buffer;
int i, error;
adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) *
adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
if (adapter->rx_buffer_area == NULL) {
device_printf(dev, "Unable to allocate rx_buffer memory\n");
return (ENOMEM);
}
#if __FreeBSD_version >= 700000
error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
#else
error = bus_dma_tag_create(NULL, /* parent */
#endif
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
MCLBYTES, /* maxsize */
1, /* nsegments */
MCLBYTES, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockarg */
&adapter->rxtag);
if (error) {
device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
__func__, error);
goto fail;
}
/* Create the spare map (used by getbuf) */
error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
&adapter->rx_sparemap);
if (error) {
device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
__func__, error);
goto fail;
}
rx_buffer = adapter->rx_buffer_area;
for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
&rx_buffer->map);
if (error) {
device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
__func__, error);
goto fail;
}
}
return (0);
fail:
em_free_receive_structures(adapter);
return (error);
}
/*********************************************************************
*
* (Re)initialize receive structures.
*
**********************************************************************/
static int
em_setup_receive_structures(struct adapter *adapter)
{
struct em_buffer *rx_buffer;
int i, error;
/* Reset descriptor ring */
bzero(adapter->rx_desc_base,
(sizeof(struct e1000_rx_desc)) * adapter->num_rx_desc);
/* Free current RX buffers. */
rx_buffer = adapter->rx_buffer_area;
for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
if (rx_buffer->m_head != NULL) {
bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
m_freem(rx_buffer->m_head);
rx_buffer->m_head = NULL;
}
}
/* Allocate new ones. */
for (i = 0; i < adapter->num_rx_desc; i++) {
error = em_get_buf(adapter, i);
if (error)
return (error);
}
/* Setup our descriptor pointers */
adapter->next_rx_desc_to_check = 0;
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
return (0);
}
/*********************************************************************
*
* Enable receive unit.
*
**********************************************************************/
#define MAX_INTS_PER_SEC 8000
#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256)
static void
em_initialize_receive_unit(struct adapter *adapter)
{
struct ifnet *ifp = adapter->ifp;
u64 bus_addr;
u32 rctl, rxcsum;
INIT_DEBUGOUT("em_initialize_receive_unit: begin");
/*
* Make sure receives are disabled while setting
* up the descriptor ring
*/
rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
if (adapter->hw.mac.type >= e1000_82540) {
E1000_WRITE_REG(&adapter->hw, E1000_RADV,
adapter->rx_abs_int_delay.value);
/*
* Set the interrupt throttling rate. Value is calculated
* as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
*/
E1000_WRITE_REG(&adapter->hw, E1000_ITR, DEFAULT_ITR);
}
/*
** When using MSIX interrupts we need to throttle
** using the EITR register (82574 only)
*/
if (adapter->msix)
for (int i = 0; i < 4; i++)
E1000_WRITE_REG(&adapter->hw,
E1000_EITR_82574(i), DEFAULT_ITR);
/* Disable accelerated ackknowledge */
if (adapter->hw.mac.type == e1000_82574)
E1000_WRITE_REG(&adapter->hw,
E1000_RFCTL, E1000_RFCTL_ACK_DIS);
/* Setup the Base and Length of the Rx Descriptor Ring */
bus_addr = adapter->rxdma.dma_paddr;
E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(0),
adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(0),
(u32)(bus_addr >> 32));
E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(0),
(u32)bus_addr);
/* Setup the Receive Control Register */
rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
E1000_RCTL_RDMTS_HALF |
(adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
/* Make sure VLAN Filters are off */
rctl &= ~E1000_RCTL_VFE;
if (e1000_tbi_sbp_enabled_82543(&adapter->hw))
rctl |= E1000_RCTL_SBP;
else
rctl &= ~E1000_RCTL_SBP;
switch (adapter->rx_buffer_len) {
default:
case 2048:
rctl |= E1000_RCTL_SZ_2048;
break;
case 4096:
rctl |= E1000_RCTL_SZ_4096 |
E1000_RCTL_BSEX | E1000_RCTL_LPE;
break;
case 8192:
rctl |= E1000_RCTL_SZ_8192 |
E1000_RCTL_BSEX | E1000_RCTL_LPE;
break;
case 16384:
rctl |= E1000_RCTL_SZ_16384 |
E1000_RCTL_BSEX | E1000_RCTL_LPE;
break;
}
if (ifp->if_mtu > ETHERMTU)
rctl |= E1000_RCTL_LPE;
else
rctl &= ~E1000_RCTL_LPE;
/* Enable 82543 Receive Checksum Offload for TCP and UDP */
if ((adapter->hw.mac.type >= e1000_82543) &&
(ifp->if_capenable & IFCAP_RXCSUM)) {
rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
}
/*
** XXX TEMPORARY WORKAROUND: on some systems with 82573
** long latencies are observed, like Lenovo X60. This
** change eliminates the problem, but since having positive
** values in RDTR is a known source of problems on other
** platforms another solution is being sought.
*/
if (adapter->hw.mac.type == e1000_82573)
E1000_WRITE_REG(&adapter->hw, E1000_RDTR, 0x20);
/* Enable Receives */
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
/*
* Setup the HW Rx Head and
* Tail Descriptor Pointers
*/
E1000_WRITE_REG(&adapter->hw, E1000_RDH(0), 0);
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), adapter->num_rx_desc - 1);
return;
}
/*********************************************************************
*
* Free receive related data structures.
*
**********************************************************************/
static void
em_free_receive_structures(struct adapter *adapter)
{
struct em_buffer *rx_buffer;
int i;
INIT_DEBUGOUT("free_receive_structures: begin");
if (adapter->rx_sparemap) {
bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
adapter->rx_sparemap = NULL;
}
/* Cleanup any existing buffers */
if (adapter->rx_buffer_area != NULL) {
rx_buffer = adapter->rx_buffer_area;
for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
if (rx_buffer->m_head != NULL) {
bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(adapter->rxtag,
rx_buffer->map);
m_freem(rx_buffer->m_head);
rx_buffer->m_head = NULL;
} else if (rx_buffer->map != NULL)
bus_dmamap_unload(adapter->rxtag,
rx_buffer->map);
if (rx_buffer->map != NULL) {
bus_dmamap_destroy(adapter->rxtag,
rx_buffer->map);
rx_buffer->map = NULL;
}
}
}
if (adapter->rx_buffer_area != NULL) {
free(adapter->rx_buffer_area, M_DEVBUF);
adapter->rx_buffer_area = NULL;
}
if (adapter->rxtag != NULL) {
bus_dma_tag_destroy(adapter->rxtag);
adapter->rxtag = NULL;
}
}
/*********************************************************************
*
* This routine executes in interrupt context. It replenishes
* the mbufs in the descriptor and sends data which has been
* dma'ed into host memory to upper layer.
*
* We loop at most count times if count is > 0, or until done if
* count < 0.
*
* For polling we also now return the number of cleaned packets
*********************************************************************/
static int
em_rxeof(struct adapter *adapter, int count)
{
struct ifnet *ifp = adapter->ifp;
struct mbuf *mp;
u8 status, accept_frame = 0, eop = 0;
u16 len, desc_len, prev_len_adj;
int i, rx_sent = 0;
struct e1000_rx_desc *current_desc;
EM_RX_LOCK(adapter);
i = adapter->next_rx_desc_to_check;
current_desc = &adapter->rx_desc_base[i];
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_POSTREAD);
if (!((current_desc->status) & E1000_RXD_STAT_DD)) {
EM_RX_UNLOCK(adapter);
return (rx_sent);
}
while ((current_desc->status & E1000_RXD_STAT_DD) &&
(count != 0) &&
(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
struct mbuf *m = NULL;
mp = adapter->rx_buffer_area[i].m_head;
/*
* Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
* needs to access the last received byte in the mbuf.
*/
bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
BUS_DMASYNC_POSTREAD);
accept_frame = 1;
prev_len_adj = 0;
desc_len = le16toh(current_desc->length);
status = current_desc->status;
if (status & E1000_RXD_STAT_EOP) {
count--;
eop = 1;
if (desc_len < ETHER_CRC_LEN) {
len = 0;
prev_len_adj = ETHER_CRC_LEN - desc_len;
} else
len = desc_len - ETHER_CRC_LEN;
} else {
eop = 0;
len = desc_len;
}
if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
u8 last_byte;
u32 pkt_len = desc_len;
if (adapter->fmp != NULL)
pkt_len += adapter->fmp->m_pkthdr.len;
last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
if (TBI_ACCEPT(&adapter->hw, status,
current_desc->errors, pkt_len, last_byte,
adapter->min_frame_size, adapter->max_frame_size)) {
e1000_tbi_adjust_stats_82543(&adapter->hw,
&adapter->stats, pkt_len,
adapter->hw.mac.addr,
adapter->max_frame_size);
if (len > 0)
len--;
} else
accept_frame = 0;
}
if (accept_frame) {
if (em_get_buf(adapter, i) != 0) {
ifp->if_iqdrops++;
goto discard;
}
/* Assign correct length to the current fragment */
mp->m_len = len;
if (adapter->fmp == NULL) {
mp->m_pkthdr.len = len;
adapter->fmp = mp; /* Store the first mbuf */
adapter->lmp = mp;
} else {
/* Chain mbuf's together */
mp->m_flags &= ~M_PKTHDR;
/*
* Adjust length of previous mbuf in chain if
* we received less than 4 bytes in the last
* descriptor.
*/
if (prev_len_adj > 0) {
adapter->lmp->m_len -= prev_len_adj;
adapter->fmp->m_pkthdr.len -=
prev_len_adj;
}
adapter->lmp->m_next = mp;
adapter->lmp = adapter->lmp->m_next;
adapter->fmp->m_pkthdr.len += len;
}
if (eop) {
adapter->fmp->m_pkthdr.rcvif = ifp;
ifp->if_ipackets++;
em_receive_checksum(adapter, current_desc,
adapter->fmp);
#ifndef __NO_STRICT_ALIGNMENT
if (adapter->max_frame_size >
(MCLBYTES - ETHER_ALIGN) &&
em_fixup_rx(adapter) != 0)
goto skip;
#endif
if (status & E1000_RXD_STAT_VP) {
#if __FreeBSD_version < 700000
VLAN_INPUT_TAG_NEW(ifp, adapter->fmp,
(le16toh(current_desc->special) &
E1000_RXD_SPC_VLAN_MASK));
#else
adapter->fmp->m_pkthdr.ether_vtag =
(le16toh(current_desc->special) &
E1000_RXD_SPC_VLAN_MASK);
adapter->fmp->m_flags |= M_VLANTAG;
#endif
}
#ifndef __NO_STRICT_ALIGNMENT
skip:
#endif
m = adapter->fmp;
adapter->fmp = NULL;
adapter->lmp = NULL;
}
} else {
ifp->if_ierrors++;
discard:
/* Reuse loaded DMA map and just update mbuf chain */
mp = adapter->rx_buffer_area[i].m_head;
mp->m_len = mp->m_pkthdr.len = MCLBYTES;
mp->m_data = mp->m_ext.ext_buf;
mp->m_next = NULL;
if (adapter->max_frame_size <=
(MCLBYTES - ETHER_ALIGN))
m_adj(mp, ETHER_ALIGN);
if (adapter->fmp != NULL) {
m_freem(adapter->fmp);
adapter->fmp = NULL;
adapter->lmp = NULL;
}
m = NULL;
}
/* Zero out the receive descriptors status. */
current_desc->status = 0;
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* Advance our pointers to the next descriptor. */
if (++i == adapter->num_rx_desc)
i = 0;
/* Call into the stack */
if (m != NULL) {
adapter->next_rx_desc_to_check = i;
EM_RX_UNLOCK(adapter);
(*ifp->if_input)(ifp, m);
EM_RX_LOCK(adapter);
rx_sent++;
i = adapter->next_rx_desc_to_check;
}
current_desc = &adapter->rx_desc_base[i];
}
adapter->next_rx_desc_to_check = i;
/* Advance the E1000's Receive Queue #0 "Tail Pointer". */
if (--i < 0)
i = adapter->num_rx_desc - 1;
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i);
EM_RX_UNLOCK(adapter);
return (rx_sent);
}
#ifndef __NO_STRICT_ALIGNMENT
/*
* When jumbo frames are enabled we should realign entire payload on
* architecures with strict alignment. This is serious design mistake of 8254x
* as it nullifies DMA operations. 8254x just allows RX buffer size to be
* 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
* payload. On architecures without strict alignment restrictions 8254x still
* performs unaligned memory access which would reduce the performance too.
* To avoid copying over an entire frame to align, we allocate a new mbuf and
* copy ethernet header to the new mbuf. The new mbuf is prepended into the
* existing mbuf chain.
*
* Be aware, best performance of the 8254x is achived only when jumbo frame is
* not used at all on architectures with strict alignment.
*/
static int
em_fixup_rx(struct adapter *adapter)
{
struct mbuf *m, *n;
int error;
error = 0;
m = adapter->fmp;
if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
m->m_data += ETHER_HDR_LEN;
} else {
MGETHDR(n, M_DONTWAIT, MT_DATA);
if (n != NULL) {
bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
m->m_data += ETHER_HDR_LEN;
m->m_len -= ETHER_HDR_LEN;
n->m_len = ETHER_HDR_LEN;
M_MOVE_PKTHDR(n, m);
n->m_next = m;
adapter->fmp = n;
} else {
adapter->dropped_pkts++;
m_freem(adapter->fmp);
adapter->fmp = NULL;
error = ENOMEM;
}
}
return (error);
}
#endif
/*********************************************************************
*
* Verify that the hardware indicated that the checksum is valid.
* Inform the stack about the status of checksum so that stack
* doesn't spend time verifying the checksum.
*
*********************************************************************/
static void
em_receive_checksum(struct adapter *adapter,
struct e1000_rx_desc *rx_desc, struct mbuf *mp)
{
/* 82543 or newer only */
if ((adapter->hw.mac.type < e1000_82543) ||
/* Ignore Checksum bit is set */
(rx_desc->status & E1000_RXD_STAT_IXSM)) {
mp->m_pkthdr.csum_flags = 0;
return;
}
if (rx_desc->status & E1000_RXD_STAT_IPCS) {
/* Did it pass? */
if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
/* IP Checksum Good */
mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
} else {
mp->m_pkthdr.csum_flags = 0;
}
}
if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
/* Did it pass? */
if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
mp->m_pkthdr.csum_flags |=
(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
mp->m_pkthdr.csum_data = htons(0xffff);
}
}
}
#if __FreeBSD_version >= 700029
/*
* This routine is run via an vlan
* config EVENT
*/
static void
em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
{
struct adapter *adapter = ifp->if_softc;
u32 index, bit;
if (ifp->if_softc != arg) /* Not our event */
return;
if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */
return;
index = (vtag >> 5) & 0x7F;
bit = vtag & 0x1F;
em_shadow_vfta[index] |= (1 << bit);
++adapter->num_vlans;
/* Re-init to load the changes */
em_init(adapter);
}
/*
* This routine is run via an vlan
* unconfig EVENT
*/
static void
em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
{
struct adapter *adapter = ifp->if_softc;
u32 index, bit;
if (ifp->if_softc != arg)
return;
if ((vtag == 0) || (vtag > 4095)) /* Invalid */
return;
index = (vtag >> 5) & 0x7F;
bit = vtag & 0x1F;
em_shadow_vfta[index] &= ~(1 << bit);
--adapter->num_vlans;
/* Re-init to load the changes */
em_init(adapter);
}
static void
em_setup_vlan_hw_support(struct adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
u32 reg;
/*
** We get here thru init_locked, meaning
** a soft reset, this has already cleared
** the VFTA and other state, so if there
** have been no vlan's registered do nothing.
*/
if (adapter->num_vlans == 0)
return;
/*
** A soft reset zero's out the VFTA, so
** we need to repopulate it now.
*/
for (int i = 0; i < EM_VFTA_SIZE; i++)
if (em_shadow_vfta[i] != 0)
E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
i, em_shadow_vfta[i]);
reg = E1000_READ_REG(hw, E1000_CTRL);
reg |= E1000_CTRL_VME;
E1000_WRITE_REG(hw, E1000_CTRL, reg);
/* Enable the Filter Table */
reg = E1000_READ_REG(hw, E1000_RCTL);
reg &= ~E1000_RCTL_CFIEN;
reg |= E1000_RCTL_VFE;
E1000_WRITE_REG(hw, E1000_RCTL, reg);
/* Update the frame size */
E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
adapter->max_frame_size + VLAN_TAG_SIZE);
}
#endif
static void
em_enable_intr(struct adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
u32 ims_mask = IMS_ENABLE_MASK;
if (adapter->msix) {
E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
ims_mask |= EM_MSIX_MASK;
}
E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
}
static void
em_disable_intr(struct adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
if (adapter->msix)
E1000_WRITE_REG(hw, EM_EIAC, 0);
E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
}
/*
* Bit of a misnomer, what this really means is
* to enable OS management of the system... aka
* to disable special hardware management features
*/
static void
em_init_manageability(struct adapter *adapter)
{
/* A shared code workaround */
#define E1000_82542_MANC2H E1000_MANC2H
if (adapter->has_manage) {
int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
/* disable hardware interception of ARP */
manc &= ~(E1000_MANC_ARP_EN);
/* enable receiving management packets to the host */
if (adapter->hw.mac.type >= e1000_82571) {
manc |= E1000_MANC_EN_MNG2HOST;
#define E1000_MNG2HOST_PORT_623 (1 << 5)
#define E1000_MNG2HOST_PORT_664 (1 << 6)
manc2h |= E1000_MNG2HOST_PORT_623;
manc2h |= E1000_MNG2HOST_PORT_664;
E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
}
E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
}
}
/*
* Give control back to hardware management
* controller if there is one.
*/
static void
em_release_manageability(struct adapter *adapter)
{
if (adapter->has_manage) {
int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
/* re-enable hardware interception of ARP */
manc |= E1000_MANC_ARP_EN;
if (adapter->hw.mac.type >= e1000_82571)
manc &= ~E1000_MANC_EN_MNG2HOST;
E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
}
}
/*
* em_get_hw_control sets {CTRL_EXT|FWSM}:DRV_LOAD bit.
* For ASF and Pass Through versions of f/w this means that
* the driver is loaded. For AMT version (only with 82573)
* of the f/w this means that the network i/f is open.
*
*/
static void
em_get_hw_control(struct adapter *adapter)
{
u32 ctrl_ext, swsm;
/* Let firmware know the driver has taken over */
switch (adapter->hw.mac.type) {
case e1000_82573:
swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
swsm | E1000_SWSM_DRV_LOAD);
break;
case e1000_82571:
case e1000_82572:
case e1000_80003es2lan:
case e1000_ich8lan:
case e1000_ich9lan:
case e1000_ich10lan:
ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
break;
default:
break;
}
}
/*
* em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
* For ASF and Pass Through versions of f/w this means that the
* driver is no longer loaded. For AMT version (only with 82573) i
* of the f/w this means that the network i/f is closed.
*
*/
static void
em_release_hw_control(struct adapter *adapter)
{
u32 ctrl_ext, swsm;
/* Let firmware taken over control of h/w */
switch (adapter->hw.mac.type) {
case e1000_82573:
swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
swsm & ~E1000_SWSM_DRV_LOAD);
break;
case e1000_82571:
case e1000_82572:
case e1000_80003es2lan:
case e1000_ich8lan:
case e1000_ich9lan:
case e1000_ich10lan:
ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
break;
default:
break;
}
}
static int
em_is_valid_ether_addr(u8 *addr)
{
char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
return (FALSE);
}
return (TRUE);
}
/*
* Enable PCI Wake On Lan capability
*/
void
em_enable_wakeup(device_t dev)
{
u16 cap, status;
u8 id;
/* First find the capabilities pointer*/
cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
/* Read the PM Capabilities */
id = pci_read_config(dev, cap, 1);
if (id != PCIY_PMG) /* Something wrong */
return;
/* OK, we have the power capabilities, so
now get the status register */
cap += PCIR_POWER_STATUS;
status = pci_read_config(dev, cap, 2);
status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
pci_write_config(dev, cap, status, 2);
return;
}
/*********************************************************************
* 82544 Coexistence issue workaround.
* There are 2 issues.
* 1. Transmit Hang issue.
* To detect this issue, following equation can be used...
* SIZE[3:0] + ADDR[2:0] = SUM[3:0].
* If SUM[3:0] is in between 1 to 4, we will have this issue.
*
* 2. DAC issue.
* To detect this issue, following equation can be used...
* SIZE[3:0] + ADDR[2:0] = SUM[3:0].
* If SUM[3:0] is in between 9 to c, we will have this issue.
*
*
* WORKAROUND:
* Make sure we do not have ending address
* as 1,2,3,4(Hang) or 9,a,b,c (DAC)
*
*************************************************************************/
static u32
em_fill_descriptors (bus_addr_t address, u32 length,
PDESC_ARRAY desc_array)
{
u32 safe_terminator;
/* Since issue is sensitive to length and address.*/
/* Let us first check the address...*/
if (length <= 4) {
desc_array->descriptor[0].address = address;
desc_array->descriptor[0].length = length;
desc_array->elements = 1;
return (desc_array->elements);
}
safe_terminator = (u32)((((u32)address & 0x7) +
(length & 0xF)) & 0xF);
/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
if (safe_terminator == 0 ||
(safe_terminator > 4 &&
safe_terminator < 9) ||
(safe_terminator > 0xC &&
safe_terminator <= 0xF)) {
desc_array->descriptor[0].address = address;
desc_array->descriptor[0].length = length;
desc_array->elements = 1;
return (desc_array->elements);
}
desc_array->descriptor[0].address = address;
desc_array->descriptor[0].length = length - 4;
desc_array->descriptor[1].address = address + (length - 4);
desc_array->descriptor[1].length = 4;
desc_array->elements = 2;
return (desc_array->elements);
}
/**********************************************************************
*
* Update the board statistics counters.
*
**********************************************************************/
static void
em_update_stats_counters(struct adapter *adapter)
{
struct ifnet *ifp;
if(adapter->hw.phy.media_type == e1000_media_type_copper ||
(E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
}
adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
/* For the 64-bit byte counters the low dword must be read first. */
/* Both registers clear on the read of the high dword */
adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
if (adapter->hw.mac.type >= e1000_82543) {
adapter->stats.algnerrc +=
E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
adapter->stats.rxerrc +=
E1000_READ_REG(&adapter->hw, E1000_RXERRC);
adapter->stats.tncrs +=
E1000_READ_REG(&adapter->hw, E1000_TNCRS);
adapter->stats.cexterr +=
E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
adapter->stats.tsctc +=
E1000_READ_REG(&adapter->hw, E1000_TSCTC);
adapter->stats.tsctfc +=
E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
}
ifp = adapter->ifp;
ifp->if_collisions = adapter->stats.colc;
/* Rx Errors */
ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
adapter->stats.crcerrs + adapter->stats.algnerrc +
adapter->stats.ruc + adapter->stats.roc +
adapter->stats.mpc + adapter->stats.cexterr;
/* Tx Errors */
ifp->if_oerrors = adapter->stats.ecol +
adapter->stats.latecol + adapter->watchdog_events;
}
/**********************************************************************
*
* This routine is called only when em_display_debug_stats is enabled.
* This routine provides a way to take a look at important statistics
* maintained by the driver and hardware.
*
**********************************************************************/
static void
em_print_debug_info(struct adapter *adapter)
{
device_t dev = adapter->dev;
u8 *hw_addr = adapter->hw.hw_addr;
device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
E1000_READ_REG(&adapter->hw, E1000_CTRL),
E1000_READ_REG(&adapter->hw, E1000_RCTL));
device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
(E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
device_printf(dev, "Flow control watermarks high = %d low = %d\n",
adapter->hw.fc.high_water,
adapter->hw.fc.low_water);
device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
E1000_READ_REG(&adapter->hw, E1000_TIDV),
E1000_READ_REG(&adapter->hw, E1000_TADV));
device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
E1000_READ_REG(&adapter->hw, E1000_RDTR),
E1000_READ_REG(&adapter->hw, E1000_RADV));
device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
(long long)adapter->tx_fifo_wrk_cnt,
(long long)adapter->tx_fifo_reset_cnt);
device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
device_printf(dev, "Num Tx descriptors avail = %d\n",
adapter->num_tx_desc_avail);
device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
adapter->no_tx_desc_avail1);
device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
adapter->no_tx_desc_avail2);
device_printf(dev, "Std mbuf failed = %ld\n",
adapter->mbuf_alloc_failed);
device_printf(dev, "Std mbuf cluster failed = %ld\n",
adapter->mbuf_cluster_failed);
device_printf(dev, "Driver dropped packets = %ld\n",
adapter->dropped_pkts);
device_printf(dev, "Driver tx dma failure in encap = %ld\n",
adapter->no_tx_dma_setup);
}
static void
em_print_hw_stats(struct adapter *adapter)
{
device_t dev = adapter->dev;
device_printf(dev, "Excessive collisions = %lld\n",
(long long)adapter->stats.ecol);
#if (DEBUG_HW > 0) /* Dont output these errors normally */
device_printf(dev, "Symbol errors = %lld\n",
(long long)adapter->stats.symerrs);
#endif
device_printf(dev, "Sequence errors = %lld\n",
(long long)adapter->stats.sec);
device_printf(dev, "Defer count = %lld\n",
(long long)adapter->stats.dc);
device_printf(dev, "Missed Packets = %lld\n",
(long long)adapter->stats.mpc);
device_printf(dev, "Receive No Buffers = %lld\n",
(long long)adapter->stats.rnbc);
/* RLEC is inaccurate on some hardware, calculate our own. */
device_printf(dev, "Receive Length Errors = %lld\n",
((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
device_printf(dev, "Receive errors = %lld\n",
(long long)adapter->stats.rxerrc);
device_printf(dev, "Crc errors = %lld\n",
(long long)adapter->stats.crcerrs);
device_printf(dev, "Alignment errors = %lld\n",
(long long)adapter->stats.algnerrc);
device_printf(dev, "Collision/Carrier extension errors = %lld\n",
(long long)adapter->stats.cexterr);
device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
device_printf(dev, "watchdog timeouts = %ld\n",
adapter->watchdog_events);
device_printf(dev, "RX MSIX IRQ = %ld TX MSIX IRQ = %ld"
" LINK MSIX IRQ = %ld\n", adapter->rx_irq,
adapter->tx_irq , adapter->link_irq);
device_printf(dev, "XON Rcvd = %lld\n",
(long long)adapter->stats.xonrxc);
device_printf(dev, "XON Xmtd = %lld\n",
(long long)adapter->stats.xontxc);
device_printf(dev, "XOFF Rcvd = %lld\n",
(long long)adapter->stats.xoffrxc);
device_printf(dev, "XOFF Xmtd = %lld\n",
(long long)adapter->stats.xofftxc);
device_printf(dev, "Good Packets Rcvd = %lld\n",
(long long)adapter->stats.gprc);
device_printf(dev, "Good Packets Xmtd = %lld\n",
(long long)adapter->stats.gptc);
device_printf(dev, "TSO Contexts Xmtd = %lld\n",
(long long)adapter->stats.tsctc);
device_printf(dev, "TSO Contexts Failed = %lld\n",
(long long)adapter->stats.tsctfc);
}
/**********************************************************************
*
* This routine provides a way to dump out the adapter eeprom,
* often a useful debug/service tool. This only dumps the first
* 32 words, stuff that matters is in that extent.
*
**********************************************************************/
static void
em_print_nvm_info(struct adapter *adapter)
{
u16 eeprom_data;
int i, j, row = 0;
/* Its a bit crude, but it gets the job done */
printf("\nInterface EEPROM Dump:\n");
printf("Offset\n0x0000 ");
for (i = 0, j = 0; i < 32; i++, j++) {
if (j == 8) { /* Make the offset block */
j = 0; ++row;
printf("\n0x00%x0 ",row);
}
e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
printf("%04x ", eeprom_data);
}
printf("\n");
}
static int
em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
{
struct adapter *adapter;
int error;
int result;
result = -1;
error = sysctl_handle_int(oidp, &result, 0, req);
if (error || !req->newptr)
return (error);
if (result == 1) {
adapter = (struct adapter *)arg1;
em_print_debug_info(adapter);
}
/*
* This value will cause a hex dump of the
* first 32 16-bit words of the EEPROM to
* the screen.
*/
if (result == 2) {
adapter = (struct adapter *)arg1;
em_print_nvm_info(adapter);
}
return (error);
}
static int
em_sysctl_stats(SYSCTL_HANDLER_ARGS)
{
struct adapter *adapter;
int error;
int result;
result = -1;
error = sysctl_handle_int(oidp, &result, 0, req);
if (error || !req->newptr)
return (error);
if (result == 1) {
adapter = (struct adapter *)arg1;
em_print_hw_stats(adapter);
}
return (error);
}
static int
em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
{
struct em_int_delay_info *info;
struct adapter *adapter;
u32 regval;
int error;
int usecs;
int ticks;
info = (struct em_int_delay_info *)arg1;
usecs = info->value;
error = sysctl_handle_int(oidp, &usecs, 0, req);
if (error != 0 || req->newptr == NULL)
return (error);
if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
return (EINVAL);
info->value = usecs;
ticks = EM_USECS_TO_TICKS(usecs);
adapter = info->adapter;
EM_CORE_LOCK(adapter);
regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
regval = (regval & ~0xffff) | (ticks & 0xffff);
/* Handle a few special cases. */
switch (info->offset) {
case E1000_RDTR:
break;
case E1000_TIDV:
if (ticks == 0) {
adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
/* Don't write 0 into the TIDV register. */
regval++;
} else
adapter->txd_cmd |= E1000_TXD_CMD_IDE;
break;
}
E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
EM_CORE_UNLOCK(adapter);
return (0);
}
static void
em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
const char *description, struct em_int_delay_info *info,
int offset, int value)
{
info->adapter = adapter;
info->offset = offset;
info->value = value;
SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
info, 0, em_sysctl_int_delay, "I", description);
}
#ifndef EM_LEGACY_IRQ
static void
em_add_rx_process_limit(struct adapter *adapter, const char *name,
const char *description, int *limit, int value)
{
*limit = value;
SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
}
#endif
Index: stable/8/sys/dev/e1000/if_igb.c
===================================================================
--- stable/8/sys/dev/e1000/if_igb.c (revision 205282)
+++ stable/8/sys/dev/e1000/if_igb.c (revision 205283)
@@ -1,5029 +1,5029 @@
/******************************************************************************
Copyright (c) 2001-2009, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
/*$FreeBSD$*/
#ifdef HAVE_KERNEL_OPTION_HEADERS
#include "opt_device_polling.h"
#include "opt_inet.h"
#endif
#include <sys/param.h>
#include <sys/systm.h>
#if __FreeBSD_version >= 800000
#include <sys/buf_ring.h>
#endif
#include <sys/bus.h>
#include <sys/endian.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/rman.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#include <sys/eventhandler.h>
#include <sys/pcpu.h>
#include <sys/smp.h>
#include <machine/smp.h>
#include <machine/bus.h>
#include <machine/resource.h>
#ifdef IGB_IEEE1588
#include <sys/ieee1588.h>
#endif
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/if_types.h>
#include <net/if_vlan_var.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <netinet/tcp_lro.h>
#include <netinet/udp.h>
#include <machine/in_cksum.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
#include "e1000_api.h"
#include "e1000_82575.h"
#include "if_igb.h"
/*********************************************************************
* Set this to one to display debug statistics
*********************************************************************/
int igb_display_debug_stats = 0;
/*********************************************************************
* Driver version:
*********************************************************************/
char igb_driver_version[] = "version - 1.7.3";
/*********************************************************************
* PCI Device ID Table
*
* Used by probe to select devices to load on
* Last field stores an index into e1000_strings
* Last entry must be all 0s
*
* { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
*********************************************************************/
static igb_vendor_info_t igb_vendor_info_array[] =
{
{ 0x8086, E1000_DEV_ID_82575EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82576, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82576_NS, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82576_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82576_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
PCI_ANY_ID, PCI_ANY_ID, 0},
/* required last entry */
{ 0, 0, 0, 0, 0}
};
/*********************************************************************
* Table of branding strings for all supported NICs.
*********************************************************************/
static char *igb_strings[] = {
"Intel(R) PRO/1000 Network Connection"
};
/*********************************************************************
* Function prototypes
*********************************************************************/
static int igb_probe(device_t);
static int igb_attach(device_t);
static int igb_detach(device_t);
static int igb_shutdown(device_t);
static int igb_suspend(device_t);
static int igb_resume(device_t);
static void igb_start(struct ifnet *);
static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
#if __FreeBSD_version >= 800000
static int igb_mq_start(struct ifnet *, struct mbuf *);
static int igb_mq_start_locked(struct ifnet *,
struct tx_ring *, struct mbuf *);
static void igb_qflush(struct ifnet *);
#endif
static int igb_ioctl(struct ifnet *, u_long, caddr_t);
static void igb_watchdog(struct adapter *);
static void igb_init(void *);
static void igb_init_locked(struct adapter *);
static void igb_stop(void *);
static void igb_media_status(struct ifnet *, struct ifmediareq *);
static int igb_media_change(struct ifnet *);
static void igb_identify_hardware(struct adapter *);
static int igb_allocate_pci_resources(struct adapter *);
static int igb_allocate_msix(struct adapter *);
static int igb_allocate_legacy(struct adapter *);
static int igb_setup_msix(struct adapter *);
static void igb_free_pci_resources(struct adapter *);
static void igb_local_timer(void *);
static int igb_hardware_init(struct adapter *);
static void igb_setup_interface(device_t, struct adapter *);
static int igb_allocate_queues(struct adapter *);
static void igb_configure_queues(struct adapter *);
static int igb_allocate_transmit_buffers(struct tx_ring *);
static void igb_setup_transmit_structures(struct adapter *);
static void igb_setup_transmit_ring(struct tx_ring *);
static void igb_initialize_transmit_units(struct adapter *);
static void igb_free_transmit_structures(struct adapter *);
static void igb_free_transmit_buffers(struct tx_ring *);
static int igb_allocate_receive_buffers(struct rx_ring *);
static int igb_setup_receive_structures(struct adapter *);
static int igb_setup_receive_ring(struct rx_ring *);
static void igb_initialize_receive_units(struct adapter *);
static void igb_free_receive_structures(struct adapter *);
static void igb_free_receive_buffers(struct rx_ring *);
static void igb_enable_intr(struct adapter *);
static void igb_disable_intr(struct adapter *);
static void igb_update_stats_counters(struct adapter *);
static bool igb_txeof(struct tx_ring *);
static bool igb_rxeof(struct rx_ring *, int);
static void igb_rx_checksum(u32, struct mbuf *, bool);
static int igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
static bool igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
static void igb_set_promisc(struct adapter *);
static void igb_disable_promisc(struct adapter *);
static void igb_set_multi(struct adapter *);
static void igb_print_hw_stats(struct adapter *);
static void igb_update_link_status(struct adapter *);
static int igb_get_buf(struct rx_ring *, int, u8);
static void igb_register_vlan(void *, struct ifnet *, u16);
static void igb_unregister_vlan(void *, struct ifnet *, u16);
static void igb_setup_vlan_hw_support(struct adapter *);
static int igb_xmit(struct tx_ring *, struct mbuf **);
static int igb_dma_malloc(struct adapter *, bus_size_t,
struct igb_dma_alloc *, int);
static void igb_dma_free(struct adapter *, struct igb_dma_alloc *);
static void igb_print_debug_info(struct adapter *);
static void igb_print_nvm_info(struct adapter *);
static int igb_is_valid_ether_addr(u8 *);
static int igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
static int igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
/* Management and WOL Support */
static void igb_init_manageability(struct adapter *);
static void igb_release_manageability(struct adapter *);
static void igb_get_hw_control(struct adapter *);
static void igb_release_hw_control(struct adapter *);
static void igb_enable_wakeup(device_t);
static int igb_irq_fast(void *);
static void igb_add_rx_process_limit(struct adapter *, const char *,
const char *, int *, int);
static void igb_handle_rxtx(void *context, int pending);
static void igb_handle_tx(void *context, int pending);
static void igb_handle_rx(void *context, int pending);
/* These are MSIX only irq handlers */
static void igb_msix_rx(void *);
static void igb_msix_tx(void *);
static void igb_msix_link(void *);
/* Adaptive Interrupt Moderation */
static void igb_update_aim(struct rx_ring *);
/*********************************************************************
* FreeBSD Device Interface Entry Points
*********************************************************************/
static device_method_t igb_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, igb_probe),
DEVMETHOD(device_attach, igb_attach),
DEVMETHOD(device_detach, igb_detach),
DEVMETHOD(device_shutdown, igb_shutdown),
DEVMETHOD(device_suspend, igb_suspend),
DEVMETHOD(device_resume, igb_resume),
{0, 0}
};
static driver_t igb_driver = {
"igb", igb_methods, sizeof(struct adapter),
};
static devclass_t igb_devclass;
DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
MODULE_DEPEND(igb, pci, 1, 1, 1);
MODULE_DEPEND(igb, ether, 1, 1, 1);
/*********************************************************************
* Tunable default values.
*********************************************************************/
/* Descriptor defaults */
static int igb_rxd = IGB_DEFAULT_RXD;
static int igb_txd = IGB_DEFAULT_TXD;
TUNABLE_INT("hw.igb.rxd", &igb_rxd);
TUNABLE_INT("hw.igb.txd", &igb_txd);
/*
** These parameters are used in Adaptive
** Interrupt Moderation. The value is set
** into EITR and controls the interrupt
** frequency. A variable static scheme can
** be created by changing the assigned value
** of igb_ave_latency to the desired value,
** and then set igb_enable_aim to FALSE.
** This will result in all EITR registers
** getting set to that value statically.
*/
static int igb_enable_aim = TRUE;
TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
static int igb_low_latency = IGB_LOW_LATENCY;
TUNABLE_INT("hw.igb.low_latency", &igb_low_latency);
static int igb_ave_latency = IGB_AVE_LATENCY;
TUNABLE_INT("hw.igb.ave_latency", &igb_ave_latency);
static int igb_bulk_latency = IGB_BULK_LATENCY;
TUNABLE_INT("hw.igb.bulk_latency", &igb_bulk_latency);
/*
** This will autoconfigure based on the number
** of CPUs if set to 0. Only a matched pair of
** TX and RX rings are allowed.
*/
static int igb_num_queues = 1;
TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
/* How many packets rxeof tries to clean at a time */
static int igb_rx_process_limit = 100;
TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
/* Flow control setting - default to FULL */
static int igb_fc_setting = e1000_fc_full;
TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
/*
** Shadow VFTA table, this is needed because
** the real filter table gets cleared during
** a soft reset and the driver needs to be able
** to repopulate it.
*/
static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
/*********************************************************************
* Device identification routine
*
* igb_probe determines if the driver should be loaded on
* adapter based on PCI vendor/device id of the adapter.
*
* return BUS_PROBE_DEFAULT on success, positive on failure
*********************************************************************/
static int
igb_probe(device_t dev)
{
char adapter_name[60];
uint16_t pci_vendor_id = 0;
uint16_t pci_device_id = 0;
uint16_t pci_subvendor_id = 0;
uint16_t pci_subdevice_id = 0;
igb_vendor_info_t *ent;
INIT_DEBUGOUT("igb_probe: begin");
pci_vendor_id = pci_get_vendor(dev);
if (pci_vendor_id != IGB_VENDOR_ID)
return (ENXIO);
pci_device_id = pci_get_device(dev);
pci_subvendor_id = pci_get_subvendor(dev);
pci_subdevice_id = pci_get_subdevice(dev);
ent = igb_vendor_info_array;
while (ent->vendor_id != 0) {
if ((pci_vendor_id == ent->vendor_id) &&
(pci_device_id == ent->device_id) &&
((pci_subvendor_id == ent->subvendor_id) ||
(ent->subvendor_id == PCI_ANY_ID)) &&
((pci_subdevice_id == ent->subdevice_id) ||
(ent->subdevice_id == PCI_ANY_ID))) {
sprintf(adapter_name, "%s %s",
igb_strings[ent->index],
igb_driver_version);
device_set_desc_copy(dev, adapter_name);
return (BUS_PROBE_DEFAULT);
}
ent++;
}
return (ENXIO);
}
/*********************************************************************
* Device initialization routine
*
* The attach entry point is called when the driver is being loaded.
* This routine identifies the type of hardware, allocates all resources
* and initializes the hardware.
*
* return 0 on success, positive on failure
*********************************************************************/
static int
igb_attach(device_t dev)
{
struct adapter *adapter;
int error = 0;
u16 eeprom_data;
INIT_DEBUGOUT("igb_attach: begin");
adapter = device_get_softc(dev);
adapter->dev = adapter->osdep.dev = dev;
IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
/* SYSCTL stuff */
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
igb_sysctl_debug_info, "I", "Debug Information");
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
igb_sysctl_stats, "I", "Statistics");
SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
&igb_fc_setting, 0, "Flow Control");
SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
&igb_enable_aim, 1, "Interrupt Moderation");
SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "low_latency", CTLTYPE_INT|CTLFLAG_RW,
&igb_low_latency, 1, "Low Latency");
SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "ave_latency", CTLTYPE_INT|CTLFLAG_RW,
&igb_ave_latency, 1, "Average Latency");
SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "bulk_latency", CTLTYPE_INT|CTLFLAG_RW,
&igb_bulk_latency, 1, "Bulk Latency");
callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
/* Determine hardware and mac info */
igb_identify_hardware(adapter);
/* Setup PCI resources */
if (igb_allocate_pci_resources(adapter)) {
device_printf(dev, "Allocation of PCI resources failed\n");
error = ENXIO;
goto err_pci;
}
/* Do Shared Code initialization */
if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
device_printf(dev, "Setup of Shared code failed\n");
error = ENXIO;
goto err_pci;
}
e1000_get_bus_info(&adapter->hw);
/* Sysctls for limiting the amount of work done in the taskqueue */
igb_add_rx_process_limit(adapter, "rx_processing_limit",
"max number of rx packets to process", &adapter->rx_process_limit,
igb_rx_process_limit);
/*
* Validate number of transmit and receive descriptors. It
* must not exceed hardware maximum, and must be multiple
* of E1000_DBA_ALIGN.
*/
if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
(igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
device_printf(dev, "Using %d TX descriptors instead of %d!\n",
IGB_DEFAULT_TXD, igb_txd);
adapter->num_tx_desc = IGB_DEFAULT_TXD;
} else
adapter->num_tx_desc = igb_txd;
if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
(igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
device_printf(dev, "Using %d RX descriptors instead of %d!\n",
IGB_DEFAULT_RXD, igb_rxd);
adapter->num_rx_desc = IGB_DEFAULT_RXD;
} else
adapter->num_rx_desc = igb_rxd;
adapter->hw.mac.autoneg = DO_AUTO_NEG;
adapter->hw.phy.autoneg_wait_to_complete = FALSE;
adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
/* Copper options */
if (adapter->hw.phy.media_type == e1000_media_type_copper) {
adapter->hw.phy.mdix = AUTO_ALL_MODES;
adapter->hw.phy.disable_polarity_correction = FALSE;
adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
}
/*
* Set the frame limits assuming
* standard ethernet sized frames.
*/
adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
/*
** Allocate and Setup Queues
*/
if (igb_allocate_queues(adapter)) {
error = ENOMEM;
goto err_pci;
}
/*
** Start from a known state, this is
** important in reading the nvm and
** mac from that.
*/
e1000_reset_hw(&adapter->hw);
/* Make sure we have a good EEPROM before we read from it */
if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
/*
** Some PCI-E parts fail the first check due to
** the link being in sleep state, call it again,
** if it fails a second time its a real issue.
*/
if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
device_printf(dev,
"The EEPROM Checksum Is Not Valid\n");
error = EIO;
goto err_late;
}
}
/*
** Copy the permanent MAC address out of the EEPROM
*/
if (e1000_read_mac_addr(&adapter->hw) < 0) {
device_printf(dev, "EEPROM read error while reading MAC"
" address\n");
error = EIO;
goto err_late;
}
/* Check its sanity */
if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
device_printf(dev, "Invalid MAC address\n");
error = EIO;
goto err_late;
}
/* Now Initialize the hardware */
if (igb_hardware_init(adapter)) {
device_printf(dev, "Unable to initialize the hardware\n");
error = EIO;
goto err_late;
}
/*
** Configure Interrupts
*/
if (adapter->msix > 1) /* MSIX */
error = igb_allocate_msix(adapter);
else /* MSI or Legacy */
error = igb_allocate_legacy(adapter);
if (error)
goto err_late;
/* Setup OS specific network interface */
igb_setup_interface(dev, adapter);
#ifdef IGB_IEEE1588
/*
** Setup the timer: IEEE 1588 support
*/
adapter->cycles.read = igb_read_clock;
adapter->cycles.mask = (u64)-1;
adapter->cycles.mult = 1;
adapter->cycles.shift = IGB_TSYNC_SHIFT;
E1000_WRITE_REG(&adapter->hw, E1000_TIMINCA, (1<<24) |
IGB_TSYNC_CYCLE_TIME * IGB_TSYNC_SHIFT);
E1000_WRITE_REG(&adapter->hw, E1000_SYSTIML, 0x00000000);
E1000_WRITE_REG(&adapter->hw, E1000_SYSTIMH, 0xFF800000);
// JFV - this is not complete yet
#endif
/* Initialize statistics */
igb_update_stats_counters(adapter);
adapter->hw.mac.get_link_status = 1;
igb_update_link_status(adapter);
/* Indicate SOL/IDER usage */
if (e1000_check_reset_block(&adapter->hw))
device_printf(dev,
"PHY reset is blocked due to SOL/IDER session.\n");
/* Determine if we have to control management hardware */
adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
/*
* Setup Wake-on-Lan
*/
/* APME bit in EEPROM is mapped to WUC.APME */
eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
if (eeprom_data)
adapter->wol = E1000_WUFC_MAG;
/* Register for VLAN events */
adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
/* Tell the stack that the interface is not active */
adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
INIT_DEBUGOUT("igb_attach: end");
return (0);
err_late:
igb_free_transmit_structures(adapter);
igb_free_receive_structures(adapter);
igb_release_hw_control(adapter);
err_pci:
igb_free_pci_resources(adapter);
IGB_CORE_LOCK_DESTROY(adapter);
return (error);
}
/*********************************************************************
* Device removal routine
*
* The detach entry point is called when the driver is being removed.
* This routine stops the adapter and deallocates all the resources
* that were allocated for driver operation.
*
* return 0 on success, positive on failure
*********************************************************************/
static int
igb_detach(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
struct ifnet *ifp = adapter->ifp;
INIT_DEBUGOUT("igb_detach: begin");
/* Make sure VLANS are not using driver */
if (adapter->ifp->if_vlantrunk != NULL) {
device_printf(dev,"Vlan in use, detach first\n");
return (EBUSY);
}
IGB_CORE_LOCK(adapter);
adapter->in_detach = 1;
igb_stop(adapter);
IGB_CORE_UNLOCK(adapter);
e1000_phy_hw_reset(&adapter->hw);
/* Give control back to firmware */
igb_release_manageability(adapter);
igb_release_hw_control(adapter);
if (adapter->wol) {
E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
igb_enable_wakeup(dev);
}
/* Unregister VLAN events */
if (adapter->vlan_attach != NULL)
EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
if (adapter->vlan_detach != NULL)
EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
ether_ifdetach(adapter->ifp);
callout_drain(&adapter->timer);
igb_free_pci_resources(adapter);
bus_generic_detach(dev);
if_free(ifp);
igb_free_transmit_structures(adapter);
igb_free_receive_structures(adapter);
IGB_CORE_LOCK_DESTROY(adapter);
return (0);
}
/*********************************************************************
*
* Shutdown entry point
*
**********************************************************************/
static int
igb_shutdown(device_t dev)
{
return igb_suspend(dev);
}
/*
* Suspend/resume device methods.
*/
static int
igb_suspend(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
IGB_CORE_LOCK(adapter);
igb_stop(adapter);
igb_release_manageability(adapter);
igb_release_hw_control(adapter);
if (adapter->wol) {
E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
igb_enable_wakeup(dev);
}
IGB_CORE_UNLOCK(adapter);
return bus_generic_suspend(dev);
}
static int
igb_resume(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
struct ifnet *ifp = adapter->ifp;
IGB_CORE_LOCK(adapter);
igb_init_locked(adapter);
igb_init_manageability(adapter);
if ((ifp->if_flags & IFF_UP) &&
(ifp->if_drv_flags & IFF_DRV_RUNNING))
igb_start(ifp);
IGB_CORE_UNLOCK(adapter);
return bus_generic_resume(dev);
}
/*********************************************************************
* Transmit entry point
*
* igb_start is called by the stack to initiate a transmit.
* The driver will remain in this routine as long as there are
* packets to transmit and transmit resources are available.
* In case resources are not available stack is notified and
* the packet is requeued.
**********************************************************************/
static void
igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
{
struct adapter *adapter = ifp->if_softc;
struct mbuf *m_head;
IGB_TX_LOCK_ASSERT(txr);
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING)
return;
if (!adapter->link_active)
return;
while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
if (m_head == NULL)
break;
/*
* Encapsulation can modify our pointer, and or make it
* NULL on failure. In that event, we can't requeue.
*/
if (igb_xmit(txr, &m_head)) {
if (m_head == NULL)
break;
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
break;
}
/* Send a copy of the frame to the BPF listener */
ETHER_BPF_MTAP(ifp, m_head);
/* Set timeout in case hardware has problems transmitting. */
txr->watchdog_timer = IGB_TX_TIMEOUT;
}
}
/*
* Legacy TX driver routine, called from the
* stack, always uses tx[0], and spins for it.
* Should not be used with multiqueue tx
*/
static void
igb_start(struct ifnet *ifp)
{
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = adapter->tx_rings;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
IGB_TX_LOCK(txr);
igb_start_locked(txr, ifp);
IGB_TX_UNLOCK(txr);
}
return;
}
#if __FreeBSD_version >= 800000
/*
** Multiqueue Transmit driver
**
*/
static int
igb_mq_start(struct ifnet *ifp, struct mbuf *m)
{
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr;
int i = 0, err = 0;
/* Which queue to use */
if ((m->m_flags & M_FLOWID) != 0)
i = m->m_pkthdr.flowid % adapter->num_queues;
txr = &adapter->tx_rings[i];
if (IGB_TX_TRYLOCK(txr)) {
err = igb_mq_start_locked(ifp, txr, m);
IGB_TX_UNLOCK(txr);
} else
err = drbr_enqueue(ifp, txr->br, m);
return (err);
}
static int
igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
{
struct adapter *adapter = txr->adapter;
struct mbuf *next;
int err = 0;
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
err = drbr_enqueue(ifp, txr->br, m);
return (err);
}
if (m == NULL) /* Called by tasklet */
goto process;
/* If nothing queued go right to xmit */
- if (drbr_empty(ifp, txr->br)) {
+ if (!drbr_needs_enqueue(ifp, txr->br)) {
if ((err = igb_xmit(txr, &m)) != 0) {
if (m != NULL)
err = drbr_enqueue(ifp, txr->br, m);
return (err);
} else {
/* Success, update stats */
drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags);
/* Send a copy of the frame to the BPF listener */
ETHER_BPF_MTAP(ifp, m);
/* Set the watchdog */
txr->watchdog_timer = IGB_TX_TIMEOUT;
}
} else if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
return (err);
process:
if (drbr_empty(ifp, txr->br))
return (err);
/* Process the queue */
while (TRUE) {
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
next = drbr_dequeue(ifp, txr->br);
if (next == NULL)
break;
if ((err = igb_xmit(txr, &next)) != 0) {
if (next != NULL)
err = drbr_enqueue(ifp, txr->br, next);
break;
}
drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
ETHER_BPF_MTAP(ifp, next);
/* Set the watchdog */
txr->watchdog_timer = IGB_TX_TIMEOUT;
}
if (txr->tx_avail <= IGB_TX_OP_THRESHOLD)
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
return (err);
}
/*
** Flush all ring buffers
*/
static void
igb_qflush(struct ifnet *ifp)
{
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = adapter->tx_rings;
struct mbuf *m;
for (int i = 0; i < adapter->num_queues; i++, txr++) {
IGB_TX_LOCK(txr);
while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
m_freem(m);
IGB_TX_UNLOCK(txr);
}
if_qflush(ifp);
}
#endif /* __FreeBSD_version >= 800000 */
/*********************************************************************
* Ioctl entry point
*
* igb_ioctl is called when the user wants to configure the
* interface.
*
* return 0 on success, positive on failure
**********************************************************************/
static int
igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
{
struct adapter *adapter = ifp->if_softc;
struct ifreq *ifr = (struct ifreq *)data;
#ifdef INET
struct ifaddr *ifa = (struct ifaddr *)data;
#endif
int error = 0;
if (adapter->in_detach)
return (error);
switch (command) {
case SIOCSIFADDR:
#ifdef INET
if (ifa->ifa_addr->sa_family == AF_INET) {
/*
* XXX
* Since resetting hardware takes a very long time
* and results in link renegotiation we only
* initialize the hardware only when it is absolutely
* required.
*/
ifp->if_flags |= IFF_UP;
if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
IGB_CORE_LOCK(adapter);
igb_init_locked(adapter);
IGB_CORE_UNLOCK(adapter);
}
if (!(ifp->if_flags & IFF_NOARP))
arp_ifinit(ifp, ifa);
} else
#endif
error = ether_ioctl(ifp, command, data);
break;
case SIOCSIFMTU:
{
int max_frame_size;
IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
IGB_CORE_LOCK(adapter);
max_frame_size = 9234;
if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
ETHER_CRC_LEN) {
IGB_CORE_UNLOCK(adapter);
error = EINVAL;
break;
}
ifp->if_mtu = ifr->ifr_mtu;
adapter->max_frame_size =
ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
igb_init_locked(adapter);
IGB_CORE_UNLOCK(adapter);
break;
}
case SIOCSIFFLAGS:
IOCTL_DEBUGOUT("ioctl rcv'd:\
SIOCSIFFLAGS (Set Interface Flags)");
IGB_CORE_LOCK(adapter);
if (ifp->if_flags & IFF_UP) {
if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
if ((ifp->if_flags ^ adapter->if_flags) &
(IFF_PROMISC | IFF_ALLMULTI)) {
igb_disable_promisc(adapter);
igb_set_promisc(adapter);
}
} else
igb_init_locked(adapter);
} else
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
igb_stop(adapter);
adapter->if_flags = ifp->if_flags;
IGB_CORE_UNLOCK(adapter);
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
IGB_CORE_LOCK(adapter);
igb_disable_intr(adapter);
igb_set_multi(adapter);
igb_enable_intr(adapter);
IGB_CORE_UNLOCK(adapter);
}
break;
case SIOCSIFMEDIA:
/* Check SOL/IDER usage */
IGB_CORE_LOCK(adapter);
if (e1000_check_reset_block(&adapter->hw)) {
IGB_CORE_UNLOCK(adapter);
device_printf(adapter->dev, "Media change is"
" blocked due to SOL/IDER session.\n");
break;
}
IGB_CORE_UNLOCK(adapter);
case SIOCGIFMEDIA:
IOCTL_DEBUGOUT("ioctl rcv'd: \
SIOCxIFMEDIA (Get/Set Interface Media)");
error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
break;
case SIOCSIFCAP:
{
int mask, reinit;
IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
reinit = 0;
mask = ifr->ifr_reqcap ^ ifp->if_capenable;
if (mask & IFCAP_HWCSUM) {
ifp->if_capenable ^= IFCAP_HWCSUM;
reinit = 1;
}
if (mask & IFCAP_TSO4) {
ifp->if_capenable ^= IFCAP_TSO4;
reinit = 1;
}
if (mask & IFCAP_VLAN_HWTAGGING) {
ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
reinit = 1;
}
if (mask & IFCAP_LRO) {
ifp->if_capenable ^= IFCAP_LRO;
reinit = 1;
}
if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
igb_init(adapter);
VLAN_CAPABILITIES(ifp);
break;
}
#ifdef IGB_IEEE1588
/*
** IOCTL support for Precision Time (IEEE 1588) Support
*/
case SIOCSHWTSTAMP:
error = igb_hwtstamp_ioctl(adapter, ifp);
break;
#endif
default:
error = ether_ioctl(ifp, command, data);
break;
}
return (error);
}
/*********************************************************************
* Watchdog timer:
*
* This routine is called from the local timer every second.
* As long as transmit descriptors are being cleaned the value
* is non-zero and we do nothing. Reaching 0 indicates a tx hang
* and we then reset the device.
*
**********************************************************************/
static void
igb_watchdog(struct adapter *adapter)
{
struct tx_ring *txr = adapter->tx_rings;
bool tx_hang = FALSE;
IGB_CORE_LOCK_ASSERT(adapter);
/*
** The timer is set to 5 every time start() queues a packet.
** Then txeof keeps resetting it as long as it cleans at
** least one descriptor.
** Finally, anytime all descriptors are clean the timer is
** set to 0.
**
** With TX Multiqueue we need to check every queue's timer,
** if any time out we do the reset.
*/
for (int i = 0; i < adapter->num_queues; i++, txr++) {
IGB_TX_LOCK(txr);
if (txr->watchdog_timer == 0 ||
(--txr->watchdog_timer)) {
IGB_TX_UNLOCK(txr);
continue;
} else {
tx_hang = TRUE;
IGB_TX_UNLOCK(txr);
break;
}
}
if (tx_hang == FALSE)
return;
/* If we are in this routine because of pause frames, then
* don't reset the hardware.
*/
if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
E1000_STATUS_TXOFF) {
txr = adapter->tx_rings; /* reset pointer */
for (int i = 0; i < adapter->num_queues; i++, txr++) {
IGB_TX_LOCK(txr);
txr->watchdog_timer = IGB_TX_TIMEOUT;
IGB_TX_UNLOCK(txr);
}
return;
}
if (e1000_check_for_link(&adapter->hw) == 0)
device_printf(adapter->dev, "watchdog timeout -- resetting\n");
for (int i = 0; i < adapter->num_queues; i++, txr++) {
device_printf(adapter->dev, "Queue(%d) tdh = %d, tdt = %d\n",
i, E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
device_printf(adapter->dev, "Queue(%d) desc avail = %d,"
" Next Desc to Clean = %d\n", i, txr->tx_avail,
txr->next_to_clean);
}
adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
adapter->watchdog_events++;
igb_init_locked(adapter);
}
/*********************************************************************
* Init entry point
*
* This routine is used in two ways. It is used by the stack as
* init entry point in network interface structure. It is also used
* by the driver as a hw/sw initialization routine to get to a
* consistent state.
*
* return 0 on success, positive on failure
**********************************************************************/
static void
igb_init_locked(struct adapter *adapter)
{
struct rx_ring *rxr = adapter->rx_rings;
struct tx_ring *txr = adapter->tx_rings;
struct ifnet *ifp = adapter->ifp;
device_t dev = adapter->dev;
u32 pba = 0;
INIT_DEBUGOUT("igb_init: begin");
IGB_CORE_LOCK_ASSERT(adapter);
igb_stop(adapter);
/*
* Packet Buffer Allocation (PBA)
* Writing PBA sets the receive portion of the buffer
* the remainder is used for the transmit buffer.
*/
if (adapter->hw.mac.type == e1000_82575) {
INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
}
/* Get the latest mac address, User can use a LAA */
bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
ETHER_ADDR_LEN);
/* Put the address into the Receive Address Array */
e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
/* Initialize the hardware */
if (igb_hardware_init(adapter)) {
device_printf(dev, "Unable to initialize the hardware\n");
return;
}
igb_update_link_status(adapter);
E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
/* Set hardware offload abilities */
ifp->if_hwassist = 0;
if (ifp->if_capenable & IFCAP_TXCSUM) {
ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
#if __FreeBSD_version >= 800000
if (adapter->hw.mac.type == e1000_82576)
ifp->if_hwassist |= CSUM_SCTP;
#endif
}
if (ifp->if_capenable & IFCAP_TSO4)
ifp->if_hwassist |= CSUM_TSO;
/* Configure for OS presence */
igb_init_manageability(adapter);
/* Prepare transmit descriptors and buffers */
igb_setup_transmit_structures(adapter);
igb_initialize_transmit_units(adapter);
/* Setup Multicast table */
igb_set_multi(adapter);
/*
** Figure out the desired mbuf pool
** for doing jumbo/packetsplit
*/
if (ifp->if_mtu > ETHERMTU)
adapter->rx_mbuf_sz = MJUMPAGESIZE;
else
adapter->rx_mbuf_sz = MCLBYTES;
/* Prepare receive descriptors and buffers */
if (igb_setup_receive_structures(adapter)) {
device_printf(dev, "Could not setup receive structures\n");
igb_stop(adapter);
return;
}
igb_initialize_receive_units(adapter);
/* Don't lose promiscuous settings */
igb_set_promisc(adapter);
ifp->if_drv_flags |= IFF_DRV_RUNNING;
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
e1000_clear_hw_cntrs_base_generic(&adapter->hw);
if (adapter->msix > 1) /* Set up queue routing */
igb_configure_queues(adapter);
/* Set up VLAN tag offload and filter */
igb_setup_vlan_hw_support(adapter);
/* Set default RX interrupt moderation */
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
E1000_WRITE_REG(&adapter->hw,
E1000_EITR(rxr->msix), igb_ave_latency);
rxr->eitr_setting = igb_ave_latency;
}
/* Set TX interrupt rate & reset TX watchdog */
for (int i = 0; i < adapter->num_queues; i++, txr++) {
E1000_WRITE_REG(&adapter->hw,
E1000_EITR(txr->msix), igb_ave_latency);
txr->watchdog_timer = FALSE;
}
{
/* this clears any pending interrupts */
E1000_READ_REG(&adapter->hw, E1000_ICR);
igb_enable_intr(adapter);
E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
}
/* Don't reset the phy next time init gets called */
adapter->hw.phy.reset_disable = TRUE;
}
static void
igb_init(void *arg)
{
struct adapter *adapter = arg;
IGB_CORE_LOCK(adapter);
igb_init_locked(adapter);
IGB_CORE_UNLOCK(adapter);
}
static void
igb_handle_rxtx(void *context, int pending)
{
struct adapter *adapter = context;
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
struct ifnet *ifp;
ifp = adapter->ifp;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
if (igb_rxeof(rxr, adapter->rx_process_limit))
taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
IGB_TX_LOCK(txr);
igb_txeof(txr);
#if __FreeBSD_version >= 800000
if (!drbr_empty(ifp, txr->br))
igb_mq_start_locked(ifp, txr, NULL);
#else
if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
igb_start_locked(txr, ifp);
#endif
IGB_TX_UNLOCK(txr);
}
igb_enable_intr(adapter);
}
static void
igb_handle_rx(void *context, int pending)
{
struct rx_ring *rxr = context;
struct adapter *adapter = rxr->adapter;
struct ifnet *ifp = adapter->ifp;
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
/* More to clean, schedule another task */
taskqueue_enqueue(adapter->tq, &rxr->rx_task);
}
static void
igb_handle_tx(void *context, int pending)
{
struct tx_ring *txr = context;
struct adapter *adapter = txr->adapter;
struct ifnet *ifp = adapter->ifp;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
IGB_TX_LOCK(txr);
igb_txeof(txr);
#if __FreeBSD_version >= 800000
if (!drbr_empty(ifp, txr->br))
igb_mq_start_locked(ifp, txr, NULL);
#else
if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
igb_start_locked(txr, ifp);
#endif
IGB_TX_UNLOCK(txr);
}
}
/*********************************************************************
*
* MSI/Legacy Deferred
* Interrupt Service routine
*
*********************************************************************/
static int
igb_irq_fast(void *arg)
{
struct adapter *adapter = arg;
uint32_t reg_icr;
reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
/* Hot eject? */
if (reg_icr == 0xffffffff)
return FILTER_STRAY;
/* Definitely not our interrupt. */
if (reg_icr == 0x0)
return FILTER_STRAY;
if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
return FILTER_STRAY;
/*
* Mask interrupts until the taskqueue is finished running. This is
* cheap, just assume that it is needed. This also works around the
* MSI message reordering errata on certain systems.
*/
igb_disable_intr(adapter);
taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
/* Link status change */
if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
adapter->hw.mac.get_link_status = 1;
igb_update_link_status(adapter);
}
if (reg_icr & E1000_ICR_RXO)
adapter->rx_overruns++;
return FILTER_HANDLED;
}
/*********************************************************************
*
* MSIX TX Interrupt Service routine
*
**********************************************************************/
static void
igb_msix_tx(void *arg)
{
struct tx_ring *txr = arg;
struct adapter *adapter = txr->adapter;
u32 loop = IGB_MAX_LOOP;
bool more;
++txr->tx_irq;
IGB_TX_LOCK(txr);
do {
more = igb_txeof(txr);
} while (loop-- && more);
IGB_TX_UNLOCK(txr);
/* Schedule a clean task */
taskqueue_enqueue(adapter->tq, &txr->tx_task);
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
return;
}
/*********************************************************************
*
* MSIX RX Interrupt Service routine
*
**********************************************************************/
static void
igb_msix_rx(void *arg)
{
struct rx_ring *rxr = arg;
struct adapter *adapter = rxr->adapter;
u32 loop = IGB_MAX_LOOP;
bool more;
++rxr->rx_irq;
do {
more = igb_rxeof(rxr, adapter->rx_process_limit);
} while (loop-- && more);
/* Update interrupt rate */
if (igb_enable_aim == TRUE)
igb_update_aim(rxr);
/* Schedule another clean */
taskqueue_enqueue(adapter->tq, &rxr->rx_task);
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
return;
}
/*********************************************************************
*
* MSIX Link Interrupt Service routine
*
**********************************************************************/
static void
igb_msix_link(void *arg)
{
struct adapter *adapter = arg;
u32 icr;
++adapter->link_irq;
icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
if (!(icr & E1000_ICR_LSC))
goto spurious;
adapter->hw.mac.get_link_status = 1;
igb_update_link_status(adapter);
spurious:
/* Rearm */
E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
return;
}
/*
** Routine to adjust the RX EITR value based on traffic,
** its a simple three state model, but seems to help.
**
** Note that the three EITR values are tuneable using
** sysctl in real time. The feature can be effectively
** nullified by setting them equal.
*/
#define BULK_THRESHOLD 10000
#define AVE_THRESHOLD 1600
static void
igb_update_aim(struct rx_ring *rxr)
{
struct adapter *adapter = rxr->adapter;
u32 olditr, newitr;
/* Update interrupt moderation based on traffic */
olditr = rxr->eitr_setting;
newitr = olditr;
/* Idle, don't change setting */
if (rxr->bytes == 0)
return;
if (olditr == igb_low_latency) {
if (rxr->bytes > AVE_THRESHOLD)
newitr = igb_ave_latency;
} else if (olditr == igb_ave_latency) {
if (rxr->bytes < AVE_THRESHOLD)
newitr = igb_low_latency;
else if (rxr->bytes > BULK_THRESHOLD)
newitr = igb_bulk_latency;
} else if (olditr == igb_bulk_latency) {
if (rxr->bytes < BULK_THRESHOLD)
newitr = igb_ave_latency;
}
if (olditr != newitr) {
/* Change interrupt rate */
rxr->eitr_setting = newitr;
if (adapter->hw.mac.type == e1000_82575)
newitr |= newitr << 16;
else
newitr |= 0x8000000;
E1000_WRITE_REG(&adapter->hw, E1000_EITR(rxr->me), newitr);
}
rxr->bytes = 0;
return;
}
/*********************************************************************
*
* Media Ioctl callback
*
* This routine is called whenever the user queries the status of
* the interface using ifconfig.
*
**********************************************************************/
static void
igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
{
struct adapter *adapter = ifp->if_softc;
u_char fiber_type = IFM_1000_SX;
INIT_DEBUGOUT("igb_media_status: begin");
IGB_CORE_LOCK(adapter);
igb_update_link_status(adapter);
ifmr->ifm_status = IFM_AVALID;
ifmr->ifm_active = IFM_ETHER;
if (!adapter->link_active) {
IGB_CORE_UNLOCK(adapter);
return;
}
ifmr->ifm_status |= IFM_ACTIVE;
if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
(adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
ifmr->ifm_active |= fiber_type | IFM_FDX;
else {
switch (adapter->link_speed) {
case 10:
ifmr->ifm_active |= IFM_10_T;
break;
case 100:
ifmr->ifm_active |= IFM_100_TX;
break;
case 1000:
ifmr->ifm_active |= IFM_1000_T;
break;
}
if (adapter->link_duplex == FULL_DUPLEX)
ifmr->ifm_active |= IFM_FDX;
else
ifmr->ifm_active |= IFM_HDX;
}
IGB_CORE_UNLOCK(adapter);
}
/*********************************************************************
*
* Media Ioctl callback
*
* This routine is called when the user changes speed/duplex using
* media/mediopt option with ifconfig.
*
**********************************************************************/
static int
igb_media_change(struct ifnet *ifp)
{
struct adapter *adapter = ifp->if_softc;
struct ifmedia *ifm = &adapter->media;
INIT_DEBUGOUT("igb_media_change: begin");
if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
return (EINVAL);
IGB_CORE_LOCK(adapter);
switch (IFM_SUBTYPE(ifm->ifm_media)) {
case IFM_AUTO:
adapter->hw.mac.autoneg = DO_AUTO_NEG;
adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
break;
case IFM_1000_LX:
case IFM_1000_SX:
case IFM_1000_T:
adapter->hw.mac.autoneg = DO_AUTO_NEG;
adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
break;
case IFM_100_TX:
adapter->hw.mac.autoneg = FALSE;
adapter->hw.phy.autoneg_advertised = 0;
if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
else
adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
break;
case IFM_10_T:
adapter->hw.mac.autoneg = FALSE;
adapter->hw.phy.autoneg_advertised = 0;
if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
else
adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
break;
default:
device_printf(adapter->dev, "Unsupported media type\n");
}
/* As the speed/duplex settings my have changed we need to
* reset the PHY.
*/
adapter->hw.phy.reset_disable = FALSE;
igb_init_locked(adapter);
IGB_CORE_UNLOCK(adapter);
return (0);
}
/*********************************************************************
*
* This routine maps the mbufs to Advanced TX descriptors.
* used by the 82575 adapter.
*
**********************************************************************/
static int
igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
{
struct adapter *adapter = txr->adapter;
bus_dma_segment_t segs[IGB_MAX_SCATTER];
bus_dmamap_t map;
struct igb_tx_buffer *tx_buffer, *tx_buffer_mapped;
union e1000_adv_tx_desc *txd = NULL;
struct mbuf *m_head;
u32 olinfo_status = 0, cmd_type_len = 0;
int nsegs, i, j, error, first, last = 0;
u32 hdrlen = 0;
m_head = *m_headp;
/* Set basic descriptor constants */
cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
if (m_head->m_flags & M_VLANTAG)
cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
/*
* Force a cleanup if number of TX descriptors
* available hits the threshold
*/
if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
igb_txeof(txr);
/* Now do we at least have a minimal? */
if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
txr->no_desc_avail++;
return (ENOBUFS);
}
}
/*
* Map the packet for DMA.
*
* Capture the first descriptor index,
* this descriptor will have the index
* of the EOP which is the only one that
* now gets a DONE bit writeback.
*/
first = txr->next_avail_desc;
tx_buffer = &txr->tx_buffers[first];
tx_buffer_mapped = tx_buffer;
map = tx_buffer->map;
error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
*m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
if (error == EFBIG) {
struct mbuf *m;
m = m_defrag(*m_headp, M_DONTWAIT);
if (m == NULL) {
adapter->mbuf_defrag_failed++;
m_freem(*m_headp);
*m_headp = NULL;
return (ENOBUFS);
}
*m_headp = m;
/* Try it again */
error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
*m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
if (error == ENOMEM) {
adapter->no_tx_dma_setup++;
return (error);
} else if (error != 0) {
adapter->no_tx_dma_setup++;
m_freem(*m_headp);
*m_headp = NULL;
return (error);
}
} else if (error == ENOMEM) {
adapter->no_tx_dma_setup++;
return (error);
} else if (error != 0) {
adapter->no_tx_dma_setup++;
m_freem(*m_headp);
*m_headp = NULL;
return (error);
}
/* Check again to be sure we have enough descriptors */
if (nsegs > (txr->tx_avail - 2)) {
txr->no_desc_avail++;
bus_dmamap_unload(txr->txtag, map);
return (ENOBUFS);
}
m_head = *m_headp;
/*
* Set up the context descriptor:
* used when any hardware offload is done.
* This includes CSUM, VLAN, and TSO. It
* will use the first descriptor.
*/
if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
if (igb_tso_setup(txr, m_head, &hdrlen)) {
cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
} else
return (ENXIO);
} else if (igb_tx_ctx_setup(txr, m_head))
olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
#ifdef IGB_IEEE1588
/* This is changing soon to an mtag detection */
if (we detect this mbuf has a TSTAMP mtag)
cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
#endif
/* Calculate payload length */
olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
<< E1000_ADVTXD_PAYLEN_SHIFT);
/* Set up our transmit descriptors */
i = txr->next_avail_desc;
for (j = 0; j < nsegs; j++) {
bus_size_t seg_len;
bus_addr_t seg_addr;
tx_buffer = &txr->tx_buffers[i];
txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
seg_addr = segs[j].ds_addr;
seg_len = segs[j].ds_len;
txd->read.buffer_addr = htole64(seg_addr);
txd->read.cmd_type_len = htole32(
adapter->txd_cmd | cmd_type_len | seg_len);
txd->read.olinfo_status = htole32(olinfo_status);
last = i;
if (++i == adapter->num_tx_desc)
i = 0;
tx_buffer->m_head = NULL;
tx_buffer->next_eop = -1;
}
txr->next_avail_desc = i;
txr->tx_avail -= nsegs;
tx_buffer->m_head = m_head;
tx_buffer_mapped->map = tx_buffer->map;
tx_buffer->map = map;
bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
/*
* Last Descriptor of Packet
* needs End Of Packet (EOP)
* and Report Status (RS)
*/
txd->read.cmd_type_len |=
htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
/*
* Keep track in the first buffer which
* descriptor will be written back
*/
tx_buffer = &txr->tx_buffers[first];
tx_buffer->next_eop = last;
/*
* Advance the Transmit Descriptor Tail (TDT), this tells the E1000
* that this frame is available to transmit.
*/
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
++txr->tx_packets;
return (0);
}
static void
igb_set_promisc(struct adapter *adapter)
{
struct ifnet *ifp = adapter->ifp;
uint32_t reg_rctl;
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
if (ifp->if_flags & IFF_PROMISC) {
reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
} else if (ifp->if_flags & IFF_ALLMULTI) {
reg_rctl |= E1000_RCTL_MPE;
reg_rctl &= ~E1000_RCTL_UPE;
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
}
}
static void
igb_disable_promisc(struct adapter *adapter)
{
uint32_t reg_rctl;
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
reg_rctl &= (~E1000_RCTL_UPE);
reg_rctl &= (~E1000_RCTL_MPE);
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
}
/*********************************************************************
* Multicast Update
*
* This routine is called whenever multicast address list is updated.
*
**********************************************************************/
static void
igb_set_multi(struct adapter *adapter)
{
struct ifnet *ifp = adapter->ifp;
struct ifmultiaddr *ifma;
u32 reg_rctl = 0;
u8 mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
int mcnt = 0;
IOCTL_DEBUGOUT("igb_set_multi: begin");
if_maddr_rlock(ifp);
TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
break;
bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
&mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
mcnt++;
}
if_maddr_runlock(ifp);
if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
reg_rctl |= E1000_RCTL_MPE;
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
} else
e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
}
/*********************************************************************
* Timer routine
*
* This routine checks for link status and updates statistics.
*
**********************************************************************/
static void
igb_local_timer(void *arg)
{
struct adapter *adapter = arg;
struct ifnet *ifp = adapter->ifp;
IGB_CORE_LOCK_ASSERT(adapter);
igb_update_link_status(adapter);
igb_update_stats_counters(adapter);
if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
igb_print_hw_stats(adapter);
/*
* Each second we check the watchdog to
* protect against hardware hangs.
*/
igb_watchdog(adapter);
/* Trigger an RX interrupt on all queues */
E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
}
static void
igb_update_link_status(struct adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
struct ifnet *ifp = adapter->ifp;
device_t dev = adapter->dev;
struct tx_ring *txr = adapter->tx_rings;
u32 link_check = 0;
/* Get the cached link value or read for real */
switch (hw->phy.media_type) {
case e1000_media_type_copper:
if (hw->mac.get_link_status) {
/* Do the work to read phy */
e1000_check_for_link(hw);
link_check = !hw->mac.get_link_status;
} else
link_check = TRUE;
break;
case e1000_media_type_fiber:
e1000_check_for_link(hw);
link_check = (E1000_READ_REG(hw, E1000_STATUS) &
E1000_STATUS_LU);
break;
case e1000_media_type_internal_serdes:
e1000_check_for_link(hw);
link_check = adapter->hw.mac.serdes_has_link;
break;
default:
case e1000_media_type_unknown:
break;
}
/* Now we check if a transition has happened */
if (link_check && (adapter->link_active == 0)) {
e1000_get_speed_and_duplex(&adapter->hw,
&adapter->link_speed, &adapter->link_duplex);
if (bootverbose)
device_printf(dev, "Link is up %d Mbps %s\n",
adapter->link_speed,
((adapter->link_duplex == FULL_DUPLEX) ?
"Full Duplex" : "Half Duplex"));
adapter->link_active = 1;
ifp->if_baudrate = adapter->link_speed * 1000000;
if_link_state_change(ifp, LINK_STATE_UP);
} else if (!link_check && (adapter->link_active == 1)) {
ifp->if_baudrate = adapter->link_speed = 0;
adapter->link_duplex = 0;
if (bootverbose)
device_printf(dev, "Link is Down\n");
adapter->link_active = 0;
if_link_state_change(ifp, LINK_STATE_DOWN);
/* Turn off watchdogs */
for (int i = 0; i < adapter->num_queues; i++, txr++)
txr->watchdog_timer = FALSE;
}
}
/*********************************************************************
*
* This routine disables all traffic on the adapter by issuing a
* global reset on the MAC and deallocates TX/RX buffers.
*
**********************************************************************/
static void
igb_stop(void *arg)
{
struct adapter *adapter = arg;
struct ifnet *ifp = adapter->ifp;
IGB_CORE_LOCK_ASSERT(adapter);
INIT_DEBUGOUT("igb_stop: begin");
igb_disable_intr(adapter);
callout_stop(&adapter->timer);
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
e1000_reset_hw(&adapter->hw);
E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
}
/*********************************************************************
*
* Determine hardware revision.
*
**********************************************************************/
static void
igb_identify_hardware(struct adapter *adapter)
{
device_t dev = adapter->dev;
/* Make sure our PCI config space has the necessary stuff set */
adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
(adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
device_printf(dev, "Memory Access and/or Bus Master bits "
"were not set!\n");
adapter->hw.bus.pci_cmd_word |=
(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
pci_write_config(dev, PCIR_COMMAND,
adapter->hw.bus.pci_cmd_word, 2);
}
/* Save off the information about this board */
adapter->hw.vendor_id = pci_get_vendor(dev);
adapter->hw.device_id = pci_get_device(dev);
adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
adapter->hw.subsystem_vendor_id =
pci_read_config(dev, PCIR_SUBVEND_0, 2);
adapter->hw.subsystem_device_id =
pci_read_config(dev, PCIR_SUBDEV_0, 2);
/* Do Shared Code Init and Setup */
if (e1000_set_mac_type(&adapter->hw)) {
device_printf(dev, "Setup init failure\n");
return;
}
}
static int
igb_allocate_pci_resources(struct adapter *adapter)
{
device_t dev = adapter->dev;
int rid;
rid = PCIR_BAR(0);
adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
&rid, RF_ACTIVE);
if (adapter->pci_mem == NULL) {
device_printf(dev, "Unable to allocate bus resource: memory\n");
return (ENXIO);
}
adapter->osdep.mem_bus_space_tag =
rman_get_bustag(adapter->pci_mem);
adapter->osdep.mem_bus_space_handle =
rman_get_bushandle(adapter->pci_mem);
adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
adapter->num_queues = 1; /* Defaults for Legacy or MSI */
/* This will setup either MSI/X or MSI */
adapter->msix = igb_setup_msix(adapter);
adapter->hw.back = &adapter->osdep;
return (0);
}
/*********************************************************************
*
* Setup the Legacy or MSI Interrupt handler
*
**********************************************************************/
static int
igb_allocate_legacy(struct adapter *adapter)
{
device_t dev = adapter->dev;
int error, rid = 0;
/* Turn off all interrupts */
E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
/* MSI RID is 1 */
if (adapter->msix == 1)
rid = 1;
/* We allocate a single interrupt resource */
adapter->res = bus_alloc_resource_any(dev,
SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
if (adapter->res == NULL) {
device_printf(dev, "Unable to allocate bus resource: "
"interrupt\n");
return (ENXIO);
}
/*
* Try allocating a fast interrupt and the associated deferred
* processing contexts.
*/
TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
taskqueue_thread_enqueue, &adapter->tq);
taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
device_get_nameunit(adapter->dev));
if ((error = bus_setup_intr(dev, adapter->res,
INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
adapter, &adapter->tag)) != 0) {
device_printf(dev, "Failed to register fast interrupt "
"handler: %d\n", error);
taskqueue_free(adapter->tq);
adapter->tq = NULL;
return (error);
}
return (0);
}
/*********************************************************************
*
* Setup the MSIX Interrupt handlers:
*
**********************************************************************/
static int
igb_allocate_msix(struct adapter *adapter)
{
device_t dev = adapter->dev;
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
int error, rid, vector = 0;
/*
* Setup the interrupt handlers
*/
/* TX Setup */
for (int i = 0; i < adapter->num_queues; i++, vector++, txr++) {
rid = vector +1;
txr->res = bus_alloc_resource_any(dev,
SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
if (txr->res == NULL) {
device_printf(dev,
"Unable to allocate bus resource: "
"MSIX TX Interrupt\n");
return (ENXIO);
}
error = bus_setup_intr(dev, txr->res,
INTR_TYPE_NET | INTR_MPSAFE, NULL,
igb_msix_tx, txr, &txr->tag);
if (error) {
txr->res = NULL;
device_printf(dev, "Failed to register TX handler");
return (error);
}
/* Make tasklet for deferred handling - one per queue */
TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
txr->msix = vector;
if (adapter->hw.mac.type == e1000_82575)
txr->eims = E1000_EICR_TX_QUEUE0 << i;
else
txr->eims = 1 << vector;
/*
** Bind the msix vector, and thus the
** ring to the corresponding cpu.
*/
if (adapter->num_queues > 1)
bus_bind_intr(dev, txr->res, i);
}
/* RX Setup */
for (int i = 0; i < adapter->num_queues; i++, vector++, rxr++) {
rid = vector +1;
rxr->res = bus_alloc_resource_any(dev,
SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
if (rxr->res == NULL) {
device_printf(dev,
"Unable to allocate bus resource: "
"MSIX RX Interrupt\n");
return (ENXIO);
}
error = bus_setup_intr(dev, rxr->res,
INTR_TYPE_NET | INTR_MPSAFE, NULL,
igb_msix_rx, rxr, &rxr->tag);
if (error) {
rxr->res = NULL;
device_printf(dev, "Failed to register RX handler");
return (error);
}
/* Make tasklet for deferred handling - one per queue */
TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
rxr->msix = vector;
if (adapter->hw.mac.type == e1000_82575)
rxr->eims = E1000_EICR_RX_QUEUE0 << i;
else
rxr->eims = 1 << vector;
/* Get a mask for local timer */
adapter->rx_mask |= rxr->eims;
/*
** Bind the msix vector, and thus the
** ring to the corresponding cpu.
** Notice that this makes an RX/TX pair
** bound to each CPU, limited by the MSIX
** vectors.
*/
if (adapter->num_queues > 1)
bus_bind_intr(dev, rxr->res, i);
}
/* And Link */
rid = vector +1;
adapter->res = bus_alloc_resource_any(dev,
SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
if (adapter->res == NULL) {
device_printf(dev,
"Unable to allocate bus resource: "
"MSIX Link Interrupt\n");
return (ENXIO);
}
if ((error = bus_setup_intr(dev, adapter->res,
INTR_TYPE_NET | INTR_MPSAFE, NULL,
igb_msix_link, adapter, &adapter->tag)) != 0) {
device_printf(dev, "Failed to register Link handler");
return (error);
}
adapter->linkvec = vector;
adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
taskqueue_thread_enqueue, &adapter->tq);
taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
device_get_nameunit(adapter->dev));
return (0);
}
static void
igb_configure_queues(struct adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
struct tx_ring *txr;
struct rx_ring *rxr;
/* Turn on MSIX */
/*
** 82576 uses IVARs to route MSI/X
** interrupts, its not very intuitive,
** study the code carefully :)
*/
if (adapter->hw.mac.type == e1000_82576) {
u32 ivar = 0;
/* First turn on the capability */
E1000_WRITE_REG(hw, E1000_GPIE,
E1000_GPIE_MSIX_MODE |
E1000_GPIE_EIAME |
E1000_GPIE_PBA | E1000_GPIE_NSICR);
/* RX */
for (int i = 0; i < adapter->num_queues; i++) {
u32 index = i & 0x7; /* Each IVAR has two entries */
ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
rxr = &adapter->rx_rings[i];
if (i < 8) {
ivar &= 0xFFFFFF00;
ivar |= rxr->msix | E1000_IVAR_VALID;
} else {
ivar &= 0xFF00FFFF;
ivar |= (rxr->msix | E1000_IVAR_VALID) << 16;
}
E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
adapter->eims_mask |= rxr->eims;
}
/* TX */
for (int i = 0; i < adapter->num_queues; i++) {
u32 index = i & 0x7; /* Each IVAR has two entries */
ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
txr = &adapter->tx_rings[i];
if (i < 8) {
ivar &= 0xFFFF00FF;
ivar |= (txr->msix | E1000_IVAR_VALID) << 8;
} else {
ivar &= 0x00FFFFFF;
ivar |= (txr->msix | E1000_IVAR_VALID) << 24;
}
E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
adapter->eims_mask |= txr->eims;
}
/* And for the link interrupt */
ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
adapter->link_mask = 1 << adapter->linkvec;
adapter->eims_mask |= adapter->link_mask;
E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
} else
{ /* 82575 */
int tmp;
/* enable MSI-X PBA support*/
tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
tmp |= E1000_CTRL_EXT_PBA_CLR;
/* Auto-Mask interrupts upon ICR read. */
tmp |= E1000_CTRL_EXT_EIAME;
tmp |= E1000_CTRL_EXT_IRCA;
E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
/* TX */
for (int i = 0; i < adapter->num_queues; i++) {
txr = &adapter->tx_rings[i];
E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
txr->eims);
adapter->eims_mask |= txr->eims;
}
/* RX */
for (int i = 0; i < adapter->num_queues; i++) {
rxr = &adapter->rx_rings[i];
E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
rxr->eims);
adapter->eims_mask |= rxr->eims;
}
/* Link */
E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
E1000_EIMS_OTHER);
adapter->link_mask |= E1000_EIMS_OTHER;
adapter->eims_mask |= adapter->link_mask;
}
return;
}
static void
igb_free_pci_resources(struct adapter *adapter)
{
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
device_t dev = adapter->dev;
int rid;
/*
** There is a slight possibility of a failure mode
** in attach that will result in entering this function
** before interrupt resources have been initialized, and
** in that case we do not want to execute the loops below
** We can detect this reliably by the state of the adapter
** res pointer.
*/
if (adapter->res == NULL)
goto mem;
/*
* First release all the TX/RX interrupt resources:
*/
for (int i = 0; i < adapter->num_queues; i++, txr++) {
rid = txr->msix + 1;
if (txr->tag != NULL) {
bus_teardown_intr(dev, txr->res, txr->tag);
txr->tag = NULL;
}
if (txr->res != NULL)
bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res);
}
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
rid = rxr->msix + 1;
if (rxr->tag != NULL) {
bus_teardown_intr(dev, rxr->res, rxr->tag);
rxr->tag = NULL;
}
if (rxr->res != NULL)
bus_release_resource(dev, SYS_RES_IRQ, rid, rxr->res);
}
/* Clean the Legacy or Link interrupt last */
if (adapter->linkvec) /* we are doing MSIX */
rid = adapter->linkvec + 1;
else
(adapter->msix != 0) ? (rid = 1):(rid = 0);
if (adapter->tag != NULL) {
bus_teardown_intr(dev, adapter->res, adapter->tag);
adapter->tag = NULL;
}
if (adapter->res != NULL)
bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
mem:
if (adapter->msix)
pci_release_msi(dev);
if (adapter->msix_mem != NULL)
bus_release_resource(dev, SYS_RES_MEMORY,
PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
if (adapter->pci_mem != NULL)
bus_release_resource(dev, SYS_RES_MEMORY,
PCIR_BAR(0), adapter->pci_mem);
}
/*
* Setup Either MSI/X or MSI
*/
static int
igb_setup_msix(struct adapter *adapter)
{
device_t dev = adapter->dev;
int rid, want, queues, msgs;
/* First try MSI/X */
rid = PCIR_BAR(IGB_MSIX_BAR);
adapter->msix_mem = bus_alloc_resource_any(dev,
SYS_RES_MEMORY, &rid, RF_ACTIVE);
if (!adapter->msix_mem) {
/* May not be enabled */
device_printf(adapter->dev,
"Unable to map MSIX table \n");
goto msi;
}
msgs = pci_msix_count(dev);
if (msgs == 0) { /* system has msix disabled */
bus_release_resource(dev, SYS_RES_MEMORY,
PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
adapter->msix_mem = NULL;
goto msi;
}
/* Figure out a reasonable auto config value */
queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
if (igb_num_queues == 0)
igb_num_queues = queues;
/*
** Two vectors (RX/TX pair) per queue
** plus an additional for Link interrupt
*/
want = (igb_num_queues * 2) + 1;
if (msgs >= want)
msgs = want;
else {
device_printf(adapter->dev,
"MSIX Configuration Problem, "
"%d vectors configured, but %d queues wanted!\n",
msgs, want);
return (ENXIO);
}
if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
device_printf(adapter->dev,
"Using MSIX interrupts with %d vectors\n", msgs);
adapter->num_queues = igb_num_queues;
return (msgs);
}
msi:
msgs = pci_msi_count(dev);
if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
device_printf(adapter->dev,"Using MSI interrupt\n");
return (msgs);
}
/*********************************************************************
*
* Initialize the hardware to a configuration
* as specified by the adapter structure.
*
**********************************************************************/
static int
igb_hardware_init(struct adapter *adapter)
{
device_t dev = adapter->dev;
u32 rx_buffer_size;
INIT_DEBUGOUT("igb_hardware_init: begin");
/* Issue a global reset */
e1000_reset_hw(&adapter->hw);
/* Let the firmware know the OS is in control */
igb_get_hw_control(adapter);
/*
* These parameters control the automatic generation (Tx) and
* response (Rx) to Ethernet PAUSE frames.
* - High water mark should allow for at least two frames to be
* received after sending an XOFF.
* - Low water mark works best when it is very near the high water mark.
* This allows the receiver to restart by sending XON when it has
* drained a bit. Here we use an arbitary value of 1500 which will
* restart after one full frame is pulled from the buffer. There
* could be several smaller frames in the buffer and if so they will
* not trigger the XON until their total number reduces the buffer
* by 1500.
* - The pause time is fairly large at 1000 x 512ns = 512 usec.
*/
if (adapter->hw.mac.type == e1000_82576)
rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
E1000_RXPBS) & 0xffff) << 10 );
else
rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
E1000_PBA) & 0xffff) << 10 );
adapter->hw.fc.high_water = rx_buffer_size -
roundup2(adapter->max_frame_size, 1024);
adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
adapter->hw.fc.pause_time = IGB_FC_PAUSE_TIME;
adapter->hw.fc.send_xon = TRUE;
/* Set Flow control, use the tunable location if sane */
if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
adapter->hw.fc.requested_mode = igb_fc_setting;
else
adapter->hw.fc.requested_mode = e1000_fc_none;
if (e1000_init_hw(&adapter->hw) < 0) {
device_printf(dev, "Hardware Initialization Failed\n");
return (EIO);
}
e1000_check_for_link(&adapter->hw);
return (0);
}
/*********************************************************************
*
* Setup networking device structure and register an interface.
*
**********************************************************************/
static void
igb_setup_interface(device_t dev, struct adapter *adapter)
{
struct ifnet *ifp;
INIT_DEBUGOUT("igb_setup_interface: begin");
ifp = adapter->ifp = if_alloc(IFT_ETHER);
if (ifp == NULL)
panic("%s: can not if_alloc()", device_get_nameunit(dev));
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
ifp->if_mtu = ETHERMTU;
ifp->if_init = igb_init;
ifp->if_softc = adapter;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = igb_ioctl;
ifp->if_start = igb_start;
#if __FreeBSD_version >= 800000
ifp->if_transmit = igb_mq_start;
ifp->if_qflush = igb_qflush;
#endif
IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
IFQ_SET_READY(&ifp->if_snd);
ether_ifattach(ifp, adapter->hw.mac.addr);
ifp->if_capabilities = ifp->if_capenable = 0;
ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
ifp->if_capabilities |= IFCAP_TSO4;
ifp->if_capabilities |= IFCAP_JUMBO_MTU;
ifp->if_capenable = ifp->if_capabilities;
/*
* Tell the upper layer(s) we support long frames.
*/
ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
/*
* Specify the media types supported by this adapter and register
* callbacks to update media and link information
*/
ifmedia_init(&adapter->media, IFM_IMASK,
igb_media_change, igb_media_status);
if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
(adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
0, NULL);
ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
} else {
ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
0, NULL);
ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
0, NULL);
ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
0, NULL);
if (adapter->hw.phy.type != e1000_phy_ife) {
ifmedia_add(&adapter->media,
IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
ifmedia_add(&adapter->media,
IFM_ETHER | IFM_1000_T, 0, NULL);
}
}
ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
}
/*
* Manage DMA'able memory.
*/
static void
igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
{
if (error)
return;
*(bus_addr_t *) arg = segs[0].ds_addr;
}
static int
igb_dma_malloc(struct adapter *adapter, bus_size_t size,
struct igb_dma_alloc *dma, int mapflags)
{
int error;
error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
size, /* maxsize */
1, /* nsegments */
size, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockarg */
&dma->dma_tag);
if (error) {
device_printf(adapter->dev,
"%s: bus_dma_tag_create failed: %d\n",
__func__, error);
goto fail_0;
}
error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
BUS_DMA_NOWAIT, &dma->dma_map);
if (error) {
device_printf(adapter->dev,
"%s: bus_dmamem_alloc(%ju) failed: %d\n",
__func__, (uintmax_t)size, error);
goto fail_2;
}
dma->dma_paddr = 0;
error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
if (error || dma->dma_paddr == 0) {
device_printf(adapter->dev,
"%s: bus_dmamap_load failed: %d\n",
__func__, error);
goto fail_3;
}
return (0);
fail_3:
bus_dmamap_unload(dma->dma_tag, dma->dma_map);
fail_2:
bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
bus_dma_tag_destroy(dma->dma_tag);
fail_0:
dma->dma_map = NULL;
dma->dma_tag = NULL;
return (error);
}
static void
igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
{
if (dma->dma_tag == NULL)
return;
if (dma->dma_map != NULL) {
bus_dmamap_sync(dma->dma_tag, dma->dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(dma->dma_tag, dma->dma_map);
bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
dma->dma_map = NULL;
}
bus_dma_tag_destroy(dma->dma_tag);
dma->dma_tag = NULL;
}
/*********************************************************************
*
* Allocate memory for the transmit and receive rings, and then
* the descriptors associated with each, called only once at attach.
*
**********************************************************************/
static int
igb_allocate_queues(struct adapter *adapter)
{
device_t dev = adapter->dev;
struct tx_ring *txr;
struct rx_ring *rxr;
int rsize, tsize, error = E1000_SUCCESS;
int txconf = 0, rxconf = 0;
/* First allocate the TX ring struct memory */
if (!(adapter->tx_rings =
(struct tx_ring *) malloc(sizeof(struct tx_ring) *
adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
device_printf(dev, "Unable to allocate TX ring memory\n");
error = ENOMEM;
goto fail;
}
txr = adapter->tx_rings;
/* Next allocate the RX */
if (!(adapter->rx_rings =
(struct rx_ring *) malloc(sizeof(struct rx_ring) *
adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
device_printf(dev, "Unable to allocate RX ring memory\n");
error = ENOMEM;
goto rx_fail;
}
rxr = adapter->rx_rings;
tsize = roundup2(adapter->num_tx_desc *
sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
/*
* Now set up the TX queues, txconf is needed to handle the
* possibility that things fail midcourse and we need to
* undo memory gracefully
*/
for (int i = 0; i < adapter->num_queues; i++, txconf++) {
/* Set up some basics */
txr = &adapter->tx_rings[i];
txr->adapter = adapter;
txr->me = i;
/* Initialize the TX lock */
snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
device_get_nameunit(dev), txr->me);
mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
if (igb_dma_malloc(adapter, tsize,
&txr->txdma, BUS_DMA_NOWAIT)) {
device_printf(dev,
"Unable to allocate TX Descriptor memory\n");
error = ENOMEM;
goto err_tx_desc;
}
txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
bzero((void *)txr->tx_base, tsize);
/* Now allocate transmit buffers for the ring */
if (igb_allocate_transmit_buffers(txr)) {
device_printf(dev,
"Critical Failure setting up transmit buffers\n");
error = ENOMEM;
goto err_tx_desc;
}
#if __FreeBSD_version >= 800000
/* Allocate a buf ring */
txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
M_WAITOK, &txr->tx_mtx);
#endif
}
/*
* Next the RX queues...
*/
rsize = roundup2(adapter->num_rx_desc *
sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
rxr = &adapter->rx_rings[i];
rxr->adapter = adapter;
rxr->me = i;
/* Initialize the RX lock */
snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
device_get_nameunit(dev), txr->me);
mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
if (igb_dma_malloc(adapter, rsize,
&rxr->rxdma, BUS_DMA_NOWAIT)) {
device_printf(dev,
"Unable to allocate RxDescriptor memory\n");
error = ENOMEM;
goto err_rx_desc;
}
rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
bzero((void *)rxr->rx_base, rsize);
/* Allocate receive buffers for the ring*/
if (igb_allocate_receive_buffers(rxr)) {
device_printf(dev,
"Critical Failure setting up receive buffers\n");
error = ENOMEM;
goto err_rx_desc;
}
}
return (0);
err_rx_desc:
for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
igb_dma_free(adapter, &rxr->rxdma);
err_tx_desc:
for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
igb_dma_free(adapter, &txr->txdma);
free(adapter->rx_rings, M_DEVBUF);
rx_fail:
free(adapter->tx_rings, M_DEVBUF);
fail:
return (error);
}
/*********************************************************************
*
* Allocate memory for tx_buffer structures. The tx_buffer stores all
* the information needed to transmit a packet on the wire. This is
* called only once at attach, setup is done every reset.
*
**********************************************************************/
static int
igb_allocate_transmit_buffers(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
device_t dev = adapter->dev;
struct igb_tx_buffer *txbuf;
int error, i;
/*
* Setup DMA descriptor areas.
*/
if ((error = bus_dma_tag_create(NULL, /* parent */
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
IGB_TSO_SIZE, /* maxsize */
IGB_MAX_SCATTER, /* nsegments */
PAGE_SIZE, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
&txr->txtag))) {
device_printf(dev,"Unable to allocate TX DMA tag\n");
goto fail;
}
if (!(txr->tx_buffers =
(struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
device_printf(dev, "Unable to allocate tx_buffer memory\n");
error = ENOMEM;
goto fail;
}
/* Create the descriptor buffer dma maps */
txbuf = txr->tx_buffers;
for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
if (error != 0) {
device_printf(dev, "Unable to create TX DMA map\n");
goto fail;
}
}
return 0;
fail:
/* We free all, it handles case where we are in the middle */
igb_free_transmit_structures(adapter);
return (error);
}
/*********************************************************************
*
* Initialize a transmit ring.
*
**********************************************************************/
static void
igb_setup_transmit_ring(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
struct igb_tx_buffer *txbuf;
int i;
/* Clear the old descriptor contents */
bzero((void *)txr->tx_base,
(sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
/* Reset indices */
txr->next_avail_desc = 0;
txr->next_to_clean = 0;
/* Free any existing tx buffers. */
txbuf = txr->tx_buffers;
for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
if (txbuf->m_head != NULL) {
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(txr->txtag, txbuf->map);
m_freem(txbuf->m_head);
txbuf->m_head = NULL;
}
/* clear the watch index */
txbuf->next_eop = -1;
}
/* Set number of descriptors available */
txr->tx_avail = adapter->num_tx_desc;
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
}
/*********************************************************************
*
* Initialize all transmit rings.
*
**********************************************************************/
static void
igb_setup_transmit_structures(struct adapter *adapter)
{
struct tx_ring *txr = adapter->tx_rings;
for (int i = 0; i < adapter->num_queues; i++, txr++)
igb_setup_transmit_ring(txr);
return;
}
/*********************************************************************
*
* Enable transmit unit.
*
**********************************************************************/
static void
igb_initialize_transmit_units(struct adapter *adapter)
{
struct tx_ring *txr = adapter->tx_rings;
u32 tctl, txdctl;
INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
/* Setup the Base and Length of the Tx Descriptor Rings */
for (int i = 0; i < adapter->num_queues; i++, txr++) {
u64 bus_addr = txr->txdma.dma_paddr;
E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
(uint32_t)(bus_addr >> 32));
E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
(uint32_t)bus_addr);
/* Setup the HW Tx Head and Tail descriptor pointers */
E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
HW_DEBUGOUT2("Base = %x, Length = %x\n",
E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
/* Setup Transmit Descriptor Base Settings */
adapter->txd_cmd = E1000_TXD_CMD_IFCS;
txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
}
/* Program the Transmit Control Register */
tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
tctl &= ~E1000_TCTL_CT;
tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
e1000_config_collision_dist(&adapter->hw);
/* This write will effectively turn on the transmit unit. */
E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
}
/*********************************************************************
*
* Free all transmit rings.
*
**********************************************************************/
static void
igb_free_transmit_structures(struct adapter *adapter)
{
struct tx_ring *txr = adapter->tx_rings;
for (int i = 0; i < adapter->num_queues; i++, txr++) {
IGB_TX_LOCK(txr);
igb_free_transmit_buffers(txr);
igb_dma_free(adapter, &txr->txdma);
IGB_TX_UNLOCK(txr);
IGB_TX_LOCK_DESTROY(txr);
}
free(adapter->tx_rings, M_DEVBUF);
}
/*********************************************************************
*
* Free transmit ring related data structures.
*
**********************************************************************/
static void
igb_free_transmit_buffers(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
struct igb_tx_buffer *tx_buffer;
int i;
INIT_DEBUGOUT("free_transmit_ring: begin");
if (txr->tx_buffers == NULL)
return;
tx_buffer = txr->tx_buffers;
for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
if (tx_buffer->m_head != NULL) {
bus_dmamap_sync(txr->txtag, tx_buffer->map,
BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(txr->txtag,
tx_buffer->map);
m_freem(tx_buffer->m_head);
tx_buffer->m_head = NULL;
if (tx_buffer->map != NULL) {
bus_dmamap_destroy(txr->txtag,
tx_buffer->map);
tx_buffer->map = NULL;
}
} else if (tx_buffer->map != NULL) {
bus_dmamap_unload(txr->txtag,
tx_buffer->map);
bus_dmamap_destroy(txr->txtag,
tx_buffer->map);
tx_buffer->map = NULL;
}
}
#if __FreeBSD_version >= 800000
if (txr->br != NULL)
buf_ring_free(txr->br, M_DEVBUF);
#endif
if (txr->tx_buffers != NULL) {
free(txr->tx_buffers, M_DEVBUF);
txr->tx_buffers = NULL;
}
if (txr->txtag != NULL) {
bus_dma_tag_destroy(txr->txtag);
txr->txtag = NULL;
}
return;
}
/**********************************************************************
*
* Setup work for hardware segmentation offload (TSO) on
* adapters using advanced tx descriptors (82575)
*
**********************************************************************/
static boolean_t
igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
{
struct adapter *adapter = txr->adapter;
struct e1000_adv_tx_context_desc *TXD;
struct igb_tx_buffer *tx_buffer;
u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
u32 mss_l4len_idx = 0;
u16 vtag = 0;
int ctxd, ehdrlen, ip_hlen, tcp_hlen;
struct ether_vlan_header *eh;
struct ip *ip;
struct tcphdr *th;
/*
* Determine where frame payload starts.
* Jump over vlan headers if already present
*/
eh = mtod(mp, struct ether_vlan_header *);
if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
else
ehdrlen = ETHER_HDR_LEN;
/* Ensure we have at least the IP+TCP header in the first mbuf. */
if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
return FALSE;
/* Only supports IPV4 for now */
ctxd = txr->next_avail_desc;
tx_buffer = &txr->tx_buffers[ctxd];
TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
ip = (struct ip *)(mp->m_data + ehdrlen);
if (ip->ip_p != IPPROTO_TCP)
return FALSE; /* 0 */
ip->ip_sum = 0;
ip_hlen = ip->ip_hl << 2;
th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
th->th_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(IPPROTO_TCP));
tcp_hlen = th->th_off << 2;
/*
* Calculate header length, this is used
* in the transmit desc in igb_xmit
*/
*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
/* VLAN MACLEN IPLEN */
if (mp->m_flags & M_VLANTAG) {
vtag = htole16(mp->m_pkthdr.ether_vtag);
vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
}
vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
vlan_macip_lens |= ip_hlen;
TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
/* ADV DTYPE TUCMD */
type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
/* MSS L4LEN IDX */
mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
TXD->mss_l4len_idx = htole32(mss_l4len_idx);
TXD->seqnum_seed = htole32(0);
tx_buffer->m_head = NULL;
tx_buffer->next_eop = -1;
if (++ctxd == adapter->num_tx_desc)
ctxd = 0;
txr->tx_avail--;
txr->next_avail_desc = ctxd;
return TRUE;
}
/*********************************************************************
*
* Context Descriptor setup for VLAN or CSUM
*
**********************************************************************/
static bool
igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
{
struct adapter *adapter = txr->adapter;
struct e1000_adv_tx_context_desc *TXD;
struct igb_tx_buffer *tx_buffer;
uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
struct ether_vlan_header *eh;
struct ip *ip = NULL;
struct ip6_hdr *ip6;
int ehdrlen, ctxd, ip_hlen = 0;
u16 etype, vtag = 0;
u8 ipproto = 0;
bool offload = TRUE;
if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
offload = FALSE;
ctxd = txr->next_avail_desc;
tx_buffer = &txr->tx_buffers[ctxd];
TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
/*
** In advanced descriptors the vlan tag must
** be placed into the context descriptor, thus
** we need to be here just for that setup.
*/
if (mp->m_flags & M_VLANTAG) {
vtag = htole16(mp->m_pkthdr.ether_vtag);
vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
} else if (offload == FALSE)
return FALSE;
/*
* Determine where frame payload starts.
* Jump over vlan headers if already present,
* helpful for QinQ too.
*/
eh = mtod(mp, struct ether_vlan_header *);
if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
etype = ntohs(eh->evl_proto);
ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
} else {
etype = ntohs(eh->evl_encap_proto);
ehdrlen = ETHER_HDR_LEN;
}
/* Set the ether header length */
vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
switch (etype) {
case ETHERTYPE_IP:
ip = (struct ip *)(mp->m_data + ehdrlen);
ip_hlen = ip->ip_hl << 2;
if (mp->m_len < ehdrlen + ip_hlen) {
offload = FALSE;
break;
}
ipproto = ip->ip_p;
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
break;
case ETHERTYPE_IPV6:
ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
ip_hlen = sizeof(struct ip6_hdr);
if (mp->m_len < ehdrlen + ip_hlen)
return (FALSE);
ipproto = ip6->ip6_nxt;
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
break;
default:
offload = FALSE;
break;
}
vlan_macip_lens |= ip_hlen;
type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
switch (ipproto) {
case IPPROTO_TCP:
if (mp->m_pkthdr.csum_flags & CSUM_TCP)
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
break;
case IPPROTO_UDP:
if (mp->m_pkthdr.csum_flags & CSUM_UDP)
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
break;
#if __FreeBSD_version >= 800000
case IPPROTO_SCTP:
if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
break;
#endif
default:
offload = FALSE;
break;
}
/* Now copy bits into descriptor */
TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
TXD->seqnum_seed = htole32(0);
TXD->mss_l4len_idx = htole32(0);
tx_buffer->m_head = NULL;
tx_buffer->next_eop = -1;
/* We've consumed the first desc, adjust counters */
if (++ctxd == adapter->num_tx_desc)
ctxd = 0;
txr->next_avail_desc = ctxd;
--txr->tx_avail;
return (offload);
}
/**********************************************************************
*
* Examine each tx_buffer in the used queue. If the hardware is done
* processing the packet then free associated resources. The
* tx_buffer is put back on the free queue.
*
* TRUE return means there's work in the ring to clean, FALSE its empty.
**********************************************************************/
static bool
igb_txeof(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
int first, last, done, num_avail;
u32 cleaned = 0;
struct igb_tx_buffer *tx_buffer;
struct e1000_tx_desc *tx_desc, *eop_desc;
struct ifnet *ifp = adapter->ifp;
IGB_TX_LOCK_ASSERT(txr);
if (txr->tx_avail == adapter->num_tx_desc)
return FALSE;
num_avail = txr->tx_avail;
first = txr->next_to_clean;
tx_desc = &txr->tx_base[first];
tx_buffer = &txr->tx_buffers[first];
last = tx_buffer->next_eop;
eop_desc = &txr->tx_base[last];
/*
* What this does is get the index of the
* first descriptor AFTER the EOP of the
* first packet, that way we can do the
* simple comparison on the inner while loop.
*/
if (++last == adapter->num_tx_desc)
last = 0;
done = last;
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
/* We clean the range of the packet */
while (first != done) {
tx_desc->upper.data = 0;
tx_desc->lower.data = 0;
tx_desc->buffer_addr = 0;
++num_avail; ++cleaned;
if (tx_buffer->m_head) {
ifp->if_opackets++;
bus_dmamap_sync(txr->txtag,
tx_buffer->map,
BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(txr->txtag,
tx_buffer->map);
m_freem(tx_buffer->m_head);
tx_buffer->m_head = NULL;
}
tx_buffer->next_eop = -1;
if (++first == adapter->num_tx_desc)
first = 0;
tx_buffer = &txr->tx_buffers[first];
tx_desc = &txr->tx_base[first];
}
/* See if we can continue to the next packet */
last = tx_buffer->next_eop;
if (last != -1) {
eop_desc = &txr->tx_base[last];
/* Get new done point */
if (++last == adapter->num_tx_desc) last = 0;
done = last;
} else
break;
}
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
txr->next_to_clean = first;
/*
* If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
* that it is OK to send packets.
* If there are no pending descriptors, clear the timeout. Otherwise,
* if some descriptors have been freed, restart the timeout.
*/
if (num_avail > IGB_TX_CLEANUP_THRESHOLD) {
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
/* All clean, turn off the timer */
if (num_avail == adapter->num_tx_desc) {
txr->watchdog_timer = 0;
txr->tx_avail = num_avail;
return FALSE;
}
}
/* Some cleaned, reset the timer */
if (cleaned)
txr->watchdog_timer = IGB_TX_TIMEOUT;
txr->tx_avail = num_avail;
return TRUE;
}
/*********************************************************************
*
* Setup descriptor buffer(s) from system mbuf buffer pools.
* i - designates the ring index
* clean - tells the function whether to update
* the header, the packet buffer, or both.
*
**********************************************************************/
static int
igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
{
struct adapter *adapter = rxr->adapter;
struct mbuf *mh, *mp;
bus_dma_segment_t seg[2];
bus_dmamap_t map;
struct igb_rx_buffer *rx_buffer;
int error, nsegs;
int merr = 0;
rx_buffer = &rxr->rx_buffers[i];
/* First get our header and payload mbuf */
if (clean & IGB_CLEAN_HEADER) {
mh = m_gethdr(M_DONTWAIT, MT_DATA);
if (mh == NULL)
goto remap;
} else /* reuse */
mh = rxr->rx_buffers[i].m_head;
mh->m_len = MHLEN;
mh->m_flags |= M_PKTHDR;
if (clean & IGB_CLEAN_PAYLOAD) {
mp = m_getjcl(M_DONTWAIT, MT_DATA,
M_PKTHDR, adapter->rx_mbuf_sz);
if (mp == NULL)
goto remap;
mp->m_len = adapter->rx_mbuf_sz;
mp->m_flags &= ~M_PKTHDR;
} else { /* reusing */
mp = rxr->rx_buffers[i].m_pack;
mp->m_len = adapter->rx_mbuf_sz;
mp->m_flags &= ~M_PKTHDR;
}
/*
** Need to create a chain for the following
** dmamap call at this point.
*/
mh->m_next = mp;
mh->m_pkthdr.len = mh->m_len + mp->m_len;
/* Get the memory mapping */
error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
rxr->rx_spare_map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
if (error != 0) {
printf("GET BUF: dmamap load failure - %d\n", error);
m_free(mh);
return (error);
}
/* Unload old mapping and update buffer struct */
if (rx_buffer->m_head != NULL)
bus_dmamap_unload(rxr->rxtag, rx_buffer->map);
map = rx_buffer->map;
rx_buffer->map = rxr->rx_spare_map;
rxr->rx_spare_map = map;
rx_buffer->m_head = mh;
rx_buffer->m_pack = mp;
bus_dmamap_sync(rxr->rxtag,
rx_buffer->map, BUS_DMASYNC_PREREAD);
/* Update descriptor */
rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
return (0);
/*
** If we get here, we have an mbuf resource
** issue, so we discard the incoming packet
** and attempt to reuse existing mbufs next
** pass thru the ring, but to do so we must
** fix up the descriptor which had the address
** clobbered with writeback info.
*/
remap:
adapter->mbuf_header_failed++;
merr = ENOBUFS;
/* Is there a reusable buffer? */
mh = rxr->rx_buffers[i].m_head;
if (mh == NULL) /* Nope, init error */
return (merr);
mp = rxr->rx_buffers[i].m_pack;
if (mp == NULL) /* Nope, init error */
return (merr);
/* Get our old mapping */
rx_buffer = &rxr->rx_buffers[i];
error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
rx_buffer->map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
if (error != 0) {
/* We really have a problem */
m_free(mh);
return (error);
}
/* Now fix the descriptor as needed */
rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
return (merr);
}
/*********************************************************************
*
* Allocate memory for rx_buffer structures. Since we use one
* rx_buffer per received packet, the maximum number of rx_buffer's
* that we'll need is equal to the number of receive descriptors
* that we've allocated.
*
**********************************************************************/
static int
igb_allocate_receive_buffers(struct rx_ring *rxr)
{
struct adapter *adapter = rxr->adapter;
device_t dev = adapter->dev;
struct igb_rx_buffer *rxbuf;
int i, bsize, error;
bsize = sizeof(struct igb_rx_buffer) * adapter->num_rx_desc;
if (!(rxr->rx_buffers =
(struct igb_rx_buffer *) malloc(bsize,
M_DEVBUF, M_NOWAIT | M_ZERO))) {
device_printf(dev, "Unable to allocate rx_buffer memory\n");
error = ENOMEM;
goto fail;
}
/*
** The tag is made to accomodate the largest buffer size
** with packet split (hence the two segments, even though
** it may not always use this.
*/
if ((error = bus_dma_tag_create(NULL, /* parent */
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
MJUM16BYTES, /* maxsize */
2, /* nsegments */
MJUMPAGESIZE, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
&rxr->rxtag))) {
device_printf(dev, "Unable to create RX DMA tag\n");
goto fail;
}
/* Create the spare map (used by getbuf) */
error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
&rxr->rx_spare_map);
if (error) {
device_printf(dev,
"%s: bus_dmamap_create header spare failed: %d\n",
__func__, error);
goto fail;
}
for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
rxbuf = &rxr->rx_buffers[i];
error = bus_dmamap_create(rxr->rxtag,
BUS_DMA_NOWAIT, &rxbuf->map);
if (error) {
device_printf(dev, "Unable to create RX DMA maps\n");
goto fail;
}
}
return (0);
fail:
/* Frees all, but can handle partial completion */
igb_free_receive_structures(adapter);
return (error);
}
/*********************************************************************
*
* Initialize a receive ring and its buffers.
*
**********************************************************************/
static int
igb_setup_receive_ring(struct rx_ring *rxr)
{
struct adapter *adapter;
struct ifnet *ifp;
device_t dev;
struct igb_rx_buffer *rxbuf;
struct lro_ctrl *lro = &rxr->lro;
int j, rsize;
adapter = rxr->adapter;
dev = adapter->dev;
ifp = adapter->ifp;
rxr->lro_enabled = FALSE;
rxr->hdr_split = FALSE;
/* Clear the ring contents */
rsize = roundup2(adapter->num_rx_desc *
sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
bzero((void *)rxr->rx_base, rsize);
/*
** Free current RX buffer structures and their mbufs
*/
for (int i = 0; i < adapter->num_rx_desc; i++) {
rxbuf = &rxr->rx_buffers[i];
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(rxr->rxtag, rxbuf->map);
if (rxbuf->m_head) {
rxbuf->m_head->m_next = rxbuf->m_pack;
m_freem(rxbuf->m_head);
}
rxbuf->m_head = NULL;
rxbuf->m_pack = NULL;
}
/* Next replenish the ring */
for (j = 0; j < adapter->num_rx_desc; j++) {
if (igb_get_buf(rxr, j, IGB_CLEAN_BOTH) == ENOBUFS) {
rxr->rx_buffers[j].m_head = NULL;
rxr->rx_buffers[j].m_pack = NULL;
rxr->rx_base[j].read.hdr_addr = 0;
rxr->rx_base[j].read.pkt_addr = 0;
goto fail;
}
}
/* Setup our descriptor indices */
rxr->next_to_check = 0;
rxr->last_cleaned = 0;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
** Now set up the LRO interface, we
** also only do head split when LRO
** is enabled, since so often they
** are undesireable in similar setups.
*/
if (ifp->if_capenable & IFCAP_LRO) {
int err = tcp_lro_init(lro);
if (err) {
device_printf(dev,"LRO Initialization failed!\n");
goto fail;
}
INIT_DEBUGOUT("RX LRO Initialized\n");
rxr->lro_enabled = TRUE;
rxr->hdr_split = TRUE;
lro->ifp = adapter->ifp;
}
return (0);
fail:
/*
* We need to clean up any buffers allocated
* so far, 'j' is the failing index.
*/
for (int i = 0; i < j; i++) {
rxbuf = &rxr->rx_buffers[i];
if (rxbuf->m_head != NULL) {
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(rxr->rxtag, rxbuf->map);
m_freem(rxbuf->m_head);
rxbuf->m_head = NULL;
}
}
return (ENOBUFS);
}
/*********************************************************************
*
* Initialize all receive rings.
*
**********************************************************************/
static int
igb_setup_receive_structures(struct adapter *adapter)
{
struct rx_ring *rxr = adapter->rx_rings;
int i, j;
for (i = 0; i < adapter->num_queues; i++, rxr++)
if (igb_setup_receive_ring(rxr))
goto fail;
return (0);
fail:
/*
* Free RX buffers allocated so far, we will only handle
* the rings that completed, the failing case will have
* cleaned up for itself. The value of 'i' will be the
* failed ring so we must pre-decrement it.
*/
rxr = adapter->rx_rings;
for (--i; i > 0; i--, rxr++) {
for (j = 0; j < adapter->num_rx_desc; j++) {
struct igb_rx_buffer *rxbuf;
rxbuf = &rxr->rx_buffers[j];
if (rxbuf->m_head != NULL) {
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(rxr->rxtag, rxbuf->map);
m_freem(rxbuf->m_head);
rxbuf->m_head = NULL;
}
}
}
return (ENOBUFS);
}
/*********************************************************************
*
* Enable receive unit.
*
**********************************************************************/
static void
igb_initialize_receive_units(struct adapter *adapter)
{
struct rx_ring *rxr = adapter->rx_rings;
struct ifnet *ifp = adapter->ifp;
u32 rctl, rxcsum, psize, srrctl = 0;
INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
/*
* Make sure receives are disabled while setting
* up the descriptor ring
*/
rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
/*
** Set up for header split
*/
if (rxr->hdr_split) {
/* Use a standard mbuf for the header */
srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
} else
srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
/*
** Set up for jumbo frames
*/
if (ifp->if_mtu > ETHERMTU) {
rctl |= E1000_RCTL_LPE;
srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
/* Set maximum packet len */
psize = adapter->max_frame_size;
/* are we on a vlan? */
if (adapter->ifp->if_vlantrunk != NULL)
psize += VLAN_TAG_SIZE;
E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
} else {
rctl &= ~E1000_RCTL_LPE;
srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
rctl |= E1000_RCTL_SZ_2048;
}
/* Setup the Base and Length of the Rx Descriptor Rings */
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
u64 bus_addr = rxr->rxdma.dma_paddr;
u32 rxdctl;
E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
(uint32_t)(bus_addr >> 32));
E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
(uint32_t)bus_addr);
E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
/* Enable this Queue */
rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
rxdctl &= 0xFFF00000;
rxdctl |= IGB_RX_PTHRESH;
rxdctl |= IGB_RX_HTHRESH << 8;
rxdctl |= IGB_RX_WTHRESH << 16;
E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
}
/*
** Setup for RX MultiQueue
*/
rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
if (adapter->num_queues >1) {
u32 random[10], mrqc, shift = 0;
union igb_reta {
u32 dword;
u8 bytes[4];
} reta;
arc4rand(&random, sizeof(random), 0);
if (adapter->hw.mac.type == e1000_82575)
shift = 6;
/* Warning FM follows */
for (int i = 0; i < 128; i++) {
reta.bytes[i & 3] =
(i % adapter->num_queues) << shift;
if ((i & 3) == 3)
E1000_WRITE_REG(&adapter->hw,
E1000_RETA(i >> 2), reta.dword);
}
/* Now fill in hash table */
mrqc = E1000_MRQC_ENABLE_RSS_4Q;
for (int i = 0; i < 10; i++)
E1000_WRITE_REG_ARRAY(&adapter->hw,
E1000_RSSRK(0), i, random[i]);
mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
E1000_MRQC_RSS_FIELD_IPV4_TCP);
mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
E1000_MRQC_RSS_FIELD_IPV6_TCP);
mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
E1000_MRQC_RSS_FIELD_IPV6_UDP);
mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
/*
** NOTE: Receive Full-Packet Checksum Offload
** is mutually exclusive with Multiqueue. However
** this is not the same as TCP/IP checksums which
** still work.
*/
rxcsum |= E1000_RXCSUM_PCSD;
#if __FreeBSD_version >= 800000
/* For SCTP Offload */
if ((adapter->hw.mac.type == e1000_82576)
&& (ifp->if_capenable & IFCAP_RXCSUM))
rxcsum |= E1000_RXCSUM_CRCOFL;
#endif
} else {
/* Non RSS setup */
if (ifp->if_capenable & IFCAP_RXCSUM) {
rxcsum |= E1000_RXCSUM_IPPCSE;
#if __FreeBSD_version >= 800000
if (adapter->hw.mac.type == e1000_82576)
rxcsum |= E1000_RXCSUM_CRCOFL;
#endif
} else
rxcsum &= ~E1000_RXCSUM_TUOFL;
}
E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
/* Setup the Receive Control Register */
rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
E1000_RCTL_RDMTS_HALF |
(adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
/* Make sure VLAN Filters are off */
rctl &= ~E1000_RCTL_VFE;
/* Don't store bad packets */
rctl &= ~E1000_RCTL_SBP;
/* Enable Receives */
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
/*
* Setup the HW Rx Head and Tail Descriptor Pointers
* - needs to be after enable
*/
for (int i = 0; i < adapter->num_queues; i++) {
E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
adapter->num_rx_desc - 1);
}
return;
}
/*********************************************************************
*
* Free receive rings.
*
**********************************************************************/
static void
igb_free_receive_structures(struct adapter *adapter)
{
struct rx_ring *rxr = adapter->rx_rings;
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
struct lro_ctrl *lro = &rxr->lro;
igb_free_receive_buffers(rxr);
tcp_lro_free(lro);
igb_dma_free(adapter, &rxr->rxdma);
}
free(adapter->rx_rings, M_DEVBUF);
}
/*********************************************************************
*
* Free receive ring data structures.
*
**********************************************************************/
static void
igb_free_receive_buffers(struct rx_ring *rxr)
{
struct adapter *adapter = rxr->adapter;
struct igb_rx_buffer *rx_buffer;
INIT_DEBUGOUT("free_receive_structures: begin");
if (rxr->rx_spare_map) {
bus_dmamap_destroy(rxr->rxtag, rxr->rx_spare_map);
rxr->rx_spare_map = NULL;
}
/* Cleanup any existing buffers */
if (rxr->rx_buffers != NULL) {
rx_buffer = &rxr->rx_buffers[0];
for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
if (rx_buffer->m_head != NULL) {
bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(rxr->rxtag,
rx_buffer->map);
m_freem(rx_buffer->m_head);
rx_buffer->m_head = NULL;
} else if (rx_buffer->map != NULL)
bus_dmamap_unload(rxr->rxtag,
rx_buffer->map);
if (rx_buffer->map != NULL) {
bus_dmamap_destroy(rxr->rxtag,
rx_buffer->map);
rx_buffer->map = NULL;
}
}
}
if (rxr->rx_buffers != NULL) {
free(rxr->rx_buffers, M_DEVBUF);
rxr->rx_buffers = NULL;
}
if (rxr->rxtag != NULL) {
bus_dma_tag_destroy(rxr->rxtag);
rxr->rxtag = NULL;
}
}
/*********************************************************************
*
* This routine executes in interrupt context. It replenishes
* the mbufs in the descriptor and sends data which has been
* dma'ed into host memory to upper layer.
*
* We loop at most count times if count is > 0, or until done if
* count < 0.
*
* Return TRUE if more to clean, FALSE otherwise
*********************************************************************/
static bool
igb_rxeof(struct rx_ring *rxr, int count)
{
struct adapter *adapter = rxr->adapter;
struct ifnet *ifp;
struct lro_ctrl *lro = &rxr->lro;
struct lro_entry *queued;
int i;
u32 staterr;
union e1000_adv_rx_desc *cur;
IGB_RX_LOCK(rxr);
ifp = adapter->ifp;
i = rxr->next_to_check;
cur = &rxr->rx_base[i];
staterr = cur->wb.upper.status_error;
if (!(staterr & E1000_RXD_STAT_DD)) {
IGB_RX_UNLOCK(rxr);
return FALSE;
}
/* Sync the ring */
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_POSTREAD);
/* Main clean loop */
while ((staterr & E1000_RXD_STAT_DD) &&
(count != 0) &&
(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
struct mbuf *sendmp, *mh, *mp;
u16 hlen, plen, hdr, ptype, len_adj, vtag;
u8 dopayload, accept_frame, eop;
accept_frame = 1;
hlen = plen = len_adj = vtag = 0;
sendmp = mh = mp = NULL;
ptype = (u16)(cur->wb.lower.lo_dword.data >> 4);
/* Sync the buffers */
bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
BUS_DMASYNC_POSTREAD);
/*
** The way the hardware is configured to
** split, it will ONLY use the header buffer
** when header split is enabled, otherwise we
** get normal behavior, ie, both header and
** payload are DMA'd into the payload buffer.
**
** The fmp test is to catch the case where a
** packet spans multiple descriptors, in that
** case only the first header is valid.
*/
if ((rxr->hdr_split) && (rxr->fmp == NULL)){
hdr = le16toh(cur->
wb.lower.lo_dword.hs_rss.hdr_info);
hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
E1000_RXDADV_HDRBUFLEN_SHIFT;
if (hlen > IGB_HDR_BUF)
hlen = IGB_HDR_BUF;
plen = le16toh(cur->wb.upper.length);
/* Handle the header mbuf */
mh = rxr->rx_buffers[i].m_head;
mh->m_len = hlen;
dopayload = IGB_CLEAN_HEADER;
/*
** Get the payload length, this
** could be zero if its a small
** packet.
*/
if (plen) {
mp = rxr->rx_buffers[i].m_pack;
mp->m_len = plen;
mp->m_next = NULL;
mp->m_flags &= ~M_PKTHDR;
mh->m_next = mp;
mh->m_flags |= M_PKTHDR;
dopayload = IGB_CLEAN_BOTH;
rxr->rx_split_packets++;
} else { /* small packets */
mh->m_flags &= ~M_PKTHDR;
mh->m_next = NULL;
}
} else {
/*
** Either no header split, or a
** secondary piece of a fragmented
** split packet.
*/
mh = rxr->rx_buffers[i].m_pack;
mh->m_flags |= M_PKTHDR;
mh->m_len = le16toh(cur->wb.upper.length);
dopayload = IGB_CLEAN_PAYLOAD;
}
if (staterr & E1000_RXD_STAT_EOP) {
count--;
eop = 1;
/*
** Strip CRC and account for frag
*/
if (mp) {
if (mp->m_len < ETHER_CRC_LEN) {
/* a frag, how much is left? */
len_adj = ETHER_CRC_LEN - mp->m_len;
mp->m_len = 0;
} else
mp->m_len -= ETHER_CRC_LEN;
} else { /* not split */
if (mh->m_len < ETHER_CRC_LEN) {
len_adj = ETHER_CRC_LEN - mh->m_len;
mh->m_len = 0;
} else
mh->m_len -= ETHER_CRC_LEN;
}
} else
eop = 0;
if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)
accept_frame = 0;
#ifdef IGB_IEEE1588
This linux code needs to be converted to work here
-----------------------------------------------------
if (unlikely(staterr & E1000_RXD_STAT_TS)) {
u64 regval;
u64 ns;
// Create an mtag and set it up
struct skb_shared_hwtstamps *shhwtstamps =
skb_hwtstamps(skb);
rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID),
"igb: no RX time stamp available for time stamped packet");
regval = rd32(E1000_RXSTMPL);
regval |= (u64)rd32(E1000_RXSTMPH) << 32;
// Do time conversion from the register
ns = timecounter_cyc2time(&adapter->clock, regval);
clocksync_update(&adapter->sync, ns);
memset(shhwtstamps, 0, sizeof(*shhwtstamps));
shhwtstamps->hwtstamp = ns_to_ktime(ns);
shhwtstamps->syststamp =
clocksync_hw2sys(&adapter->sync, ns);
}
#endif
if (accept_frame) {
/*
** get_buf will overwrite the writeback
** descriptor so save the VLAN tag now.
*/
vtag = le16toh(cur->wb.upper.vlan);
if (igb_get_buf(rxr, i, dopayload) != 0) {
ifp->if_iqdrops++;
goto discard;
}
/* Initial frame - setup */
if (rxr->fmp == NULL) {
mh->m_flags |= M_PKTHDR;
mh->m_pkthdr.len = mh->m_len;
rxr->fmp = mh; /* Store the first mbuf */
rxr->lmp = mh;
if (mp) { /* Add payload if split */
mh->m_pkthdr.len += mp->m_len;
rxr->lmp = mh->m_next;
}
} else {
/* Chain mbuf's together */
mh->m_flags &= ~M_PKTHDR;
rxr->lmp->m_next = mh;
rxr->lmp = rxr->lmp->m_next;
rxr->fmp->m_pkthdr.len += mh->m_len;
/* Adjust for CRC frag */
if (len_adj) {
rxr->lmp->m_len -= len_adj;
rxr->fmp->m_pkthdr.len -= len_adj;
}
}
if (eop) {
bool sctp = ((ptype & 0x40) != 0);
rxr->fmp->m_pkthdr.rcvif = ifp;
ifp->if_ipackets++;
rxr->rx_packets++;
/* capture data for AIM */
rxr->bytes += rxr->fmp->m_pkthdr.len;
rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
igb_rx_checksum(staterr, rxr->fmp, sctp);
if (staterr & E1000_RXD_STAT_VP) {
rxr->fmp->m_pkthdr.ether_vtag = vtag;
rxr->fmp->m_flags |= M_VLANTAG;
}
#if __FreeBSD_version >= 800000
rxr->fmp->m_pkthdr.flowid = curcpu;
rxr->fmp->m_flags |= M_FLOWID;
#endif
sendmp = rxr->fmp;
rxr->fmp = NULL;
rxr->lmp = NULL;
}
} else {
ifp->if_ierrors++;
discard:
/* Reuse loaded DMA map and just update mbuf chain */
if (hlen) {
mh = rxr->rx_buffers[i].m_head;
mh->m_len = MHLEN;
mh->m_next = NULL;
}
mp = rxr->rx_buffers[i].m_pack;
mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
mp->m_data = mp->m_ext.ext_buf;
mp->m_next = NULL;
if (adapter->max_frame_size <=
(MCLBYTES - ETHER_ALIGN))
m_adj(mp, ETHER_ALIGN);
if (rxr->fmp != NULL) {
/* handles the whole chain */
m_freem(rxr->fmp);
rxr->fmp = NULL;
rxr->lmp = NULL;
}
sendmp = NULL;
}
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
rxr->last_cleaned = i; /* For updating tail */
/* Advance our pointers to the next descriptor. */
if (++i == adapter->num_rx_desc)
i = 0;
/*
** Note that we hold the RX lock thru
** the following call so this ring's
** next_to_check is not gonna change.
*/
if (sendmp != NULL) {
/*
** Send to the stack if:
** - LRO not enabled, or
** - no LRO resources, or
** - lro enqueue fails
*/
if ((!rxr->lro_enabled) ||
((!lro->lro_cnt) || (tcp_lro_rx(lro, sendmp, 0))))
(*ifp->if_input)(ifp, sendmp);
}
/* Get the next descriptor */
cur = &rxr->rx_base[i];
staterr = cur->wb.upper.status_error;
}
rxr->next_to_check = i;
/* Advance the E1000's Receive Queue #0 "Tail Pointer". */
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
/*
* Flush any outstanding LRO work
*/
while (!SLIST_EMPTY(&lro->lro_active)) {
queued = SLIST_FIRST(&lro->lro_active);
SLIST_REMOVE_HEAD(&lro->lro_active, next);
tcp_lro_flush(lro, queued);
}
IGB_RX_UNLOCK(rxr);
/*
** We still have cleaning to do?
** Schedule another interrupt if so.
*/
if (staterr & E1000_RXD_STAT_DD) {
E1000_WRITE_REG(&adapter->hw, E1000_EICS, rxr->eims);
return TRUE;
}
return FALSE;
}
/*********************************************************************
*
* Verify that the hardware indicated that the checksum is valid.
* Inform the stack about the status of checksum so that stack
* doesn't spend time verifying the checksum.
*
*********************************************************************/
static void
igb_rx_checksum(u32 staterr, struct mbuf *mp, bool sctp)
{
u16 status = (u16)staterr;
u8 errors = (u8) (staterr >> 24);
/* Ignore Checksum bit is set */
if (status & E1000_RXD_STAT_IXSM) {
mp->m_pkthdr.csum_flags = 0;
return;
}
if (status & E1000_RXD_STAT_IPCS) {
/* Did it pass? */
if (!(errors & E1000_RXD_ERR_IPE)) {
/* IP Checksum Good */
mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
} else
mp->m_pkthdr.csum_flags = 0;
}
if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
#if __FreeBSD_version >= 800000
if (sctp) /* reassign */
type = CSUM_SCTP_VALID;
#endif
/* Did it pass? */
if (!(errors & E1000_RXD_ERR_TCPE)) {
mp->m_pkthdr.csum_flags |= type;
if (!sctp)
mp->m_pkthdr.csum_data = htons(0xffff);
}
}
return;
}
/*
* This routine is run via an vlan
* config EVENT
*/
static void
igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
{
struct adapter *adapter = ifp->if_softc;
u32 index, bit;
if (ifp->if_softc != arg) /* Not our event */
return;
if ((vtag == 0) || (vtag > 4095)) /* Invalid */
return;
index = (vtag >> 5) & 0x7F;
bit = vtag & 0x1F;
igb_shadow_vfta[index] |= (1 << bit);
++adapter->num_vlans;
/* Re-init to load the changes */
igb_init(adapter);
}
/*
* This routine is run via an vlan
* unconfig EVENT
*/
static void
igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
{
struct adapter *adapter = ifp->if_softc;
u32 index, bit;
if (ifp->if_softc != arg)
return;
if ((vtag == 0) || (vtag > 4095)) /* Invalid */
return;
index = (vtag >> 5) & 0x7F;
bit = vtag & 0x1F;
igb_shadow_vfta[index] &= ~(1 << bit);
--adapter->num_vlans;
/* Re-init to load the changes */
igb_init(adapter);
}
static void
igb_setup_vlan_hw_support(struct adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
u32 reg;
/*
** We get here thru init_locked, meaning
** a soft reset, this has already cleared
** the VFTA and other state, so if there
** have been no vlan's registered do nothing.
*/
if (adapter->num_vlans == 0)
return;
/*
** A soft reset zero's out the VFTA, so
** we need to repopulate it now.
*/
for (int i = 0; i < IGB_VFTA_SIZE; i++)
if (igb_shadow_vfta[i] != 0)
E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
i, igb_shadow_vfta[i]);
reg = E1000_READ_REG(hw, E1000_CTRL);
reg |= E1000_CTRL_VME;
E1000_WRITE_REG(hw, E1000_CTRL, reg);
/* Enable the Filter Table */
reg = E1000_READ_REG(hw, E1000_RCTL);
reg &= ~E1000_RCTL_CFIEN;
reg |= E1000_RCTL_VFE;
E1000_WRITE_REG(hw, E1000_RCTL, reg);
/* Update the frame size */
E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
adapter->max_frame_size + VLAN_TAG_SIZE);
}
static void
igb_enable_intr(struct adapter *adapter)
{
/* With RSS set up what to auto clear */
if (adapter->msix_mem) {
E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
adapter->eims_mask);
E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
adapter->eims_mask);
E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
adapter->eims_mask);
E1000_WRITE_REG(&adapter->hw, E1000_IMS,
E1000_IMS_LSC);
} else {
E1000_WRITE_REG(&adapter->hw, E1000_IMS,
IMS_ENABLE_MASK);
}
E1000_WRITE_FLUSH(&adapter->hw);
return;
}
static void
igb_disable_intr(struct adapter *adapter)
{
if (adapter->msix_mem) {
E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
}
E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
E1000_WRITE_FLUSH(&adapter->hw);
return;
}
/*
* Bit of a misnomer, what this really means is
* to enable OS management of the system... aka
* to disable special hardware management features
*/
static void
igb_init_manageability(struct adapter *adapter)
{
if (adapter->has_manage) {
int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
/* disable hardware interception of ARP */
manc &= ~(E1000_MANC_ARP_EN);
/* enable receiving management packets to the host */
manc |= E1000_MANC_EN_MNG2HOST;
manc2h |= 1 << 5; /* Mng Port 623 */
manc2h |= 1 << 6; /* Mng Port 664 */
E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
}
}
/*
* Give control back to hardware management
* controller if there is one.
*/
static void
igb_release_manageability(struct adapter *adapter)
{
if (adapter->has_manage) {
int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
/* re-enable hardware interception of ARP */
manc |= E1000_MANC_ARP_EN;
manc &= ~E1000_MANC_EN_MNG2HOST;
E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
}
}
/*
* igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
* For ASF and Pass Through versions of f/w this means that
* the driver is loaded.
*
*/
static void
igb_get_hw_control(struct adapter *adapter)
{
u32 ctrl_ext;
/* Let firmware know the driver has taken over */
ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
}
/*
* igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
* For ASF and Pass Through versions of f/w this means that the
* driver is no longer loaded.
*
*/
static void
igb_release_hw_control(struct adapter *adapter)
{
u32 ctrl_ext;
/* Let firmware taken over control of h/w */
ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
}
static int
igb_is_valid_ether_addr(uint8_t *addr)
{
char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
return (FALSE);
}
return (TRUE);
}
/*
* Enable PCI Wake On Lan capability
*/
void
igb_enable_wakeup(device_t dev)
{
u16 cap, status;
u8 id;
/* First find the capabilities pointer*/
cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
/* Read the PM Capabilities */
id = pci_read_config(dev, cap, 1);
if (id != PCIY_PMG) /* Something wrong */
return;
/* OK, we have the power capabilities, so
now get the status register */
cap += PCIR_POWER_STATUS;
status = pci_read_config(dev, cap, 2);
status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
pci_write_config(dev, cap, status, 2);
return;
}
/**********************************************************************
*
* Update the board statistics counters.
*
**********************************************************************/
static void
igb_update_stats_counters(struct adapter *adapter)
{
struct ifnet *ifp;
if(adapter->hw.phy.media_type == e1000_media_type_copper ||
(E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
}
adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
/* For the 64-bit byte counters the low dword must be read first. */
/* Both registers clear on the read of the high dword */
adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
adapter->stats.algnerrc +=
E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
adapter->stats.rxerrc +=
E1000_READ_REG(&adapter->hw, E1000_RXERRC);
adapter->stats.tncrs +=
E1000_READ_REG(&adapter->hw, E1000_TNCRS);
adapter->stats.cexterr +=
E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
adapter->stats.tsctc +=
E1000_READ_REG(&adapter->hw, E1000_TSCTC);
adapter->stats.tsctfc +=
E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
ifp = adapter->ifp;
ifp->if_collisions = adapter->stats.colc;
/* Rx Errors */
ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
adapter->stats.crcerrs + adapter->stats.algnerrc +
adapter->stats.ruc + adapter->stats.roc +
adapter->stats.mpc + adapter->stats.cexterr;
/* Tx Errors */
ifp->if_oerrors = adapter->stats.ecol +
adapter->stats.latecol + adapter->watchdog_events;
}
/**********************************************************************
*
* This routine is called only when igb_display_debug_stats is enabled.
* This routine provides a way to take a look at important statistics
* maintained by the driver and hardware.
*
**********************************************************************/
static void
igb_print_debug_info(struct adapter *adapter)
{
device_t dev = adapter->dev;
struct rx_ring *rxr = adapter->rx_rings;
struct tx_ring *txr = adapter->tx_rings;
uint8_t *hw_addr = adapter->hw.hw_addr;
device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
E1000_READ_REG(&adapter->hw, E1000_CTRL),
E1000_READ_REG(&adapter->hw, E1000_RCTL));
#if (DEBUG_HW > 0) /* Dont output these errors normally */
device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
E1000_READ_REG(&adapter->hw, E1000_IMS),
E1000_READ_REG(&adapter->hw, E1000_EIMS));
#endif
device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
(E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
device_printf(dev, "Flow control watermarks high = %d low = %d\n",
adapter->hw.fc.high_water,
adapter->hw.fc.low_water);
for (int i = 0; i < adapter->num_queues; i++, txr++) {
device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
txr->me, (long long)txr->no_desc_avail);
device_printf(dev, "TX(%d) MSIX IRQ Handled = %lld\n", txr->me,
(long long)txr->tx_irq);
device_printf(dev, "TX(%d) Packets sent = %lld\n", txr->me,
(long long)txr->tx_packets);
}
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
struct lro_ctrl *lro = &rxr->lro;
device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
(long long)rxr->rx_packets);
device_printf(dev, "RX(%d) Split Packets = %lld\n", rxr->me,
(long long)rxr->rx_split_packets);
device_printf(dev, "RX(%d) Byte count = %lld\n", rxr->me,
(long long)rxr->rx_bytes);
device_printf(dev, "RX(%d) MSIX IRQ Handled = %lld\n", rxr->me,
(long long)rxr->rx_irq);
device_printf(dev,"RX(%d) LRO Queued= %d\n",
rxr->me, lro->lro_queued);
device_printf(dev,"RX(%d) LRO Flushed= %d\n",
rxr->me, lro->lro_flushed);
}
device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
device_printf(dev, "Mbuf defrag failed = %ld\n",
adapter->mbuf_defrag_failed);
device_printf(dev, "Std mbuf header failed = %ld\n",
adapter->mbuf_header_failed);
device_printf(dev, "Std mbuf packet failed = %ld\n",
adapter->mbuf_packet_failed);
device_printf(dev, "Driver dropped packets = %ld\n",
adapter->dropped_pkts);
device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
adapter->no_tx_dma_setup);
}
static void
igb_print_hw_stats(struct adapter *adapter)
{
device_t dev = adapter->dev;
device_printf(dev, "Excessive collisions = %lld\n",
(long long)adapter->stats.ecol);
#if (DEBUG_HW > 0) /* Dont output these errors normally */
device_printf(dev, "Symbol errors = %lld\n",
(long long)adapter->stats.symerrs);
#endif
device_printf(dev, "Sequence errors = %lld\n",
(long long)adapter->stats.sec);
device_printf(dev, "Defer count = %lld\n",
(long long)adapter->stats.dc);
device_printf(dev, "Missed Packets = %lld\n",
(long long)adapter->stats.mpc);
device_printf(dev, "Receive No Buffers = %lld\n",
(long long)adapter->stats.rnbc);
/* RLEC is inaccurate on some hardware, calculate our own. */
device_printf(dev, "Receive Length Errors = %lld\n",
((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
device_printf(dev, "Receive errors = %lld\n",
(long long)adapter->stats.rxerrc);
device_printf(dev, "Crc errors = %lld\n",
(long long)adapter->stats.crcerrs);
device_printf(dev, "Alignment errors = %lld\n",
(long long)adapter->stats.algnerrc);
/* On 82575 these are collision counts */
device_printf(dev, "Collision/Carrier extension errors = %lld\n",
(long long)adapter->stats.cexterr);
device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
device_printf(dev, "watchdog timeouts = %ld\n",
adapter->watchdog_events);
device_printf(dev, "XON Rcvd = %lld\n",
(long long)adapter->stats.xonrxc);
device_printf(dev, "XON Xmtd = %lld\n",
(long long)adapter->stats.xontxc);
device_printf(dev, "XOFF Rcvd = %lld\n",
(long long)adapter->stats.xoffrxc);
device_printf(dev, "XOFF Xmtd = %lld\n",
(long long)adapter->stats.xofftxc);
device_printf(dev, "Good Packets Rcvd = %lld\n",
(long long)adapter->stats.gprc);
device_printf(dev, "Good Packets Xmtd = %lld\n",
(long long)adapter->stats.gptc);
device_printf(dev, "TSO Contexts Xmtd = %lld\n",
(long long)adapter->stats.tsctc);
device_printf(dev, "TSO Contexts Failed = %lld\n",
(long long)adapter->stats.tsctfc);
}
/**********************************************************************
*
* This routine provides a way to dump out the adapter eeprom,
* often a useful debug/service tool. This only dumps the first
* 32 words, stuff that matters is in that extent.
*
**********************************************************************/
static void
igb_print_nvm_info(struct adapter *adapter)
{
u16 eeprom_data;
int i, j, row = 0;
/* Its a bit crude, but it gets the job done */
printf("\nInterface EEPROM Dump:\n");
printf("Offset\n0x0000 ");
for (i = 0, j = 0; i < 32; i++, j++) {
if (j == 8) { /* Make the offset block */
j = 0; ++row;
printf("\n0x00%x0 ",row);
}
e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
printf("%04x ", eeprom_data);
}
printf("\n");
}
static int
igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
{
struct adapter *adapter;
int error;
int result;
result = -1;
error = sysctl_handle_int(oidp, &result, 0, req);
if (error || !req->newptr)
return (error);
if (result == 1) {
adapter = (struct adapter *)arg1;
igb_print_debug_info(adapter);
}
/*
* This value will cause a hex dump of the
* first 32 16-bit words of the EEPROM to
* the screen.
*/
if (result == 2) {
adapter = (struct adapter *)arg1;
igb_print_nvm_info(adapter);
}
return (error);
}
static int
igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
{
struct adapter *adapter;
int error;
int result;
result = -1;
error = sysctl_handle_int(oidp, &result, 0, req);
if (error || !req->newptr)
return (error);
if (result == 1) {
adapter = (struct adapter *)arg1;
igb_print_hw_stats(adapter);
}
return (error);
}
static void
igb_add_rx_process_limit(struct adapter *adapter, const char *name,
const char *description, int *limit, int value)
{
*limit = value;
SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
}
#ifdef IGB_IEEE1588
/*
** igb_hwtstamp_ioctl - control hardware time stamping
**
** Outgoing time stamping can be enabled and disabled. Play nice and
** disable it when requested, although it shouldn't case any overhead
** when no packet needs it. At most one packet in the queue may be
** marked for time stamping, otherwise it would be impossible to tell
** for sure to which packet the hardware time stamp belongs.
**
** Incoming time stamping has to be configured via the hardware
** filters. Not all combinations are supported, in particular event
** type has to be specified. Matching the kind of event packet is
** not supported, with the exception of "all V2 events regardless of
** level 2 or 4".
**
*/
static int
igb_hwtstamp_ioctl(struct adapter *adapter, struct ifreq *ifr)
{
struct e1000_hw *hw = &adapter->hw;
struct hwtstamp_ctrl *config;
u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED;
u32 tsync_rx_ctl_type = 0;
u32 tsync_rx_cfg = 0;
int is_l4 = 0;
int is_l2 = 0;
u16 port = 319; /* PTP */
u32 regval;
config = (struct hwtstamp_ctrl *) ifr->ifr_data;
/* reserved for future extensions */
if (config->flags)
return (EINVAL);
switch (config->tx_type) {
case HWTSTAMP_TX_OFF:
tsync_tx_ctl_bit = 0;
break;
case HWTSTAMP_TX_ON:
tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
break;
default:
return (ERANGE);
}
switch (config->rx_filter) {
case HWTSTAMP_FILTER_NONE:
tsync_rx_ctl_bit = 0;
break;
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
case HWTSTAMP_FILTER_ALL:
/*
* register TSYNCRXCFG must be set, therefore it is not
* possible to time stamp both Sync and Delay_Req messages
* => fall back to time stamping all packets
*/
tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL;
config->rx_filter = HWTSTAMP_FILTER_ALL;
break;
case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
is_l4 = 1;
break;
case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
is_l4 = 1;
break;
case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
is_l2 = 1;
is_l4 = 1;
config->rx_filter = HWTSTAMP_FILTER_SOME;
break;
case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
is_l2 = 1;
is_l4 = 1;
config->rx_filter = HWTSTAMP_FILTER_SOME;
break;
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2;
config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
is_l2 = 1;
break;
default:
return -ERANGE;
}
/* enable/disable TX */
regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
/* enable/disable RX, define which PTP packets are time stamped */
regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
regval = (regval & ~0xE) | tsync_rx_ctl_type;
E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
/*
* Ethertype Filter Queue Filter[0][15:0] = 0x88F7
* (Ethertype to filter on)
* Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
* Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
*/
E1000_WRITE_REG(hw, E1000_ETQF0, is_l2 ? 0x440088f7 : 0);
/* L4 Queue Filter[0]: only filter by source and destination port */
E1000_WRITE_REG(hw, E1000_SPQF0, htons(port));
E1000_WRITE_REG(hw, E1000_IMIREXT(0), is_l4 ?
((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
E1000_WRITE_REG(hw, E1000_IMIR(0), is_l4 ?
(htons(port)
| (0<<16) /* immediate interrupt disabled */
| 0 /* (1<<17) bit cleared: do not bypass
destination port check */)
: 0);
E1000_WRITE_REG(hw, E1000_FTQF0, is_l4 ?
(0x11 /* UDP */
| (1<<15) /* VF not compared */
| (1<<27) /* Enable Timestamping */
| (7<<28) /* only source port filter enabled,
source/target address and protocol
masked */)
: ((1<<15) | (15<<28) /* all mask bits set = filter not
enabled */));
wrfl();
adapter->hwtstamp_ctrl = config;
/* clear TX/RX time stamp registers, just to be sure */
regval = E1000_READ_REG(hw, E1000_TXSTMPH);
regval = E1000_READ_REG(hw, E1000_RXSTMPH);
return (error);
}
/*
** igb_read_clock - read raw cycle counter (to be used by time counter)
*/
static cycle_t igb_read_clock(const struct cyclecounter *tc)
{
struct igb_adapter *adapter =
container_of(tc, struct igb_adapter, cycles);
struct e1000_hw *hw = &adapter->hw;
u64 stamp;
stamp = E1000_READ_REG(hw, E1000_SYSTIML);
stamp |= (u64)E1000_READ_REG(hw, E1000_SYSTIMH) << 32ULL;
return (stamp);
}
#endif /* IGB_IEEE1588 */
Index: stable/8/sys/dev/ixgbe/ixgbe.c
===================================================================
--- stable/8/sys/dev/ixgbe/ixgbe.c (revision 205282)
+++ stable/8/sys/dev/ixgbe/ixgbe.c (revision 205283)
@@ -1,4947 +1,4947 @@
/******************************************************************************
Copyright (c) 2001-2009, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
/*$FreeBSD$*/
#ifdef HAVE_KERNEL_OPTION_HEADERS
#include "opt_device_polling.h"
#endif
#include "ixgbe.h"
/*********************************************************************
* Set this to one to display debug statistics
*********************************************************************/
int ixgbe_display_debug_stats = 0;
/*********************************************************************
* Driver version
*********************************************************************/
char ixgbe_driver_version[] = "1.8.9";
/*********************************************************************
* PCI Device ID Table
*
* Used by probe to select devices to load on
* Last field stores an index into ixgbe_strings
* Last entry must be all 0s
*
* { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
*********************************************************************/
static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
{
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
/* required last entry */
{0, 0, 0, 0, 0}
};
/*********************************************************************
* Table of branding strings
*********************************************************************/
static char *ixgbe_strings[] = {
"Intel(R) PRO/10GbE PCI-Express Network Driver"
};
/*********************************************************************
* Function prototypes
*********************************************************************/
static int ixgbe_probe(device_t);
static int ixgbe_attach(device_t);
static int ixgbe_detach(device_t);
static int ixgbe_shutdown(device_t);
static void ixgbe_start(struct ifnet *);
static void ixgbe_start_locked(struct tx_ring *, struct ifnet *);
#if __FreeBSD_version >= 800000
static int ixgbe_mq_start(struct ifnet *, struct mbuf *);
static int ixgbe_mq_start_locked(struct ifnet *,
struct tx_ring *, struct mbuf *);
static void ixgbe_qflush(struct ifnet *);
#endif
static int ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
static void ixgbe_watchdog(struct adapter *);
static void ixgbe_init(void *);
static void ixgbe_init_locked(struct adapter *);
static void ixgbe_stop(void *);
static void ixgbe_media_status(struct ifnet *, struct ifmediareq *);
static int ixgbe_media_change(struct ifnet *);
static void ixgbe_identify_hardware(struct adapter *);
static int ixgbe_allocate_pci_resources(struct adapter *);
static int ixgbe_allocate_msix(struct adapter *);
static int ixgbe_allocate_legacy(struct adapter *);
static int ixgbe_allocate_queues(struct adapter *);
static int ixgbe_setup_msix(struct adapter *);
static void ixgbe_free_pci_resources(struct adapter *);
static void ixgbe_local_timer(void *);
static int ixgbe_hardware_init(struct adapter *);
static void ixgbe_setup_interface(device_t, struct adapter *);
static int ixgbe_allocate_transmit_buffers(struct tx_ring *);
static int ixgbe_setup_transmit_structures(struct adapter *);
static void ixgbe_setup_transmit_ring(struct tx_ring *);
static void ixgbe_initialize_transmit_units(struct adapter *);
static void ixgbe_free_transmit_structures(struct adapter *);
static void ixgbe_free_transmit_buffers(struct tx_ring *);
static int ixgbe_allocate_receive_buffers(struct rx_ring *);
static int ixgbe_setup_receive_structures(struct adapter *);
static int ixgbe_setup_receive_ring(struct rx_ring *);
static void ixgbe_initialize_receive_units(struct adapter *);
static void ixgbe_free_receive_structures(struct adapter *);
static void ixgbe_free_receive_buffers(struct rx_ring *);
static void ixgbe_init_moderation(struct adapter *);
static void ixgbe_enable_intr(struct adapter *);
static void ixgbe_disable_intr(struct adapter *);
static void ixgbe_update_stats_counters(struct adapter *);
static bool ixgbe_txeof(struct tx_ring *);
static bool ixgbe_rxeof(struct rx_ring *, int);
static void ixgbe_rx_checksum(u32, struct mbuf *);
static void ixgbe_set_promisc(struct adapter *);
static void ixgbe_disable_promisc(struct adapter *);
static void ixgbe_set_multi(struct adapter *);
static void ixgbe_print_hw_stats(struct adapter *);
static void ixgbe_print_debug_info(struct adapter *);
static void ixgbe_update_link_status(struct adapter *);
static int ixgbe_get_buf(struct rx_ring *, int, u8);
static int ixgbe_xmit(struct tx_ring *, struct mbuf **);
static int ixgbe_sysctl_stats(SYSCTL_HANDLER_ARGS);
static int ixgbe_sysctl_debug(SYSCTL_HANDLER_ARGS);
static int ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
struct ixgbe_dma_alloc *, int);
static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
static void ixgbe_add_rx_process_limit(struct adapter *, const char *,
const char *, int *, int);
static bool ixgbe_tx_ctx_setup(struct tx_ring *, struct mbuf *);
static bool ixgbe_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
static void ixgbe_set_ivar(struct adapter *, u8, u8, s8);
static void ixgbe_configure_ivars(struct adapter *);
static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
static void ixgbe_setup_vlan_hw_support(struct adapter *);
static void ixgbe_register_vlan(void *, struct ifnet *, u16);
static void ixgbe_unregister_vlan(void *, struct ifnet *, u16);
static void ixgbe_update_aim(struct rx_ring *);
/* Support for pluggable optic modules */
static bool ixgbe_sfp_probe(struct adapter *);
/* Legacy (single vector interrupt handler */
static void ixgbe_legacy_irq(void *);
/* The MSI/X Interrupt handlers */
static void ixgbe_msix_tx(void *);
static void ixgbe_msix_rx(void *);
static void ixgbe_msix_link(void *);
/* Deferred interrupt tasklets */
static void ixgbe_handle_tx(void *, int);
static void ixgbe_handle_rx(void *, int);
static void ixgbe_handle_link(void *, int);
static void ixgbe_handle_msf(void *, int);
static void ixgbe_handle_mod(void *, int);
/*********************************************************************
* FreeBSD Device Interface Entry Points
*********************************************************************/
static device_method_t ixgbe_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, ixgbe_probe),
DEVMETHOD(device_attach, ixgbe_attach),
DEVMETHOD(device_detach, ixgbe_detach),
DEVMETHOD(device_shutdown, ixgbe_shutdown),
{0, 0}
};
static driver_t ixgbe_driver = {
"ix", ixgbe_methods, sizeof(struct adapter),
};
static devclass_t ixgbe_devclass;
DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
/*
** TUNEABLE PARAMETERS:
*/
/*
** These parameters are used in Adaptive
** Interrupt Moderation. The value is set
** into EITR and controls the interrupt
** frequency. They can be modified but
** be careful in tuning them.
*/
static int ixgbe_enable_aim = TRUE;
TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
static int ixgbe_low_latency = IXGBE_LOW_LATENCY;
TUNABLE_INT("hw.ixgbe.low_latency", &ixgbe_low_latency);
static int ixgbe_ave_latency = IXGBE_AVE_LATENCY;
TUNABLE_INT("hw.ixgbe.ave_latency", &ixgbe_ave_latency);
static int ixgbe_bulk_latency = IXGBE_BULK_LATENCY;
TUNABLE_INT("hw.ixgbe.bulk_latency", &ixgbe_bulk_latency);
/* How many packets rxeof tries to clean at a time */
static int ixgbe_rx_process_limit = 100;
TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
/* Flow control setting, default to full */
static int ixgbe_flow_control = ixgbe_fc_full;
TUNABLE_INT("hw.ixgbe.flow_control", &ixgbe_flow_control);
/*
* MSIX should be the default for best performance,
* but this allows it to be forced off for testing.
*/
static int ixgbe_enable_msix = 1;
TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
/*
* Header split has seemed to be beneficial in
* all circumstances tested, so its on by default
* however this variable will allow it to be disabled
* for some debug purposes.
*/
static bool ixgbe_header_split = TRUE;
TUNABLE_INT("hw.ixgbe.hdr_split", &ixgbe_header_split);
/*
* Number of Queues, should normally
* be left at 0, it then autoconfigures to
* the number of cpus. Each queue is a pair
* of RX and TX rings with a dedicated interrupt
*/
static int ixgbe_num_queues = 0;
TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
/* Number of TX descriptors per ring */
static int ixgbe_txd = DEFAULT_TXD;
TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
/* Number of RX descriptors per ring */
static int ixgbe_rxd = DEFAULT_RXD;
TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
/* Total number of Interfaces - need for config sanity check */
static int ixgbe_total_ports;
/*
** Shadow VFTA table, this is needed because
** the real filter table gets cleared during
** a soft reset and we need to repopulate it.
*/
static u32 ixgbe_shadow_vfta[IXGBE_VFTA_SIZE];
/*
** The number of scatter-gather segments
** differs for 82598 and 82599, default to
** the former.
*/
static int ixgbe_num_segs = IXGBE_82598_SCATTER;
/*********************************************************************
* Device identification routine
*
* ixgbe_probe determines if the driver should be loaded on
* adapter based on PCI vendor/device id of the adapter.
*
* return 0 on success, positive on failure
*********************************************************************/
static int
ixgbe_probe(device_t dev)
{
ixgbe_vendor_info_t *ent;
u16 pci_vendor_id = 0;
u16 pci_device_id = 0;
u16 pci_subvendor_id = 0;
u16 pci_subdevice_id = 0;
char adapter_name[256];
INIT_DEBUGOUT("ixgbe_probe: begin");
pci_vendor_id = pci_get_vendor(dev);
if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
return (ENXIO);
pci_device_id = pci_get_device(dev);
pci_subvendor_id = pci_get_subvendor(dev);
pci_subdevice_id = pci_get_subdevice(dev);
ent = ixgbe_vendor_info_array;
while (ent->vendor_id != 0) {
if ((pci_vendor_id == ent->vendor_id) &&
(pci_device_id == ent->device_id) &&
((pci_subvendor_id == ent->subvendor_id) ||
(ent->subvendor_id == 0)) &&
((pci_subdevice_id == ent->subdevice_id) ||
(ent->subdevice_id == 0))) {
sprintf(adapter_name, "%s, Version - %s",
ixgbe_strings[ent->index],
ixgbe_driver_version);
device_set_desc_copy(dev, adapter_name);
++ixgbe_total_ports;
return (0);
}
ent++;
}
return (ENXIO);
}
/*********************************************************************
* Device initialization routine
*
* The attach entry point is called when the driver is being loaded.
* This routine identifies the type of hardware, allocates all resources
* and initializes the hardware.
*
* return 0 on success, positive on failure
*********************************************************************/
static int
ixgbe_attach(device_t dev)
{
struct adapter *adapter;
struct ixgbe_hw *hw;
int error = 0;
u16 pci_device_id;
u32 ctrl_ext;
INIT_DEBUGOUT("ixgbe_attach: begin");
/* Allocate, clear, and link in our adapter structure */
adapter = device_get_softc(dev);
adapter->dev = adapter->osdep.dev = dev;
hw = &adapter->hw;
/* Core Lock Init*/
IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
/* Keep track of optics */
pci_device_id = pci_get_device(dev);
switch (pci_device_id) {
case IXGBE_DEV_ID_82598_CX4_DUAL_PORT :
case IXGBE_DEV_ID_82598EB_CX4 :
adapter->optics = IFM_10G_CX4;
break;
case IXGBE_DEV_ID_82598AF_DUAL_PORT :
case IXGBE_DEV_ID_82598_DA_DUAL_PORT :
case IXGBE_DEV_ID_82598AF_SINGLE_PORT :
case IXGBE_DEV_ID_82598AT :
adapter->optics = IFM_10G_SR;
break;
case IXGBE_DEV_ID_82598EB_XF_LR :
adapter->optics = IFM_10G_LR;
break;
case IXGBE_DEV_ID_82599_SFP :
adapter->optics = IFM_10G_SR;
ixgbe_num_segs = IXGBE_82599_SCATTER;
break;
case IXGBE_DEV_ID_82599_KX4 :
adapter->optics = IFM_10G_CX4;
ixgbe_num_segs = IXGBE_82599_SCATTER;
break;
case IXGBE_DEV_ID_82599_XAUI_LOM :
ixgbe_num_segs = IXGBE_82599_SCATTER;
default:
break;
}
/* SYSCTL APIs */
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "stats", CTLTYPE_INT | CTLFLAG_RW,
adapter, 0, ixgbe_sysctl_stats, "I", "Statistics");
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "debug", CTLTYPE_INT | CTLFLAG_RW,
adapter, 0, ixgbe_sysctl_debug, "I", "Debug Info");
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "flow_control", CTLTYPE_INT | CTLFLAG_RW,
adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
&ixgbe_enable_aim, 1, "Interrupt Moderation");
SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "low_latency", CTLTYPE_INT|CTLFLAG_RW,
&ixgbe_low_latency, 1, "Low Latency");
SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "ave_latency", CTLTYPE_INT|CTLFLAG_RW,
&ixgbe_ave_latency, 1, "Average Latency");
SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "bulk_latency", CTLTYPE_INT|CTLFLAG_RW,
&ixgbe_bulk_latency, 1, "Bulk Latency");
/* Set up the timer callout */
callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
/* Determine hardware revision */
ixgbe_identify_hardware(adapter);
/* Do base PCI setup - map BAR0 */
if (ixgbe_allocate_pci_resources(adapter)) {
device_printf(dev, "Allocation of PCI resources failed\n");
error = ENXIO;
goto err_out;
}
/* Do descriptor calc and sanity checks */
if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
device_printf(dev, "TXD config issue, using default!\n");
adapter->num_tx_desc = DEFAULT_TXD;
} else
adapter->num_tx_desc = ixgbe_txd;
/*
** With many RX rings it is easy to exceed the
** system mbuf allocation. Tuning nmbclusters
** can alleviate this.
*/
if (nmbclusters > 0 ) {
int s;
s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
if (s > nmbclusters) {
device_printf(dev, "RX Descriptors exceed "
"system mbuf max, using default instead!\n");
ixgbe_rxd = DEFAULT_RXD;
}
}
if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
device_printf(dev, "RXD config issue, using default!\n");
adapter->num_rx_desc = DEFAULT_RXD;
} else
adapter->num_rx_desc = ixgbe_rxd;
/* Allocate our TX/RX Queues */
if (ixgbe_allocate_queues(adapter)) {
error = ENOMEM;
goto err_out;
}
/* Initialize the shared code */
error = ixgbe_init_shared_code(hw);
if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
/*
** No optics in this port, set up
** so the timer routine will probe
** for later insertion.
*/
adapter->sfp_probe = TRUE;
error = 0;
} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
device_printf(dev,"Unsupported SFP+ module detected!\n");
error = EIO;
goto err_late;
} else if (error) {
device_printf(dev,"Unable to initialize the shared code\n");
error = EIO;
goto err_late;
}
/* Initialize the hardware */
if (ixgbe_hardware_init(adapter)) {
device_printf(dev,"Unable to initialize the hardware\n");
error = EIO;
goto err_late;
}
if ((adapter->msix > 1) && (ixgbe_enable_msix))
error = ixgbe_allocate_msix(adapter);
else
error = ixgbe_allocate_legacy(adapter);
if (error)
goto err_late;
/* Setup OS specific network interface */
ixgbe_setup_interface(dev, adapter);
#ifdef IXGBE_IEEE1588
/*
** Setup the timer: IEEE 1588 support
*/
adapter->cycles.read = ixgbe_read_clock;
adapter->cycles.mask = (u64)-1;
adapter->cycles.mult = 1;
adapter->cycles.shift = IXGBE_TSYNC_SHIFT;
IXGBE_WRITE_REG(&adapter->hw, IXGBE_TIMINCA, (1<<24) |
IXGBE_TSYNC_CYCLE_TIME * IXGBE_TSYNC_SHIFT);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_SYSTIML, 0x00000000);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_SYSTIMH, 0xFF800000);
// JFV - this is not complete yet
#endif
/* Sysctl for limiting the amount of work done in the taskqueue */
ixgbe_add_rx_process_limit(adapter, "rx_processing_limit",
"max number of rx packets to process", &adapter->rx_process_limit,
ixgbe_rx_process_limit);
/* Initialize statistics */
ixgbe_update_stats_counters(adapter);
/* Register for VLAN events */
adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
/* let hardware know driver is loaded */
ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
INIT_DEBUGOUT("ixgbe_attach: end");
return (0);
err_late:
ixgbe_free_transmit_structures(adapter);
ixgbe_free_receive_structures(adapter);
err_out:
ixgbe_free_pci_resources(adapter);
return (error);
}
/*********************************************************************
* Device removal routine
*
* The detach entry point is called when the driver is being removed.
* This routine stops the adapter and deallocates all the resources
* that were allocated for driver operation.
*
* return 0 on success, positive on failure
*********************************************************************/
static int
ixgbe_detach(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
u32 ctrl_ext;
INIT_DEBUGOUT("ixgbe_detach: begin");
/* Make sure VLANS are not using driver */
if (adapter->ifp->if_vlantrunk != NULL) {
device_printf(dev,"Vlan in use, detach first\n");
return (EBUSY);
}
IXGBE_CORE_LOCK(adapter);
ixgbe_stop(adapter);
IXGBE_CORE_UNLOCK(adapter);
for (int i = 0; i < adapter->num_queues; i++, txr++) {
if (txr->tq) {
taskqueue_drain(txr->tq, &txr->tx_task);
taskqueue_free(txr->tq);
}
}
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
if (rxr->tq) {
taskqueue_drain(rxr->tq, &rxr->rx_task);
taskqueue_free(rxr->tq);
}
}
/* Drain the Link queue */
if (adapter->tq) {
taskqueue_drain(adapter->tq, &adapter->link_task);
taskqueue_drain(adapter->tq, &adapter->mod_task);
taskqueue_drain(adapter->tq, &adapter->msf_task);
taskqueue_free(adapter->tq);
}
/* let hardware know driver is unloading */
ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
/* Unregister VLAN events */
if (adapter->vlan_attach != NULL)
EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
if (adapter->vlan_detach != NULL)
EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
ether_ifdetach(adapter->ifp);
callout_drain(&adapter->timer);
ixgbe_free_pci_resources(adapter);
bus_generic_detach(dev);
if_free(adapter->ifp);
ixgbe_free_transmit_structures(adapter);
ixgbe_free_receive_structures(adapter);
IXGBE_CORE_LOCK_DESTROY(adapter);
return (0);
}
/*********************************************************************
*
* Shutdown entry point
*
**********************************************************************/
static int
ixgbe_shutdown(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
IXGBE_CORE_LOCK(adapter);
ixgbe_stop(adapter);
IXGBE_CORE_UNLOCK(adapter);
return (0);
}
/*********************************************************************
* Transmit entry point
*
* ixgbe_start is called by the stack to initiate a transmit.
* The driver will remain in this routine as long as there are
* packets to transmit and transmit resources are available.
* In case resources are not available stack is notified and
* the packet is requeued.
**********************************************************************/
static void
ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
{
struct mbuf *m_head;
struct adapter *adapter = txr->adapter;
IXGBE_TX_LOCK_ASSERT(txr);
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING)
return;
if (!adapter->link_active)
return;
while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
if (m_head == NULL)
break;
if (ixgbe_xmit(txr, &m_head)) {
if (m_head == NULL)
break;
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
break;
}
/* Send a copy of the frame to the BPF listener */
ETHER_BPF_MTAP(ifp, m_head);
/* Set timeout in case hardware has problems transmitting */
txr->watchdog_timer = IXGBE_TX_TIMEOUT;
}
return;
}
/*
* Legacy TX start - called by the stack, this
* always uses the first tx ring, and should
* not be used with multiqueue tx enabled.
*/
static void
ixgbe_start(struct ifnet *ifp)
{
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = adapter->tx_rings;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
IXGBE_TX_LOCK(txr);
ixgbe_start_locked(txr, ifp);
IXGBE_TX_UNLOCK(txr);
}
return;
}
#if __FreeBSD_version >= 800000
/*
** Multiqueue Transmit driver
**
*/
static int
ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
{
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr;
int i = 0, err = 0;
/* Which queue to use */
if ((m->m_flags & M_FLOWID) != 0)
i = m->m_pkthdr.flowid % adapter->num_queues;
txr = &adapter->tx_rings[i];
if (IXGBE_TX_TRYLOCK(txr)) {
err = ixgbe_mq_start_locked(ifp, txr, m);
IXGBE_TX_UNLOCK(txr);
} else
err = drbr_enqueue(ifp, txr->br, m);
return (err);
}
static int
ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
{
struct adapter *adapter = txr->adapter;
struct mbuf *next;
int err = 0;
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
err = drbr_enqueue(ifp, txr->br, m);
return (err);
}
if (m == NULL) /* Called by tasklet */
goto process;
/* If nothing queued go right to xmit */
- if (drbr_empty(ifp, txr->br)) {
+ if (!drbr_needs_enqueue(ifp, txr->br)) {
if (ixgbe_xmit(txr, &m)) {
if (m && (err = drbr_enqueue(ifp, txr->br, m)) != 0)
return (err);
} else {
/* Success, update stats */
drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags);
/* Send a copy of the frame to the BPF listener */
ETHER_BPF_MTAP(ifp, m);
/* Set the watchdog */
txr->watchdog_timer = IXGBE_TX_TIMEOUT;
}
} else if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
return (err);
process:
if (drbr_empty(ifp, txr->br))
return (err);
/* Process the queue */
while (TRUE) {
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
next = drbr_dequeue(ifp, txr->br);
if (next == NULL)
break;
if (ixgbe_xmit(txr, &next))
break;
ETHER_BPF_MTAP(ifp, next);
/* Set the watchdog */
txr->watchdog_timer = IXGBE_TX_TIMEOUT;
}
if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
return (err);
}
/*
** Flush all ring buffers
*/
static void
ixgbe_qflush(struct ifnet *ifp)
{
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = adapter->tx_rings;
struct mbuf *m;
for (int i = 0; i < adapter->num_queues; i++, txr++) {
IXGBE_TX_LOCK(txr);
while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
m_freem(m);
IXGBE_TX_UNLOCK(txr);
}
if_qflush(ifp);
}
#endif /* __FreeBSD_version >= 800000 */
/*********************************************************************
* Ioctl entry point
*
* ixgbe_ioctl is called when the user wants to configure the
* interface.
*
* return 0 on success, positive on failure
**********************************************************************/
static int
ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
{
struct adapter *adapter = ifp->if_softc;
struct ifreq *ifr = (struct ifreq *) data;
#ifdef INET
struct ifaddr *ifa = (struct ifaddr *) data;
#endif
int error = 0;
switch (command) {
case SIOCSIFADDR:
#ifdef INET
IOCTL_DEBUGOUT("ioctl: SIOCxIFADDR (Get/Set Interface Addr)");
if (ifa->ifa_addr->sa_family == AF_INET) {
ifp->if_flags |= IFF_UP;
if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
IXGBE_CORE_LOCK(adapter);
ixgbe_init_locked(adapter);
IXGBE_CORE_UNLOCK(adapter);
}
arp_ifinit(ifp, ifa);
} else
#endif
ether_ioctl(ifp, command, data);
break;
case SIOCSIFMTU:
IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
error = EINVAL;
} else {
IXGBE_CORE_LOCK(adapter);
ifp->if_mtu = ifr->ifr_mtu;
adapter->max_frame_size =
ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
ixgbe_init_locked(adapter);
IXGBE_CORE_UNLOCK(adapter);
}
break;
case SIOCSIFFLAGS:
IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
IXGBE_CORE_LOCK(adapter);
if (ifp->if_flags & IFF_UP) {
if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
if ((ifp->if_flags ^ adapter->if_flags) &
(IFF_PROMISC | IFF_ALLMULTI)) {
ixgbe_disable_promisc(adapter);
ixgbe_set_promisc(adapter);
}
} else
ixgbe_init_locked(adapter);
} else
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
ixgbe_stop(adapter);
adapter->if_flags = ifp->if_flags;
IXGBE_CORE_UNLOCK(adapter);
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
IXGBE_CORE_LOCK(adapter);
ixgbe_disable_intr(adapter);
ixgbe_set_multi(adapter);
ixgbe_enable_intr(adapter);
IXGBE_CORE_UNLOCK(adapter);
}
break;
case SIOCSIFMEDIA:
case SIOCGIFMEDIA:
IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
break;
case SIOCSIFCAP:
{
int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
if (mask & IFCAP_HWCSUM)
ifp->if_capenable ^= IFCAP_HWCSUM;
if (mask & IFCAP_TSO4)
ifp->if_capenable ^= IFCAP_TSO4;
if (mask & IFCAP_LRO)
ifp->if_capenable ^= IFCAP_LRO;
if (mask & IFCAP_VLAN_HWTAGGING)
ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
ixgbe_init(adapter);
VLAN_CAPABILITIES(ifp);
break;
}
#ifdef IXGBE_IEEE1588
/*
** IOCTL support for Precision Time (IEEE 1588) Support
*/
case SIOCSHWTSTAMP:
error = ixgbe_hwtstamp_ioctl(adapter, ifp);
break;
#endif
default:
IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
error = ether_ioctl(ifp, command, data);
break;
}
return (error);
}
/*********************************************************************
* Watchdog entry point
*
* This routine is called by the local timer
* to detect hardware hangs .
*
**********************************************************************/
static void
ixgbe_watchdog(struct adapter *adapter)
{
device_t dev = adapter->dev;
struct tx_ring *txr = adapter->tx_rings;
struct ixgbe_hw *hw = &adapter->hw;
bool tx_hang = FALSE;
IXGBE_CORE_LOCK_ASSERT(adapter);
/*
* The timer is set to 5 every time ixgbe_start() queues a packet.
* Then ixgbe_txeof() keeps resetting to 5 as long as it cleans at
* least one descriptor.
* Finally, anytime all descriptors are clean the timer is
* set to 0.
*/
for (int i = 0; i < adapter->num_queues; i++, txr++) {
u32 head, tail;
IXGBE_TX_LOCK(txr);
if (txr->watchdog_timer == 0 || --txr->watchdog_timer) {
IXGBE_TX_UNLOCK(txr);
continue;
} else {
head = IXGBE_READ_REG(hw, IXGBE_TDH(i));
tail = IXGBE_READ_REG(hw, IXGBE_TDT(i));
if (head == tail) { /* last minute check */
IXGBE_TX_UNLOCK(txr);
continue;
}
/* Well, seems something is really hung */
tx_hang = TRUE;
IXGBE_TX_UNLOCK(txr);
break;
}
}
if (tx_hang == FALSE)
return;
/*
* If we are in this routine because of pause frames, then don't
* reset the hardware.
*/
if (IXGBE_READ_REG(hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
txr = adapter->tx_rings; /* reset pointer */
for (int i = 0; i < adapter->num_queues; i++, txr++) {
IXGBE_TX_LOCK(txr);
txr->watchdog_timer = IXGBE_TX_TIMEOUT;
IXGBE_TX_UNLOCK(txr);
}
return;
}
device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
for (int i = 0; i < adapter->num_queues; i++, txr++) {
device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", i,
IXGBE_READ_REG(hw, IXGBE_TDH(i)),
IXGBE_READ_REG(hw, IXGBE_TDT(i)));
device_printf(dev,"TX(%d) desc avail = %d,"
"Next TX to Clean = %d\n",
i, txr->tx_avail, txr->next_tx_to_clean);
}
adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
adapter->watchdog_events++;
ixgbe_init_locked(adapter);
}
/*********************************************************************
* Init entry point
*
* This routine is used in two ways. It is used by the stack as
* init entry point in network interface structure. It is also used
* by the driver as a hw/sw initialization routine to get to a
* consistent state.
*
* return 0 on success, positive on failure
**********************************************************************/
#define IXGBE_MHADD_MFS_SHIFT 16
static void
ixgbe_init_locked(struct adapter *adapter)
{
struct ifnet *ifp = adapter->ifp;
device_t dev = adapter->dev;
struct ixgbe_hw *hw;
u32 k, txdctl, mhadd, gpie;
u32 rxdctl, rxctrl;
int err;
INIT_DEBUGOUT("ixgbe_init: begin");
hw = &adapter->hw;
mtx_assert(&adapter->core_mtx, MA_OWNED);
ixgbe_stop(adapter);
/* Get the latest mac address, User can use a LAA */
bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
IXGBE_ETH_LENGTH_OF_ADDRESS);
ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, 1);
adapter->hw.addr_ctrl.rar_used_count = 1;
/* Initialize the hardware */
if (ixgbe_hardware_init(adapter)) {
device_printf(dev, "Unable to initialize the hardware\n");
return;
}
/* Prepare transmit descriptors and buffers */
if (ixgbe_setup_transmit_structures(adapter)) {
device_printf(dev,"Could not setup transmit structures\n");
ixgbe_stop(adapter);
return;
}
ixgbe_initialize_transmit_units(adapter);
/* Setup Multicast table */
ixgbe_set_multi(adapter);
/*
** Determine the correct mbuf pool
** for doing jumbo/headersplit
*/
if (ifp->if_mtu > ETHERMTU)
adapter->rx_mbuf_sz = MJUMPAGESIZE;
else
adapter->rx_mbuf_sz = MCLBYTES;
/* Prepare receive descriptors and buffers */
if (ixgbe_setup_receive_structures(adapter)) {
device_printf(dev,"Could not setup receive structures\n");
ixgbe_stop(adapter);
return;
}
/* Configure RX settings */
ixgbe_initialize_receive_units(adapter);
/* Configure Interrupt Moderation */
ixgbe_init_moderation(adapter);
gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
if (adapter->hw.mac.type == ixgbe_mac_82599EB) {
gpie |= IXGBE_SDP1_GPIEN;
gpie |= IXGBE_SDP2_GPIEN;
}
/* Enable Fan Failure Interrupt */
if (hw->device_id == IXGBE_DEV_ID_82598AT)
gpie |= IXGBE_SDP1_GPIEN;
if (adapter->msix > 1) {
/* Enable Enhanced MSIX mode */
gpie |= IXGBE_GPIE_MSIX_MODE;
gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
IXGBE_GPIE_OCD;
}
IXGBE_WRITE_REG(&adapter->hw, IXGBE_GPIE, gpie);
/* Set the various hardware offload abilities */
ifp->if_hwassist = 0;
if (ifp->if_capenable & IFCAP_TSO4)
ifp->if_hwassist |= CSUM_TSO;
if (ifp->if_capenable & IFCAP_TXCSUM)
ifp->if_hwassist = (CSUM_TCP | CSUM_UDP);
/* Set MTU size */
if (ifp->if_mtu > ETHERMTU) {
mhadd = IXGBE_READ_REG(&adapter->hw, IXGBE_MHADD);
mhadd &= ~IXGBE_MHADD_MFS_MASK;
mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
IXGBE_WRITE_REG(&adapter->hw, IXGBE_MHADD, mhadd);
}
/* Now enable all the queues */
for (int i = 0; i < adapter->num_queues; i++) {
txdctl = IXGBE_READ_REG(&adapter->hw, IXGBE_TXDCTL(i));
txdctl |= IXGBE_TXDCTL_ENABLE;
/* Set WTHRESH to 8, burst writeback */
txdctl |= (8 << 16);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_TXDCTL(i), txdctl);
}
for (int i = 0; i < adapter->num_queues; i++) {
rxdctl = IXGBE_READ_REG(&adapter->hw, IXGBE_RXDCTL(i));
/* PTHRESH set to 32 */
rxdctl |= 0x0020;
rxdctl |= IXGBE_RXDCTL_ENABLE;
IXGBE_WRITE_REG(&adapter->hw, IXGBE_RXDCTL(i), rxdctl);
for (k = 0; k < 10; k++) {
if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
IXGBE_RXDCTL_ENABLE)
break;
else
msec_delay(1);
}
wmb();
IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
}
/* Set up VLAN offloads and filter */
ixgbe_setup_vlan_hw_support(adapter);
/* Enable Receive engine */
rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
if (adapter->hw.mac.type == ixgbe_mac_82598EB)
rxctrl |= IXGBE_RXCTRL_DMBYPS;
rxctrl |= IXGBE_RXCTRL_RXEN;
IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl);
callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
/* Set up MSI/X routing */
if (ixgbe_enable_msix)
ixgbe_configure_ivars(adapter);
else { /* Simple settings for Legacy/MSI */
ixgbe_set_ivar(adapter, 0, 0, 0);
ixgbe_set_ivar(adapter, 0, 0, 1);
}
ixgbe_enable_intr(adapter);
/*
** Check on any SFP devices that
** need to be kick-started
*/
err = hw->phy.ops.identify(hw);
if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
device_printf(dev,
"Unsupported SFP+ module type was detected.\n");
ixgbe_detach(dev);
return;
}
if (ixgbe_is_sfp(hw)) {
if (hw->phy.multispeed_fiber) {
hw->mac.ops.setup_sfp(hw);
taskqueue_enqueue(adapter->tq, &adapter->msf_task);
} else
taskqueue_enqueue(adapter->tq, &adapter->mod_task);
} else
taskqueue_enqueue(adapter->tq, &adapter->link_task);
/* Now inform the stack we're ready */
ifp->if_drv_flags |= IFF_DRV_RUNNING;
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
return;
}
static void
ixgbe_init(void *arg)
{
struct adapter *adapter = arg;
IXGBE_CORE_LOCK(adapter);
ixgbe_init_locked(adapter);
IXGBE_CORE_UNLOCK(adapter);
return;
}
/*
**
** MSIX Interrupt Handlers and Tasklets
**
*/
static inline void
ixgbe_enable_queue(struct adapter *adapter, u32 vector)
{
struct ixgbe_hw *hw = &adapter->hw;
u64 queue = (u64)(1 << vector);
u32 mask;
if (hw->mac.type == ixgbe_mac_82598EB) {
mask = (IXGBE_EIMS_RTX_QUEUE & queue);
IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
} else {
mask = (queue & 0xFFFFFFFF);
if (mask)
IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
mask = (queue >> 32);
if (mask)
IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
}
}
static inline void
ixgbe_disable_queue(struct adapter *adapter, u32 vector)
{
struct ixgbe_hw *hw = &adapter->hw;
u64 queue = (u64)(1 << vector);
u32 mask;
if (hw->mac.type == ixgbe_mac_82598EB) {
mask = (IXGBE_EIMS_RTX_QUEUE & queue);
IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
} else {
mask = (queue & 0xFFFFFFFF);
if (mask)
IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
mask = (queue >> 32);
if (mask)
IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
}
}
static inline void
ixgbe_rearm_rx_queues(struct adapter *adapter, u64 queues)
{
u32 mask;
if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
mask = (IXGBE_EIMS_RTX_QUEUE & queues);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
} else {
mask = (queues & 0xFFFFFFFF);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
mask = (queues >> 32);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
}
}
static void
ixgbe_handle_rx(void *context, int pending)
{
struct rx_ring *rxr = context;
struct adapter *adapter = rxr->adapter;
u32 loop = MAX_LOOP;
bool more;
do {
more = ixgbe_rxeof(rxr, -1);
} while (loop-- && more);
/* Reenable this interrupt */
ixgbe_enable_queue(adapter, rxr->msix);
}
static void
ixgbe_handle_tx(void *context, int pending)
{
struct tx_ring *txr = context;
struct adapter *adapter = txr->adapter;
struct ifnet *ifp = adapter->ifp;
u32 loop = MAX_LOOP;
bool more;
IXGBE_TX_LOCK(txr);
do {
more = ixgbe_txeof(txr);
} while (loop-- && more);
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
#if __FreeBSD_version >= 800000
if (!drbr_empty(ifp, txr->br))
ixgbe_mq_start_locked(ifp, txr, NULL);
#else
if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
ixgbe_start_locked(txr, ifp);
#endif
}
IXGBE_TX_UNLOCK(txr);
/* Reenable this interrupt */
ixgbe_enable_queue(adapter, txr->msix);
}
/*********************************************************************
*
* Legacy Interrupt Service routine
*
**********************************************************************/
static void
ixgbe_legacy_irq(void *arg)
{
struct adapter *adapter = arg;
struct ixgbe_hw *hw = &adapter->hw;
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
bool more;
u32 reg_eicr, loop = MAX_LOOP;
reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
if (reg_eicr == 0) {
ixgbe_enable_intr(adapter);
return;
}
if (ixgbe_rxeof(rxr, adapter->rx_process_limit))
taskqueue_enqueue(rxr->tq, &rxr->rx_task);
IXGBE_TX_LOCK(txr);
++txr->tx_irq;
do {
more = ixgbe_txeof(txr);
} while (loop-- && more);
IXGBE_TX_UNLOCK(txr);
if (more)
taskqueue_enqueue(txr->tq, &txr->tx_task);
/* Check for fan failure */
if ((hw->phy.media_type == ixgbe_media_type_copper) &&
(reg_eicr & IXGBE_EICR_GPI_SDP1)) {
device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
"REPLACE IMMEDIATELY!!\n");
IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
}
/* Link status change */
if (reg_eicr & IXGBE_EICR_LSC) {
ixgbe_check_link(&adapter->hw,
&adapter->link_speed, &adapter->link_up, 0);
ixgbe_update_link_status(adapter);
}
/* Update interrupt rate */
if (ixgbe_enable_aim == TRUE)
ixgbe_update_aim(rxr);
ixgbe_enable_intr(adapter);
return;
}
/*********************************************************************
*
* MSI TX Interrupt Service routine
*
**********************************************************************/
void
ixgbe_msix_tx(void *arg)
{
struct tx_ring *txr = arg;
struct adapter *adapter = txr->adapter;
bool more;
ixgbe_disable_queue(adapter, txr->msix);
IXGBE_TX_LOCK(txr);
++txr->tx_irq;
more = ixgbe_txeof(txr);
IXGBE_TX_UNLOCK(txr);
if (more)
taskqueue_enqueue(txr->tq, &txr->tx_task);
else /* Reenable this interrupt */
ixgbe_enable_queue(adapter, txr->msix);
return;
}
/*********************************************************************
*
* MSIX RX Interrupt Service routine
*
**********************************************************************/
static void
ixgbe_msix_rx(void *arg)
{
struct rx_ring *rxr = arg;
struct adapter *adapter = rxr->adapter;
bool more;
ixgbe_disable_queue(adapter, rxr->msix);
++rxr->rx_irq;
more = ixgbe_rxeof(rxr, adapter->rx_process_limit);
/* Update interrupt rate */
if (ixgbe_enable_aim == TRUE)
ixgbe_update_aim(rxr);
if (more)
taskqueue_enqueue(rxr->tq, &rxr->rx_task);
else
ixgbe_enable_queue(adapter, rxr->msix);
return;
}
static void
ixgbe_msix_link(void *arg)
{
struct adapter *adapter = arg;
struct ixgbe_hw *hw = &adapter->hw;
u32 reg_eicr;
++adapter->link_irq;
/* First get the cause */
reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
/* Clear interrupt with write */
IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
/* Link status change */
if (reg_eicr & IXGBE_EICR_LSC)
taskqueue_enqueue(adapter->tq, &adapter->link_task);
if (adapter->hw.mac.type == ixgbe_mac_82599EB) {
if (reg_eicr & IXGBE_EICR_ECC) {
device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
"Please Reboot!!\n");
IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
} else if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
/* Clear the interrupt */
IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
taskqueue_enqueue(adapter->tq, &adapter->msf_task);
} else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
/* Clear the interrupt */
IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
taskqueue_enqueue(adapter->tq, &adapter->mod_task);
}
}
/* Check for fan failure */
if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
(reg_eicr & IXGBE_EICR_GPI_SDP1)) {
device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
"REPLACE IMMEDIATELY!!\n");
IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
}
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
return;
}
/*
** Routine to do adjust the RX EITR value based on traffic,
** its a simple three state model, but seems to help.
**
** Note that the three EITR values are tuneable using
** sysctl in real time. The feature can be effectively
** nullified by setting them equal.
*/
#define BULK_THRESHOLD 10000
#define AVE_THRESHOLD 1600
static void
ixgbe_update_aim(struct rx_ring *rxr)
{
struct adapter *adapter = rxr->adapter;
u32 olditr, newitr;
/* Update interrupt moderation based on traffic */
olditr = rxr->eitr_setting;
newitr = olditr;
/* Idle, don't change setting */
if (rxr->bytes == 0)
return;
if (olditr == ixgbe_low_latency) {
if (rxr->bytes > AVE_THRESHOLD)
newitr = ixgbe_ave_latency;
} else if (olditr == ixgbe_ave_latency) {
if (rxr->bytes < AVE_THRESHOLD)
newitr = ixgbe_low_latency;
else if (rxr->bytes > BULK_THRESHOLD)
newitr = ixgbe_bulk_latency;
} else if (olditr == ixgbe_bulk_latency) {
if (rxr->bytes < BULK_THRESHOLD)
newitr = ixgbe_ave_latency;
}
if (olditr != newitr) {
/* Change interrupt rate */
rxr->eitr_setting = newitr;
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(rxr->me),
newitr | (newitr << 16));
}
rxr->bytes = 0;
return;
}
static void
ixgbe_init_moderation(struct adapter *adapter)
{
struct rx_ring *rxr = adapter->rx_rings;
struct tx_ring *txr = adapter->tx_rings;
/* Single interrupt - MSI or Legacy? */
if (adapter->msix < 2) {
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(0), 100);
return;
}
/* TX irq moderation rate is fixed */
for (int i = 0; i < adapter->num_queues; i++, txr++) {
IXGBE_WRITE_REG(&adapter->hw,
IXGBE_EITR(txr->msix), ixgbe_ave_latency);
txr->watchdog_timer = FALSE;
}
/* RX moderation will be adapted over time, set default */
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
IXGBE_WRITE_REG(&adapter->hw,
IXGBE_EITR(rxr->msix), ixgbe_low_latency);
}
/* Set Link moderation */
IXGBE_WRITE_REG(&adapter->hw,
IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
}
/*********************************************************************
*
* Media Ioctl callback
*
* This routine is called whenever the user queries the status of
* the interface using ifconfig.
*
**********************************************************************/
static void
ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
{
struct adapter *adapter = ifp->if_softc;
INIT_DEBUGOUT("ixgbe_media_status: begin");
IXGBE_CORE_LOCK(adapter);
ixgbe_update_link_status(adapter);
ifmr->ifm_status = IFM_AVALID;
ifmr->ifm_active = IFM_ETHER;
if (!adapter->link_active) {
IXGBE_CORE_UNLOCK(adapter);
return;
}
ifmr->ifm_status |= IFM_ACTIVE;
switch (adapter->link_speed) {
case IXGBE_LINK_SPEED_1GB_FULL:
ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
break;
case IXGBE_LINK_SPEED_10GB_FULL:
ifmr->ifm_active |= adapter->optics | IFM_FDX;
break;
}
IXGBE_CORE_UNLOCK(adapter);
return;
}
/*********************************************************************
*
* Media Ioctl callback
*
* This routine is called when the user changes speed/duplex using
* media/mediopt option with ifconfig.
*
**********************************************************************/
static int
ixgbe_media_change(struct ifnet * ifp)
{
struct adapter *adapter = ifp->if_softc;
struct ifmedia *ifm = &adapter->media;
INIT_DEBUGOUT("ixgbe_media_change: begin");
if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
return (EINVAL);
switch (IFM_SUBTYPE(ifm->ifm_media)) {
case IFM_AUTO:
adapter->hw.mac.autoneg = TRUE;
adapter->hw.phy.autoneg_advertised =
IXGBE_LINK_SPEED_1GB_FULL | IXGBE_LINK_SPEED_10GB_FULL;
break;
default:
device_printf(adapter->dev, "Only auto media type\n");
return (EINVAL);
}
return (0);
}
/*********************************************************************
*
* This routine maps the mbufs to tx descriptors.
* WARNING: while this code is using an MQ style infrastructure,
* it would NOT work as is with more than 1 queue.
*
* return 0 on success, positive on failure
**********************************************************************/
static int
ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
{
struct adapter *adapter = txr->adapter;
u32 olinfo_status = 0, cmd_type_len;
u32 paylen = 0;
int i, j, error, nsegs;
int first, last = 0;
struct mbuf *m_head;
bus_dma_segment_t segs[ixgbe_num_segs];
bus_dmamap_t map;
struct ixgbe_tx_buf *txbuf, *txbuf_mapped;
union ixgbe_adv_tx_desc *txd = NULL;
m_head = *m_headp;
/* Basic descriptor defines */
cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
if (m_head->m_flags & M_VLANTAG)
cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
/* Do a clean if descriptors are low */
if (txr->tx_avail <= IXGBE_TX_CLEANUP_THRESHOLD) {
ixgbe_txeof(txr);
/* Now do we at least have a minimal? */
if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
return (ENOBUFS);
}
/*
* Important to capture the first descriptor
* used because it will contain the index of
* the one we tell the hardware to report back
*/
first = txr->next_avail_tx_desc;
txbuf = &txr->tx_buffers[first];
txbuf_mapped = txbuf;
map = txbuf->map;
/*
* Map the packet for DMA.
*/
error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
*m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
if (error == EFBIG) {
struct mbuf *m;
m = m_defrag(*m_headp, M_DONTWAIT);
if (m == NULL) {
adapter->mbuf_defrag_failed++;
m_freem(*m_headp);
*m_headp = NULL;
return (ENOBUFS);
}
*m_headp = m;
/* Try it again */
error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
*m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
if (error == ENOMEM) {
adapter->no_tx_dma_setup++;
return (error);
} else if (error != 0) {
adapter->no_tx_dma_setup++;
m_freem(*m_headp);
*m_headp = NULL;
return (error);
}
} else if (error == ENOMEM) {
adapter->no_tx_dma_setup++;
return (error);
} else if (error != 0) {
adapter->no_tx_dma_setup++;
m_freem(*m_headp);
*m_headp = NULL;
return (error);
}
/* Make certain there are enough descriptors */
if (nsegs > txr->tx_avail - 2) {
txr->no_tx_desc_avail++;
error = ENOBUFS;
goto xmit_fail;
}
m_head = *m_headp;
/*
** Set up the appropriate offload context
** this becomes the first descriptor of
** a packet.
*/
if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
if (ixgbe_tso_setup(txr, m_head, &paylen)) {
cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
++adapter->tso_tx;
} else
return (ENXIO);
} else if (ixgbe_tx_ctx_setup(txr, m_head))
olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
#ifdef IXGBE_IEEE1588
/* This is changing soon to an mtag detection */
if (we detect this mbuf has a TSTAMP mtag)
cmd_type_len |= IXGBE_ADVTXD_MAC_TSTAMP;
#endif
/* Record payload length */
if (paylen == 0)
olinfo_status |= m_head->m_pkthdr.len <<
IXGBE_ADVTXD_PAYLEN_SHIFT;
i = txr->next_avail_tx_desc;
for (j = 0; j < nsegs; j++) {
bus_size_t seglen;
bus_addr_t segaddr;
txbuf = &txr->tx_buffers[i];
txd = &txr->tx_base[i];
seglen = segs[j].ds_len;
segaddr = htole64(segs[j].ds_addr);
txd->read.buffer_addr = segaddr;
txd->read.cmd_type_len = htole32(txr->txd_cmd |
cmd_type_len |seglen);
txd->read.olinfo_status = htole32(olinfo_status);
last = i; /* Next descriptor that will get completed */
if (++i == adapter->num_tx_desc)
i = 0;
txbuf->m_head = NULL;
txbuf->eop_index = -1;
}
txd->read.cmd_type_len |=
htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
txr->tx_avail -= nsegs;
txr->next_avail_tx_desc = i;
txbuf->m_head = m_head;
txbuf->map = map;
bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
/* Set the index of the descriptor that will be marked done */
txbuf = &txr->tx_buffers[first];
txbuf->eop_index = last;
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* Advance the Transmit Descriptor Tail (Tdt), this tells the
* hardware that this frame is available to transmit.
*/
++txr->total_packets;
IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
return (0);
xmit_fail:
bus_dmamap_unload(txr->txtag, txbuf->map);
return (error);
}
static void
ixgbe_set_promisc(struct adapter *adapter)
{
u_int32_t reg_rctl;
struct ifnet *ifp = adapter->ifp;
reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
if (ifp->if_flags & IFF_PROMISC) {
reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
} else if (ifp->if_flags & IFF_ALLMULTI) {
reg_rctl |= IXGBE_FCTRL_MPE;
reg_rctl &= ~IXGBE_FCTRL_UPE;
IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
}
return;
}
static void
ixgbe_disable_promisc(struct adapter * adapter)
{
u_int32_t reg_rctl;
reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
reg_rctl &= (~IXGBE_FCTRL_UPE);
reg_rctl &= (~IXGBE_FCTRL_MPE);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
return;
}
/*********************************************************************
* Multicast Update
*
* This routine is called whenever multicast address list is updated.
*
**********************************************************************/
#define IXGBE_RAR_ENTRIES 16
static void
ixgbe_set_multi(struct adapter *adapter)
{
u32 fctrl;
u8 mta[MAX_NUM_MULTICAST_ADDRESSES * IXGBE_ETH_LENGTH_OF_ADDRESS];
u8 *update_ptr;
struct ifmultiaddr *ifma;
int mcnt = 0;
struct ifnet *ifp = adapter->ifp;
IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
if (ifp->if_flags & IFF_PROMISC)
fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
else if (ifp->if_flags & IFF_ALLMULTI) {
fctrl |= IXGBE_FCTRL_MPE;
fctrl &= ~IXGBE_FCTRL_UPE;
} else
fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
#if __FreeBSD_version < 800000
IF_ADDR_LOCK(ifp);
#else
if_maddr_rlock(ifp);
#endif
TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
&mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
IXGBE_ETH_LENGTH_OF_ADDRESS);
mcnt++;
}
#if __FreeBSD_version < 800000
IF_ADDR_UNLOCK(ifp);
#else
if_maddr_runlock(ifp);
#endif
update_ptr = mta;
ixgbe_update_mc_addr_list(&adapter->hw,
update_ptr, mcnt, ixgbe_mc_array_itr);
return;
}
/*
* This is an iterator function now needed by the multicast
* shared code. It simply feeds the shared code routine the
* addresses in the array of ixgbe_set_multi() one by one.
*/
static u8 *
ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
{
u8 *addr = *update_ptr;
u8 *newptr;
*vmdq = 0;
newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
*update_ptr = newptr;
return addr;
}
/*********************************************************************
* Timer routine
*
* This routine checks for link status,updates statistics,
* and runs the watchdog timer.
*
**********************************************************************/
static void
ixgbe_local_timer(void *arg)
{
struct adapter *adapter = arg;
struct ifnet *ifp = adapter->ifp;
mtx_assert(&adapter->core_mtx, MA_OWNED);
/* Check for pluggable optics */
if (adapter->sfp_probe)
if (!ixgbe_sfp_probe(adapter))
goto out; /* Nothing to do */
ixgbe_update_link_status(adapter);
ixgbe_update_stats_counters(adapter);
if (ixgbe_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING) {
ixgbe_print_hw_stats(adapter);
}
/*
* Each tick we check the watchdog
* to protect against hardware hangs.
*/
ixgbe_watchdog(adapter);
out:
/* Trigger an RX interrupt on all queues */
ixgbe_rearm_rx_queues(adapter, adapter->rx_mask);
callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
}
/*
** Note: this routine updates the OS on the link state
** the real check of the hardware only happens with
** a link interrupt.
*/
static void
ixgbe_update_link_status(struct adapter *adapter)
{
struct ifnet *ifp = adapter->ifp;
struct tx_ring *txr = adapter->tx_rings;
device_t dev = adapter->dev;
if (adapter->link_up){
if (adapter->link_active == FALSE) {
if (bootverbose)
device_printf(dev,"Link is up %d Gbps %s \n",
((adapter->link_speed == 128)? 10:1),
"Full Duplex");
adapter->link_active = TRUE;
if_link_state_change(ifp, LINK_STATE_UP);
}
} else { /* Link down */
if (adapter->link_active == TRUE) {
if (bootverbose)
device_printf(dev,"Link is Down\n");
if_link_state_change(ifp, LINK_STATE_DOWN);
adapter->link_active = FALSE;
for (int i = 0; i < adapter->num_queues;
i++, txr++)
txr->watchdog_timer = FALSE;
}
}
return;
}
/*********************************************************************
*
* This routine disables all traffic on the adapter by issuing a
* global reset on the MAC and deallocates TX/RX buffers.
*
**********************************************************************/
static void
ixgbe_stop(void *arg)
{
struct ifnet *ifp;
struct adapter *adapter = arg;
ifp = adapter->ifp;
mtx_assert(&adapter->core_mtx, MA_OWNED);
INIT_DEBUGOUT("ixgbe_stop: begin\n");
ixgbe_disable_intr(adapter);
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
ixgbe_reset_hw(&adapter->hw);
adapter->hw.adapter_stopped = FALSE;
ixgbe_stop_adapter(&adapter->hw);
callout_stop(&adapter->timer);
/* reprogram the RAR[0] in case user changed it. */
ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
return;
}
/*********************************************************************
*
* Determine hardware revision.
*
**********************************************************************/
static void
ixgbe_identify_hardware(struct adapter *adapter)
{
device_t dev = adapter->dev;
/* Save off the information about this board */
adapter->hw.vendor_id = pci_get_vendor(dev);
adapter->hw.device_id = pci_get_device(dev);
adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
adapter->hw.subsystem_vendor_id =
pci_read_config(dev, PCIR_SUBVEND_0, 2);
adapter->hw.subsystem_device_id =
pci_read_config(dev, PCIR_SUBDEV_0, 2);
return;
}
/*********************************************************************
*
* Setup the Legacy or MSI Interrupt handler
*
**********************************************************************/
static int
ixgbe_allocate_legacy(struct adapter *adapter)
{
device_t dev = adapter->dev;
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
int error, rid = 0;
/* MSI RID at 1 */
if (adapter->msix == 1)
rid = 1;
/* We allocate a single interrupt resource */
adapter->res = bus_alloc_resource_any(dev,
SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
if (adapter->res == NULL) {
device_printf(dev, "Unable to allocate bus resource: "
"interrupt\n");
return (ENXIO);
}
/*
* Try allocating a fast interrupt and the associated deferred
* processing contexts.
*/
TASK_INIT(&txr->tx_task, 0, ixgbe_handle_tx, txr);
TASK_INIT(&rxr->rx_task, 0, ixgbe_handle_rx, rxr);
txr->tq = taskqueue_create_fast("ixgbe_txq", M_NOWAIT,
taskqueue_thread_enqueue, &txr->tq);
rxr->tq = taskqueue_create_fast("ixgbe_rxq", M_NOWAIT,
taskqueue_thread_enqueue, &rxr->tq);
taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
device_get_nameunit(adapter->dev));
taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
device_get_nameunit(adapter->dev));
/* Tasklets for Link, SFP and Multispeed Fiber */
TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
taskqueue_thread_enqueue, &adapter->tq);
taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
device_get_nameunit(adapter->dev));
if ((error = bus_setup_intr(dev, adapter->res,
INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq,
adapter, &adapter->tag)) != 0) {
device_printf(dev, "Failed to register fast interrupt "
"handler: %d\n", error);
taskqueue_free(txr->tq);
taskqueue_free(rxr->tq);
txr->tq = NULL;
rxr->tq = NULL;
return (error);
}
return (0);
}
/*********************************************************************
*
* Setup MSIX Interrupt resources and handlers
*
**********************************************************************/
static int
ixgbe_allocate_msix(struct adapter *adapter)
{
device_t dev = adapter->dev;
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
int error, rid, vector = 0;
/* TX setup: the code is here for multi tx,
there are other parts of the driver not ready for it */
for (int i = 0; i < adapter->num_queues; i++, vector++, txr++) {
rid = vector + 1;
txr->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
RF_SHAREABLE | RF_ACTIVE);
if (!txr->res) {
device_printf(dev,"Unable to allocate"
" bus resource: tx interrupt [%d]\n", vector);
return (ENXIO);
}
/* Set the handler function */
error = bus_setup_intr(dev, txr->res,
INTR_TYPE_NET | INTR_MPSAFE, NULL,
ixgbe_msix_tx, txr, &txr->tag);
if (error) {
txr->res = NULL;
device_printf(dev, "Failed to register TX handler");
return (error);
}
txr->msix = vector;
/*
** Bind the msix vector, and thus the
** ring to the corresponding cpu.
*/
if (adapter->num_queues > 1)
bus_bind_intr(dev, txr->res, i);
TASK_INIT(&txr->tx_task, 0, ixgbe_handle_tx, txr);
txr->tq = taskqueue_create_fast("ixgbe_txq", M_NOWAIT,
taskqueue_thread_enqueue, &txr->tq);
taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
device_get_nameunit(adapter->dev));
}
/* RX setup */
for (int i = 0; i < adapter->num_queues; i++, vector++, rxr++) {
rid = vector + 1;
rxr->res = bus_alloc_resource_any(dev,
SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
if (!rxr->res) {
device_printf(dev,"Unable to allocate"
" bus resource: rx interrupt [%d],"
"rid = %d\n", i, rid);
return (ENXIO);
}
/* Set the handler function */
error = bus_setup_intr(dev, rxr->res,
INTR_TYPE_NET | INTR_MPSAFE, NULL,
ixgbe_msix_rx, rxr, &rxr->tag);
if (error) {
rxr->res = NULL;
device_printf(dev, "Failed to register RX handler");
return (error);
}
rxr->msix = vector;
/* used in local timer */
adapter->rx_mask |= (u64)(1 << vector);
/*
** Bind the msix vector, and thus the
** ring to the corresponding cpu.
*/
if (adapter->num_queues > 1)
bus_bind_intr(dev, rxr->res, i);
TASK_INIT(&rxr->rx_task, 0, ixgbe_handle_rx, rxr);
rxr->tq = taskqueue_create_fast("ixgbe_rxq", M_NOWAIT,
taskqueue_thread_enqueue, &rxr->tq);
taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
device_get_nameunit(adapter->dev));
}
/* Now for Link changes */
rid = vector + 1;
adapter->res = bus_alloc_resource_any(dev,
SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
if (!adapter->res) {
device_printf(dev,"Unable to allocate"
" bus resource: Link interrupt [%d]\n", rid);
return (ENXIO);
}
/* Set the link handler function */
error = bus_setup_intr(dev, adapter->res,
INTR_TYPE_NET | INTR_MPSAFE, NULL,
ixgbe_msix_link, adapter, &adapter->tag);
if (error) {
adapter->res = NULL;
device_printf(dev, "Failed to register LINK handler");
return (error);
}
adapter->linkvec = vector;
/* Tasklets for Link, SFP and Multispeed Fiber */
TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
taskqueue_thread_enqueue, &adapter->tq);
taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
device_get_nameunit(adapter->dev));
return (0);
}
/*
* Setup Either MSI/X or MSI
*/
static int
ixgbe_setup_msix(struct adapter *adapter)
{
device_t dev = adapter->dev;
int rid, want, queues, msgs;
/* Override by tuneable */
if (ixgbe_enable_msix == 0)
goto msi;
/* First try MSI/X */
rid = PCIR_BAR(MSIX_82598_BAR);
adapter->msix_mem = bus_alloc_resource_any(dev,
SYS_RES_MEMORY, &rid, RF_ACTIVE);
if (!adapter->msix_mem) {
rid += 4; /* 82599 maps in higher BAR */
adapter->msix_mem = bus_alloc_resource_any(dev,
SYS_RES_MEMORY, &rid, RF_ACTIVE);
}
if (!adapter->msix_mem) {
/* May not be enabled */
device_printf(adapter->dev,
"Unable to map MSIX table \n");
goto msi;
}
msgs = pci_msix_count(dev);
if (msgs == 0) { /* system has msix disabled */
bus_release_resource(dev, SYS_RES_MEMORY,
rid, adapter->msix_mem);
adapter->msix_mem = NULL;
goto msi;
}
/* Figure out a reasonable auto config value */
queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
if (ixgbe_num_queues == 0)
ixgbe_num_queues = queues;
/*
** Want two vectors (RX/TX) per queue
** plus an additional for Link.
*/
want = (ixgbe_num_queues * 2) + 1;
if (msgs >= want)
msgs = want;
else {
device_printf(adapter->dev,
"MSIX Configuration Problem, "
"%d vectors but %d queues wanted!\n",
msgs, want);
return (0); /* Will go to Legacy setup */
}
if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
device_printf(adapter->dev,
"Using MSIX interrupts with %d vectors\n", msgs);
adapter->num_queues = ixgbe_num_queues;
return (msgs);
}
msi:
msgs = pci_msi_count(dev);
if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
device_printf(adapter->dev,"Using MSI interrupt\n");
return (msgs);
}
static int
ixgbe_allocate_pci_resources(struct adapter *adapter)
{
int rid;
device_t dev = adapter->dev;
rid = PCIR_BAR(0);
adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
&rid, RF_ACTIVE);
if (!(adapter->pci_mem)) {
device_printf(dev,"Unable to allocate bus resource: memory\n");
return (ENXIO);
}
adapter->osdep.mem_bus_space_tag =
rman_get_bustag(adapter->pci_mem);
adapter->osdep.mem_bus_space_handle =
rman_get_bushandle(adapter->pci_mem);
adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
/* Legacy defaults */
adapter->num_queues = 1;
adapter->hw.back = &adapter->osdep;
/*
** Now setup MSI or MSI/X, should
** return us the number of supported
** vectors. (Will be 1 for MSI)
*/
adapter->msix = ixgbe_setup_msix(adapter);
return (0);
}
static void
ixgbe_free_pci_resources(struct adapter * adapter)
{
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
device_t dev = adapter->dev;
int rid, memrid;
if (adapter->hw.mac.type == ixgbe_mac_82598EB)
memrid = PCIR_BAR(MSIX_82598_BAR);
else
memrid = PCIR_BAR(MSIX_82599_BAR);
/*
** There is a slight possibility of a failure mode
** in attach that will result in entering this function
** before interrupt resources have been initialized, and
** in that case we do not want to execute the loops below
** We can detect this reliably by the state of the adapter
** res pointer.
*/
if (adapter->res == NULL)
goto mem;
/*
** Release all the interrupt resources:
** notice this is harmless for Legacy or
** MSI since pointers will always be NULL
*/
for (int i = 0; i < adapter->num_queues; i++, txr++) {
rid = txr->msix + 1;
if (txr->tag != NULL) {
bus_teardown_intr(dev, txr->res, txr->tag);
txr->tag = NULL;
}
if (txr->res != NULL)
bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res);
}
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
rid = rxr->msix + 1;
if (rxr->tag != NULL) {
bus_teardown_intr(dev, rxr->res, rxr->tag);
rxr->tag = NULL;
}
if (rxr->res != NULL)
bus_release_resource(dev, SYS_RES_IRQ, rid, rxr->res);
}
/* Clean the Legacy or Link interrupt last */
if (adapter->linkvec) /* we are doing MSIX */
rid = adapter->linkvec + 1;
else
(adapter->msix != 0) ? (rid = 1):(rid = 0);
if (adapter->tag != NULL) {
bus_teardown_intr(dev, adapter->res, adapter->tag);
adapter->tag = NULL;
}
if (adapter->res != NULL)
bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
mem:
if (adapter->msix)
pci_release_msi(dev);
if (adapter->msix_mem != NULL)
bus_release_resource(dev, SYS_RES_MEMORY,
memrid, adapter->msix_mem);
if (adapter->pci_mem != NULL)
bus_release_resource(dev, SYS_RES_MEMORY,
PCIR_BAR(0), adapter->pci_mem);
return;
}
/*********************************************************************
*
* Initialize the hardware to a configuration as specified by the
* adapter structure. The controller is reset, the EEPROM is
* verified, the MAC address is set, then the shared initialization
* routines are called.
*
**********************************************************************/
static int
ixgbe_hardware_init(struct adapter *adapter)
{
device_t dev = adapter->dev;
u32 ret;
u16 csum;
csum = 0;
/* Issue a global reset */
adapter->hw.adapter_stopped = FALSE;
ixgbe_stop_adapter(&adapter->hw);
/* Make sure we have a good EEPROM before we read from it */
if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
return (EIO);
}
/* Get Hardware Flow Control setting */
adapter->hw.fc.requested_mode = ixgbe_fc_full;
adapter->hw.fc.pause_time = IXGBE_FC_PAUSE;
adapter->hw.fc.low_water = IXGBE_FC_LO;
adapter->hw.fc.high_water = IXGBE_FC_HI;
adapter->hw.fc.send_xon = TRUE;
ret = ixgbe_init_hw(&adapter->hw);
if (ret == IXGBE_ERR_EEPROM_VERSION) {
device_printf(dev, "This device is a pre-production adapter/"
"LOM. Please be aware there may be issues associated "
"with your hardware.\n If you are experiencing problems "
"please contact your Intel or hardware representative "
"who provided you with this hardware.\n");
} else if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
device_printf(dev,"Unsupported SFP+ Module\n");
return (EIO);
} else if (ret != 0 ) {
device_printf(dev,"Hardware Initialization Failure\n");
return (EIO);
}
return (0);
}
/*********************************************************************
*
* Setup networking device structure and register an interface.
*
**********************************************************************/
static void
ixgbe_setup_interface(device_t dev, struct adapter *adapter)
{
struct ifnet *ifp;
struct ixgbe_hw *hw = &adapter->hw;
INIT_DEBUGOUT("ixgbe_setup_interface: begin");
ifp = adapter->ifp = if_alloc(IFT_ETHER);
if (ifp == NULL)
panic("%s: can not if_alloc()\n", device_get_nameunit(dev));
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
ifp->if_mtu = ETHERMTU;
ifp->if_baudrate = 1000000000;
ifp->if_init = ixgbe_init;
ifp->if_softc = adapter;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = ixgbe_ioctl;
ifp->if_start = ixgbe_start;
#if __FreeBSD_version >= 800000
ifp->if_transmit = ixgbe_mq_start;
ifp->if_qflush = ixgbe_qflush;
#endif
ifp->if_timer = 0;
ifp->if_watchdog = NULL;
ifp->if_snd.ifq_maxlen = adapter->num_tx_desc - 2;
ether_ifattach(ifp, adapter->hw.mac.addr);
adapter->max_frame_size =
ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
/*
* Tell the upper layer(s) we support long frames.
*/
ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_VLAN_HWCSUM;
ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_LRO;
ifp->if_capenable = ifp->if_capabilities;
if (hw->device_id == IXGBE_DEV_ID_82598AT)
ixgbe_setup_link_speed(hw, (IXGBE_LINK_SPEED_10GB_FULL |
IXGBE_LINK_SPEED_1GB_FULL), TRUE, TRUE);
else
ixgbe_setup_link_speed(hw, IXGBE_LINK_SPEED_10GB_FULL,
TRUE, FALSE);
/*
* Specify the media types supported by this adapter and register
* callbacks to update media and link information
*/
ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
ixgbe_media_status);
ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics |
IFM_FDX, 0, NULL);
if (hw->device_id == IXGBE_DEV_ID_82598AT) {
ifmedia_add(&adapter->media,
IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
ifmedia_add(&adapter->media,
IFM_ETHER | IFM_1000_T, 0, NULL);
}
ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
return;
}
/********************************************************************
* Manage DMA'able memory.
*******************************************************************/
static void
ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
{
if (error)
return;
*(bus_addr_t *) arg = segs->ds_addr;
return;
}
static int
ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
struct ixgbe_dma_alloc *dma, int mapflags)
{
device_t dev = adapter->dev;
int r;
r = bus_dma_tag_create(NULL, /* parent */
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
size, /* maxsize */
1, /* nsegments */
size, /* maxsegsize */
BUS_DMA_ALLOCNOW, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
&dma->dma_tag);
if (r != 0) {
device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
"error %u\n", r);
goto fail_0;
}
r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
BUS_DMA_NOWAIT, &dma->dma_map);
if (r != 0) {
device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
"error %u\n", r);
goto fail_1;
}
r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
size,
ixgbe_dmamap_cb,
&dma->dma_paddr,
mapflags | BUS_DMA_NOWAIT);
if (r != 0) {
device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
"error %u\n", r);
goto fail_2;
}
dma->dma_size = size;
return (0);
fail_2:
bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
fail_1:
bus_dma_tag_destroy(dma->dma_tag);
fail_0:
dma->dma_map = NULL;
dma->dma_tag = NULL;
return (r);
}
static void
ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
{
bus_dmamap_sync(dma->dma_tag, dma->dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(dma->dma_tag, dma->dma_map);
bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
bus_dma_tag_destroy(dma->dma_tag);
}
/*********************************************************************
*
* Allocate memory for the transmit and receive rings, and then
* the descriptors associated with each, called only once at attach.
*
**********************************************************************/
static int
ixgbe_allocate_queues(struct adapter *adapter)
{
device_t dev = adapter->dev;
struct tx_ring *txr;
struct rx_ring *rxr;
int rsize, tsize, error = IXGBE_SUCCESS;
int txconf = 0, rxconf = 0;
/* First allocate the TX ring struct memory */
if (!(adapter->tx_rings =
(struct tx_ring *) malloc(sizeof(struct tx_ring) *
adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
device_printf(dev, "Unable to allocate TX ring memory\n");
error = ENOMEM;
goto fail;
}
txr = adapter->tx_rings;
/* Next allocate the RX */
if (!(adapter->rx_rings =
(struct rx_ring *) malloc(sizeof(struct rx_ring) *
adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
device_printf(dev, "Unable to allocate RX ring memory\n");
error = ENOMEM;
goto rx_fail;
}
rxr = adapter->rx_rings;
/* For the ring itself */
tsize = roundup2(adapter->num_tx_desc *
sizeof(union ixgbe_adv_tx_desc), 4096);
/*
* Now set up the TX queues, txconf is needed to handle the
* possibility that things fail midcourse and we need to
* undo memory gracefully
*/
for (int i = 0; i < adapter->num_queues; i++, txconf++) {
/* Set up some basics */
txr = &adapter->tx_rings[i];
txr->adapter = adapter;
txr->me = i;
/* Initialize the TX side lock */
snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
device_get_nameunit(dev), txr->me);
mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
if (ixgbe_dma_malloc(adapter, tsize,
&txr->txdma, BUS_DMA_NOWAIT)) {
device_printf(dev,
"Unable to allocate TX Descriptor memory\n");
error = ENOMEM;
goto err_tx_desc;
}
txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
bzero((void *)txr->tx_base, tsize);
/* Now allocate transmit buffers for the ring */
if (ixgbe_allocate_transmit_buffers(txr)) {
device_printf(dev,
"Critical Failure setting up transmit buffers\n");
error = ENOMEM;
goto err_tx_desc;
}
#if __FreeBSD_version >= 800000
/* Allocate a buf ring */
txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
M_WAITOK, &txr->tx_mtx);
#endif
}
/*
* Next the RX queues...
*/
rsize = roundup2(adapter->num_rx_desc *
sizeof(union ixgbe_adv_rx_desc), 4096);
for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
rxr = &adapter->rx_rings[i];
/* Set up some basics */
rxr->adapter = adapter;
rxr->me = i;
/* Initialize the RX side lock */
snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
device_get_nameunit(dev), rxr->me);
mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
if (ixgbe_dma_malloc(adapter, rsize,
&rxr->rxdma, BUS_DMA_NOWAIT)) {
device_printf(dev,
"Unable to allocate RxDescriptor memory\n");
error = ENOMEM;
goto err_rx_desc;
}
rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
bzero((void *)rxr->rx_base, rsize);
/* Allocate receive buffers for the ring*/
if (ixgbe_allocate_receive_buffers(rxr)) {
device_printf(dev,
"Critical Failure setting up receive buffers\n");
error = ENOMEM;
goto err_rx_desc;
}
}
return (0);
err_rx_desc:
for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
ixgbe_dma_free(adapter, &rxr->rxdma);
err_tx_desc:
for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
ixgbe_dma_free(adapter, &txr->txdma);
free(adapter->rx_rings, M_DEVBUF);
rx_fail:
free(adapter->tx_rings, M_DEVBUF);
fail:
return (error);
}
/*********************************************************************
*
* Allocate memory for tx_buffer structures. The tx_buffer stores all
* the information needed to transmit a packet on the wire. This is
* called only once at attach, setup is done every reset.
*
**********************************************************************/
static int
ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
device_t dev = adapter->dev;
struct ixgbe_tx_buf *txbuf;
int error, i;
/*
* Setup DMA descriptor areas.
*/
if ((error = bus_dma_tag_create(NULL, /* parent */
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
IXGBE_TSO_SIZE, /* maxsize */
ixgbe_num_segs, /* nsegments */
PAGE_SIZE, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
&txr->txtag))) {
device_printf(dev,"Unable to allocate TX DMA tag\n");
goto fail;
}
if (!(txr->tx_buffers =
(struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
device_printf(dev, "Unable to allocate tx_buffer memory\n");
error = ENOMEM;
goto fail;
}
/* Create the descriptor buffer dma maps */
txbuf = txr->tx_buffers;
for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
if (error != 0) {
device_printf(dev, "Unable to create TX DMA map\n");
goto fail;
}
}
return 0;
fail:
/* We free all, it handles case where we are in the middle */
ixgbe_free_transmit_structures(adapter);
return (error);
}
/*********************************************************************
*
* Initialize a transmit ring.
*
**********************************************************************/
static void
ixgbe_setup_transmit_ring(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
struct ixgbe_tx_buf *txbuf;
int i;
/* Clear the old ring contents */
bzero((void *)txr->tx_base,
(sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
/* Reset indices */
txr->next_avail_tx_desc = 0;
txr->next_tx_to_clean = 0;
/* Free any existing tx buffers. */
txbuf = txr->tx_buffers;
for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
if (txbuf->m_head != NULL) {
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(txr->txtag, txbuf->map);
m_freem(txbuf->m_head);
txbuf->m_head = NULL;
}
/* Clear the EOP index */
txbuf->eop_index = -1;
}
/* Set number of descriptors available */
txr->tx_avail = adapter->num_tx_desc;
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
}
/*********************************************************************
*
* Initialize all transmit rings.
*
**********************************************************************/
static int
ixgbe_setup_transmit_structures(struct adapter *adapter)
{
struct tx_ring *txr = adapter->tx_rings;
for (int i = 0; i < adapter->num_queues; i++, txr++)
ixgbe_setup_transmit_ring(txr);
return (0);
}
/*********************************************************************
*
* Enable transmit unit.
*
**********************************************************************/
static void
ixgbe_initialize_transmit_units(struct adapter *adapter)
{
struct tx_ring *txr = adapter->tx_rings;
struct ixgbe_hw *hw = &adapter->hw;
/* Setup the Base and Length of the Tx Descriptor Ring */
for (int i = 0; i < adapter->num_queues; i++, txr++) {
u64 tdba = txr->txdma.dma_paddr;
IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
(tdba & 0x00000000ffffffffULL));
IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
adapter->num_tx_desc * sizeof(struct ixgbe_legacy_tx_desc));
/* Setup the HW Tx Head and Tail descriptor pointers */
IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
/* Setup Transmit Descriptor Cmd Settings */
txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
txr->watchdog_timer = 0;
}
if (hw->mac.type == ixgbe_mac_82599EB) {
u32 dmatxctl;
dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
dmatxctl |= IXGBE_DMATXCTL_TE;
IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
}
return;
}
/*********************************************************************
*
* Free all transmit rings.
*
**********************************************************************/
static void
ixgbe_free_transmit_structures(struct adapter *adapter)
{
struct tx_ring *txr = adapter->tx_rings;
for (int i = 0; i < adapter->num_queues; i++, txr++) {
IXGBE_TX_LOCK(txr);
ixgbe_free_transmit_buffers(txr);
ixgbe_dma_free(adapter, &txr->txdma);
IXGBE_TX_UNLOCK(txr);
IXGBE_TX_LOCK_DESTROY(txr);
}
free(adapter->tx_rings, M_DEVBUF);
}
/*********************************************************************
*
* Free transmit ring related data structures.
*
**********************************************************************/
static void
ixgbe_free_transmit_buffers(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
struct ixgbe_tx_buf *tx_buffer;
int i;
INIT_DEBUGOUT("free_transmit_ring: begin");
if (txr->tx_buffers == NULL)
return;
tx_buffer = txr->tx_buffers;
for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
if (tx_buffer->m_head != NULL) {
bus_dmamap_sync(txr->txtag, tx_buffer->map,
BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(txr->txtag,
tx_buffer->map);
m_freem(tx_buffer->m_head);
tx_buffer->m_head = NULL;
if (tx_buffer->map != NULL) {
bus_dmamap_destroy(txr->txtag,
tx_buffer->map);
tx_buffer->map = NULL;
}
} else if (tx_buffer->map != NULL) {
bus_dmamap_unload(txr->txtag,
tx_buffer->map);
bus_dmamap_destroy(txr->txtag,
tx_buffer->map);
tx_buffer->map = NULL;
}
}
#if __FreeBSD_version >= 800000
if (txr->br != NULL)
buf_ring_free(txr->br, M_DEVBUF);
#endif
if (txr->tx_buffers != NULL) {
free(txr->tx_buffers, M_DEVBUF);
txr->tx_buffers = NULL;
}
if (txr->txtag != NULL) {
bus_dma_tag_destroy(txr->txtag);
txr->txtag = NULL;
}
return;
}
/*********************************************************************
*
* Advanced Context Descriptor setup for VLAN or CSUM
*
**********************************************************************/
static boolean_t
ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
{
struct adapter *adapter = txr->adapter;
struct ixgbe_adv_tx_context_desc *TXD;
struct ixgbe_tx_buf *tx_buffer;
u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
struct ether_vlan_header *eh;
struct ip *ip;
struct ip6_hdr *ip6;
int ehdrlen, ip_hlen = 0;
u16 etype;
u8 ipproto = 0;
bool offload = TRUE;
int ctxd = txr->next_avail_tx_desc;
u16 vtag = 0;
if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
offload = FALSE;
tx_buffer = &txr->tx_buffers[ctxd];
TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
/*
** In advanced descriptors the vlan tag must
** be placed into the descriptor itself.
*/
if (mp->m_flags & M_VLANTAG) {
vtag = htole16(mp->m_pkthdr.ether_vtag);
vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
} else if (offload == FALSE)
return FALSE;
/*
* Determine where frame payload starts.
* Jump over vlan headers if already present,
* helpful for QinQ too.
*/
eh = mtod(mp, struct ether_vlan_header *);
if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
etype = ntohs(eh->evl_proto);
ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
} else {
etype = ntohs(eh->evl_encap_proto);
ehdrlen = ETHER_HDR_LEN;
}
/* Set the ether header length */
vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
switch (etype) {
case ETHERTYPE_IP:
ip = (struct ip *)(mp->m_data + ehdrlen);
ip_hlen = ip->ip_hl << 2;
if (mp->m_len < ehdrlen + ip_hlen)
return (FALSE);
ipproto = ip->ip_p;
type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
break;
case ETHERTYPE_IPV6:
ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
ip_hlen = sizeof(struct ip6_hdr);
if (mp->m_len < ehdrlen + ip_hlen)
return (FALSE);
ipproto = ip6->ip6_nxt;
type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
break;
default:
offload = FALSE;
break;
}
vlan_macip_lens |= ip_hlen;
type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
switch (ipproto) {
case IPPROTO_TCP:
if (mp->m_pkthdr.csum_flags & CSUM_TCP)
type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
break;
case IPPROTO_UDP:
if (mp->m_pkthdr.csum_flags & CSUM_UDP)
type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
break;
default:
offload = FALSE;
break;
}
/* Now copy bits into descriptor */
TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
TXD->seqnum_seed = htole32(0);
TXD->mss_l4len_idx = htole32(0);
tx_buffer->m_head = NULL;
tx_buffer->eop_index = -1;
/* We've consumed the first desc, adjust counters */
if (++ctxd == adapter->num_tx_desc)
ctxd = 0;
txr->next_avail_tx_desc = ctxd;
--txr->tx_avail;
return (offload);
}
/**********************************************************************
*
* Setup work for hardware segmentation offload (TSO) on
* adapters using advanced tx descriptors
*
**********************************************************************/
static boolean_t
ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *paylen)
{
struct adapter *adapter = txr->adapter;
struct ixgbe_adv_tx_context_desc *TXD;
struct ixgbe_tx_buf *tx_buffer;
u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
u32 mss_l4len_idx = 0;
u16 vtag = 0;
int ctxd, ehdrlen, hdrlen, ip_hlen, tcp_hlen;
struct ether_vlan_header *eh;
struct ip *ip;
struct tcphdr *th;
/*
* Determine where frame payload starts.
* Jump over vlan headers if already present
*/
eh = mtod(mp, struct ether_vlan_header *);
if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
else
ehdrlen = ETHER_HDR_LEN;
/* Ensure we have at least the IP+TCP header in the first mbuf. */
if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
return FALSE;
ctxd = txr->next_avail_tx_desc;
tx_buffer = &txr->tx_buffers[ctxd];
TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
ip = (struct ip *)(mp->m_data + ehdrlen);
if (ip->ip_p != IPPROTO_TCP)
return FALSE; /* 0 */
ip->ip_sum = 0;
ip_hlen = ip->ip_hl << 2;
th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
th->th_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(IPPROTO_TCP));
tcp_hlen = th->th_off << 2;
hdrlen = ehdrlen + ip_hlen + tcp_hlen;
/* This is used in the transmit desc in encap */
*paylen = mp->m_pkthdr.len - hdrlen;
/* VLAN MACLEN IPLEN */
if (mp->m_flags & M_VLANTAG) {
vtag = htole16(mp->m_pkthdr.ether_vtag);
vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
}
vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
vlan_macip_lens |= ip_hlen;
TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
/* ADV DTYPE TUCMD */
type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
/* MSS L4LEN IDX */
mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
TXD->mss_l4len_idx = htole32(mss_l4len_idx);
TXD->seqnum_seed = htole32(0);
tx_buffer->m_head = NULL;
tx_buffer->eop_index = -1;
if (++ctxd == adapter->num_tx_desc)
ctxd = 0;
txr->tx_avail--;
txr->next_avail_tx_desc = ctxd;
return TRUE;
}
/**********************************************************************
*
* Examine each tx_buffer in the used queue. If the hardware is done
* processing the packet then free associated resources. The
* tx_buffer is put back on the free queue.
*
**********************************************************************/
static boolean_t
ixgbe_txeof(struct tx_ring *txr)
{
struct adapter * adapter = txr->adapter;
struct ifnet *ifp = adapter->ifp;
u32 first, last, done, num_avail;
u32 cleaned = 0;
struct ixgbe_tx_buf *tx_buffer;
struct ixgbe_legacy_tx_desc *tx_desc, *eop_desc;
mtx_assert(&txr->tx_mtx, MA_OWNED);
if (txr->tx_avail == adapter->num_tx_desc)
return FALSE;
num_avail = txr->tx_avail;
first = txr->next_tx_to_clean;
tx_buffer = &txr->tx_buffers[first];
/* For cleanup we just use legacy struct */
tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
last = tx_buffer->eop_index;
if (last == -1)
return FALSE;
eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
/*
** Get the index of the first descriptor
** BEYOND the EOP and call that 'done'.
** I do this so the comparison in the
** inner while loop below can be simple
*/
if (++last == adapter->num_tx_desc) last = 0;
done = last;
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
/*
** Only the EOP descriptor of a packet now has the DD
** bit set, this is what we look for...
*/
while (eop_desc->upper.fields.status & IXGBE_TXD_STAT_DD) {
/* We clean the range of the packet */
while (first != done) {
tx_desc->upper.data = 0;
tx_desc->lower.data = 0;
tx_desc->buffer_addr = 0;
num_avail++; cleaned++;
if (tx_buffer->m_head) {
ifp->if_opackets++;
bus_dmamap_sync(txr->txtag,
tx_buffer->map,
BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(txr->txtag,
tx_buffer->map);
m_freem(tx_buffer->m_head);
tx_buffer->m_head = NULL;
tx_buffer->map = NULL;
}
tx_buffer->eop_index = -1;
if (++first == adapter->num_tx_desc)
first = 0;
tx_buffer = &txr->tx_buffers[first];
tx_desc =
(struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
}
/* See if there is more work now */
last = tx_buffer->eop_index;
if (last != -1) {
eop_desc =
(struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
/* Get next done point */
if (++last == adapter->num_tx_desc) last = 0;
done = last;
} else
break;
}
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
txr->next_tx_to_clean = first;
/*
* If we have enough room, clear IFF_DRV_OACTIVE to tell the stack that
* it is OK to send packets. If there are no pending descriptors,
* clear the timeout. Otherwise, if some descriptors have been freed,
* restart the timeout.
*/
if (num_avail > IXGBE_TX_CLEANUP_THRESHOLD) {
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
/* If all are clean turn off the timer */
if (num_avail == adapter->num_tx_desc) {
txr->watchdog_timer = 0;
txr->tx_avail = num_avail;
return FALSE;
}
}
/* Some were cleaned, so reset timer */
if (cleaned)
txr->watchdog_timer = IXGBE_TX_TIMEOUT;
txr->tx_avail = num_avail;
return TRUE;
}
/*********************************************************************
*
* Get a buffer from system mbuf buffer pool.
*
**********************************************************************/
static int
ixgbe_get_buf(struct rx_ring *rxr, int i, u8 clean)
{
struct adapter *adapter = rxr->adapter;
bus_dma_segment_t seg[2];
struct ixgbe_rx_buf *rxbuf;
struct mbuf *mh, *mp;
bus_dmamap_t map;
int nsegs, error;
int merr = 0;
rxbuf = &rxr->rx_buffers[i];
/* First get our header and payload mbuf */
if (clean & IXGBE_CLEAN_HDR) {
mh = m_gethdr(M_DONTWAIT, MT_DATA);
if (mh == NULL)
goto remap;
} else /* reuse */
mh = rxr->rx_buffers[i].m_head;
mh->m_len = MHLEN;
mh->m_flags |= M_PKTHDR;
if (clean & IXGBE_CLEAN_PKT) {
mp = m_getjcl(M_DONTWAIT, MT_DATA,
M_PKTHDR, adapter->rx_mbuf_sz);
if (mp == NULL)
goto remap;
mp->m_len = adapter->rx_mbuf_sz;
mp->m_flags &= ~M_PKTHDR;
} else { /* reusing */
mp = rxr->rx_buffers[i].m_pack;
mp->m_len = adapter->rx_mbuf_sz;
mp->m_flags &= ~M_PKTHDR;
}
/*
** Need to create a chain for the following
** dmamap call at this point.
*/
mh->m_next = mp;
mh->m_pkthdr.len = mh->m_len + mp->m_len;
/* Get the memory mapping */
error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
rxr->spare_map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
if (error != 0) {
printf("GET BUF: dmamap load failure - %d\n", error);
m_free(mh);
return (error);
}
/* Unload old mapping and update buffer struct */
if (rxbuf->m_head != NULL)
bus_dmamap_unload(rxr->rxtag, rxbuf->map);
map = rxbuf->map;
rxbuf->map = rxr->spare_map;
rxr->spare_map = map;
rxbuf->m_head = mh;
rxbuf->m_pack = mp;
bus_dmamap_sync(rxr->rxtag,
rxbuf->map, BUS_DMASYNC_PREREAD);
/* Update descriptor */
rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
return (0);
/*
** If we get here, we have an mbuf resource
** issue, so we discard the incoming packet
** and attempt to reuse existing mbufs next
** pass thru the ring, but to do so we must
** fix up the descriptor which had the address
** clobbered with writeback info.
*/
remap:
adapter->mbuf_header_failed++;
merr = ENOBUFS;
/* Is there a reusable buffer? */
mh = rxr->rx_buffers[i].m_head;
if (mh == NULL) /* Nope, init error */
return (merr);
mp = rxr->rx_buffers[i].m_pack;
if (mp == NULL) /* Nope, init error */
return (merr);
/* Get our old mapping */
rxbuf = &rxr->rx_buffers[i];
error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
rxbuf->map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
if (error != 0) {
/* We really have a problem */
m_free(mh);
return (error);
}
/* Now fix the descriptor as needed */
rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
return (merr);
}
/*********************************************************************
*
* Allocate memory for rx_buffer structures. Since we use one
* rx_buffer per received packet, the maximum number of rx_buffer's
* that we'll need is equal to the number of receive descriptors
* that we've allocated.
*
**********************************************************************/
static int
ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
{
struct adapter *adapter = rxr->adapter;
device_t dev = adapter->dev;
struct ixgbe_rx_buf *rxbuf;
int i, bsize, error;
bsize = sizeof(struct ixgbe_rx_buf) * adapter->num_rx_desc;
if (!(rxr->rx_buffers =
(struct ixgbe_rx_buf *) malloc(bsize,
M_DEVBUF, M_NOWAIT | M_ZERO))) {
device_printf(dev, "Unable to allocate rx_buffer memory\n");
error = ENOMEM;
goto fail;
}
/*
** The tag is made to accomodate the largest buffer size
** with packet split (hence the two segments, even though
** it may not always use this.
*/
if ((error = bus_dma_tag_create(NULL, /* parent */
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
MJUM16BYTES, /* maxsize */
2, /* nsegments */
MJUMPAGESIZE, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
&rxr->rxtag))) {
device_printf(dev, "Unable to create RX DMA tag\n");
goto fail;
}
/* Create the spare map (used by getbuf) */
error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
&rxr->spare_map);
if (error) {
device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
__func__, error);
goto fail;
}
for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
rxbuf = &rxr->rx_buffers[i];
error = bus_dmamap_create(rxr->rxtag,
BUS_DMA_NOWAIT, &rxbuf->map);
if (error) {
device_printf(dev, "Unable to create RX DMA map\n");
goto fail;
}
}
return (0);
fail:
/* Frees all, but can handle partial completion */
ixgbe_free_receive_structures(adapter);
return (error);
}
/*********************************************************************
*
* Initialize a receive ring and its buffers.
*
**********************************************************************/
static int
ixgbe_setup_receive_ring(struct rx_ring *rxr)
{
struct adapter *adapter;
struct ifnet *ifp;
device_t dev;
struct ixgbe_rx_buf *rxbuf;
struct lro_ctrl *lro = &rxr->lro;
int j, rsize;
adapter = rxr->adapter;
ifp = adapter->ifp;
dev = adapter->dev;
/* Clear the ring contents */
rsize = roundup2(adapter->num_rx_desc *
sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
bzero((void *)rxr->rx_base, rsize);
/*
** Free current RX buffer structs and their mbufs
*/
for (int i = 0; i < adapter->num_rx_desc; i++) {
rxbuf = &rxr->rx_buffers[i];
if (rxbuf->m_head != NULL) {
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(rxr->rxtag, rxbuf->map);
if (rxbuf->m_head) {
rxbuf->m_head->m_next = rxbuf->m_pack;
m_freem(rxbuf->m_head);
}
rxbuf->m_head = NULL;
rxbuf->m_pack = NULL;
}
}
/* Now refresh the mbufs */
for (j = 0; j < adapter->num_rx_desc; j++) {
if (ixgbe_get_buf(rxr, j, IXGBE_CLEAN_ALL) == ENOBUFS) {
rxr->rx_buffers[j].m_head = NULL;
rxr->rx_buffers[j].m_pack = NULL;
rxr->rx_base[j].read.hdr_addr = 0;
rxr->rx_base[j].read.pkt_addr = 0;
goto fail;
}
}
/* Setup our descriptor indices */
rxr->next_to_check = 0;
rxr->last_cleaned = 0;
rxr->lro_enabled = FALSE;
/* Use header split if configured */
if (ixgbe_header_split)
rxr->hdr_split = TRUE;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
** Now set up the LRO interface, we
** also only do head split when LRO
** is enabled, since so often they
** are undesireable in similar setups.
*/
if (ifp->if_capenable & IFCAP_LRO) {
int err = tcp_lro_init(lro);
if (err) {
INIT_DEBUGOUT("LRO Initialization failed!\n");
goto fail;
}
INIT_DEBUGOUT("RX LRO Initialized\n");
rxr->lro_enabled = TRUE;
lro->ifp = adapter->ifp;
}
return (0);
fail:
/*
* We need to clean up any buffers allocated
* so far, 'j' is the failing index.
*/
for (int i = 0; i < j; i++) {
rxbuf = &rxr->rx_buffers[i];
if (rxbuf->m_head != NULL) {
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(rxr->rxtag, rxbuf->map);
m_freem(rxbuf->m_head);
rxbuf->m_head = NULL;
}
}
return (ENOBUFS);
}
/*********************************************************************
*
* Initialize all receive rings.
*
**********************************************************************/
static int
ixgbe_setup_receive_structures(struct adapter *adapter)
{
struct rx_ring *rxr = adapter->rx_rings;
int j;
for (j = 0; j < adapter->num_queues; j++, rxr++)
if (ixgbe_setup_receive_ring(rxr))
goto fail;
return (0);
fail:
/*
* Free RX buffers allocated so far, we will only handle
* the rings that completed, the failing case will have
* cleaned up for itself. 'j' failed, so its the terminus.
*/
for (int i = 0; i < j; ++i) {
rxr = &adapter->rx_rings[i];
for (int n = 0; n < adapter->num_rx_desc; n++) {
struct ixgbe_rx_buf *rxbuf;
rxbuf = &rxr->rx_buffers[n];
if (rxbuf->m_head != NULL) {
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(rxr->rxtag, rxbuf->map);
m_freem(rxbuf->m_head);
rxbuf->m_head = NULL;
}
}
}
return (ENOBUFS);
}
/*********************************************************************
*
* Setup receive registers and features.
*
**********************************************************************/
#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
static void
ixgbe_initialize_receive_units(struct adapter *adapter)
{
struct rx_ring *rxr = adapter->rx_rings;
struct ixgbe_hw *hw = &adapter->hw;
struct ifnet *ifp = adapter->ifp;
u32 bufsz, rxctrl, fctrl, srrctl, rxcsum;
u32 reta, mrqc = 0, hlreg, random[10];
/*
* Make sure receives are disabled while
* setting up the descriptor ring
*/
rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
rxctrl & ~IXGBE_RXCTRL_RXEN);
/* Enable broadcasts */
fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
fctrl |= IXGBE_FCTRL_BAM;
fctrl |= IXGBE_FCTRL_DPF;
fctrl |= IXGBE_FCTRL_PMCF;
IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
/* Set for Jumbo Frames? */
hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
if (ifp->if_mtu > ETHERMTU) {
hlreg |= IXGBE_HLREG0_JUMBOEN;
bufsz = 4096 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
} else {
hlreg &= ~IXGBE_HLREG0_JUMBOEN;
bufsz = 2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
}
IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
u64 rdba = rxr->rxdma.dma_paddr;
/* Setup the Base and Length of the Rx Descriptor Ring */
IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
(rdba & 0x00000000ffffffffULL));
IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
/* Set up the SRRCTL register */
srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
srrctl |= bufsz;
if (rxr->hdr_split) {
/* Use a standard mbuf for the header */
srrctl |= ((IXGBE_RX_HDR <<
IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT)
& IXGBE_SRRCTL_BSIZEHDR_MASK);
srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
if (adapter->hw.mac.type == ixgbe_mac_82599EB) {
/* PSRTYPE must be initialized in 82599 */
u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
IXGBE_PSRTYPE_UDPHDR |
IXGBE_PSRTYPE_IPV4HDR |
IXGBE_PSRTYPE_IPV6HDR;
IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
}
} else
srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
/* Setup the HW Rx Head and Tail Descriptor Pointers */
IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
}
rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
/* Setup RSS */
if (adapter->num_queues > 1) {
int i, j;
reta = 0;
/* set up random bits */
arc4rand(&random, sizeof(random), 0);
/* Set up the redirection table */
for (i = 0, j = 0; i < 128; i++, j++) {
if (j == adapter->num_queues) j = 0;
reta = (reta << 8) | (j * 0x11);
if ((i & 3) == 3)
IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
}
/* Now fill our hash function seeds */
for (int i = 0; i < 10; i++)
IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random[i]);
/* Perform hash on these packet types */
mrqc = IXGBE_MRQC_RSSEN
| IXGBE_MRQC_RSS_FIELD_IPV4
| IXGBE_MRQC_RSS_FIELD_IPV4_TCP
| IXGBE_MRQC_RSS_FIELD_IPV4_UDP
| IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
| IXGBE_MRQC_RSS_FIELD_IPV6_EX
| IXGBE_MRQC_RSS_FIELD_IPV6
| IXGBE_MRQC_RSS_FIELD_IPV6_TCP
| IXGBE_MRQC_RSS_FIELD_IPV6_UDP
| IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
/* RSS and RX IPP Checksum are mutually exclusive */
rxcsum |= IXGBE_RXCSUM_PCSD;
}
if (ifp->if_capenable & IFCAP_RXCSUM)
rxcsum |= IXGBE_RXCSUM_PCSD;
if (!(rxcsum & IXGBE_RXCSUM_PCSD))
rxcsum |= IXGBE_RXCSUM_IPPCSE;
IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
return;
}
/*********************************************************************
*
* Free all receive rings.
*
**********************************************************************/
static void
ixgbe_free_receive_structures(struct adapter *adapter)
{
struct rx_ring *rxr = adapter->rx_rings;
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
struct lro_ctrl *lro = &rxr->lro;
ixgbe_free_receive_buffers(rxr);
/* Free LRO memory */
tcp_lro_free(lro);
/* Free the ring memory as well */
ixgbe_dma_free(adapter, &rxr->rxdma);
}
free(adapter->rx_rings, M_DEVBUF);
}
/*********************************************************************
*
* Free receive ring data structures
*
**********************************************************************/
void
ixgbe_free_receive_buffers(struct rx_ring *rxr)
{
struct adapter *adapter = NULL;
struct ixgbe_rx_buf *rxbuf = NULL;
INIT_DEBUGOUT("free_receive_buffers: begin");
adapter = rxr->adapter;
if (rxr->rx_buffers != NULL) {
rxbuf = &rxr->rx_buffers[0];
for (int i = 0; i < adapter->num_rx_desc; i++) {
if (rxbuf->map != NULL) {
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(rxr->rxtag, rxbuf->map);
bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
}
if (rxbuf->m_head != NULL) {
m_freem(rxbuf->m_head);
}
rxbuf->m_head = NULL;
++rxbuf;
}
}
if (rxr->rx_buffers != NULL) {
free(rxr->rx_buffers, M_DEVBUF);
rxr->rx_buffers = NULL;
}
if (rxr->rxtag != NULL) {
bus_dma_tag_destroy(rxr->rxtag);
rxr->rxtag = NULL;
}
return;
}
/*********************************************************************
*
* This routine executes in interrupt context. It replenishes
* the mbufs in the descriptor and sends data which has been
* dma'ed into host memory to upper layer.
*
* We loop at most count times if count is > 0, or until done if
* count < 0.
*
* Return TRUE for more work, FALSE for all clean.
*********************************************************************/
static bool
ixgbe_rxeof(struct rx_ring *rxr, int count)
{
struct adapter *adapter = rxr->adapter;
struct ifnet *ifp = adapter->ifp;
struct lro_ctrl *lro = &rxr->lro;
struct lro_entry *queued;
int i;
u32 staterr;
union ixgbe_adv_rx_desc *cur;
IXGBE_RX_LOCK(rxr);
i = rxr->next_to_check;
cur = &rxr->rx_base[i];
staterr = cur->wb.upper.status_error;
if (!(staterr & IXGBE_RXD_STAT_DD)) {
IXGBE_RX_UNLOCK(rxr);
return FALSE;
}
/* Sync the ring */
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_POSTREAD);
while ((staterr & IXGBE_RXD_STAT_DD) && (count != 0) &&
(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
struct mbuf *sendmp, *mh, *mp;
u16 hlen, plen, hdr, vtag;
u8 dopayload, accept_frame, eop;
accept_frame = 1;
hlen = plen = vtag = 0;
sendmp = mh = mp = NULL;
/* Sync the buffers */
bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
BUS_DMASYNC_POSTREAD);
/*
** The way the hardware is configured to
** split, it will ONLY use the header buffer
** when header split is enabled, otherwise we
** get normal behavior, ie, both header and
** payload are DMA'd into the payload buffer.
**
** The fmp test is to catch the case where a
** packet spans multiple descriptors, in that
** case only the first header is valid.
*/
if ((rxr->hdr_split) && (rxr->fmp == NULL)){
hdr = le16toh(cur->
wb.lower.lo_dword.hs_rss.hdr_info);
hlen = (hdr & IXGBE_RXDADV_HDRBUFLEN_MASK) >>
IXGBE_RXDADV_HDRBUFLEN_SHIFT;
if (hlen > IXGBE_RX_HDR)
hlen = IXGBE_RX_HDR;
plen = le16toh(cur->wb.upper.length);
/* Handle the header mbuf */
mh = rxr->rx_buffers[i].m_head;
mh->m_len = hlen;
dopayload = IXGBE_CLEAN_HDR;
/*
** Get the payload length, this
** could be zero if its a small
** packet.
*/
if (plen) {
mp = rxr->rx_buffers[i].m_pack;
mp->m_len = plen;
mp->m_next = NULL;
mp->m_flags &= ~M_PKTHDR;
mh->m_next = mp;
mh->m_flags |= M_PKTHDR;
dopayload = IXGBE_CLEAN_ALL;
rxr->rx_split_packets++;
} else { /* small packets */
mh->m_flags &= ~M_PKTHDR;
mh->m_next = NULL;
}
} else {
/*
** Either no header split, or a
** secondary piece of a fragmented
** split packet.
*/
mh = rxr->rx_buffers[i].m_pack;
mh->m_flags |= M_PKTHDR;
mh->m_len = le16toh(cur->wb.upper.length);
dopayload = IXGBE_CLEAN_PKT;
}
if (staterr & IXGBE_RXD_STAT_EOP) {
count--;
eop = 1;
} else
eop = 0;
#ifdef IXGBE_IEEE1588
This code needs to be converted to work here
-----------------------------------------------------
if (unlikely(staterr & IXGBE_RXD_STAT_TS)) {
u64 regval;
u64 ns;
// Create an mtag and set it up
struct skb_shared_hwtstamps *shhwtstamps =
skb_hwtstamps(skb);
rd32(IXGBE_TSYNCRXCTL) & IXGBE_TSYNCRXCTL_VALID),
"igb: no RX time stamp available for time stamped packet");
regval = rd32(IXGBE_RXSTMPL);
regval |= (u64)rd32(IXGBE_RXSTMPH) << 32;
// Do time conversion from the register
ns = timecounter_cyc2time(&adapter->clock, regval);
clocksync_update(&adapter->sync, ns);
memset(shhwtstamps, 0, sizeof(*shhwtstamps));
shhwtstamps->hwtstamp = ns_to_ktime(ns);
shhwtstamps->syststamp =
clocksync_hw2sys(&adapter->sync, ns);
}
#endif
if (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK)
accept_frame = 0;
if (accept_frame) {
/*
** Save the vlan id, because get_buf will
** clobber the writeback descriptor...
*/
vtag = le16toh(cur->wb.upper.vlan);
if (ixgbe_get_buf(rxr, i, dopayload) != 0) {
ifp->if_iqdrops++;
goto discard;
}
/* Initial frame - setup */
if (rxr->fmp == NULL) {
mh->m_flags |= M_PKTHDR;
mh->m_pkthdr.len = mh->m_len;
rxr->fmp = mh; /* Store the first mbuf */
rxr->lmp = mh;
if (mp) { /* Add payload if split */
mh->m_pkthdr.len += mp->m_len;
rxr->lmp = mh->m_next;
}
} else {
/* Chain mbuf's together */
mh->m_flags &= ~M_PKTHDR;
rxr->lmp->m_next = mh;
rxr->lmp = rxr->lmp->m_next;
rxr->fmp->m_pkthdr.len += mh->m_len;
}
if (eop) {
rxr->fmp->m_pkthdr.rcvif = ifp;
ifp->if_ipackets++;
rxr->rx_packets++;
/* capture data for AIM */
rxr->bytes += rxr->fmp->m_pkthdr.len;
rxr->rx_bytes += rxr->bytes;
if (ifp->if_capenable & IFCAP_RXCSUM)
ixgbe_rx_checksum(staterr, rxr->fmp);
else
rxr->fmp->m_pkthdr.csum_flags = 0;
if (staterr & IXGBE_RXD_STAT_VP) {
rxr->fmp->m_pkthdr.ether_vtag = vtag;
rxr->fmp->m_flags |= M_VLANTAG;
}
#if __FreeBSD_version >= 800000
rxr->fmp->m_pkthdr.flowid = curcpu;
rxr->fmp->m_flags |= M_FLOWID;
#endif
sendmp = rxr->fmp;
rxr->fmp = NULL;
rxr->lmp = NULL;
}
} else {
ifp->if_ierrors++;
discard:
/* Reuse loaded DMA map and just update mbuf chain */
if (hlen) {
mh = rxr->rx_buffers[i].m_head;
mh->m_len = MHLEN;
mh->m_next = NULL;
}
mp = rxr->rx_buffers[i].m_pack;
mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
mp->m_data = mp->m_ext.ext_buf;
mp->m_next = NULL;
if (adapter->max_frame_size <=
(MCLBYTES - ETHER_ALIGN))
m_adj(mp, ETHER_ALIGN);
if (rxr->fmp != NULL) {
/* handles the whole chain */
m_freem(rxr->fmp);
rxr->fmp = NULL;
rxr->lmp = NULL;
}
sendmp = NULL;
}
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
rxr->last_cleaned = i; /* for updating tail */
if (++i == adapter->num_rx_desc)
i = 0;
/*
** Now send up to the stack,
** note the the value of next_to_check
** is safe because we keep the RX lock
** thru this call.
*/
if (sendmp != NULL) {
/*
** Send to the stack if:
** - LRO not enabled, or
** - no LRO resources, or
** - lro enqueue fails
*/
if ((!rxr->lro_enabled) ||
((!lro->lro_cnt) || (tcp_lro_rx(lro, sendmp, 0))))
(*ifp->if_input)(ifp, sendmp);
}
/* Get next descriptor */
cur = &rxr->rx_base[i];
staterr = cur->wb.upper.status_error;
}
rxr->next_to_check = i;
/* Advance the IXGB's Receive Queue "Tail Pointer" */
IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), rxr->last_cleaned);
/*
* Flush any outstanding LRO work
*/
while (!SLIST_EMPTY(&lro->lro_active)) {
queued = SLIST_FIRST(&lro->lro_active);
SLIST_REMOVE_HEAD(&lro->lro_active, next);
tcp_lro_flush(lro, queued);
}
IXGBE_RX_UNLOCK(rxr);
/*
** Leaving with more to clean?
** then schedule another interrupt.
*/
if (staterr & IXGBE_RXD_STAT_DD) {
ixgbe_rearm_rx_queues(adapter, (u64)(1 << rxr->msix));
return TRUE;
}
return FALSE;
}
/*********************************************************************
*
* Verify that the hardware indicated that the checksum is valid.
* Inform the stack about the status of checksum so that stack
* doesn't spend time verifying the checksum.
*
*********************************************************************/
static void
ixgbe_rx_checksum(u32 staterr, struct mbuf * mp)
{
u16 status = (u16) staterr;
u8 errors = (u8) (staterr >> 24);
if (status & IXGBE_RXD_STAT_IPCS) {
/* Did it pass? */
if (!(errors & IXGBE_RXD_ERR_IPE)) {
/* IP Checksum Good */
mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
} else
mp->m_pkthdr.csum_flags = 0;
}
if (status & IXGBE_RXD_STAT_L4CS) {
/* Did it pass? */
if (!(errors & IXGBE_RXD_ERR_TCPE)) {
mp->m_pkthdr.csum_flags |=
(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
mp->m_pkthdr.csum_data = htons(0xffff);
}
}
return;
}
/*
** This routine is run via an vlan config EVENT,
** it enables us to use the HW Filter table since
** we can get the vlan id. This just creates the
** entry in the soft version of the VFTA, init will
** repopulate the real table.
*/
static void
ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
{
struct adapter *adapter = ifp->if_softc;
u16 index, bit;
if (ifp->if_softc != arg) /* Not our event */
return;
if ((vtag == 0) || (vtag > 4095)) /* Invalid */
return;
index = (vtag >> 5) & 0x7F;
bit = vtag & 0x1F;
ixgbe_shadow_vfta[index] |= (1 << bit);
++adapter->num_vlans;
/* Re-init to load the changes */
ixgbe_init(adapter);
}
/*
** This routine is run via an vlan
** unconfig EVENT, remove our entry
** in the soft vfta.
*/
static void
ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
{
struct adapter *adapter = ifp->if_softc;
u16 index, bit;
if (ifp->if_softc != arg)
return;
if ((vtag == 0) || (vtag > 4095)) /* Invalid */
return;
index = (vtag >> 5) & 0x7F;
bit = vtag & 0x1F;
ixgbe_shadow_vfta[index] &= ~(1 << bit);
--adapter->num_vlans;
/* Re-init to load the changes */
ixgbe_init(adapter);
}
static void
ixgbe_setup_vlan_hw_support(struct adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
u32 ctrl;
/*
** We get here thru init_locked, meaning
** a soft reset, this has already cleared
** the VFTA and other state, so if there
** have been no vlan's registered do nothing.
*/
if (adapter->num_vlans == 0)
return;
/*
** A soft reset zero's out the VFTA, so
** we need to repopulate it now.
*/
for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
if (ixgbe_shadow_vfta[i] != 0)
IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
ixgbe_shadow_vfta[i]);
/* Enable the Filter Table */
ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
ctrl &= ~IXGBE_VLNCTRL_CFIEN;
ctrl |= IXGBE_VLNCTRL_VFE;
if (hw->mac.type == ixgbe_mac_82598EB)
ctrl |= IXGBE_VLNCTRL_VME;
IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
/* On 82599 the VLAN enable is per/queue in RXDCTL */
if (hw->mac.type == ixgbe_mac_82599EB)
for (int i = 0; i < adapter->num_queues; i++) {
ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
ctrl |= IXGBE_RXDCTL_VME;
IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
}
}
static void
ixgbe_enable_intr(struct adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
/* Enable Fan Failure detection */
if (hw->device_id == IXGBE_DEV_ID_82598AT)
mask |= IXGBE_EIMS_GPI_SDP1;
/* 82599 specific interrupts */
if (adapter->hw.mac.type == ixgbe_mac_82599EB) {
mask |= IXGBE_EIMS_ECC;
mask |= IXGBE_EIMS_GPI_SDP1;
mask |= IXGBE_EIMS_GPI_SDP2;
}
IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
/* With RSS we use auto clear */
if (adapter->msix_mem) {
mask = IXGBE_EIMS_ENABLE_MASK;
/* Dont autoclear Link */
mask &= ~IXGBE_EIMS_OTHER;
mask &= ~IXGBE_EIMS_LSC;
IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
}
/*
** Now enable all queues, this is done seperately to
** allow for handling the extended (beyond 32) MSIX
** vectors that can be used by 82599
*/
for (int i = 0; i < adapter->num_queues; i++, rxr++)
ixgbe_enable_queue(adapter, rxr->msix);
for (int i = 0; i < adapter->num_queues; i++, txr++)
ixgbe_enable_queue(adapter, txr->msix);
IXGBE_WRITE_FLUSH(hw);
return;
}
static void
ixgbe_disable_intr(struct adapter *adapter)
{
if (adapter->msix_mem)
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
} else {
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
}
IXGBE_WRITE_FLUSH(&adapter->hw);
return;
}
u16
ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
{
u16 value;
value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
reg, 2);
return (value);
}
void
ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
{
pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
reg, value, 2);
return;
}
/*
** Setup the correct IVAR register for a particular MSIX interrupt
** (yes this is all very magic and confusing :)
** - entry is the register array entry
** - vector is the MSIX vector for this queue
** - type is RX/TX/MISC
*/
static void
ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
{
struct ixgbe_hw *hw = &adapter->hw;
u32 ivar, index;
vector |= IXGBE_IVAR_ALLOC_VAL;
switch (hw->mac.type) {
case ixgbe_mac_82598EB:
if (type == -1)
entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
else
entry += (type * 64);
index = (entry >> 2) & 0x1F;
ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
ivar &= ~(0xFF << (8 * (entry & 0x3)));
ivar |= (vector << (8 * (entry & 0x3)));
IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
break;
case ixgbe_mac_82599EB:
if (type == -1) { /* MISC IVAR */
index = (entry & 1) * 8;
ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
ivar &= ~(0xFF << index);
ivar |= (vector << index);
IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
} else { /* RX/TX IVARS */
index = (16 * (entry & 1)) + (8 * type);
ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
ivar &= ~(0xFF << index);
ivar |= (vector << index);
IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
}
default:
break;
}
}
static void
ixgbe_configure_ivars(struct adapter *adapter)
{
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
for (int i = 0; i < adapter->num_queues; i++, rxr++)
ixgbe_set_ivar(adapter, i, rxr->msix, 0);
for (int i = 0; i < adapter->num_queues; i++, txr++)
ixgbe_set_ivar(adapter, i, txr->msix, 1);
/* For the Link interrupt */
ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
}
/*
** ixgbe_sfp_probe - called in the local timer to
** determine if a port had optics inserted.
*/
static bool ixgbe_sfp_probe(struct adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
device_t dev = adapter->dev;
bool result = FALSE;
if ((hw->phy.type == ixgbe_phy_nl) &&
(hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
s32 ret = hw->phy.ops.identify_sfp(hw);
if (ret)
goto out;
ret = hw->phy.ops.reset(hw);
if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
device_printf(dev,"Unsupported SFP+ module detected!");
printf(" Reload driver with supported module.\n");
adapter->sfp_probe = FALSE;
goto out;
} else
device_printf(dev,"SFP+ module detected!\n");
/* We now have supported optics */
adapter->sfp_probe = FALSE;
result = TRUE;
}
out:
return (result);
}
/*
** Tasklet handler for MSIX Link interrupts
** - do outside interrupt since it might sleep
*/
static void
ixgbe_handle_link(void *context, int pending)
{
struct adapter *adapter = context;
ixgbe_check_link(&adapter->hw,
&adapter->link_speed, &adapter->link_up, 0);
ixgbe_update_link_status(adapter);
}
/*
** Tasklet for handling SFP module interrupts
*/
static void
ixgbe_handle_mod(void *context, int pending)
{
struct adapter *adapter = context;
struct ixgbe_hw *hw = &adapter->hw;
device_t dev = adapter->dev;
u32 err;
err = hw->phy.ops.identify_sfp(hw);
if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
device_printf(dev,
"Unsupported SFP+ module type was detected.\n");
return;
}
hw->mac.ops.setup_sfp(hw);
taskqueue_enqueue(adapter->tq, &adapter->msf_task);
return;
}
/*
** Tasklet for handling MSF (multispeed fiber) interrupts
*/
static void
ixgbe_handle_msf(void *context, int pending)
{
struct adapter *adapter = context;
struct ixgbe_hw *hw = &adapter->hw;
u32 autoneg;
if (hw->mac.ops.get_link_capabilities)
hw->mac.ops.get_link_capabilities(hw, &autoneg,
&hw->mac.autoneg);
if (hw->mac.ops.setup_link_speed)
hw->mac.ops.setup_link_speed(hw, autoneg, TRUE, TRUE);
ixgbe_check_link(&adapter->hw,
&adapter->link_speed, &adapter->link_up, 0);
ixgbe_update_link_status(adapter);
return;
}
#ifdef IXGBE_FDIR
/*
** Tasklet for reinitializing the Flow Director filter table
*/
static void
ixgbe_reinit_fdir(void *context, int pending)
{
struct adapter *adapter = context;
struct ifnet *ifp = adapter->ifp;
if (adapter->fdir_reinit != 1) /* Shouldn't happen */
return;
ixgbe_reinit_fdir_tables_82599(&adapter->hw);
adapter->fdir_reinit = 0;
/* Restart the interface */
ifp->if_drv_flags |= IFF_DRV_RUNNING;
return;
}
#endif
/**********************************************************************
*
* Update the board statistics counters.
*
**********************************************************************/
static void
ixgbe_update_stats_counters(struct adapter *adapter)
{
struct ifnet *ifp = adapter->ifp;
struct ixgbe_hw *hw = &adapter->hw;
u32 missed_rx = 0, bprc, lxon, lxoff, total;
u64 total_missed_rx = 0;
adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
for (int i = 0; i < 8; i++) {
/* missed_rx tallies misses for the gprc workaround */
missed_rx += IXGBE_READ_REG(hw, IXGBE_MPC(i));
adapter->stats.mpc[i] += missed_rx;
/* Running comprehensive total for stats display */
total_missed_rx += adapter->stats.mpc[i];
if (hw->mac.type == ixgbe_mac_82598EB)
adapter->stats.rnbc[i] +=
IXGBE_READ_REG(hw, IXGBE_RNBC(i));
}
/* Hardware workaround, gprc counts missed packets */
adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
adapter->stats.gprc -= missed_rx;
if (hw->mac.type == ixgbe_mac_82599EB) {
adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL);
IXGBE_READ_REG(hw, IXGBE_GORCH); /* clears register */
adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL);
IXGBE_READ_REG(hw, IXGBE_GOTCH); /* clears register */
adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL);
IXGBE_READ_REG(hw, IXGBE_TORH); /* clears register */
adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
} else {
adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
/* 82598 only has a counter in the high register */
adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
}
/*
* Workaround: mprc hardware is incorrectly counting
* broadcasts, so for now we subtract those.
*/
bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
adapter->stats.bprc += bprc;
adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
adapter->stats.mprc -= bprc;
adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
adapter->stats.lxontxc += lxon;
lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
adapter->stats.lxofftxc += lxoff;
total = lxon + lxoff;
adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
adapter->stats.gptc -= total;
adapter->stats.mptc -= total;
adapter->stats.ptc64 -= total;
adapter->stats.gotc -= total * ETHER_MIN_LEN;
adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
/* Fill out the OS statistics structure */
ifp->if_ipackets = adapter->stats.gprc;
ifp->if_opackets = adapter->stats.gptc;
ifp->if_ibytes = adapter->stats.gorc;
ifp->if_obytes = adapter->stats.gotc;
ifp->if_imcasts = adapter->stats.mprc;
ifp->if_collisions = 0;
/* Rx Errors */
ifp->if_ierrors = total_missed_rx + adapter->stats.crcerrs +
adapter->stats.rlec;
}
/**********************************************************************
*
* This routine is called only when ixgbe_display_debug_stats is enabled.
* This routine provides a way to take a look at important statistics
* maintained by the driver and hardware.
*
**********************************************************************/
static void
ixgbe_print_hw_stats(struct adapter * adapter)
{
device_t dev = adapter->dev;
device_printf(dev,"Std Mbuf Failed = %lu\n",
adapter->mbuf_defrag_failed);
device_printf(dev,"Missed Packets = %llu\n",
(long long)adapter->stats.mpc[0]);
device_printf(dev,"Receive length errors = %llu\n",
((long long)adapter->stats.roc +
(long long)adapter->stats.ruc));
device_printf(dev,"Crc errors = %llu\n",
(long long)adapter->stats.crcerrs);
device_printf(dev,"Driver dropped packets = %lu\n",
adapter->dropped_pkts);
device_printf(dev, "watchdog timeouts = %ld\n",
adapter->watchdog_events);
device_printf(dev,"XON Rcvd = %llu\n",
(long long)adapter->stats.lxonrxc);
device_printf(dev,"XON Xmtd = %llu\n",
(long long)adapter->stats.lxontxc);
device_printf(dev,"XOFF Rcvd = %llu\n",
(long long)adapter->stats.lxoffrxc);
device_printf(dev,"XOFF Xmtd = %llu\n",
(long long)adapter->stats.lxofftxc);
device_printf(dev,"Total Packets Rcvd = %llu\n",
(long long)adapter->stats.tpr);
device_printf(dev,"Good Packets Rcvd = %llu\n",
(long long)adapter->stats.gprc);
device_printf(dev,"Good Packets Xmtd = %llu\n",
(long long)adapter->stats.gptc);
device_printf(dev,"TSO Transmissions = %lu\n",
adapter->tso_tx);
return;
}
/**********************************************************************
*
* This routine is called only when em_display_debug_stats is enabled.
* This routine provides a way to take a look at important statistics
* maintained by the driver and hardware.
*
**********************************************************************/
static void
ixgbe_print_debug_info(struct adapter *adapter)
{
device_t dev = adapter->dev;
struct rx_ring *rxr = adapter->rx_rings;
struct tx_ring *txr = adapter->tx_rings;
struct ixgbe_hw *hw = &adapter->hw;
device_printf(dev,"Error Byte Count = %u \n",
IXGBE_READ_REG(hw, IXGBE_ERRBC));
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
struct lro_ctrl *lro = &rxr->lro;
device_printf(dev,"Queue[%d]: rdh = %d, hw rdt = %d\n",
i, IXGBE_READ_REG(hw, IXGBE_RDH(i)),
IXGBE_READ_REG(hw, IXGBE_RDT(i)));
device_printf(dev,"RX(%d) Packets Received: %lld\n",
rxr->me, (long long)rxr->rx_packets);
device_printf(dev,"RX(%d) Split RX Packets: %lld\n",
rxr->me, (long long)rxr->rx_split_packets);
device_printf(dev,"RX(%d) Bytes Received: %lu\n",
rxr->me, (long)rxr->rx_bytes);
device_printf(dev,"RX(%d) IRQ Handled: %lu\n",
rxr->me, (long)rxr->rx_irq);
device_printf(dev,"RX(%d) LRO Queued= %d\n",
rxr->me, lro->lro_queued);
device_printf(dev,"RX(%d) LRO Flushed= %d\n",
rxr->me, lro->lro_flushed);
}
for (int i = 0; i < adapter->num_queues; i++, txr++) {
device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", i,
IXGBE_READ_REG(hw, IXGBE_TDH(i)),
IXGBE_READ_REG(hw, IXGBE_TDT(i)));
device_printf(dev,"TX(%d) Packets Sent: %lu\n",
txr->me, (long)txr->total_packets);
device_printf(dev,"TX(%d) IRQ Handled: %lu\n",
txr->me, (long)txr->tx_irq);
device_printf(dev,"TX(%d) NO Desc Avail: %lu\n",
txr->me, (long)txr->no_tx_desc_avail);
}
device_printf(dev,"Link IRQ Handled: %lu\n",
(long)adapter->link_irq);
return;
}
static int
ixgbe_sysctl_stats(SYSCTL_HANDLER_ARGS)
{
int error;
int result;
struct adapter *adapter;
result = -1;
error = sysctl_handle_int(oidp, &result, 0, req);
if (error || !req->newptr)
return (error);
if (result == 1) {
adapter = (struct adapter *) arg1;
ixgbe_print_hw_stats(adapter);
}
return error;
}
static int
ixgbe_sysctl_debug(SYSCTL_HANDLER_ARGS)
{
int error, result;
struct adapter *adapter;
result = -1;
error = sysctl_handle_int(oidp, &result, 0, req);
if (error || !req->newptr)
return (error);
if (result == 1) {
adapter = (struct adapter *) arg1;
ixgbe_print_debug_info(adapter);
}
return error;
}
/*
** Set flow control using sysctl:
** Flow control values:
** 0 - off
** 1 - rx pause
** 2 - tx pause
** 3 - full
*/
static int
ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS)
{
int error;
struct adapter *adapter;
error = sysctl_handle_int(oidp, &ixgbe_flow_control, 0, req);
if (error)
return (error);
adapter = (struct adapter *) arg1;
switch (ixgbe_flow_control) {
case ixgbe_fc_rx_pause:
case ixgbe_fc_tx_pause:
case ixgbe_fc_full:
adapter->hw.fc.requested_mode = ixgbe_flow_control;
break;
case ixgbe_fc_none:
default:
adapter->hw.fc.requested_mode = ixgbe_fc_none;
}
ixgbe_fc_enable(&adapter->hw, 0);
return error;
}
static void
ixgbe_add_rx_process_limit(struct adapter *adapter, const char *name,
const char *description, int *limit, int value)
{
*limit = value;
SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
}
#ifdef IXGBE_IEEE1588
/*
** ixgbe_hwtstamp_ioctl - control hardware time stamping
**
** Outgoing time stamping can be enabled and disabled. Play nice and
** disable it when requested, although it shouldn't case any overhead
** when no packet needs it. At most one packet in the queue may be
** marked for time stamping, otherwise it would be impossible to tell
** for sure to which packet the hardware time stamp belongs.
**
** Incoming time stamping has to be configured via the hardware
** filters. Not all combinations are supported, in particular event
** type has to be specified. Matching the kind of event packet is
** not supported, with the exception of "all V2 events regardless of
** level 2 or 4".
**
*/
static int
ixgbe_hwtstamp_ioctl(struct adapter *adapter, struct ifreq *ifr)
{
struct ixgbe_hw *hw = &adapter->hw;
struct hwtstamp_ctrl *config;
u32 tsync_tx_ctl_bit = IXGBE_TSYNCTXCTL_ENABLED;
u32 tsync_rx_ctl_bit = IXGBE_TSYNCRXCTL_ENABLED;
u32 tsync_rx_ctl_type = 0;
u32 tsync_rx_cfg = 0;
int is_l4 = 0;
int is_l2 = 0;
u16 port = 319; /* PTP */
u32 regval;
config = (struct hwtstamp_ctrl *) ifr->ifr_data;
/* reserved for future extensions */
if (config->flags)
return (EINVAL);
switch (config->tx_type) {
case HWTSTAMP_TX_OFF:
tsync_tx_ctl_bit = 0;
break;
case HWTSTAMP_TX_ON:
tsync_tx_ctl_bit = IXGBE_TSYNCTXCTL_ENABLED;
break;
default:
return (ERANGE);
}
switch (config->rx_filter) {
case HWTSTAMP_FILTER_NONE:
tsync_rx_ctl_bit = 0;
break;
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
case HWTSTAMP_FILTER_ALL:
/*
* register TSYNCRXCFG must be set, therefore it is not
* possible to time stamp both Sync and Delay_Req messages
* => fall back to time stamping all packets
*/
tsync_rx_ctl_type = IXGBE_TSYNCRXCTL_TYPE_ALL;
config->rx_filter = HWTSTAMP_FILTER_ALL;
break;
case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
tsync_rx_ctl_type = IXGBE_TSYNCRXCTL_TYPE_L4_V1;
tsync_rx_cfg = IXGBE_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
is_l4 = 1;
break;
case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
tsync_rx_ctl_type = IXGBE_TSYNCRXCTL_TYPE_L4_V1;
tsync_rx_cfg = IXGBE_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
is_l4 = 1;
break;
case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
tsync_rx_ctl_type = IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2;
tsync_rx_cfg = IXGBE_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
is_l2 = 1;
is_l4 = 1;
config->rx_filter = HWTSTAMP_FILTER_SOME;
break;
case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
tsync_rx_ctl_type = IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2;
tsync_rx_cfg = IXGBE_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
is_l2 = 1;
is_l4 = 1;
config->rx_filter = HWTSTAMP_FILTER_SOME;
break;
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
tsync_rx_ctl_type = IXGBE_TSYNCRXCTL_TYPE_EVENT_V2;
config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
is_l2 = 1;
break;
default:
return -ERANGE;
}
/* enable/disable TX */
regval = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL);
regval = (regval & ~IXGBE_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
IXGBE_WRITE_REG(hw, IXGBE_TSYNCTXCTL, regval);
/* enable/disable RX, define which PTP packets are time stamped */
regval = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL);
regval = (regval & ~IXGBE_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
regval = (regval & ~0xE) | tsync_rx_ctl_type;
IXGBE_WRITE_REG(hw, IXGBE_TSYNCRXCTL, regval);
IXGBE_WRITE_REG(hw, IXGBE_TSYNCRXCFG, tsync_rx_cfg);
/*
* Ethertype Filter Queue Filter[0][15:0] = 0x88F7
* (Ethertype to filter on)
* Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
* Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
*/
IXGBE_WRITE_REG(hw, IXGBE_ETQF0, is_l2 ? 0x440088f7 : 0);
/* L4 Queue Filter[0]: only filter by source and destination port */
IXGBE_WRITE_REG(hw, IXGBE_SPQF0, htons(port));
IXGBE_WRITE_REG(hw, IXGBE_IMIREXT(0), is_l4 ?
((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
IXGBE_WRITE_REG(hw, IXGBE_IMIR(0), is_l4 ?
(htons(port)
| (0<<16) /* immediate interrupt disabled */
| 0 /* (1<<17) bit cleared: do not bypass
destination port check */)
: 0);
IXGBE_WRITE_REG(hw, IXGBE_FTQF0, is_l4 ?
(0x11 /* UDP */
| (1<<15) /* VF not compared */
| (1<<27) /* Enable Timestamping */
| (7<<28) /* only source port filter enabled,
source/target address and protocol
masked */)
: ((1<<15) | (15<<28) /* all mask bits set = filter not
enabled */));
wrfl();
adapter->hwtstamp_ctrl = config;
/* clear TX/RX time stamp registers, just to be sure */
regval = IXGBE_READ_REG(hw, IXGBE_TXSTMPH);
regval = IXGBE_READ_REG(hw, IXGBE_RXSTMPH);
return (error);
}
/*
** ixgbe_read_clock - read raw cycle counter (to be used by time counter)
*/
static cycle_t ixgbe_read_clock(const struct cyclecounter *tc)
{
struct adapter *adapter =
container_of(tc, struct igb_adapter, cycles);
struct ixgbe_hw *hw = &adapter->hw;
u64 stamp;
stamp = IXGBE_READ_REG(hw, IXGBE_SYSTIML);
stamp |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32ULL;
return (stamp);
}
#endif /* IXGBE_IEEE1588 */
Index: stable/8/sys/dev/mxge/if_mxge.c
===================================================================
--- stable/8/sys/dev/mxge/if_mxge.c (revision 205282)
+++ stable/8/sys/dev/mxge/if_mxge.c (revision 205283)
@@ -1,4858 +1,4858 @@
/******************************************************************************
Copyright (c) 2006-2009, Myricom Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Myricom Inc, nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***************************************************************************/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/linker.h>
#include <sys/firmware.h>
#include <sys/endian.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/module.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/sx.h>
#include <sys/taskqueue.h>
/* count xmits ourselves, rather than via drbr */
#define NO_SLOW_STATS
#include <net/if.h>
#include <net/if_arp.h>
#include <net/ethernet.h>
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/bpf.h>
#include <net/if_types.h>
#include <net/if_vlan_var.h>
#include <net/zlib.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>
#include <machine/bus.h>
#include <machine/in_cksum.h>
#include <machine/resource.h>
#include <sys/bus.h>
#include <sys/rman.h>
#include <sys/smp.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
#include <vm/vm.h> /* for pmap_mapdev() */
#include <vm/pmap.h>
#if defined(__i386) || defined(__amd64)
#include <machine/specialreg.h>
#endif
#include <dev/mxge/mxge_mcp.h>
#include <dev/mxge/mcp_gen_header.h>
/*#define MXGE_FAKE_IFP*/
#include <dev/mxge/if_mxge_var.h>
#ifdef IFNET_BUF_RING
#include <sys/buf_ring.h>
#endif
#include "opt_inet.h"
/* tunable params */
static int mxge_nvidia_ecrc_enable = 1;
static int mxge_force_firmware = 0;
static int mxge_intr_coal_delay = 30;
static int mxge_deassert_wait = 1;
static int mxge_flow_control = 1;
static int mxge_verbose = 0;
static int mxge_lro_cnt = 8;
static int mxge_ticks;
static int mxge_max_slices = 1;
static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
static int mxge_always_promisc = 0;
static int mxge_initial_mtu = ETHERMTU_JUMBO;
static int mxge_throttle = 0;
static char *mxge_fw_unaligned = "mxge_ethp_z8e";
static char *mxge_fw_aligned = "mxge_eth_z8e";
static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
static int mxge_probe(device_t dev);
static int mxge_attach(device_t dev);
static int mxge_detach(device_t dev);
static int mxge_shutdown(device_t dev);
static void mxge_intr(void *arg);
static device_method_t mxge_methods[] =
{
/* Device interface */
DEVMETHOD(device_probe, mxge_probe),
DEVMETHOD(device_attach, mxge_attach),
DEVMETHOD(device_detach, mxge_detach),
DEVMETHOD(device_shutdown, mxge_shutdown),
{0, 0}
};
static driver_t mxge_driver =
{
"mxge",
mxge_methods,
sizeof(mxge_softc_t),
};
static devclass_t mxge_devclass;
/* Declare ourselves to be a child of the PCI bus.*/
DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
MODULE_DEPEND(mxge, firmware, 1, 1, 1);
MODULE_DEPEND(mxge, zlib, 1, 1, 1);
static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
static int mxge_close(mxge_softc_t *sc, int down);
static int mxge_open(mxge_softc_t *sc);
static void mxge_tick(void *arg);
static int
mxge_probe(device_t dev)
{
int rev;
if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
(pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
rev = pci_get_revid(dev);
switch (rev) {
case MXGE_PCI_REV_Z8E:
device_set_desc(dev, "Myri10G-PCIE-8A");
break;
case MXGE_PCI_REV_Z8ES:
device_set_desc(dev, "Myri10G-PCIE-8B");
break;
default:
device_set_desc(dev, "Myri10G-PCIE-8??");
device_printf(dev, "Unrecognized rev %d NIC\n",
rev);
break;
}
return 0;
}
return ENXIO;
}
static void
mxge_enable_wc(mxge_softc_t *sc)
{
#if defined(__i386) || defined(__amd64)
vm_offset_t len;
int err;
sc->wc = 1;
len = rman_get_size(sc->mem_res);
err = pmap_change_attr((vm_offset_t) sc->sram,
len, PAT_WRITE_COMBINING);
if (err != 0) {
device_printf(sc->dev, "pmap_change_attr failed, %d\n",
err);
sc->wc = 0;
}
#endif
}
/* callback to get our DMA address */
static void
mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
int error)
{
if (error == 0) {
*(bus_addr_t *) arg = segs->ds_addr;
}
}
static int
mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
bus_size_t alignment)
{
int err;
device_t dev = sc->dev;
bus_size_t boundary, maxsegsize;
if (bytes > 4096 && alignment == 4096) {
boundary = 0;
maxsegsize = bytes;
} else {
boundary = 4096;
maxsegsize = 4096;
}
/* allocate DMAable memory tags */
err = bus_dma_tag_create(sc->parent_dmat, /* parent */
alignment, /* alignment */
boundary, /* boundary */
BUS_SPACE_MAXADDR, /* low */
BUS_SPACE_MAXADDR, /* high */
NULL, NULL, /* filter */
bytes, /* maxsize */
1, /* num segs */
maxsegsize, /* maxsegsize */
BUS_DMA_COHERENT, /* flags */
NULL, NULL, /* lock */
&dma->dmat); /* tag */
if (err != 0) {
device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
return err;
}
/* allocate DMAable memory & map */
err = bus_dmamem_alloc(dma->dmat, &dma->addr,
(BUS_DMA_WAITOK | BUS_DMA_COHERENT
| BUS_DMA_ZERO), &dma->map);
if (err != 0) {
device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
goto abort_with_dmat;
}
/* load the memory */
err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
mxge_dmamap_callback,
(void *)&dma->bus_addr, 0);
if (err != 0) {
device_printf(dev, "couldn't load map (err = %d)\n", err);
goto abort_with_mem;
}
return 0;
abort_with_mem:
bus_dmamem_free(dma->dmat, dma->addr, dma->map);
abort_with_dmat:
(void)bus_dma_tag_destroy(dma->dmat);
return err;
}
static void
mxge_dma_free(mxge_dma_t *dma)
{
bus_dmamap_unload(dma->dmat, dma->map);
bus_dmamem_free(dma->dmat, dma->addr, dma->map);
(void)bus_dma_tag_destroy(dma->dmat);
}
/*
* The eeprom strings on the lanaiX have the format
* SN=x\0
* MAC=x:x:x:x:x:x\0
* PC=text\0
*/
static int
mxge_parse_strings(mxge_softc_t *sc)
{
#define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
char *ptr, *limit;
int i, found_mac;
ptr = sc->eeprom_strings;
limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
found_mac = 0;
while (ptr < limit && *ptr != '\0') {
if (memcmp(ptr, "MAC=", 4) == 0) {
ptr += 1;
sc->mac_addr_string = ptr;
for (i = 0; i < 6; i++) {
ptr += 3;
if ((ptr + 2) > limit)
goto abort;
sc->mac_addr[i] = strtoul(ptr, NULL, 16);
found_mac = 1;
}
} else if (memcmp(ptr, "PC=", 3) == 0) {
ptr += 3;
strncpy(sc->product_code_string, ptr,
sizeof (sc->product_code_string) - 1);
} else if (memcmp(ptr, "SN=", 3) == 0) {
ptr += 3;
strncpy(sc->serial_number_string, ptr,
sizeof (sc->serial_number_string) - 1);
}
MXGE_NEXT_STRING(ptr);
}
if (found_mac)
return 0;
abort:
device_printf(sc->dev, "failed to parse eeprom_strings\n");
return ENXIO;
}
#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
static void
mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
{
uint32_t val;
unsigned long base, off;
char *va, *cfgptr;
device_t pdev, mcp55;
uint16_t vendor_id, device_id, word;
uintptr_t bus, slot, func, ivend, idev;
uint32_t *ptr32;
if (!mxge_nvidia_ecrc_enable)
return;
pdev = device_get_parent(device_get_parent(sc->dev));
if (pdev == NULL) {
device_printf(sc->dev, "could not find parent?\n");
return;
}
vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
if (vendor_id != 0x10de)
return;
base = 0;
if (device_id == 0x005d) {
/* ck804, base address is magic */
base = 0xe0000000UL;
} else if (device_id >= 0x0374 && device_id <= 0x378) {
/* mcp55, base address stored in chipset */
mcp55 = pci_find_bsf(0, 0, 0);
if (mcp55 &&
0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
word = pci_read_config(mcp55, 0x90, 2);
base = ((unsigned long)word & 0x7ffeU) << 25;
}
}
if (!base)
return;
/* XXXX
Test below is commented because it is believed that doing
config read/write beyond 0xff will access the config space
for the next larger function. Uncomment this and remove
the hacky pmap_mapdev() way of accessing config space when
FreeBSD grows support for extended pcie config space access
*/
#if 0
/* See if we can, by some miracle, access the extended
config space */
val = pci_read_config(pdev, 0x178, 4);
if (val != 0xffffffff) {
val |= 0x40;
pci_write_config(pdev, 0x178, val, 4);
return;
}
#endif
/* Rather than using normal pci config space writes, we must
* map the Nvidia config space ourselves. This is because on
* opteron/nvidia class machine the 0xe000000 mapping is
* handled by the nvidia chipset, that means the internal PCI
* device (the on-chip northbridge), or the amd-8131 bridge
* and things behind them are not visible by this method.
*/
BUS_READ_IVAR(device_get_parent(pdev), pdev,
PCI_IVAR_BUS, &bus);
BUS_READ_IVAR(device_get_parent(pdev), pdev,
PCI_IVAR_SLOT, &slot);
BUS_READ_IVAR(device_get_parent(pdev), pdev,
PCI_IVAR_FUNCTION, &func);
BUS_READ_IVAR(device_get_parent(pdev), pdev,
PCI_IVAR_VENDOR, &ivend);
BUS_READ_IVAR(device_get_parent(pdev), pdev,
PCI_IVAR_DEVICE, &idev);
off = base
+ 0x00100000UL * (unsigned long)bus
+ 0x00001000UL * (unsigned long)(func
+ 8 * slot);
/* map it into the kernel */
va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
if (va == NULL) {
device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
return;
}
/* get a pointer to the config space mapped into the kernel */
cfgptr = va + (off & PAGE_MASK);
/* make sure that we can really access it */
vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
if (! (vendor_id == ivend && device_id == idev)) {
device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
vendor_id, device_id);
pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
return;
}
ptr32 = (uint32_t*)(cfgptr + 0x178);
val = *ptr32;
if (val == 0xffffffff) {
device_printf(sc->dev, "extended mapping failed\n");
pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
return;
}
*ptr32 = val | 0x40;
pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
if (mxge_verbose)
device_printf(sc->dev,
"Enabled ECRC on upstream Nvidia bridge "
"at %d:%d:%d\n",
(int)bus, (int)slot, (int)func);
return;
}
#else
static void
mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
{
device_printf(sc->dev,
"Nforce 4 chipset on non-x86/amd64!?!?!\n");
return;
}
#endif
static int
mxge_dma_test(mxge_softc_t *sc, int test_type)
{
mxge_cmd_t cmd;
bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
int status;
uint32_t len;
char *test = " ";
/* Run a small DMA test.
* The magic multipliers to the length tell the firmware
* to do DMA read, write, or read+write tests. The
* results are returned in cmd.data0. The upper 16
* bits of the return is the number of transfers completed.
* The lower 16 bits is the time in 0.5us ticks that the
* transfers took to complete.
*/
len = sc->tx_boundary;
cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
cmd.data2 = len * 0x10000;
status = mxge_send_cmd(sc, test_type, &cmd);
if (status != 0) {
test = "read";
goto abort;
}
sc->read_dma = ((cmd.data0>>16) * len * 2) /
(cmd.data0 & 0xffff);
cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
cmd.data2 = len * 0x1;
status = mxge_send_cmd(sc, test_type, &cmd);
if (status != 0) {
test = "write";
goto abort;
}
sc->write_dma = ((cmd.data0>>16) * len * 2) /
(cmd.data0 & 0xffff);
cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
cmd.data2 = len * 0x10001;
status = mxge_send_cmd(sc, test_type, &cmd);
if (status != 0) {
test = "read/write";
goto abort;
}
sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
(cmd.data0 & 0xffff);
abort:
if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
test, status);
return status;
}
/*
* The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
* when the PCI-E Completion packets are aligned on an 8-byte
* boundary. Some PCI-E chip sets always align Completion packets; on
* the ones that do not, the alignment can be enforced by enabling
* ECRC generation (if supported).
*
* When PCI-E Completion packets are not aligned, it is actually more
* efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
*
* If the driver can neither enable ECRC nor verify that it has
* already been enabled, then it must use a firmware image which works
* around unaligned completion packets (ethp_z8e.dat), and it should
* also ensure that it never gives the device a Read-DMA which is
* larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
* enabled, then the driver should use the aligned (eth_z8e.dat)
* firmware image, and set tx_boundary to 4KB.
*/
static int
mxge_firmware_probe(mxge_softc_t *sc)
{
device_t dev = sc->dev;
int reg, status;
uint16_t pectl;
sc->tx_boundary = 4096;
/*
* Verify the max read request size was set to 4KB
* before trying the test with 4KB.
*/
if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) {
pectl = pci_read_config(dev, reg + 0x8, 2);
if ((pectl & (5 << 12)) != (5 << 12)) {
device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
pectl);
sc->tx_boundary = 2048;
}
}
/*
* load the optimized firmware (which assumes aligned PCIe
* completions) in order to see if it works on this host.
*/
sc->fw_name = mxge_fw_aligned;
status = mxge_load_firmware(sc, 1);
if (status != 0) {
return status;
}
/*
* Enable ECRC if possible
*/
mxge_enable_nvidia_ecrc(sc);
/*
* Run a DMA test which watches for unaligned completions and
* aborts on the first one seen.
*/
status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
if (status == 0)
return 0; /* keep the aligned firmware */
if (status != E2BIG)
device_printf(dev, "DMA test failed: %d\n", status);
if (status == ENOSYS)
device_printf(dev, "Falling back to ethp! "
"Please install up to date fw\n");
return status;
}
static int
mxge_select_firmware(mxge_softc_t *sc)
{
int aligned = 0;
int force_firmware = mxge_force_firmware;
if (sc->throttle)
force_firmware = sc->throttle;
if (force_firmware != 0) {
if (force_firmware == 1)
aligned = 1;
else
aligned = 0;
if (mxge_verbose)
device_printf(sc->dev,
"Assuming %s completions (forced)\n",
aligned ? "aligned" : "unaligned");
goto abort;
}
/* if the PCIe link width is 4 or less, we can use the aligned
firmware and skip any checks */
if (sc->link_width != 0 && sc->link_width <= 4) {
device_printf(sc->dev,
"PCIe x%d Link, expect reduced performance\n",
sc->link_width);
aligned = 1;
goto abort;
}
if (0 == mxge_firmware_probe(sc))
return 0;
abort:
if (aligned) {
sc->fw_name = mxge_fw_aligned;
sc->tx_boundary = 4096;
} else {
sc->fw_name = mxge_fw_unaligned;
sc->tx_boundary = 2048;
}
return (mxge_load_firmware(sc, 0));
}
union qualhack
{
const char *ro_char;
char *rw_char;
};
static int
mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
{
if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
device_printf(sc->dev, "Bad firmware type: 0x%x\n",
be32toh(hdr->mcp_type));
return EIO;
}
/* save firmware version for sysctl */
strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
if (mxge_verbose)
device_printf(sc->dev, "firmware id: %s\n", hdr->version);
sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
&sc->fw_ver_minor, &sc->fw_ver_tiny);
if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
&& sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
device_printf(sc->dev, "Found firmware version %s\n",
sc->fw_version);
device_printf(sc->dev, "Driver needs %d.%d\n",
MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
return EINVAL;
}
return 0;
}
static void *
z_alloc(void *nil, u_int items, u_int size)
{
void *ptr;
ptr = malloc(items * size, M_TEMP, M_NOWAIT);
return ptr;
}
static void
z_free(void *nil, void *ptr)
{
free(ptr, M_TEMP);
}
static int
mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
{
z_stream zs;
char *inflate_buffer;
const struct firmware *fw;
const mcp_gen_header_t *hdr;
unsigned hdr_offset;
int status;
unsigned int i;
char dummy;
size_t fw_len;
fw = firmware_get(sc->fw_name);
if (fw == NULL) {
device_printf(sc->dev, "Could not find firmware image %s\n",
sc->fw_name);
return ENOENT;
}
/* setup zlib and decompress f/w */
bzero(&zs, sizeof (zs));
zs.zalloc = z_alloc;
zs.zfree = z_free;
status = inflateInit(&zs);
if (status != Z_OK) {
status = EIO;
goto abort_with_fw;
}
/* the uncompressed size is stored as the firmware version,
which would otherwise go unused */
fw_len = (size_t) fw->version;
inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
if (inflate_buffer == NULL)
goto abort_with_zs;
zs.avail_in = fw->datasize;
zs.next_in = __DECONST(char *, fw->data);
zs.avail_out = fw_len;
zs.next_out = inflate_buffer;
status = inflate(&zs, Z_FINISH);
if (status != Z_STREAM_END) {
device_printf(sc->dev, "zlib %d\n", status);
status = EIO;
goto abort_with_buffer;
}
/* check id */
hdr_offset = htobe32(*(const uint32_t *)
(inflate_buffer + MCP_HEADER_PTR_OFFSET));
if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
device_printf(sc->dev, "Bad firmware file");
status = EIO;
goto abort_with_buffer;
}
hdr = (const void*)(inflate_buffer + hdr_offset);
status = mxge_validate_firmware(sc, hdr);
if (status != 0)
goto abort_with_buffer;
/* Copy the inflated firmware to NIC SRAM. */
for (i = 0; i < fw_len; i += 256) {
mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
inflate_buffer + i,
min(256U, (unsigned)(fw_len - i)));
wmb();
dummy = *sc->sram;
wmb();
}
*limit = fw_len;
status = 0;
abort_with_buffer:
free(inflate_buffer, M_TEMP);
abort_with_zs:
inflateEnd(&zs);
abort_with_fw:
firmware_put(fw, FIRMWARE_UNLOAD);
return status;
}
/*
* Enable or disable periodic RDMAs from the host to make certain
* chipsets resend dropped PCIe messages
*/
static void
mxge_dummy_rdma(mxge_softc_t *sc, int enable)
{
char buf_bytes[72];
volatile uint32_t *confirm;
volatile char *submit;
uint32_t *buf, dma_low, dma_high;
int i;
buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
/* clear confirmation addr */
confirm = (volatile uint32_t *)sc->cmd;
*confirm = 0;
wmb();
/* send an rdma command to the PCIe engine, and wait for the
response in the confirmation address. The firmware should
write a -1 there to indicate it is alive and well
*/
dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
buf[0] = htobe32(dma_high); /* confirm addr MSW */
buf[1] = htobe32(dma_low); /* confirm addr LSW */
buf[2] = htobe32(0xffffffff); /* confirm data */
dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
buf[3] = htobe32(dma_high); /* dummy addr MSW */
buf[4] = htobe32(dma_low); /* dummy addr LSW */
buf[5] = htobe32(enable); /* enable? */
submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
mxge_pio_copy(submit, buf, 64);
wmb();
DELAY(1000);
wmb();
i = 0;
while (*confirm != 0xffffffff && i < 20) {
DELAY(1000);
i++;
}
if (*confirm != 0xffffffff) {
device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
(enable ? "enable" : "disable"), confirm,
*confirm);
}
return;
}
static int
mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
{
mcp_cmd_t *buf;
char buf_bytes[sizeof(*buf) + 8];
volatile mcp_cmd_response_t *response = sc->cmd;
volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
uint32_t dma_low, dma_high;
int err, sleep_total = 0;
/* ensure buf is aligned to 8 bytes */
buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
buf->data0 = htobe32(data->data0);
buf->data1 = htobe32(data->data1);
buf->data2 = htobe32(data->data2);
buf->cmd = htobe32(cmd);
dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
buf->response_addr.low = htobe32(dma_low);
buf->response_addr.high = htobe32(dma_high);
mtx_lock(&sc->cmd_mtx);
response->result = 0xffffffff;
wmb();
mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
/* wait up to 20ms */
err = EAGAIN;
for (sleep_total = 0; sleep_total < 20; sleep_total++) {
bus_dmamap_sync(sc->cmd_dma.dmat,
sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
wmb();
switch (be32toh(response->result)) {
case 0:
data->data0 = be32toh(response->data);
err = 0;
break;
case 0xffffffff:
DELAY(1000);
break;
case MXGEFW_CMD_UNKNOWN:
err = ENOSYS;
break;
case MXGEFW_CMD_ERROR_UNALIGNED:
err = E2BIG;
break;
case MXGEFW_CMD_ERROR_BUSY:
err = EBUSY;
break;
default:
device_printf(sc->dev,
"mxge: command %d "
"failed, result = %d\n",
cmd, be32toh(response->result));
err = ENXIO;
break;
}
if (err != EAGAIN)
break;
}
if (err == EAGAIN)
device_printf(sc->dev, "mxge: command %d timed out"
"result = %d\n",
cmd, be32toh(response->result));
mtx_unlock(&sc->cmd_mtx);
return err;
}
static int
mxge_adopt_running_firmware(mxge_softc_t *sc)
{
struct mcp_gen_header *hdr;
const size_t bytes = sizeof (struct mcp_gen_header);
size_t hdr_offset;
int status;
/* find running firmware header */
hdr_offset = htobe32(*(volatile uint32_t *)
(sc->sram + MCP_HEADER_PTR_OFFSET));
if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
device_printf(sc->dev,
"Running firmware has bad header offset (%d)\n",
(int)hdr_offset);
return EIO;
}
/* copy header of running firmware from SRAM to host memory to
* validate firmware */
hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
if (hdr == NULL) {
device_printf(sc->dev, "could not malloc firmware hdr\n");
return ENOMEM;
}
bus_space_read_region_1(rman_get_bustag(sc->mem_res),
rman_get_bushandle(sc->mem_res),
hdr_offset, (char *)hdr, bytes);
status = mxge_validate_firmware(sc, hdr);
free(hdr, M_DEVBUF);
/*
* check to see if adopted firmware has bug where adopting
* it will cause broadcasts to be filtered unless the NIC
* is kept in ALLMULTI mode
*/
if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
sc->adopted_rx_filter_bug = 1;
device_printf(sc->dev, "Adopting fw %d.%d.%d: "
"working around rx filter bug\n",
sc->fw_ver_major, sc->fw_ver_minor,
sc->fw_ver_tiny);
}
return status;
}
static int
mxge_load_firmware(mxge_softc_t *sc, int adopt)
{
volatile uint32_t *confirm;
volatile char *submit;
char buf_bytes[72];
uint32_t *buf, size, dma_low, dma_high;
int status, i;
buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
size = sc->sram_size;
status = mxge_load_firmware_helper(sc, &size);
if (status) {
if (!adopt)
return status;
/* Try to use the currently running firmware, if
it is new enough */
status = mxge_adopt_running_firmware(sc);
if (status) {
device_printf(sc->dev,
"failed to adopt running firmware\n");
return status;
}
device_printf(sc->dev,
"Successfully adopted running firmware\n");
if (sc->tx_boundary == 4096) {
device_printf(sc->dev,
"Using firmware currently running on NIC"
". For optimal\n");
device_printf(sc->dev,
"performance consider loading optimized "
"firmware\n");
}
sc->fw_name = mxge_fw_unaligned;
sc->tx_boundary = 2048;
return 0;
}
/* clear confirmation addr */
confirm = (volatile uint32_t *)sc->cmd;
*confirm = 0;
wmb();
/* send a reload command to the bootstrap MCP, and wait for the
response in the confirmation address. The firmware should
write a -1 there to indicate it is alive and well
*/
dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
buf[0] = htobe32(dma_high); /* confirm addr MSW */
buf[1] = htobe32(dma_low); /* confirm addr LSW */
buf[2] = htobe32(0xffffffff); /* confirm data */
/* FIX: All newest firmware should un-protect the bottom of
the sram before handoff. However, the very first interfaces
do not. Therefore the handoff copy must skip the first 8 bytes
*/
/* where the code starts*/
buf[3] = htobe32(MXGE_FW_OFFSET + 8);
buf[4] = htobe32(size - 8); /* length of code */
buf[5] = htobe32(8); /* where to copy to */
buf[6] = htobe32(0); /* where to jump to */
submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
mxge_pio_copy(submit, buf, 64);
wmb();
DELAY(1000);
wmb();
i = 0;
while (*confirm != 0xffffffff && i < 20) {
DELAY(1000*10);
i++;
bus_dmamap_sync(sc->cmd_dma.dmat,
sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
}
if (*confirm != 0xffffffff) {
device_printf(sc->dev,"handoff failed (%p = 0x%x)",
confirm, *confirm);
return ENXIO;
}
return 0;
}
static int
mxge_update_mac_address(mxge_softc_t *sc)
{
mxge_cmd_t cmd;
uint8_t *addr = sc->mac_addr;
int status;
cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
| (addr[2] << 8) | addr[3]);
cmd.data1 = ((addr[4] << 8) | (addr[5]));
status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
return status;
}
static int
mxge_change_pause(mxge_softc_t *sc, int pause)
{
mxge_cmd_t cmd;
int status;
if (pause)
status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
&cmd);
else
status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
&cmd);
if (status) {
device_printf(sc->dev, "Failed to set flow control mode\n");
return ENXIO;
}
sc->pause = pause;
return 0;
}
static void
mxge_change_promisc(mxge_softc_t *sc, int promisc)
{
mxge_cmd_t cmd;
int status;
if (mxge_always_promisc)
promisc = 1;
if (promisc)
status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
&cmd);
else
status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
&cmd);
if (status) {
device_printf(sc->dev, "Failed to set promisc mode\n");
}
}
static void
mxge_set_multicast_list(mxge_softc_t *sc)
{
mxge_cmd_t cmd;
struct ifmultiaddr *ifma;
struct ifnet *ifp = sc->ifp;
int err;
/* This firmware is known to not support multicast */
if (!sc->fw_multicast_support)
return;
/* Disable multicast filtering while we play with the lists*/
err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
if (err != 0) {
device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
" error status: %d\n", err);
return;
}
if (sc->adopted_rx_filter_bug)
return;
if (ifp->if_flags & IFF_ALLMULTI)
/* request to disable multicast filtering, so quit here */
return;
/* Flush all the filters */
err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
if (err != 0) {
device_printf(sc->dev,
"Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
", error status: %d\n", err);
return;
}
/* Walk the multicast list, and add each address */
if_maddr_rlock(ifp);
TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
&cmd.data0, 4);
bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
&cmd.data1, 2);
cmd.data0 = htonl(cmd.data0);
cmd.data1 = htonl(cmd.data1);
err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
if (err != 0) {
device_printf(sc->dev, "Failed "
"MXGEFW_JOIN_MULTICAST_GROUP, error status:"
"%d\t", err);
/* abort, leaving multicast filtering off */
if_maddr_runlock(ifp);
return;
}
}
if_maddr_runlock(ifp);
/* Enable multicast filtering */
err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
if (err != 0) {
device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
", error status: %d\n", err);
}
}
static int
mxge_max_mtu(mxge_softc_t *sc)
{
mxge_cmd_t cmd;
int status;
if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
return MXGEFW_MAX_MTU - MXGEFW_PAD;
/* try to set nbufs to see if it we can
use virtually contiguous jumbos */
cmd.data0 = 0;
status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
&cmd);
if (status == 0)
return MXGEFW_MAX_MTU - MXGEFW_PAD;
/* otherwise, we're limited to MJUMPAGESIZE */
return MJUMPAGESIZE - MXGEFW_PAD;
}
static int
mxge_reset(mxge_softc_t *sc, int interrupts_setup)
{
struct mxge_slice_state *ss;
mxge_rx_done_t *rx_done;
volatile uint32_t *irq_claim;
mxge_cmd_t cmd;
int slice, status;
/* try to send a reset command to the card to see if it
is alive */
memset(&cmd, 0, sizeof (cmd));
status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
if (status != 0) {
device_printf(sc->dev, "failed reset\n");
return ENXIO;
}
mxge_dummy_rdma(sc, 1);
/* set the intrq size */
cmd.data0 = sc->rx_ring_size;
status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
/*
* Even though we already know how many slices are supported
* via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
* has magic side effects, and must be called after a reset.
* It must be called prior to calling any RSS related cmds,
* including assigning an interrupt queue for anything but
* slice 0. It must also be called *after*
* MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
* the firmware to compute offsets.
*/
if (sc->num_slices > 1) {
/* ask the maximum number of slices it supports */
status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
&cmd);
if (status != 0) {
device_printf(sc->dev,
"failed to get number of slices\n");
return status;
}
/*
* MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
* to setting up the interrupt queue DMA
*/
cmd.data0 = sc->num_slices;
cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
#ifdef IFNET_BUF_RING
cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
#endif
status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
&cmd);
if (status != 0) {
device_printf(sc->dev,
"failed to set number of slices\n");
return status;
}
}
if (interrupts_setup) {
/* Now exchange information about interrupts */
for (slice = 0; slice < sc->num_slices; slice++) {
rx_done = &sc->ss[slice].rx_done;
memset(rx_done->entry, 0, sc->rx_ring_size);
cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
cmd.data2 = slice;
status |= mxge_send_cmd(sc,
MXGEFW_CMD_SET_INTRQ_DMA,
&cmd);
}
}
status |= mxge_send_cmd(sc,
MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
&cmd);
sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
if (status != 0) {
device_printf(sc->dev, "failed set interrupt parameters\n");
return status;
}
*sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
/* run a DMA benchmark */
(void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
for (slice = 0; slice < sc->num_slices; slice++) {
ss = &sc->ss[slice];
ss->irq_claim = irq_claim + (2 * slice);
/* reset mcp/driver shared state back to 0 */
ss->rx_done.idx = 0;
ss->rx_done.cnt = 0;
ss->tx.req = 0;
ss->tx.done = 0;
ss->tx.pkt_done = 0;
ss->tx.queue_active = 0;
ss->tx.activate = 0;
ss->tx.deactivate = 0;
ss->tx.wake = 0;
ss->tx.defrag = 0;
ss->tx.stall = 0;
ss->rx_big.cnt = 0;
ss->rx_small.cnt = 0;
ss->lro_bad_csum = 0;
ss->lro_queued = 0;
ss->lro_flushed = 0;
if (ss->fw_stats != NULL) {
bzero(ss->fw_stats, sizeof *ss->fw_stats);
}
}
sc->rdma_tags_available = 15;
status = mxge_update_mac_address(sc);
mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
mxge_change_pause(sc, sc->pause);
mxge_set_multicast_list(sc);
if (sc->throttle) {
cmd.data0 = sc->throttle;
if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
&cmd)) {
device_printf(sc->dev,
"can't enable throttle\n");
}
}
return status;
}
static int
mxge_change_throttle(SYSCTL_HANDLER_ARGS)
{
mxge_cmd_t cmd;
mxge_softc_t *sc;
int err;
unsigned int throttle;
sc = arg1;
throttle = sc->throttle;
err = sysctl_handle_int(oidp, &throttle, arg2, req);
if (err != 0) {
return err;
}
if (throttle == sc->throttle)
return 0;
if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
return EINVAL;
mtx_lock(&sc->driver_mtx);
cmd.data0 = throttle;
err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
if (err == 0)
sc->throttle = throttle;
mtx_unlock(&sc->driver_mtx);
return err;
}
static int
mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
{
mxge_softc_t *sc;
unsigned int intr_coal_delay;
int err;
sc = arg1;
intr_coal_delay = sc->intr_coal_delay;
err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
if (err != 0) {
return err;
}
if (intr_coal_delay == sc->intr_coal_delay)
return 0;
if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
return EINVAL;
mtx_lock(&sc->driver_mtx);
*sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
sc->intr_coal_delay = intr_coal_delay;
mtx_unlock(&sc->driver_mtx);
return err;
}
static int
mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
{
mxge_softc_t *sc;
unsigned int enabled;
int err;
sc = arg1;
enabled = sc->pause;
err = sysctl_handle_int(oidp, &enabled, arg2, req);
if (err != 0) {
return err;
}
if (enabled == sc->pause)
return 0;
mtx_lock(&sc->driver_mtx);
err = mxge_change_pause(sc, enabled);
mtx_unlock(&sc->driver_mtx);
return err;
}
static int
mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
{
struct ifnet *ifp;
int err = 0;
ifp = sc->ifp;
if (lro_cnt == 0)
ifp->if_capenable &= ~IFCAP_LRO;
else
ifp->if_capenable |= IFCAP_LRO;
sc->lro_cnt = lro_cnt;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
mxge_close(sc, 0);
err = mxge_open(sc);
}
return err;
}
static int
mxge_change_lro(SYSCTL_HANDLER_ARGS)
{
mxge_softc_t *sc;
unsigned int lro_cnt;
int err;
sc = arg1;
lro_cnt = sc->lro_cnt;
err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
if (err != 0)
return err;
if (lro_cnt == sc->lro_cnt)
return 0;
if (lro_cnt > 128)
return EINVAL;
mtx_lock(&sc->driver_mtx);
err = mxge_change_lro_locked(sc, lro_cnt);
mtx_unlock(&sc->driver_mtx);
return err;
}
static int
mxge_handle_be32(SYSCTL_HANDLER_ARGS)
{
int err;
if (arg1 == NULL)
return EFAULT;
arg2 = be32toh(*(int *)arg1);
arg1 = NULL;
err = sysctl_handle_int(oidp, arg1, arg2, req);
return err;
}
static void
mxge_rem_sysctls(mxge_softc_t *sc)
{
struct mxge_slice_state *ss;
int slice;
if (sc->slice_sysctl_tree == NULL)
return;
for (slice = 0; slice < sc->num_slices; slice++) {
ss = &sc->ss[slice];
if (ss == NULL || ss->sysctl_tree == NULL)
continue;
sysctl_ctx_free(&ss->sysctl_ctx);
ss->sysctl_tree = NULL;
}
sysctl_ctx_free(&sc->slice_sysctl_ctx);
sc->slice_sysctl_tree = NULL;
}
static void
mxge_add_sysctls(mxge_softc_t *sc)
{
struct sysctl_ctx_list *ctx;
struct sysctl_oid_list *children;
mcp_irq_data_t *fw;
struct mxge_slice_state *ss;
int slice;
char slice_num[8];
ctx = device_get_sysctl_ctx(sc->dev);
children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
fw = sc->ss[0].fw_stats;
/* random information */
SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
"firmware_version",
CTLFLAG_RD, &sc->fw_version,
0, "firmware version");
SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
"serial_number",
CTLFLAG_RD, &sc->serial_number_string,
0, "serial number");
SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
"product_code",
CTLFLAG_RD, &sc->product_code_string,
0, "product_code");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"pcie_link_width",
CTLFLAG_RD, &sc->link_width,
0, "tx_boundary");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"tx_boundary",
CTLFLAG_RD, &sc->tx_boundary,
0, "tx_boundary");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"write_combine",
CTLFLAG_RD, &sc->wc,
0, "write combining PIO?");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"read_dma_MBs",
CTLFLAG_RD, &sc->read_dma,
0, "DMA Read speed in MB/s");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"write_dma_MBs",
CTLFLAG_RD, &sc->write_dma,
0, "DMA Write speed in MB/s");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"read_write_dma_MBs",
CTLFLAG_RD, &sc->read_write_dma,
0, "DMA concurrent Read/Write speed in MB/s");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"watchdog_resets",
CTLFLAG_RD, &sc->watchdog_resets,
0, "Number of times NIC was reset");
/* performance related tunables */
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"intr_coal_delay",
CTLTYPE_INT|CTLFLAG_RW, sc,
0, mxge_change_intr_coal,
"I", "interrupt coalescing delay in usecs");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"throttle",
CTLTYPE_INT|CTLFLAG_RW, sc,
0, mxge_change_throttle,
"I", "transmit throttling");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"flow_control_enabled",
CTLTYPE_INT|CTLFLAG_RW, sc,
0, mxge_change_flow_control,
"I", "interrupt coalescing delay in usecs");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"deassert_wait",
CTLFLAG_RW, &mxge_deassert_wait,
0, "Wait for IRQ line to go low in ihandler");
/* stats block from firmware is in network byte order.
Need to swap it */
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"link_up",
CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
0, mxge_handle_be32,
"I", "link up");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"rdma_tags_available",
CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
0, mxge_handle_be32,
"I", "rdma_tags_available");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"dropped_bad_crc32",
CTLTYPE_INT|CTLFLAG_RD,
&fw->dropped_bad_crc32,
0, mxge_handle_be32,
"I", "dropped_bad_crc32");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"dropped_bad_phy",
CTLTYPE_INT|CTLFLAG_RD,
&fw->dropped_bad_phy,
0, mxge_handle_be32,
"I", "dropped_bad_phy");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"dropped_link_error_or_filtered",
CTLTYPE_INT|CTLFLAG_RD,
&fw->dropped_link_error_or_filtered,
0, mxge_handle_be32,
"I", "dropped_link_error_or_filtered");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"dropped_link_overflow",
CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
0, mxge_handle_be32,
"I", "dropped_link_overflow");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"dropped_multicast_filtered",
CTLTYPE_INT|CTLFLAG_RD,
&fw->dropped_multicast_filtered,
0, mxge_handle_be32,
"I", "dropped_multicast_filtered");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"dropped_no_big_buffer",
CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
0, mxge_handle_be32,
"I", "dropped_no_big_buffer");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"dropped_no_small_buffer",
CTLTYPE_INT|CTLFLAG_RD,
&fw->dropped_no_small_buffer,
0, mxge_handle_be32,
"I", "dropped_no_small_buffer");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"dropped_overrun",
CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
0, mxge_handle_be32,
"I", "dropped_overrun");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"dropped_pause",
CTLTYPE_INT|CTLFLAG_RD,
&fw->dropped_pause,
0, mxge_handle_be32,
"I", "dropped_pause");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"dropped_runt",
CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
0, mxge_handle_be32,
"I", "dropped_runt");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"dropped_unicast_filtered",
CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
0, mxge_handle_be32,
"I", "dropped_unicast_filtered");
/* verbose printing? */
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"verbose",
CTLFLAG_RW, &mxge_verbose,
0, "verbose printing");
/* lro */
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"lro_cnt",
CTLTYPE_INT|CTLFLAG_RW, sc,
0, mxge_change_lro,
"I", "number of lro merge queues");
/* add counters exported for debugging from all slices */
sysctl_ctx_init(&sc->slice_sysctl_ctx);
sc->slice_sysctl_tree =
SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
"slice", CTLFLAG_RD, 0, "");
for (slice = 0; slice < sc->num_slices; slice++) {
ss = &sc->ss[slice];
sysctl_ctx_init(&ss->sysctl_ctx);
ctx = &ss->sysctl_ctx;
children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
sprintf(slice_num, "%d", slice);
ss->sysctl_tree =
SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
CTLFLAG_RD, 0, "");
children = SYSCTL_CHILDREN(ss->sysctl_tree);
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"rx_small_cnt",
CTLFLAG_RD, &ss->rx_small.cnt,
0, "rx_small_cnt");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"rx_big_cnt",
CTLFLAG_RD, &ss->rx_big.cnt,
0, "rx_small_cnt");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
0, "number of lro merge queues flushed");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"lro_queued", CTLFLAG_RD, &ss->lro_queued,
0, "number of frames appended to lro merge"
"queues");
#ifndef IFNET_BUF_RING
/* only transmit from slice 0 for now */
if (slice > 0)
continue;
#endif
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"tx_req",
CTLFLAG_RD, &ss->tx.req,
0, "tx_req");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"tx_done",
CTLFLAG_RD, &ss->tx.done,
0, "tx_done");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"tx_pkt_done",
CTLFLAG_RD, &ss->tx.pkt_done,
0, "tx_done");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"tx_stall",
CTLFLAG_RD, &ss->tx.stall,
0, "tx_stall");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"tx_wake",
CTLFLAG_RD, &ss->tx.wake,
0, "tx_wake");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"tx_defrag",
CTLFLAG_RD, &ss->tx.defrag,
0, "tx_defrag");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"tx_queue_active",
CTLFLAG_RD, &ss->tx.queue_active,
0, "tx_queue_active");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"tx_activate",
CTLFLAG_RD, &ss->tx.activate,
0, "tx_activate");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"tx_deactivate",
CTLFLAG_RD, &ss->tx.deactivate,
0, "tx_deactivate");
}
}
/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
backwards one at a time and handle ring wraps */
static inline void
mxge_submit_req_backwards(mxge_tx_ring_t *tx,
mcp_kreq_ether_send_t *src, int cnt)
{
int idx, starting_slot;
starting_slot = tx->req;
while (cnt > 1) {
cnt--;
idx = (starting_slot + cnt) & tx->mask;
mxge_pio_copy(&tx->lanai[idx],
&src[cnt], sizeof(*src));
wmb();
}
}
/*
* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
* at most 32 bytes at a time, so as to avoid involving the software
* pio handler in the nic. We re-write the first segment's flags
* to mark them valid only after writing the entire chain
*/
static inline void
mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
int cnt)
{
int idx, i;
uint32_t *src_ints;
volatile uint32_t *dst_ints;
mcp_kreq_ether_send_t *srcp;
volatile mcp_kreq_ether_send_t *dstp, *dst;
uint8_t last_flags;
idx = tx->req & tx->mask;
last_flags = src->flags;
src->flags = 0;
wmb();
dst = dstp = &tx->lanai[idx];
srcp = src;
if ((idx + cnt) < tx->mask) {
for (i = 0; i < (cnt - 1); i += 2) {
mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
wmb(); /* force write every 32 bytes */
srcp += 2;
dstp += 2;
}
} else {
/* submit all but the first request, and ensure
that it is submitted below */
mxge_submit_req_backwards(tx, src, cnt);
i = 0;
}
if (i < cnt) {
/* submit the first request */
mxge_pio_copy(dstp, srcp, sizeof(*src));
wmb(); /* barrier before setting valid flag */
}
/* re-write the last 32-bits with the valid flags */
src->flags = last_flags;
src_ints = (uint32_t *)src;
src_ints+=3;
dst_ints = (volatile uint32_t *)dst;
dst_ints+=3;
*dst_ints = *src_ints;
tx->req += cnt;
wmb();
}
#if IFCAP_TSO4
static void
mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
int busdma_seg_cnt, int ip_off)
{
mxge_tx_ring_t *tx;
mcp_kreq_ether_send_t *req;
bus_dma_segment_t *seg;
struct ip *ip;
struct tcphdr *tcp;
uint32_t low, high_swapped;
int len, seglen, cum_len, cum_len_next;
int next_is_first, chop, cnt, rdma_count, small;
uint16_t pseudo_hdr_offset, cksum_offset, mss;
uint8_t flags, flags_next;
static int once;
mss = m->m_pkthdr.tso_segsz;
/* negative cum_len signifies to the
* send loop that we are still in the
* header portion of the TSO packet.
*/
/* ensure we have the ethernet, IP and TCP
header together in the first mbuf, copy
it to a scratch buffer if not */
if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
m_copydata(m, 0, ip_off + sizeof (*ip),
ss->scratch);
ip = (struct ip *)(ss->scratch + ip_off);
} else {
ip = (struct ip *)(mtod(m, char *) + ip_off);
}
if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
+ sizeof (*tcp))) {
m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
+ sizeof (*tcp), ss->scratch);
ip = (struct ip *)(mtod(m, char *) + ip_off);
}
tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
/* TSO implies checksum offload on this hardware */
cksum_offset = ip_off + (ip->ip_hl << 2);
flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
/* for TSO, pseudo_hdr_offset holds mss.
* The firmware figures out where to put
* the checksum by parsing the header. */
pseudo_hdr_offset = htobe16(mss);
tx = &ss->tx;
req = tx->req_list;
seg = tx->seg_list;
cnt = 0;
rdma_count = 0;
/* "rdma_count" is the number of RDMAs belonging to the
* current packet BEFORE the current send request. For
* non-TSO packets, this is equal to "count".
* For TSO packets, rdma_count needs to be reset
* to 0 after a segment cut.
*
* The rdma_count field of the send request is
* the number of RDMAs of the packet starting at
* that request. For TSO send requests with one ore more cuts
* in the middle, this is the number of RDMAs starting
* after the last cut in the request. All previous
* segments before the last cut implicitly have 1 RDMA.
*
* Since the number of RDMAs is not known beforehand,
* it must be filled-in retroactively - after each
* segmentation cut or at the end of the entire packet.
*/
while (busdma_seg_cnt) {
/* Break the busdma segment up into pieces*/
low = MXGE_LOWPART_TO_U32(seg->ds_addr);
high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
len = seg->ds_len;
while (len) {
flags_next = flags & ~MXGEFW_FLAGS_FIRST;
seglen = len;
cum_len_next = cum_len + seglen;
(req-rdma_count)->rdma_count = rdma_count + 1;
if (__predict_true(cum_len >= 0)) {
/* payload */
chop = (cum_len_next > mss);
cum_len_next = cum_len_next % mss;
next_is_first = (cum_len_next == 0);
flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
flags_next |= next_is_first *
MXGEFW_FLAGS_FIRST;
rdma_count |= -(chop | next_is_first);
rdma_count += chop & !next_is_first;
} else if (cum_len_next >= 0) {
/* header ends */
rdma_count = -1;
cum_len_next = 0;
seglen = -cum_len;
small = (mss <= MXGEFW_SEND_SMALL_SIZE);
flags_next = MXGEFW_FLAGS_TSO_PLD |
MXGEFW_FLAGS_FIRST |
(small * MXGEFW_FLAGS_SMALL);
}
req->addr_high = high_swapped;
req->addr_low = htobe32(low);
req->pseudo_hdr_offset = pseudo_hdr_offset;
req->pad = 0;
req->rdma_count = 1;
req->length = htobe16(seglen);
req->cksum_offset = cksum_offset;
req->flags = flags | ((cum_len & 1) *
MXGEFW_FLAGS_ALIGN_ODD);
low += seglen;
len -= seglen;
cum_len = cum_len_next;
flags = flags_next;
req++;
cnt++;
rdma_count++;
if (__predict_false(cksum_offset > seglen))
cksum_offset -= seglen;
else
cksum_offset = 0;
if (__predict_false(cnt > tx->max_desc))
goto drop;
}
busdma_seg_cnt--;
seg++;
}
(req-rdma_count)->rdma_count = rdma_count;
do {
req--;
req->flags |= MXGEFW_FLAGS_TSO_LAST;
} while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
mxge_submit_req(tx, tx->req_list, cnt);
#ifdef IFNET_BUF_RING
if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
/* tell the NIC to start polling this slice */
*tx->send_go = 1;
tx->queue_active = 1;
tx->activate++;
wmb();
}
#endif
return;
drop:
bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
m_freem(m);
ss->oerrors++;
if (!once) {
printf("tx->max_desc exceeded via TSO!\n");
printf("mss = %d, %ld, %d!\n", mss,
(long)seg - (long)tx->seg_list, tx->max_desc);
once = 1;
}
return;
}
#endif /* IFCAP_TSO4 */
#ifdef MXGE_NEW_VLAN_API
/*
* We reproduce the software vlan tag insertion from
* net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
* vlan tag insertion. We need to advertise this in order to have the
* vlan interface respect our csum offload flags.
*/
static struct mbuf *
mxge_vlan_tag_insert(struct mbuf *m)
{
struct ether_vlan_header *evl;
M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
if (__predict_false(m == NULL))
return NULL;
if (m->m_len < sizeof(*evl)) {
m = m_pullup(m, sizeof(*evl));
if (__predict_false(m == NULL))
return NULL;
}
/*
* Transform the Ethernet header into an Ethernet header
* with 802.1Q encapsulation.
*/
evl = mtod(m, struct ether_vlan_header *);
bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
(char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
m->m_flags &= ~M_VLANTAG;
return m;
}
#endif /* MXGE_NEW_VLAN_API */
static void
mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
{
mxge_softc_t *sc;
mcp_kreq_ether_send_t *req;
bus_dma_segment_t *seg;
struct mbuf *m_tmp;
struct ifnet *ifp;
mxge_tx_ring_t *tx;
struct ip *ip;
int cnt, cum_len, err, i, idx, odd_flag, ip_off;
uint16_t pseudo_hdr_offset;
uint8_t flags, cksum_offset;
sc = ss->sc;
ifp = sc->ifp;
tx = &ss->tx;
ip_off = sizeof (struct ether_header);
#ifdef MXGE_NEW_VLAN_API
if (m->m_flags & M_VLANTAG) {
m = mxge_vlan_tag_insert(m);
if (__predict_false(m == NULL))
goto drop;
ip_off += ETHER_VLAN_ENCAP_LEN;
}
#endif
/* (try to) map the frame for DMA */
idx = tx->req & tx->mask;
err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
m, tx->seg_list, &cnt,
BUS_DMA_NOWAIT);
if (__predict_false(err == EFBIG)) {
/* Too many segments in the chain. Try
to defrag */
m_tmp = m_defrag(m, M_NOWAIT);
if (m_tmp == NULL) {
goto drop;
}
ss->tx.defrag++;
m = m_tmp;
err = bus_dmamap_load_mbuf_sg(tx->dmat,
tx->info[idx].map,
m, tx->seg_list, &cnt,
BUS_DMA_NOWAIT);
}
if (__predict_false(err != 0)) {
device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
" packet len = %d\n", err, m->m_pkthdr.len);
goto drop;
}
bus_dmamap_sync(tx->dmat, tx->info[idx].map,
BUS_DMASYNC_PREWRITE);
tx->info[idx].m = m;
#if IFCAP_TSO4
/* TSO is different enough, we handle it in another routine */
if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
mxge_encap_tso(ss, m, cnt, ip_off);
return;
}
#endif
req = tx->req_list;
cksum_offset = 0;
pseudo_hdr_offset = 0;
flags = MXGEFW_FLAGS_NO_TSO;
/* checksum offloading? */
if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
/* ensure ip header is in first mbuf, copy
it to a scratch buffer if not */
if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
m_copydata(m, 0, ip_off + sizeof (*ip),
ss->scratch);
ip = (struct ip *)(ss->scratch + ip_off);
} else {
ip = (struct ip *)(mtod(m, char *) + ip_off);
}
cksum_offset = ip_off + (ip->ip_hl << 2);
pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
req->cksum_offset = cksum_offset;
flags |= MXGEFW_FLAGS_CKSUM;
odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
} else {
odd_flag = 0;
}
if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
flags |= MXGEFW_FLAGS_SMALL;
/* convert segments into a request list */
cum_len = 0;
seg = tx->seg_list;
req->flags = MXGEFW_FLAGS_FIRST;
for (i = 0; i < cnt; i++) {
req->addr_low =
htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
req->addr_high =
htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
req->length = htobe16(seg->ds_len);
req->cksum_offset = cksum_offset;
if (cksum_offset > seg->ds_len)
cksum_offset -= seg->ds_len;
else
cksum_offset = 0;
req->pseudo_hdr_offset = pseudo_hdr_offset;
req->pad = 0; /* complete solid 16-byte block */
req->rdma_count = 1;
req->flags |= flags | ((cum_len & 1) * odd_flag);
cum_len += seg->ds_len;
seg++;
req++;
req->flags = 0;
}
req--;
/* pad runts to 60 bytes */
if (cum_len < 60) {
req++;
req->addr_low =
htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
req->addr_high =
htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
req->length = htobe16(60 - cum_len);
req->cksum_offset = 0;
req->pseudo_hdr_offset = pseudo_hdr_offset;
req->pad = 0; /* complete solid 16-byte block */
req->rdma_count = 1;
req->flags |= flags | ((cum_len & 1) * odd_flag);
cnt++;
}
tx->req_list[0].rdma_count = cnt;
#if 0
/* print what the firmware will see */
for (i = 0; i < cnt; i++) {
printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
"cso:%d, flags:0x%x, rdma:%d\n",
i, (int)ntohl(tx->req_list[i].addr_high),
(int)ntohl(tx->req_list[i].addr_low),
(int)ntohs(tx->req_list[i].length),
(int)ntohs(tx->req_list[i].pseudo_hdr_offset),
tx->req_list[i].cksum_offset, tx->req_list[i].flags,
tx->req_list[i].rdma_count);
}
printf("--------------\n");
#endif
tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
mxge_submit_req(tx, tx->req_list, cnt);
#ifdef IFNET_BUF_RING
if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
/* tell the NIC to start polling this slice */
*tx->send_go = 1;
tx->queue_active = 1;
tx->activate++;
wmb();
}
#endif
return;
drop:
m_freem(m);
ss->oerrors++;
return;
}
#ifdef IFNET_BUF_RING
static void
mxge_qflush(struct ifnet *ifp)
{
mxge_softc_t *sc = ifp->if_softc;
mxge_tx_ring_t *tx;
struct mbuf *m;
int slice;
for (slice = 0; slice < sc->num_slices; slice++) {
tx = &sc->ss[slice].tx;
mtx_lock(&tx->mtx);
while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
m_freem(m);
mtx_unlock(&tx->mtx);
}
if_qflush(ifp);
}
static inline void
mxge_start_locked(struct mxge_slice_state *ss)
{
mxge_softc_t *sc;
struct mbuf *m;
struct ifnet *ifp;
mxge_tx_ring_t *tx;
sc = ss->sc;
ifp = sc->ifp;
tx = &ss->tx;
while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
m = drbr_dequeue(ifp, tx->br);
if (m == NULL) {
return;
}
/* let BPF see it */
BPF_MTAP(ifp, m);
/* give it to the nic */
mxge_encap(ss, m);
}
/* ran out of transmit slots */
if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
&& (!drbr_empty(ifp, tx->br))) {
ss->if_drv_flags |= IFF_DRV_OACTIVE;
tx->stall++;
}
}
static int
mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
{
mxge_softc_t *sc;
struct ifnet *ifp;
mxge_tx_ring_t *tx;
int err;
sc = ss->sc;
ifp = sc->ifp;
tx = &ss->tx;
if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING) {
err = drbr_enqueue(ifp, tx->br, m);
return (err);
}
- if (drbr_empty(ifp, tx->br) &&
+ if (!drbr_needs_enqueue(ifp, tx->br) &&
((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
/* let BPF see it */
BPF_MTAP(ifp, m);
/* give it to the nic */
mxge_encap(ss, m);
} else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
return (err);
}
if (!drbr_empty(ifp, tx->br))
mxge_start_locked(ss);
return (0);
}
static int
mxge_transmit(struct ifnet *ifp, struct mbuf *m)
{
mxge_softc_t *sc = ifp->if_softc;
struct mxge_slice_state *ss;
mxge_tx_ring_t *tx;
int err = 0;
int slice;
slice = m->m_pkthdr.flowid;
slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
ss = &sc->ss[slice];
tx = &ss->tx;
if (mtx_trylock(&tx->mtx)) {
err = mxge_transmit_locked(ss, m);
mtx_unlock(&tx->mtx);
} else {
err = drbr_enqueue(ifp, tx->br, m);
}
return (err);
}
#else
static inline void
mxge_start_locked(struct mxge_slice_state *ss)
{
mxge_softc_t *sc;
struct mbuf *m;
struct ifnet *ifp;
mxge_tx_ring_t *tx;
sc = ss->sc;
ifp = sc->ifp;
tx = &ss->tx;
while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
if (m == NULL) {
return;
}
/* let BPF see it */
BPF_MTAP(ifp, m);
/* give it to the nic */
mxge_encap(ss, m);
}
/* ran out of transmit slots */
if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
tx->stall++;
}
}
#endif
static void
mxge_start(struct ifnet *ifp)
{
mxge_softc_t *sc = ifp->if_softc;
struct mxge_slice_state *ss;
/* only use the first slice for now */
ss = &sc->ss[0];
mtx_lock(&ss->tx.mtx);
mxge_start_locked(ss);
mtx_unlock(&ss->tx.mtx);
}
/*
* copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
* at most 32 bytes at a time, so as to avoid involving the software
* pio handler in the nic. We re-write the first segment's low
* DMA address to mark it valid only after we write the entire chunk
* in a burst
*/
static inline void
mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
mcp_kreq_ether_recv_t *src)
{
uint32_t low;
low = src->addr_low;
src->addr_low = 0xffffffff;
mxge_pio_copy(dst, src, 4 * sizeof (*src));
wmb();
mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
wmb();
src->addr_low = low;
dst->addr_low = low;
wmb();
}
static int
mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
{
bus_dma_segment_t seg;
struct mbuf *m;
mxge_rx_ring_t *rx = &ss->rx_small;
int cnt, err;
m = m_gethdr(M_DONTWAIT, MT_DATA);
if (m == NULL) {
rx->alloc_fail++;
err = ENOBUFS;
goto done;
}
m->m_len = MHLEN;
err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
&seg, &cnt, BUS_DMA_NOWAIT);
if (err != 0) {
m_free(m);
goto done;
}
rx->info[idx].m = m;
rx->shadow[idx].addr_low =
htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
rx->shadow[idx].addr_high =
htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
done:
if ((idx & 7) == 7)
mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
return err;
}
static int
mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
{
bus_dma_segment_t seg[3];
struct mbuf *m;
mxge_rx_ring_t *rx = &ss->rx_big;
int cnt, err, i;
if (rx->cl_size == MCLBYTES)
m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
else
m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
if (m == NULL) {
rx->alloc_fail++;
err = ENOBUFS;
goto done;
}
m->m_len = rx->mlen;
err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
seg, &cnt, BUS_DMA_NOWAIT);
if (err != 0) {
m_free(m);
goto done;
}
rx->info[idx].m = m;
rx->shadow[idx].addr_low =
htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
rx->shadow[idx].addr_high =
htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
#if MXGE_VIRT_JUMBOS
for (i = 1; i < cnt; i++) {
rx->shadow[idx + i].addr_low =
htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
rx->shadow[idx + i].addr_high =
htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
}
#endif
done:
for (i = 0; i < rx->nbufs; i++) {
if ((idx & 7) == 7) {
mxge_submit_8rx(&rx->lanai[idx - 7],
&rx->shadow[idx - 7]);
}
idx++;
}
return err;
}
/*
* Myri10GE hardware checksums are not valid if the sender
* padded the frame with non-zero padding. This is because
* the firmware just does a simple 16-bit 1s complement
* checksum across the entire frame, excluding the first 14
* bytes. It is best to simply to check the checksum and
* tell the stack about it only if the checksum is good
*/
static inline uint16_t
mxge_rx_csum(struct mbuf *m, int csum)
{
struct ether_header *eh;
struct ip *ip;
uint16_t c;
eh = mtod(m, struct ether_header *);
/* only deal with IPv4 TCP & UDP for now */
if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
return 1;
ip = (struct ip *)(eh + 1);
if (__predict_false(ip->ip_p != IPPROTO_TCP &&
ip->ip_p != IPPROTO_UDP))
return 1;
#ifdef INET
c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htonl(ntohs(csum) + ntohs(ip->ip_len) +
- (ip->ip_hl << 2) + ip->ip_p));
#else
c = 1;
#endif
c ^= 0xffff;
return (c);
}
static void
mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
{
struct ether_vlan_header *evl;
struct ether_header *eh;
uint32_t partial;
evl = mtod(m, struct ether_vlan_header *);
eh = mtod(m, struct ether_header *);
/*
* fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
* after what the firmware thought was the end of the ethernet
* header.
*/
/* put checksum into host byte order */
*csum = ntohs(*csum);
partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
(*csum) += ~partial;
(*csum) += ((*csum) < ~partial);
(*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
(*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
/* restore checksum to network byte order;
later consumers expect this */
*csum = htons(*csum);
/* save the tag */
#ifdef MXGE_NEW_VLAN_API
m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
#else
{
struct m_tag *mtag;
mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
M_NOWAIT);
if (mtag == NULL)
return;
VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
m_tag_prepend(m, mtag);
}
#endif
m->m_flags |= M_VLANTAG;
/*
* Remove the 802.1q header by copying the Ethernet
* addresses over it and adjusting the beginning of
* the data in the mbuf. The encapsulated Ethernet
* type field is already in place.
*/
bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
ETHER_HDR_LEN - ETHER_TYPE_LEN);
m_adj(m, ETHER_VLAN_ENCAP_LEN);
}
static inline void
mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
{
mxge_softc_t *sc;
struct ifnet *ifp;
struct mbuf *m;
struct ether_header *eh;
mxge_rx_ring_t *rx;
bus_dmamap_t old_map;
int idx;
uint16_t tcpudp_csum;
sc = ss->sc;
ifp = sc->ifp;
rx = &ss->rx_big;
idx = rx->cnt & rx->mask;
rx->cnt += rx->nbufs;
/* save a pointer to the received mbuf */
m = rx->info[idx].m;
/* try to replace the received mbuf */
if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
/* drop the frame -- the old mbuf is re-cycled */
ifp->if_ierrors++;
return;
}
/* unmap the received buffer */
old_map = rx->info[idx].map;
bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(rx->dmat, old_map);
/* swap the bus_dmamap_t's */
rx->info[idx].map = rx->extra_map;
rx->extra_map = old_map;
/* mcp implicitly skips 1st 2 bytes so that packet is properly
* aligned */
m->m_data += MXGEFW_PAD;
m->m_pkthdr.rcvif = ifp;
m->m_len = m->m_pkthdr.len = len;
ss->ipackets++;
eh = mtod(m, struct ether_header *);
if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
mxge_vlan_tag_remove(m, &csum);
}
/* if the checksum is valid, mark it in the mbuf header */
if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
return;
/* otherwise, it was a UDP frame, or a TCP frame which
we could not do LRO on. Tell the stack that the
checksum is good */
m->m_pkthdr.csum_data = 0xffff;
m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
}
/* flowid only valid if RSS hashing is enabled */
if (sc->num_slices > 1) {
m->m_pkthdr.flowid = (ss - sc->ss);
m->m_flags |= M_FLOWID;
}
/* pass the frame up the stack */
(*ifp->if_input)(ifp, m);
}
static inline void
mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
{
mxge_softc_t *sc;
struct ifnet *ifp;
struct ether_header *eh;
struct mbuf *m;
mxge_rx_ring_t *rx;
bus_dmamap_t old_map;
int idx;
uint16_t tcpudp_csum;
sc = ss->sc;
ifp = sc->ifp;
rx = &ss->rx_small;
idx = rx->cnt & rx->mask;
rx->cnt++;
/* save a pointer to the received mbuf */
m = rx->info[idx].m;
/* try to replace the received mbuf */
if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
/* drop the frame -- the old mbuf is re-cycled */
ifp->if_ierrors++;
return;
}
/* unmap the received buffer */
old_map = rx->info[idx].map;
bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(rx->dmat, old_map);
/* swap the bus_dmamap_t's */
rx->info[idx].map = rx->extra_map;
rx->extra_map = old_map;
/* mcp implicitly skips 1st 2 bytes so that packet is properly
* aligned */
m->m_data += MXGEFW_PAD;
m->m_pkthdr.rcvif = ifp;
m->m_len = m->m_pkthdr.len = len;
ss->ipackets++;
eh = mtod(m, struct ether_header *);
if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
mxge_vlan_tag_remove(m, &csum);
}
/* if the checksum is valid, mark it in the mbuf header */
if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
return;
/* otherwise, it was a UDP frame, or a TCP frame which
we could not do LRO on. Tell the stack that the
checksum is good */
m->m_pkthdr.csum_data = 0xffff;
m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
}
/* flowid only valid if RSS hashing is enabled */
if (sc->num_slices > 1) {
m->m_pkthdr.flowid = (ss - sc->ss);
m->m_flags |= M_FLOWID;
}
/* pass the frame up the stack */
(*ifp->if_input)(ifp, m);
}
static inline void
mxge_clean_rx_done(struct mxge_slice_state *ss)
{
mxge_rx_done_t *rx_done = &ss->rx_done;
int limit = 0;
uint16_t length;
uint16_t checksum;
while (rx_done->entry[rx_done->idx].length != 0) {
length = ntohs(rx_done->entry[rx_done->idx].length);
rx_done->entry[rx_done->idx].length = 0;
checksum = rx_done->entry[rx_done->idx].checksum;
if (length <= (MHLEN - MXGEFW_PAD))
mxge_rx_done_small(ss, length, checksum);
else
mxge_rx_done_big(ss, length, checksum);
rx_done->cnt++;
rx_done->idx = rx_done->cnt & rx_done->mask;
/* limit potential for livelock */
if (__predict_false(++limit > rx_done->mask / 2))
break;
}
#ifdef INET
while (!SLIST_EMPTY(&ss->lro_active)) {
struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
SLIST_REMOVE_HEAD(&ss->lro_active, next);
mxge_lro_flush(ss, lro);
}
#endif
}
static inline void
mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
{
struct ifnet *ifp;
mxge_tx_ring_t *tx;
struct mbuf *m;
bus_dmamap_t map;
int idx;
int *flags;
tx = &ss->tx;
ifp = ss->sc->ifp;
while (tx->pkt_done != mcp_idx) {
idx = tx->done & tx->mask;
tx->done++;
m = tx->info[idx].m;
/* mbuf and DMA map only attached to the first
segment per-mbuf */
if (m != NULL) {
ss->obytes += m->m_pkthdr.len;
if (m->m_flags & M_MCAST)
ss->omcasts++;
ss->opackets++;
tx->info[idx].m = NULL;
map = tx->info[idx].map;
bus_dmamap_unload(tx->dmat, map);
m_freem(m);
}
if (tx->info[idx].flag) {
tx->info[idx].flag = 0;
tx->pkt_done++;
}
}
/* If we have space, clear IFF_OACTIVE to tell the stack that
its OK to send packets */
#ifdef IFNET_BUF_RING
flags = &ss->if_drv_flags;
#else
flags = &ifp->if_drv_flags;
#endif
mtx_lock(&ss->tx.mtx);
if ((*flags) & IFF_DRV_OACTIVE &&
tx->req - tx->done < (tx->mask + 1)/4) {
*(flags) &= ~IFF_DRV_OACTIVE;
ss->tx.wake++;
mxge_start_locked(ss);
}
#ifdef IFNET_BUF_RING
if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
/* let the NIC stop polling this queue, since there
* are no more transmits pending */
if (tx->req == tx->done) {
*tx->send_stop = 1;
tx->queue_active = 0;
tx->deactivate++;
wmb();
}
}
#endif
mtx_unlock(&ss->tx.mtx);
}
static struct mxge_media_type mxge_xfp_media_types[] =
{
{IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
{IFM_10G_SR, (1 << 7), "10GBASE-SR"},
{IFM_10G_LR, (1 << 6), "10GBASE-LR"},
{0, (1 << 5), "10GBASE-ER"},
{IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
{0, (1 << 3), "10GBASE-SW"},
{0, (1 << 2), "10GBASE-LW"},
{0, (1 << 1), "10GBASE-EW"},
{0, (1 << 0), "Reserved"}
};
static struct mxge_media_type mxge_sfp_media_types[] =
{
{IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
{0, (1 << 7), "Reserved"},
{IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
{IFM_10G_LR, (1 << 5), "10GBASE-LR"},
{IFM_10G_SR, (1 << 4), "10GBASE-SR"}
};
static void
mxge_set_media(mxge_softc_t *sc, int type)
{
sc->media_flags |= type;
ifmedia_add(&sc->media, sc->media_flags, 0, NULL);
ifmedia_set(&sc->media, sc->media_flags);
}
/*
* Determine the media type for a NIC. Some XFPs will identify
* themselves only when their link is up, so this is initiated via a
* link up interrupt. However, this can potentially take up to
* several milliseconds, so it is run via the watchdog routine, rather
* than in the interrupt handler itself. This need only be done
* once, not each time the link is up.
*/
static void
mxge_media_probe(mxge_softc_t *sc)
{
mxge_cmd_t cmd;
char *cage_type;
char *ptr;
struct mxge_media_type *mxge_media_types = NULL;
int i, err, ms, mxge_media_type_entries;
uint32_t byte;
sc->need_media_probe = 0;
/* if we've already set a media type, we're done */
if (sc->media_flags != (IFM_ETHER | IFM_AUTO))
return;
/*
* parse the product code to deterimine the interface type
* (CX4, XFP, Quad Ribbon Fiber) by looking at the character
* after the 3rd dash in the driver's cached copy of the
* EEPROM's product code string.
*/
ptr = sc->product_code_string;
if (ptr == NULL) {
device_printf(sc->dev, "Missing product code\n");
}
for (i = 0; i < 3; i++, ptr++) {
ptr = index(ptr, '-');
if (ptr == NULL) {
device_printf(sc->dev,
"only %d dashes in PC?!?\n", i);
return;
}
}
if (*ptr == 'C') {
/* -C is CX4 */
mxge_set_media(sc, IFM_10G_CX4);
return;
}
else if (*ptr == 'Q') {
/* -Q is Quad Ribbon Fiber */
device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
/* FreeBSD has no media type for Quad ribbon fiber */
return;
}
if (*ptr == 'R') {
/* -R is XFP */
mxge_media_types = mxge_xfp_media_types;
mxge_media_type_entries =
sizeof (mxge_xfp_media_types) /
sizeof (mxge_xfp_media_types[0]);
byte = MXGE_XFP_COMPLIANCE_BYTE;
cage_type = "XFP";
}
if (*ptr == 'S' || *(ptr +1) == 'S') {
/* -S or -2S is SFP+ */
mxge_media_types = mxge_sfp_media_types;
mxge_media_type_entries =
sizeof (mxge_sfp_media_types) /
sizeof (mxge_sfp_media_types[0]);
cage_type = "SFP+";
byte = 3;
}
if (mxge_media_types == NULL) {
device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
return;
}
/*
* At this point we know the NIC has an XFP cage, so now we
* try to determine what is in the cage by using the
* firmware's XFP I2C commands to read the XFP 10GbE compilance
* register. We read just one byte, which may take over
* a millisecond
*/
cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
cmd.data1 = byte;
err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
device_printf(sc->dev, "failed to read XFP\n");
}
if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
}
if (err != MXGEFW_CMD_OK) {
return;
}
/* now we wait for the data to be cached */
cmd.data0 = byte;
err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
DELAY(1000);
cmd.data0 = byte;
err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
}
if (err != MXGEFW_CMD_OK) {
device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
cage_type, err, ms);
return;
}
if (cmd.data0 == mxge_media_types[0].bitmask) {
if (mxge_verbose)
device_printf(sc->dev, "%s:%s\n", cage_type,
mxge_media_types[0].name);
mxge_set_media(sc, mxge_media_types[0].flag);
return;
}
for (i = 1; i < mxge_media_type_entries; i++) {
if (cmd.data0 & mxge_media_types[i].bitmask) {
if (mxge_verbose)
device_printf(sc->dev, "%s:%s\n",
cage_type,
mxge_media_types[i].name);
mxge_set_media(sc, mxge_media_types[i].flag);
return;
}
}
device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type,
cmd.data0);
return;
}
static void
mxge_intr(void *arg)
{
struct mxge_slice_state *ss = arg;
mxge_softc_t *sc = ss->sc;
mcp_irq_data_t *stats = ss->fw_stats;
mxge_tx_ring_t *tx = &ss->tx;
mxge_rx_done_t *rx_done = &ss->rx_done;
uint32_t send_done_count;
uint8_t valid;
#ifndef IFNET_BUF_RING
/* an interrupt on a non-zero slice is implicitly valid
since MSI-X irqs are not shared */
if (ss != sc->ss) {
mxge_clean_rx_done(ss);
*ss->irq_claim = be32toh(3);
return;
}
#endif
/* make sure the DMA has finished */
if (!stats->valid) {
return;
}
valid = stats->valid;
if (sc->legacy_irq) {
/* lower legacy IRQ */
*sc->irq_deassert = 0;
if (!mxge_deassert_wait)
/* don't wait for conf. that irq is low */
stats->valid = 0;
} else {
stats->valid = 0;
}
/* loop while waiting for legacy irq deassertion */
do {
/* check for transmit completes and receives */
send_done_count = be32toh(stats->send_done_count);
while ((send_done_count != tx->pkt_done) ||
(rx_done->entry[rx_done->idx].length != 0)) {
if (send_done_count != tx->pkt_done)
mxge_tx_done(ss, (int)send_done_count);
mxge_clean_rx_done(ss);
send_done_count = be32toh(stats->send_done_count);
}
if (sc->legacy_irq && mxge_deassert_wait)
wmb();
} while (*((volatile uint8_t *) &stats->valid));
/* fw link & error stats meaningful only on the first slice */
if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
if (sc->link_state != stats->link_up) {
sc->link_state = stats->link_up;
if (sc->link_state) {
if_link_state_change(sc->ifp, LINK_STATE_UP);
if (mxge_verbose)
device_printf(sc->dev, "link up\n");
} else {
if_link_state_change(sc->ifp, LINK_STATE_DOWN);
if (mxge_verbose)
device_printf(sc->dev, "link down\n");
}
sc->need_media_probe = 1;
}
if (sc->rdma_tags_available !=
be32toh(stats->rdma_tags_available)) {
sc->rdma_tags_available =
be32toh(stats->rdma_tags_available);
device_printf(sc->dev, "RDMA timed out! %d tags "
"left\n", sc->rdma_tags_available);
}
if (stats->link_down) {
sc->down_cnt += stats->link_down;
sc->link_state = 0;
if_link_state_change(sc->ifp, LINK_STATE_DOWN);
}
}
/* check to see if we have rx token to pass back */
if (valid & 0x1)
*ss->irq_claim = be32toh(3);
*(ss->irq_claim + 1) = be32toh(3);
}
static void
mxge_init(void *arg)
{
}
static void
mxge_free_slice_mbufs(struct mxge_slice_state *ss)
{
struct lro_entry *lro_entry;
int i;
while (!SLIST_EMPTY(&ss->lro_free)) {
lro_entry = SLIST_FIRST(&ss->lro_free);
SLIST_REMOVE_HEAD(&ss->lro_free, next);
free(lro_entry, M_DEVBUF);
}
for (i = 0; i <= ss->rx_big.mask; i++) {
if (ss->rx_big.info[i].m == NULL)
continue;
bus_dmamap_unload(ss->rx_big.dmat,
ss->rx_big.info[i].map);
m_freem(ss->rx_big.info[i].m);
ss->rx_big.info[i].m = NULL;
}
for (i = 0; i <= ss->rx_small.mask; i++) {
if (ss->rx_small.info[i].m == NULL)
continue;
bus_dmamap_unload(ss->rx_small.dmat,
ss->rx_small.info[i].map);
m_freem(ss->rx_small.info[i].m);
ss->rx_small.info[i].m = NULL;
}
/* transmit ring used only on the first slice */
if (ss->tx.info == NULL)
return;
for (i = 0; i <= ss->tx.mask; i++) {
ss->tx.info[i].flag = 0;
if (ss->tx.info[i].m == NULL)
continue;
bus_dmamap_unload(ss->tx.dmat,
ss->tx.info[i].map);
m_freem(ss->tx.info[i].m);
ss->tx.info[i].m = NULL;
}
}
static void
mxge_free_mbufs(mxge_softc_t *sc)
{
int slice;
for (slice = 0; slice < sc->num_slices; slice++)
mxge_free_slice_mbufs(&sc->ss[slice]);
}
static void
mxge_free_slice_rings(struct mxge_slice_state *ss)
{
int i;
if (ss->rx_done.entry != NULL)
mxge_dma_free(&ss->rx_done.dma);
ss->rx_done.entry = NULL;
if (ss->tx.req_bytes != NULL)
free(ss->tx.req_bytes, M_DEVBUF);
ss->tx.req_bytes = NULL;
if (ss->tx.seg_list != NULL)
free(ss->tx.seg_list, M_DEVBUF);
ss->tx.seg_list = NULL;
if (ss->rx_small.shadow != NULL)
free(ss->rx_small.shadow, M_DEVBUF);
ss->rx_small.shadow = NULL;
if (ss->rx_big.shadow != NULL)
free(ss->rx_big.shadow, M_DEVBUF);
ss->rx_big.shadow = NULL;
if (ss->tx.info != NULL) {
if (ss->tx.dmat != NULL) {
for (i = 0; i <= ss->tx.mask; i++) {
bus_dmamap_destroy(ss->tx.dmat,
ss->tx.info[i].map);
}
bus_dma_tag_destroy(ss->tx.dmat);
}
free(ss->tx.info, M_DEVBUF);
}
ss->tx.info = NULL;
if (ss->rx_small.info != NULL) {
if (ss->rx_small.dmat != NULL) {
for (i = 0; i <= ss->rx_small.mask; i++) {
bus_dmamap_destroy(ss->rx_small.dmat,
ss->rx_small.info[i].map);
}
bus_dmamap_destroy(ss->rx_small.dmat,
ss->rx_small.extra_map);
bus_dma_tag_destroy(ss->rx_small.dmat);
}
free(ss->rx_small.info, M_DEVBUF);
}
ss->rx_small.info = NULL;
if (ss->rx_big.info != NULL) {
if (ss->rx_big.dmat != NULL) {
for (i = 0; i <= ss->rx_big.mask; i++) {
bus_dmamap_destroy(ss->rx_big.dmat,
ss->rx_big.info[i].map);
}
bus_dmamap_destroy(ss->rx_big.dmat,
ss->rx_big.extra_map);
bus_dma_tag_destroy(ss->rx_big.dmat);
}
free(ss->rx_big.info, M_DEVBUF);
}
ss->rx_big.info = NULL;
}
static void
mxge_free_rings(mxge_softc_t *sc)
{
int slice;
for (slice = 0; slice < sc->num_slices; slice++)
mxge_free_slice_rings(&sc->ss[slice]);
}
static int
mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
int tx_ring_entries)
{
mxge_softc_t *sc = ss->sc;
size_t bytes;
int err, i;
err = ENOMEM;
/* allocate per-slice receive resources */
ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
ss->rx_done.mask = (2 * rx_ring_entries) - 1;
/* allocate the rx shadow rings */
bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
if (ss->rx_small.shadow == NULL)
return err;
bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
if (ss->rx_big.shadow == NULL)
return err;
/* allocate the rx host info rings */
bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
if (ss->rx_small.info == NULL)
return err;
bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
if (ss->rx_big.info == NULL)
return err;
/* allocate the rx busdma resources */
err = bus_dma_tag_create(sc->parent_dmat, /* parent */
1, /* alignment */
4096, /* boundary */
BUS_SPACE_MAXADDR, /* low */
BUS_SPACE_MAXADDR, /* high */
NULL, NULL, /* filter */
MHLEN, /* maxsize */
1, /* num segs */
MHLEN, /* maxsegsize */
BUS_DMA_ALLOCNOW, /* flags */
NULL, NULL, /* lock */
&ss->rx_small.dmat); /* tag */
if (err != 0) {
device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
err);
return err;
}
err = bus_dma_tag_create(sc->parent_dmat, /* parent */
1, /* alignment */
#if MXGE_VIRT_JUMBOS
4096, /* boundary */
#else
0, /* boundary */
#endif
BUS_SPACE_MAXADDR, /* low */
BUS_SPACE_MAXADDR, /* high */
NULL, NULL, /* filter */
3*4096, /* maxsize */
#if MXGE_VIRT_JUMBOS
3, /* num segs */
4096, /* maxsegsize*/
#else
1, /* num segs */
MJUM9BYTES, /* maxsegsize*/
#endif
BUS_DMA_ALLOCNOW, /* flags */
NULL, NULL, /* lock */
&ss->rx_big.dmat); /* tag */
if (err != 0) {
device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
err);
return err;
}
for (i = 0; i <= ss->rx_small.mask; i++) {
err = bus_dmamap_create(ss->rx_small.dmat, 0,
&ss->rx_small.info[i].map);
if (err != 0) {
device_printf(sc->dev, "Err %d rx_small dmamap\n",
err);
return err;
}
}
err = bus_dmamap_create(ss->rx_small.dmat, 0,
&ss->rx_small.extra_map);
if (err != 0) {
device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
err);
return err;
}
for (i = 0; i <= ss->rx_big.mask; i++) {
err = bus_dmamap_create(ss->rx_big.dmat, 0,
&ss->rx_big.info[i].map);
if (err != 0) {
device_printf(sc->dev, "Err %d rx_big dmamap\n",
err);
return err;
}
}
err = bus_dmamap_create(ss->rx_big.dmat, 0,
&ss->rx_big.extra_map);
if (err != 0) {
device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
err);
return err;
}
/* now allocate TX resouces */
#ifndef IFNET_BUF_RING
/* only use a single TX ring for now */
if (ss != ss->sc->ss)
return 0;
#endif
ss->tx.mask = tx_ring_entries - 1;
ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
/* allocate the tx request copy block */
bytes = 8 +
sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
if (ss->tx.req_bytes == NULL)
return err;
/* ensure req_list entries are aligned to 8 bytes */
ss->tx.req_list = (mcp_kreq_ether_send_t *)
((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
/* allocate the tx busdma segment list */
bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
ss->tx.seg_list = (bus_dma_segment_t *)
malloc(bytes, M_DEVBUF, M_WAITOK);
if (ss->tx.seg_list == NULL)
return err;
/* allocate the tx host info ring */
bytes = tx_ring_entries * sizeof (*ss->tx.info);
ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
if (ss->tx.info == NULL)
return err;
/* allocate the tx busdma resources */
err = bus_dma_tag_create(sc->parent_dmat, /* parent */
1, /* alignment */
sc->tx_boundary, /* boundary */
BUS_SPACE_MAXADDR, /* low */
BUS_SPACE_MAXADDR, /* high */
NULL, NULL, /* filter */
65536 + 256, /* maxsize */
ss->tx.max_desc - 2, /* num segs */
sc->tx_boundary, /* maxsegsz */
BUS_DMA_ALLOCNOW, /* flags */
NULL, NULL, /* lock */
&ss->tx.dmat); /* tag */
if (err != 0) {
device_printf(sc->dev, "Err %d allocating tx dmat\n",
err);
return err;
}
/* now use these tags to setup dmamaps for each slot
in the ring */
for (i = 0; i <= ss->tx.mask; i++) {
err = bus_dmamap_create(ss->tx.dmat, 0,
&ss->tx.info[i].map);
if (err != 0) {
device_printf(sc->dev, "Err %d tx dmamap\n",
err);
return err;
}
}
return 0;
}
static int
mxge_alloc_rings(mxge_softc_t *sc)
{
mxge_cmd_t cmd;
int tx_ring_size;
int tx_ring_entries, rx_ring_entries;
int err, slice;
/* get ring sizes */
err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
tx_ring_size = cmd.data0;
if (err != 0) {
device_printf(sc->dev, "Cannot determine tx ring sizes\n");
goto abort;
}
tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
IFQ_SET_READY(&sc->ifp->if_snd);
for (slice = 0; slice < sc->num_slices; slice++) {
err = mxge_alloc_slice_rings(&sc->ss[slice],
rx_ring_entries,
tx_ring_entries);
if (err != 0)
goto abort;
}
return 0;
abort:
mxge_free_rings(sc);
return err;
}
static void
mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
{
int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
if (bufsize < MCLBYTES) {
/* easy, everything fits in a single buffer */
*big_buf_size = MCLBYTES;
*cl_size = MCLBYTES;
*nbufs = 1;
return;
}
if (bufsize < MJUMPAGESIZE) {
/* still easy, everything still fits in a single buffer */
*big_buf_size = MJUMPAGESIZE;
*cl_size = MJUMPAGESIZE;
*nbufs = 1;
return;
}
#if MXGE_VIRT_JUMBOS
/* now we need to use virtually contiguous buffers */
*cl_size = MJUM9BYTES;
*big_buf_size = 4096;
*nbufs = mtu / 4096 + 1;
/* needs to be a power of two, so round up */
if (*nbufs == 3)
*nbufs = 4;
#else
*cl_size = MJUM9BYTES;
*big_buf_size = MJUM9BYTES;
*nbufs = 1;
#endif
}
static int
mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
{
mxge_softc_t *sc;
mxge_cmd_t cmd;
bus_dmamap_t map;
struct lro_entry *lro_entry;
int err, i, slice;
sc = ss->sc;
slice = ss - sc->ss;
SLIST_INIT(&ss->lro_free);
SLIST_INIT(&ss->lro_active);
for (i = 0; i < sc->lro_cnt; i++) {
lro_entry = (struct lro_entry *)
malloc(sizeof (*lro_entry), M_DEVBUF,
M_NOWAIT | M_ZERO);
if (lro_entry == NULL) {
sc->lro_cnt = i;
break;
}
SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
}
/* get the lanai pointers to the send and receive rings */
err = 0;
#ifndef IFNET_BUF_RING
/* We currently only send from the first slice */
if (slice == 0) {
#endif
cmd.data0 = slice;
err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
ss->tx.lanai =
(volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
ss->tx.send_go = (volatile uint32_t *)
(sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
ss->tx.send_stop = (volatile uint32_t *)
(sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
#ifndef IFNET_BUF_RING
}
#endif
cmd.data0 = slice;
err |= mxge_send_cmd(sc,
MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
ss->rx_small.lanai =
(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
cmd.data0 = slice;
err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
ss->rx_big.lanai =
(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
if (err != 0) {
device_printf(sc->dev,
"failed to get ring sizes or locations\n");
return EIO;
}
/* stock receive rings */
for (i = 0; i <= ss->rx_small.mask; i++) {
map = ss->rx_small.info[i].map;
err = mxge_get_buf_small(ss, map, i);
if (err) {
device_printf(sc->dev, "alloced %d/%d smalls\n",
i, ss->rx_small.mask + 1);
return ENOMEM;
}
}
for (i = 0; i <= ss->rx_big.mask; i++) {
ss->rx_big.shadow[i].addr_low = 0xffffffff;
ss->rx_big.shadow[i].addr_high = 0xffffffff;
}
ss->rx_big.nbufs = nbufs;
ss->rx_big.cl_size = cl_size;
ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
map = ss->rx_big.info[i].map;
err = mxge_get_buf_big(ss, map, i);
if (err) {
device_printf(sc->dev, "alloced %d/%d bigs\n",
i, ss->rx_big.mask + 1);
return ENOMEM;
}
}
return 0;
}
static int
mxge_open(mxge_softc_t *sc)
{
mxge_cmd_t cmd;
int err, big_bytes, nbufs, slice, cl_size, i;
bus_addr_t bus;
volatile uint8_t *itable;
struct mxge_slice_state *ss;
/* Copy the MAC address in case it was overridden */
bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
err = mxge_reset(sc, 1);
if (err != 0) {
device_printf(sc->dev, "failed to reset\n");
return EIO;
}
if (sc->num_slices > 1) {
/* setup the indirection table */
cmd.data0 = sc->num_slices;
err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
&cmd);
err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
&cmd);
if (err != 0) {
device_printf(sc->dev,
"failed to setup rss tables\n");
return err;
}
/* just enable an identity mapping */
itable = sc->sram + cmd.data0;
for (i = 0; i < sc->num_slices; i++)
itable[i] = (uint8_t)i;
cmd.data0 = 1;
cmd.data1 = mxge_rss_hash_type;
err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
if (err != 0) {
device_printf(sc->dev, "failed to enable slices\n");
return err;
}
}
mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
cmd.data0 = nbufs;
err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
&cmd);
/* error is only meaningful if we're trying to set
MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
if (err && nbufs > 1) {
device_printf(sc->dev,
"Failed to set alway-use-n to %d\n",
nbufs);
return EIO;
}
/* Give the firmware the mtu and the big and small buffer
sizes. The firmware wants the big buf size to be a power
of two. Luckily, FreeBSD's clusters are powers of two */
cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
cmd.data0 = MHLEN - MXGEFW_PAD;
err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
&cmd);
cmd.data0 = big_bytes;
err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
if (err != 0) {
device_printf(sc->dev, "failed to setup params\n");
goto abort;
}
/* Now give him the pointer to the stats block */
for (slice = 0;
#ifdef IFNET_BUF_RING
slice < sc->num_slices;
#else
slice < 1;
#endif
slice++) {
ss = &sc->ss[slice];
cmd.data0 =
MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
cmd.data1 =
MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
cmd.data2 = sizeof(struct mcp_irq_data);
cmd.data2 |= (slice << 16);
err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
}
if (err != 0) {
bus = sc->ss->fw_stats_dma.bus_addr;
bus += offsetof(struct mcp_irq_data, send_done_count);
cmd.data0 = MXGE_LOWPART_TO_U32(bus);
cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
err = mxge_send_cmd(sc,
MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
&cmd);
/* Firmware cannot support multicast without STATS_DMA_V2 */
sc->fw_multicast_support = 0;
} else {
sc->fw_multicast_support = 1;
}
if (err != 0) {
device_printf(sc->dev, "failed to setup params\n");
goto abort;
}
for (slice = 0; slice < sc->num_slices; slice++) {
err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
if (err != 0) {
device_printf(sc->dev, "couldn't open slice %d\n",
slice);
goto abort;
}
}
/* Finally, start the firmware running */
err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
if (err) {
device_printf(sc->dev, "Couldn't bring up link\n");
goto abort;
}
#ifdef IFNET_BUF_RING
for (slice = 0; slice < sc->num_slices; slice++) {
ss = &sc->ss[slice];
ss->if_drv_flags |= IFF_DRV_RUNNING;
ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
}
#endif
sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
return 0;
abort:
mxge_free_mbufs(sc);
return err;
}
static int
mxge_close(mxge_softc_t *sc, int down)
{
mxge_cmd_t cmd;
int err, old_down_cnt;
#ifdef IFNET_BUF_RING
struct mxge_slice_state *ss;
int slice;
#endif
#ifdef IFNET_BUF_RING
for (slice = 0; slice < sc->num_slices; slice++) {
ss = &sc->ss[slice];
ss->if_drv_flags &= ~IFF_DRV_RUNNING;
}
#endif
sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
if (!down) {
old_down_cnt = sc->down_cnt;
wmb();
err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
if (err) {
device_printf(sc->dev,
"Couldn't bring down link\n");
}
if (old_down_cnt == sc->down_cnt) {
/* wait for down irq */
DELAY(10 * sc->intr_coal_delay);
}
wmb();
if (old_down_cnt == sc->down_cnt) {
device_printf(sc->dev, "never got down irq\n");
}
}
mxge_free_mbufs(sc);
return 0;
}
static void
mxge_setup_cfg_space(mxge_softc_t *sc)
{
device_t dev = sc->dev;
int reg;
uint16_t cmd, lnk, pectl;
/* find the PCIe link width and set max read request to 4KB*/
if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) {
lnk = pci_read_config(dev, reg + 0x12, 2);
sc->link_width = (lnk >> 4) & 0x3f;
if (sc->pectl == 0) {
pectl = pci_read_config(dev, reg + 0x8, 2);
pectl = (pectl & ~0x7000) | (5 << 12);
pci_write_config(dev, reg + 0x8, pectl, 2);
sc->pectl = pectl;
} else {
/* restore saved pectl after watchdog reset */
pci_write_config(dev, reg + 0x8, sc->pectl, 2);
}
}
/* Enable DMA and Memory space access */
pci_enable_busmaster(dev);
cmd = pci_read_config(dev, PCIR_COMMAND, 2);
cmd |= PCIM_CMD_MEMEN;
pci_write_config(dev, PCIR_COMMAND, cmd, 2);
}
static uint32_t
mxge_read_reboot(mxge_softc_t *sc)
{
device_t dev = sc->dev;
uint32_t vs;
/* find the vendor specific offset */
if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
device_printf(sc->dev,
"could not find vendor specific offset\n");
return (uint32_t)-1;
}
/* enable read32 mode */
pci_write_config(dev, vs + 0x10, 0x3, 1);
/* tell NIC which register to read */
pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
return (pci_read_config(dev, vs + 0x14, 4));
}
static void
mxge_watchdog_reset(mxge_softc_t *sc)
{
struct pci_devinfo *dinfo;
struct mxge_slice_state *ss;
int err, running, s, num_tx_slices = 1;
uint32_t reboot;
uint16_t cmd;
err = ENXIO;
device_printf(sc->dev, "Watchdog reset!\n");
/*
* check to see if the NIC rebooted. If it did, then all of
* PCI config space has been reset, and things like the
* busmaster bit will be zero. If this is the case, then we
* must restore PCI config space before the NIC can be used
* again
*/
cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
if (cmd == 0xffff) {
/*
* maybe the watchdog caught the NIC rebooting; wait
* up to 100ms for it to finish. If it does not come
* back, then give up
*/
DELAY(1000*100);
cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
if (cmd == 0xffff) {
device_printf(sc->dev, "NIC disappeared!\n");
}
}
if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
/* print the reboot status */
reboot = mxge_read_reboot(sc);
device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
reboot);
running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
if (running) {
/*
* quiesce NIC so that TX routines will not try to
* xmit after restoration of BAR
*/
/* Mark the link as down */
if (sc->link_state) {
sc->link_state = 0;
if_link_state_change(sc->ifp,
LINK_STATE_DOWN);
}
#ifdef IFNET_BUF_RING
num_tx_slices = sc->num_slices;
#endif
/* grab all TX locks to ensure no tx */
for (s = 0; s < num_tx_slices; s++) {
ss = &sc->ss[s];
mtx_lock(&ss->tx.mtx);
}
mxge_close(sc, 1);
}
/* restore PCI configuration space */
dinfo = device_get_ivars(sc->dev);
pci_cfg_restore(sc->dev, dinfo);
/* and redo any changes we made to our config space */
mxge_setup_cfg_space(sc);
/* reload f/w */
err = mxge_load_firmware(sc, 0);
if (err) {
device_printf(sc->dev,
"Unable to re-load f/w\n");
}
if (running) {
if (!err)
err = mxge_open(sc);
/* release all TX locks */
for (s = 0; s < num_tx_slices; s++) {
ss = &sc->ss[s];
#ifdef IFNET_BUF_RING
mxge_start_locked(ss);
#endif
mtx_unlock(&ss->tx.mtx);
}
}
sc->watchdog_resets++;
} else {
device_printf(sc->dev,
"NIC did not reboot, not resetting\n");
err = 0;
}
if (err) {
device_printf(sc->dev, "watchdog reset failed\n");
} else {
if (sc->dying == 2)
sc->dying = 0;
callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
}
}
static void
mxge_watchdog_task(void *arg, int pending)
{
mxge_softc_t *sc = arg;
mtx_lock(&sc->driver_mtx);
mxge_watchdog_reset(sc);
mtx_unlock(&sc->driver_mtx);
}
static void
mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
{
tx = &sc->ss[slice].tx;
device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
device_printf(sc->dev,
"tx.req=%d tx.done=%d, tx.queue_active=%d\n",
tx->req, tx->done, tx->queue_active);
device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
tx->activate, tx->deactivate);
device_printf(sc->dev, "pkt_done=%d fw=%d\n",
tx->pkt_done,
be32toh(sc->ss->fw_stats->send_done_count));
}
static int
mxge_watchdog(mxge_softc_t *sc)
{
mxge_tx_ring_t *tx;
uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
int i, err = 0;
/* see if we have outstanding transmits, which
have been pending for more than mxge_ticks */
for (i = 0;
#ifdef IFNET_BUF_RING
(i < sc->num_slices) && (err == 0);
#else
(i < 1) && (err == 0);
#endif
i++) {
tx = &sc->ss[i].tx;
if (tx->req != tx->done &&
tx->watchdog_req != tx->watchdog_done &&
tx->done == tx->watchdog_done) {
/* check for pause blocking before resetting */
if (tx->watchdog_rx_pause == rx_pause) {
mxge_warn_stuck(sc, tx, i);
taskqueue_enqueue(sc->tq, &sc->watchdog_task);
return (ENXIO);
}
else
device_printf(sc->dev, "Flow control blocking "
"xmits, check link partner\n");
}
tx->watchdog_req = tx->req;
tx->watchdog_done = tx->done;
tx->watchdog_rx_pause = rx_pause;
}
if (sc->need_media_probe)
mxge_media_probe(sc);
return (err);
}
static u_long
mxge_update_stats(mxge_softc_t *sc)
{
struct mxge_slice_state *ss;
u_long pkts = 0;
u_long ipackets = 0;
u_long opackets = 0;
#ifdef IFNET_BUF_RING
u_long obytes = 0;
u_long omcasts = 0;
u_long odrops = 0;
#endif
u_long oerrors = 0;
int slice;
for (slice = 0; slice < sc->num_slices; slice++) {
ss = &sc->ss[slice];
ipackets += ss->ipackets;
opackets += ss->opackets;
#ifdef IFNET_BUF_RING
obytes += ss->obytes;
omcasts += ss->omcasts;
odrops += ss->tx.br->br_drops;
#endif
oerrors += ss->oerrors;
}
pkts = (ipackets - sc->ifp->if_ipackets);
pkts += (opackets - sc->ifp->if_opackets);
sc->ifp->if_ipackets = ipackets;
sc->ifp->if_opackets = opackets;
#ifdef IFNET_BUF_RING
sc->ifp->if_obytes = obytes;
sc->ifp->if_omcasts = omcasts;
sc->ifp->if_snd.ifq_drops = odrops;
#endif
sc->ifp->if_oerrors = oerrors;
return pkts;
}
static void
mxge_tick(void *arg)
{
mxge_softc_t *sc = arg;
u_long pkts = 0;
int err = 0;
int running, ticks;
uint16_t cmd;
ticks = mxge_ticks;
running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
if (running) {
/* aggregate stats from different slices */
pkts = mxge_update_stats(sc);
if (!sc->watchdog_countdown) {
err = mxge_watchdog(sc);
sc->watchdog_countdown = 4;
}
sc->watchdog_countdown--;
}
if (pkts == 0) {
/* ensure NIC did not suffer h/w fault while idle */
cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
sc->dying = 2;
taskqueue_enqueue(sc->tq, &sc->watchdog_task);
err = ENXIO;
}
/* look less often if NIC is idle */
ticks *= 4;
}
if (err == 0)
callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
}
static int
mxge_media_change(struct ifnet *ifp)
{
return EINVAL;
}
static int
mxge_change_mtu(mxge_softc_t *sc, int mtu)
{
struct ifnet *ifp = sc->ifp;
int real_mtu, old_mtu;
int err = 0;
real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
if ((real_mtu > sc->max_mtu) || real_mtu < 60)
return EINVAL;
mtx_lock(&sc->driver_mtx);
old_mtu = ifp->if_mtu;
ifp->if_mtu = mtu;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
mxge_close(sc, 0);
err = mxge_open(sc);
if (err != 0) {
ifp->if_mtu = old_mtu;
mxge_close(sc, 0);
(void) mxge_open(sc);
}
}
mtx_unlock(&sc->driver_mtx);
return err;
}
static void
mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
{
mxge_softc_t *sc = ifp->if_softc;
if (sc == NULL)
return;
ifmr->ifm_status = IFM_AVALID;
ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0;
}
static int
mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
{
mxge_softc_t *sc = ifp->if_softc;
struct ifreq *ifr = (struct ifreq *)data;
int err, mask;
err = 0;
switch (command) {
case SIOCSIFADDR:
case SIOCGIFADDR:
err = ether_ioctl(ifp, command, data);
break;
case SIOCSIFMTU:
err = mxge_change_mtu(sc, ifr->ifr_mtu);
break;
case SIOCSIFFLAGS:
mtx_lock(&sc->driver_mtx);
if (sc->dying) {
mtx_unlock(&sc->driver_mtx);
return EINVAL;
}
if (ifp->if_flags & IFF_UP) {
if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
err = mxge_open(sc);
} else {
/* take care of promis can allmulti
flag chages */
mxge_change_promisc(sc,
ifp->if_flags & IFF_PROMISC);
mxge_set_multicast_list(sc);
}
} else {
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
mxge_close(sc, 0);
}
}
mtx_unlock(&sc->driver_mtx);
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
mtx_lock(&sc->driver_mtx);
mxge_set_multicast_list(sc);
mtx_unlock(&sc->driver_mtx);
break;
case SIOCSIFCAP:
mtx_lock(&sc->driver_mtx);
mask = ifr->ifr_reqcap ^ ifp->if_capenable;
if (mask & IFCAP_TXCSUM) {
if (IFCAP_TXCSUM & ifp->if_capenable) {
ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
| CSUM_TSO);
} else {
ifp->if_capenable |= IFCAP_TXCSUM;
ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
}
} else if (mask & IFCAP_RXCSUM) {
if (IFCAP_RXCSUM & ifp->if_capenable) {
ifp->if_capenable &= ~IFCAP_RXCSUM;
sc->csum_flag = 0;
} else {
ifp->if_capenable |= IFCAP_RXCSUM;
sc->csum_flag = 1;
}
}
if (mask & IFCAP_TSO4) {
if (IFCAP_TSO4 & ifp->if_capenable) {
ifp->if_capenable &= ~IFCAP_TSO4;
ifp->if_hwassist &= ~CSUM_TSO;
} else if (IFCAP_TXCSUM & ifp->if_capenable) {
ifp->if_capenable |= IFCAP_TSO4;
ifp->if_hwassist |= CSUM_TSO;
} else {
printf("mxge requires tx checksum offload"
" be enabled to use TSO\n");
err = EINVAL;
}
}
if (mask & IFCAP_LRO) {
if (IFCAP_LRO & ifp->if_capenable)
err = mxge_change_lro_locked(sc, 0);
else
err = mxge_change_lro_locked(sc, mxge_lro_cnt);
}
if (mask & IFCAP_VLAN_HWTAGGING)
ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
mtx_unlock(&sc->driver_mtx);
VLAN_CAPABILITIES(ifp);
break;
case SIOCGIFMEDIA:
err = ifmedia_ioctl(ifp, (struct ifreq *)data,
&sc->media, command);
break;
default:
err = ENOTTY;
}
return err;
}
static void
mxge_fetch_tunables(mxge_softc_t *sc)
{
TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
&mxge_flow_control);
TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
&mxge_intr_coal_delay);
TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
&mxge_nvidia_ecrc_enable);
TUNABLE_INT_FETCH("hw.mxge.force_firmware",
&mxge_force_firmware);
TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
&mxge_deassert_wait);
TUNABLE_INT_FETCH("hw.mxge.verbose",
&mxge_verbose);
TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
if (sc->lro_cnt != 0)
mxge_lro_cnt = sc->lro_cnt;
if (bootverbose)
mxge_verbose = 1;
if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
mxge_intr_coal_delay = 30;
if (mxge_ticks == 0)
mxge_ticks = hz / 2;
sc->pause = mxge_flow_control;
if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
|| mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
}
if (mxge_initial_mtu > ETHERMTU_JUMBO ||
mxge_initial_mtu < ETHER_MIN_LEN)
mxge_initial_mtu = ETHERMTU_JUMBO;
if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
mxge_throttle = MXGE_MAX_THROTTLE;
if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
mxge_throttle = MXGE_MIN_THROTTLE;
sc->throttle = mxge_throttle;
}
static void
mxge_free_slices(mxge_softc_t *sc)
{
struct mxge_slice_state *ss;
int i;
if (sc->ss == NULL)
return;
for (i = 0; i < sc->num_slices; i++) {
ss = &sc->ss[i];
if (ss->fw_stats != NULL) {
mxge_dma_free(&ss->fw_stats_dma);
ss->fw_stats = NULL;
#ifdef IFNET_BUF_RING
if (ss->tx.br != NULL) {
drbr_free(ss->tx.br, M_DEVBUF);
ss->tx.br = NULL;
}
#endif
mtx_destroy(&ss->tx.mtx);
}
if (ss->rx_done.entry != NULL) {
mxge_dma_free(&ss->rx_done.dma);
ss->rx_done.entry = NULL;
}
}
free(sc->ss, M_DEVBUF);
sc->ss = NULL;
}
static int
mxge_alloc_slices(mxge_softc_t *sc)
{
mxge_cmd_t cmd;
struct mxge_slice_state *ss;
size_t bytes;
int err, i, max_intr_slots;
err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
if (err != 0) {
device_printf(sc->dev, "Cannot determine rx ring size\n");
return err;
}
sc->rx_ring_size = cmd.data0;
max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
bytes = sizeof (*sc->ss) * sc->num_slices;
sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
if (sc->ss == NULL)
return (ENOMEM);
for (i = 0; i < sc->num_slices; i++) {
ss = &sc->ss[i];
ss->sc = sc;
/* allocate per-slice rx interrupt queues */
bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
if (err != 0)
goto abort;
ss->rx_done.entry = ss->rx_done.dma.addr;
bzero(ss->rx_done.entry, bytes);
/*
* allocate the per-slice firmware stats; stats
* (including tx) are used used only on the first
* slice for now
*/
#ifndef IFNET_BUF_RING
if (i > 0)
continue;
#endif
bytes = sizeof (*ss->fw_stats);
err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
sizeof (*ss->fw_stats), 64);
if (err != 0)
goto abort;
ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
"%s:tx(%d)", device_get_nameunit(sc->dev), i);
mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
#ifdef IFNET_BUF_RING
ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
&ss->tx.mtx);
#endif
}
return (0);
abort:
mxge_free_slices(sc);
return (ENOMEM);
}
static void
mxge_slice_probe(mxge_softc_t *sc)
{
mxge_cmd_t cmd;
char *old_fw;
int msix_cnt, status, max_intr_slots;
sc->num_slices = 1;
/*
* don't enable multiple slices if they are not enabled,
* or if this is not an SMP system
*/
if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
return;
/* see how many MSI-X interrupts are available */
msix_cnt = pci_msix_count(sc->dev);
if (msix_cnt < 2)
return;
/* now load the slice aware firmware see what it supports */
old_fw = sc->fw_name;
if (old_fw == mxge_fw_aligned)
sc->fw_name = mxge_fw_rss_aligned;
else
sc->fw_name = mxge_fw_rss_unaligned;
status = mxge_load_firmware(sc, 0);
if (status != 0) {
device_printf(sc->dev, "Falling back to a single slice\n");
return;
}
/* try to send a reset command to the card to see if it
is alive */
memset(&cmd, 0, sizeof (cmd));
status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
if (status != 0) {
device_printf(sc->dev, "failed reset\n");
goto abort_with_fw;
}
/* get rx ring size */
status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
if (status != 0) {
device_printf(sc->dev, "Cannot determine rx ring size\n");
goto abort_with_fw;
}
max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
/* tell it the size of the interrupt queues */
cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
if (status != 0) {
device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
goto abort_with_fw;
}
/* ask the maximum number of slices it supports */
status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
if (status != 0) {
device_printf(sc->dev,
"failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
goto abort_with_fw;
}
sc->num_slices = cmd.data0;
if (sc->num_slices > msix_cnt)
sc->num_slices = msix_cnt;
if (mxge_max_slices == -1) {
/* cap to number of CPUs in system */
if (sc->num_slices > mp_ncpus)
sc->num_slices = mp_ncpus;
} else {
if (sc->num_slices > mxge_max_slices)
sc->num_slices = mxge_max_slices;
}
/* make sure it is a power of two */
while (sc->num_slices & (sc->num_slices - 1))
sc->num_slices--;
if (mxge_verbose)
device_printf(sc->dev, "using %d slices\n",
sc->num_slices);
return;
abort_with_fw:
sc->fw_name = old_fw;
(void) mxge_load_firmware(sc, 0);
}
static int
mxge_add_msix_irqs(mxge_softc_t *sc)
{
size_t bytes;
int count, err, i, rid;
rid = PCIR_BAR(2);
sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
&rid, RF_ACTIVE);
if (sc->msix_table_res == NULL) {
device_printf(sc->dev, "couldn't alloc MSIX table res\n");
return ENXIO;
}
count = sc->num_slices;
err = pci_alloc_msix(sc->dev, &count);
if (err != 0) {
device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
"err = %d \n", sc->num_slices, err);
goto abort_with_msix_table;
}
if (count < sc->num_slices) {
device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
count, sc->num_slices);
device_printf(sc->dev,
"Try setting hw.mxge.max_slices to %d\n",
count);
err = ENOSPC;
goto abort_with_msix;
}
bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
if (sc->msix_irq_res == NULL) {
err = ENOMEM;
goto abort_with_msix;
}
for (i = 0; i < sc->num_slices; i++) {
rid = i + 1;
sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
SYS_RES_IRQ,
&rid, RF_ACTIVE);
if (sc->msix_irq_res[i] == NULL) {
device_printf(sc->dev, "couldn't allocate IRQ res"
" for message %d\n", i);
err = ENXIO;
goto abort_with_res;
}
}
bytes = sizeof (*sc->msix_ih) * sc->num_slices;
sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
for (i = 0; i < sc->num_slices; i++) {
err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
INTR_TYPE_NET | INTR_MPSAFE,
#if __FreeBSD_version > 700030
NULL,
#endif
mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
if (err != 0) {
device_printf(sc->dev, "couldn't setup intr for "
"message %d\n", i);
goto abort_with_intr;
}
}
if (mxge_verbose) {
device_printf(sc->dev, "using %d msix IRQs:",
sc->num_slices);
for (i = 0; i < sc->num_slices; i++)
printf(" %ld", rman_get_start(sc->msix_irq_res[i]));
printf("\n");
}
return (0);
abort_with_intr:
for (i = 0; i < sc->num_slices; i++) {
if (sc->msix_ih[i] != NULL) {
bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
sc->msix_ih[i]);
sc->msix_ih[i] = NULL;
}
}
free(sc->msix_ih, M_DEVBUF);
abort_with_res:
for (i = 0; i < sc->num_slices; i++) {
rid = i + 1;
if (sc->msix_irq_res[i] != NULL)
bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
sc->msix_irq_res[i]);
sc->msix_irq_res[i] = NULL;
}
free(sc->msix_irq_res, M_DEVBUF);
abort_with_msix:
pci_release_msi(sc->dev);
abort_with_msix_table:
bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
sc->msix_table_res);
return err;
}
static int
mxge_add_single_irq(mxge_softc_t *sc)
{
int count, err, rid;
count = pci_msi_count(sc->dev);
if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
rid = 1;
} else {
rid = 0;
sc->legacy_irq = 1;
}
sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
1, RF_SHAREABLE | RF_ACTIVE);
if (sc->irq_res == NULL) {
device_printf(sc->dev, "could not alloc interrupt\n");
return ENXIO;
}
if (mxge_verbose)
device_printf(sc->dev, "using %s irq %ld\n",
sc->legacy_irq ? "INTx" : "MSI",
rman_get_start(sc->irq_res));
err = bus_setup_intr(sc->dev, sc->irq_res,
INTR_TYPE_NET | INTR_MPSAFE,
#if __FreeBSD_version > 700030
NULL,
#endif
mxge_intr, &sc->ss[0], &sc->ih);
if (err != 0) {
bus_release_resource(sc->dev, SYS_RES_IRQ,
sc->legacy_irq ? 0 : 1, sc->irq_res);
if (!sc->legacy_irq)
pci_release_msi(sc->dev);
}
return err;
}
static void
mxge_rem_msix_irqs(mxge_softc_t *sc)
{
int i, rid;
for (i = 0; i < sc->num_slices; i++) {
if (sc->msix_ih[i] != NULL) {
bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
sc->msix_ih[i]);
sc->msix_ih[i] = NULL;
}
}
free(sc->msix_ih, M_DEVBUF);
for (i = 0; i < sc->num_slices; i++) {
rid = i + 1;
if (sc->msix_irq_res[i] != NULL)
bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
sc->msix_irq_res[i]);
sc->msix_irq_res[i] = NULL;
}
free(sc->msix_irq_res, M_DEVBUF);
bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
sc->msix_table_res);
pci_release_msi(sc->dev);
return;
}
static void
mxge_rem_single_irq(mxge_softc_t *sc)
{
bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
bus_release_resource(sc->dev, SYS_RES_IRQ,
sc->legacy_irq ? 0 : 1, sc->irq_res);
if (!sc->legacy_irq)
pci_release_msi(sc->dev);
}
static void
mxge_rem_irq(mxge_softc_t *sc)
{
if (sc->num_slices > 1)
mxge_rem_msix_irqs(sc);
else
mxge_rem_single_irq(sc);
}
static int
mxge_add_irq(mxge_softc_t *sc)
{
int err;
if (sc->num_slices > 1)
err = mxge_add_msix_irqs(sc);
else
err = mxge_add_single_irq(sc);
if (0 && err == 0 && sc->num_slices > 1) {
mxge_rem_msix_irqs(sc);
err = mxge_add_msix_irqs(sc);
}
return err;
}
static int
mxge_attach(device_t dev)
{
mxge_softc_t *sc = device_get_softc(dev);
struct ifnet *ifp;
int err, rid;
sc->dev = dev;
mxge_fetch_tunables(sc);
TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
sc->tq = taskqueue_create_fast("mxge_taskq", M_WAITOK,
taskqueue_thread_enqueue,
&sc->tq);
if (sc->tq == NULL) {
err = ENOMEM;
goto abort_with_nothing;
}
taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
device_get_nameunit(sc->dev));
err = bus_dma_tag_create(NULL, /* parent */
1, /* alignment */
0, /* boundary */
BUS_SPACE_MAXADDR, /* low */
BUS_SPACE_MAXADDR, /* high */
NULL, NULL, /* filter */
65536 + 256, /* maxsize */
MXGE_MAX_SEND_DESC, /* num segs */
65536, /* maxsegsize */
0, /* flags */
NULL, NULL, /* lock */
&sc->parent_dmat); /* tag */
if (err != 0) {
device_printf(sc->dev, "Err %d allocating parent dmat\n",
err);
goto abort_with_tq;
}
ifp = sc->ifp = if_alloc(IFT_ETHER);
if (ifp == NULL) {
device_printf(dev, "can not if_alloc()\n");
err = ENOSPC;
goto abort_with_parent_dmat;
}
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
device_get_nameunit(dev));
mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
"%s:drv", device_get_nameunit(dev));
mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
MTX_NETWORK_LOCK, MTX_DEF);
callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
mxge_setup_cfg_space(sc);
/* Map the board into the kernel */
rid = PCIR_BARS;
sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
~0, 1, RF_ACTIVE);
if (sc->mem_res == NULL) {
device_printf(dev, "could not map memory\n");
err = ENXIO;
goto abort_with_lock;
}
sc->sram = rman_get_virtual(sc->mem_res);
sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
if (sc->sram_size > rman_get_size(sc->mem_res)) {
device_printf(dev, "impossible memory region size %ld\n",
rman_get_size(sc->mem_res));
err = ENXIO;
goto abort_with_mem_res;
}
/* make NULL terminated copy of the EEPROM strings section of
lanai SRAM */
bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
bus_space_read_region_1(rman_get_bustag(sc->mem_res),
rman_get_bushandle(sc->mem_res),
sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
sc->eeprom_strings,
MXGE_EEPROM_STRINGS_SIZE - 2);
err = mxge_parse_strings(sc);
if (err != 0)
goto abort_with_mem_res;
/* Enable write combining for efficient use of PCIe bus */
mxge_enable_wc(sc);
/* Allocate the out of band dma memory */
err = mxge_dma_alloc(sc, &sc->cmd_dma,
sizeof (mxge_cmd_t), 64);
if (err != 0)
goto abort_with_mem_res;
sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
if (err != 0)
goto abort_with_cmd_dma;
err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
if (err != 0)
goto abort_with_zeropad_dma;
/* select & load the firmware */
err = mxge_select_firmware(sc);
if (err != 0)
goto abort_with_dmabench;
sc->intr_coal_delay = mxge_intr_coal_delay;
mxge_slice_probe(sc);
err = mxge_alloc_slices(sc);
if (err != 0)
goto abort_with_dmabench;
err = mxge_reset(sc, 0);
if (err != 0)
goto abort_with_slices;
err = mxge_alloc_rings(sc);
if (err != 0) {
device_printf(sc->dev, "failed to allocate rings\n");
goto abort_with_dmabench;
}
err = mxge_add_irq(sc);
if (err != 0) {
device_printf(sc->dev, "failed to add irq\n");
goto abort_with_rings;
}
ifp->if_baudrate = IF_Gbps(10UL);
ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
IFCAP_VLAN_MTU;
#ifdef INET
ifp->if_capabilities |= IFCAP_LRO;
#endif
#ifdef MXGE_NEW_VLAN_API
ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
#endif
sc->max_mtu = mxge_max_mtu(sc);
if (sc->max_mtu >= 9000)
ifp->if_capabilities |= IFCAP_JUMBO_MTU;
else
device_printf(dev, "MTU limited to %d. Install "
"latest firmware for 9000 byte jumbo support\n",
sc->max_mtu - ETHER_HDR_LEN);
ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
ifp->if_capenable = ifp->if_capabilities;
if (sc->lro_cnt == 0)
ifp->if_capenable &= ~IFCAP_LRO;
sc->csum_flag = 1;
ifp->if_init = mxge_init;
ifp->if_softc = sc;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = mxge_ioctl;
ifp->if_start = mxge_start;
/* Initialise the ifmedia structure */
ifmedia_init(&sc->media, 0, mxge_media_change,
mxge_media_status);
mxge_set_media(sc, IFM_ETHER | IFM_AUTO);
mxge_media_probe(sc);
sc->dying = 0;
ether_ifattach(ifp, sc->mac_addr);
/* ether_ifattach sets mtu to ETHERMTU */
if (mxge_initial_mtu != ETHERMTU)
mxge_change_mtu(sc, mxge_initial_mtu);
mxge_add_sysctls(sc);
#ifdef IFNET_BUF_RING
ifp->if_transmit = mxge_transmit;
ifp->if_qflush = mxge_qflush;
#endif
callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
return 0;
abort_with_rings:
mxge_free_rings(sc);
abort_with_slices:
mxge_free_slices(sc);
abort_with_dmabench:
mxge_dma_free(&sc->dmabench_dma);
abort_with_zeropad_dma:
mxge_dma_free(&sc->zeropad_dma);
abort_with_cmd_dma:
mxge_dma_free(&sc->cmd_dma);
abort_with_mem_res:
bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
abort_with_lock:
pci_disable_busmaster(dev);
mtx_destroy(&sc->cmd_mtx);
mtx_destroy(&sc->driver_mtx);
if_free(ifp);
abort_with_parent_dmat:
bus_dma_tag_destroy(sc->parent_dmat);
abort_with_tq:
if (sc->tq != NULL) {
taskqueue_drain(sc->tq, &sc->watchdog_task);
taskqueue_free(sc->tq);
sc->tq = NULL;
}
abort_with_nothing:
return err;
}
static int
mxge_detach(device_t dev)
{
mxge_softc_t *sc = device_get_softc(dev);
if (mxge_vlans_active(sc)) {
device_printf(sc->dev,
"Detach vlans before removing module\n");
return EBUSY;
}
mtx_lock(&sc->driver_mtx);
sc->dying = 1;
if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
mxge_close(sc, 0);
mtx_unlock(&sc->driver_mtx);
ether_ifdetach(sc->ifp);
if (sc->tq != NULL) {
taskqueue_drain(sc->tq, &sc->watchdog_task);
taskqueue_free(sc->tq);
sc->tq = NULL;
}
callout_drain(&sc->co_hdl);
ifmedia_removeall(&sc->media);
mxge_dummy_rdma(sc, 0);
mxge_rem_sysctls(sc);
mxge_rem_irq(sc);
mxge_free_rings(sc);
mxge_free_slices(sc);
mxge_dma_free(&sc->dmabench_dma);
mxge_dma_free(&sc->zeropad_dma);
mxge_dma_free(&sc->cmd_dma);
bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
pci_disable_busmaster(dev);
mtx_destroy(&sc->cmd_mtx);
mtx_destroy(&sc->driver_mtx);
if_free(sc->ifp);
bus_dma_tag_destroy(sc->parent_dmat);
return 0;
}
static int
mxge_shutdown(device_t dev)
{
return 0;
}
/*
This file uses Myri10GE driver indentation.
Local Variables:
c-file-style:"linux"
tab-width:8
End:
*/
Index: stable/8/sys/dev/xen/xenpci
===================================================================
--- stable/8/sys/dev/xen/xenpci (revision 205282)
+++ stable/8/sys/dev/xen/xenpci (revision 205283)
Property changes on: stable/8/sys/dev/xen/xenpci
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
Merged /head/sys/dev/xen/xenpci:r203834,205197
Index: stable/8/sys/net/if_var.h
===================================================================
--- stable/8/sys/net/if_var.h (revision 205282)
+++ stable/8/sys/net/if_var.h (revision 205283)
@@ -1,891 +1,901 @@
/*-
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* From: @(#)if.h 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#ifndef _NET_IF_VAR_H_
#define _NET_IF_VAR_H_
/*
* Structures defining a network interface, providing a packet
* transport mechanism (ala level 0 of the PUP protocols).
*
* Each interface accepts output datagrams of a specified maximum
* length, and provides higher level routines with input datagrams
* received from its medium.
*
* Output occurs when the routine if_output is called, with three parameters:
* (*ifp->if_output)(ifp, m, dst, rt)
* Here m is the mbuf chain to be sent and dst is the destination address.
* The output routine encapsulates the supplied datagram if necessary,
* and then transmits it on its medium.
*
* On input, each interface unwraps the data received by it, and either
* places it on the input queue of an internetwork datagram routine
* and posts the associated software interrupt, or passes the datagram to a raw
* packet input routine.
*
* Routines exist for locating interfaces by their addresses
* or for locating an interface on a certain network, as well as more general
* routing and gateway routines maintaining information used to locate
* interfaces. These routines live in the files if.c and route.c
*/
#ifdef __STDC__
/*
* Forward structure declarations for function prototypes [sic].
*/
struct mbuf;
struct thread;
struct rtentry;
struct rt_addrinfo;
struct socket;
struct ether_header;
struct carp_if;
struct ifvlantrunk;
struct route;
struct vnet;
#endif
#include <sys/queue.h> /* get TAILQ macros */
#ifdef _KERNEL
#include <sys/mbuf.h>
#include <sys/eventhandler.h>
#include <sys/buf_ring.h>
#include <net/vnet.h>
#endif /* _KERNEL */
#include <sys/lock.h> /* XXX */
#include <sys/mutex.h> /* XXX */
#include <sys/rwlock.h> /* XXX */
#include <sys/sx.h> /* XXX */
#include <sys/event.h> /* XXX */
#include <sys/_task.h>
#define IF_DUNIT_NONE -1
#include <altq/if_altq.h>
TAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */
TAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */
TAILQ_HEAD(ifprefixhead, ifprefix);
TAILQ_HEAD(ifmultihead, ifmultiaddr);
TAILQ_HEAD(ifgrouphead, ifg_group);
/*
* Structure defining a queue for a network interface.
*/
struct ifqueue {
struct mbuf *ifq_head;
struct mbuf *ifq_tail;
int ifq_len;
int ifq_maxlen;
int ifq_drops;
struct mtx ifq_mtx;
};
/*
* Structure defining a network interface.
*
* (Would like to call this struct ``if'', but C isn't PL/1.)
*/
struct ifnet {
void *if_softc; /* pointer to driver state */
void *if_l2com; /* pointer to protocol bits */
struct vnet *if_vnet; /* pointer to network stack instance */
TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */
char if_xname[IFNAMSIZ]; /* external name (name + unit) */
const char *if_dname; /* driver name */
int if_dunit; /* unit or IF_DUNIT_NONE */
u_int if_refcount; /* reference count */
struct ifaddrhead if_addrhead; /* linked list of addresses per if */
/*
* if_addrhead is the list of all addresses associated to
* an interface.
* Some code in the kernel assumes that first element
* of the list has type AF_LINK, and contains sockaddr_dl
* addresses which store the link-level address and the name
* of the interface.
* However, access to the AF_LINK address through this
* field is deprecated. Use if_addr or ifaddr_byindex() instead.
*/
int if_pcount; /* number of promiscuous listeners */
struct carp_if *if_carp; /* carp interface structure */
struct bpf_if *if_bpf; /* packet filter structure */
u_short if_index; /* numeric abbreviation for this if */
short if_timer; /* time 'til if_watchdog called */
struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */
int if_flags; /* up/down, broadcast, etc. */
int if_capabilities; /* interface features & capabilities */
int if_capenable; /* enabled features & capabilities */
void *if_linkmib; /* link-type-specific MIB data */
size_t if_linkmiblen; /* length of above data */
struct if_data if_data;
struct ifmultihead if_multiaddrs; /* multicast addresses configured */
int if_amcount; /* number of all-multicast requests */
/* procedure handles */
int (*if_output) /* output routine (enqueue) */
(struct ifnet *, struct mbuf *, struct sockaddr *,
struct route *);
void (*if_input) /* input routine (from h/w driver) */
(struct ifnet *, struct mbuf *);
void (*if_start) /* initiate output routine */
(struct ifnet *);
int (*if_ioctl) /* ioctl routine */
(struct ifnet *, u_long, caddr_t);
void (*if_watchdog) /* timer routine */
(struct ifnet *);
void (*if_init) /* Init routine */
(void *);
int (*if_resolvemulti) /* validate/resolve multicast */
(struct ifnet *, struct sockaddr **, struct sockaddr *);
void (*if_qflush) /* flush any queues */
(struct ifnet *);
int (*if_transmit) /* initiate output routine */
(struct ifnet *, struct mbuf *);
void (*if_reassign) /* reassign to vnet routine */
(struct ifnet *, struct vnet *, char *);
struct vnet *if_home_vnet; /* where this ifnet originates from */
struct ifaddr *if_addr; /* pointer to link-level address */
void *if_llsoftc; /* link layer softc */
int if_drv_flags; /* driver-managed status flags */
struct ifaltq if_snd; /* output queue (includes altq) */
const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */
void *if_bridge; /* bridge glue */
struct label *if_label; /* interface MAC label */
/* these are only used by IPv6 */
struct ifprefixhead if_prefixhead; /* list of prefixes per if */
void *if_afdata[AF_MAX];
int if_afdata_initialized;
struct rwlock if_afdata_lock;
struct task if_linktask; /* task for link change events */
struct mtx if_addr_mtx; /* mutex to protect address lists */
LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */
TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
/* protected by if_addr_mtx */
void *if_pf_kif;
void *if_lagg; /* lagg glue */
u_char if_alloctype; /* if_type at time of allocation */
/*
* Spare fields are added so that we can modify sensitive data
* structures without changing the kernel binary interface, and must
* be used with care where binary compatibility is required.
*/
char if_cspare[3];
char *if_description; /* interface description */
void *if_pspare[7];
int if_ispare[4];
};
typedef void if_init_f_t(void *);
/*
* XXX These aliases are terribly dangerous because they could apply
* to anything.
*/
#define if_mtu if_data.ifi_mtu
#define if_type if_data.ifi_type
#define if_physical if_data.ifi_physical
#define if_addrlen if_data.ifi_addrlen
#define if_hdrlen if_data.ifi_hdrlen
#define if_metric if_data.ifi_metric
#define if_link_state if_data.ifi_link_state
#define if_baudrate if_data.ifi_baudrate
#define if_hwassist if_data.ifi_hwassist
#define if_ipackets if_data.ifi_ipackets
#define if_ierrors if_data.ifi_ierrors
#define if_opackets if_data.ifi_opackets
#define if_oerrors if_data.ifi_oerrors
#define if_collisions if_data.ifi_collisions
#define if_ibytes if_data.ifi_ibytes
#define if_obytes if_data.ifi_obytes
#define if_imcasts if_data.ifi_imcasts
#define if_omcasts if_data.ifi_omcasts
#define if_iqdrops if_data.ifi_iqdrops
#define if_noproto if_data.ifi_noproto
#define if_lastchange if_data.ifi_lastchange
/* for compatibility with other BSDs */
#define if_addrlist if_addrhead
#define if_list if_link
#define if_name(ifp) ((ifp)->if_xname)
/*
* Locks for address lists on the network interface.
*/
#define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_mtx, \
"if_addr_mtx", NULL, MTX_DEF)
#define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_mtx)
#define IF_ADDR_LOCK(if) mtx_lock(&(if)->if_addr_mtx)
#define IF_ADDR_UNLOCK(if) mtx_unlock(&(if)->if_addr_mtx)
#define IF_ADDR_LOCK_ASSERT(if) mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
/*
* Function variations on locking macros intended to be used by loadable
* kernel modules in order to divorce them from the internals of address list
* locking.
*/
void if_addr_rlock(struct ifnet *ifp); /* if_addrhead */
void if_addr_runlock(struct ifnet *ifp); /* if_addrhead */
void if_maddr_rlock(struct ifnet *ifp); /* if_multiaddrs */
void if_maddr_runlock(struct ifnet *ifp); /* if_multiaddrs */
/*
* Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq)
* are queues of messages stored on ifqueue structures
* (defined above). Entries are added to and deleted from these structures
* by these macros, which should be called with ipl raised to splimp().
*/
#define IF_LOCK(ifq) mtx_lock(&(ifq)->ifq_mtx)
#define IF_UNLOCK(ifq) mtx_unlock(&(ifq)->ifq_mtx)
#define IF_LOCK_ASSERT(ifq) mtx_assert(&(ifq)->ifq_mtx, MA_OWNED)
#define _IF_QFULL(ifq) ((ifq)->ifq_len >= (ifq)->ifq_maxlen)
#define _IF_DROP(ifq) ((ifq)->ifq_drops++)
#define _IF_QLEN(ifq) ((ifq)->ifq_len)
#define _IF_ENQUEUE(ifq, m) do { \
(m)->m_nextpkt = NULL; \
if ((ifq)->ifq_tail == NULL) \
(ifq)->ifq_head = m; \
else \
(ifq)->ifq_tail->m_nextpkt = m; \
(ifq)->ifq_tail = m; \
(ifq)->ifq_len++; \
} while (0)
#define IF_ENQUEUE(ifq, m) do { \
IF_LOCK(ifq); \
_IF_ENQUEUE(ifq, m); \
IF_UNLOCK(ifq); \
} while (0)
#define _IF_PREPEND(ifq, m) do { \
(m)->m_nextpkt = (ifq)->ifq_head; \
if ((ifq)->ifq_tail == NULL) \
(ifq)->ifq_tail = (m); \
(ifq)->ifq_head = (m); \
(ifq)->ifq_len++; \
} while (0)
#define IF_PREPEND(ifq, m) do { \
IF_LOCK(ifq); \
_IF_PREPEND(ifq, m); \
IF_UNLOCK(ifq); \
} while (0)
#define _IF_DEQUEUE(ifq, m) do { \
(m) = (ifq)->ifq_head; \
if (m) { \
if (((ifq)->ifq_head = (m)->m_nextpkt) == NULL) \
(ifq)->ifq_tail = NULL; \
(m)->m_nextpkt = NULL; \
(ifq)->ifq_len--; \
} \
} while (0)
#define IF_DEQUEUE(ifq, m) do { \
IF_LOCK(ifq); \
_IF_DEQUEUE(ifq, m); \
IF_UNLOCK(ifq); \
} while (0)
#define _IF_POLL(ifq, m) ((m) = (ifq)->ifq_head)
#define IF_POLL(ifq, m) _IF_POLL(ifq, m)
#define _IF_DRAIN(ifq) do { \
struct mbuf *m; \
for (;;) { \
_IF_DEQUEUE(ifq, m); \
if (m == NULL) \
break; \
m_freem(m); \
} \
} while (0)
#define IF_DRAIN(ifq) do { \
IF_LOCK(ifq); \
_IF_DRAIN(ifq); \
IF_UNLOCK(ifq); \
} while(0)
#ifdef _KERNEL
/* interface address change event */
typedef void (*ifaddr_event_handler_t)(void *, struct ifnet *);
EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t);
/* new interface arrival event */
typedef void (*ifnet_arrival_event_handler_t)(void *, struct ifnet *);
EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t);
/* interface departure event */
typedef void (*ifnet_departure_event_handler_t)(void *, struct ifnet *);
EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t);
/*
* interface groups
*/
struct ifg_group {
char ifg_group[IFNAMSIZ];
u_int ifg_refcnt;
void *ifg_pf_kif;
TAILQ_HEAD(, ifg_member) ifg_members;
TAILQ_ENTRY(ifg_group) ifg_next;
};
struct ifg_member {
TAILQ_ENTRY(ifg_member) ifgm_next;
struct ifnet *ifgm_ifp;
};
struct ifg_list {
struct ifg_group *ifgl_group;
TAILQ_ENTRY(ifg_list) ifgl_next;
};
/* group attach event */
typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *);
EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t);
/* group detach event */
typedef void (*group_detach_event_handler_t)(void *, struct ifg_group *);
EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t);
/* group change event */
typedef void (*group_change_event_handler_t)(void *, const char *);
EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
#define IF_AFDATA_LOCK_INIT(ifp) \
rw_init(&(ifp)->if_afdata_lock, "if_afdata")
#define IF_AFDATA_WLOCK(ifp) rw_wlock(&(ifp)->if_afdata_lock)
#define IF_AFDATA_RLOCK(ifp) rw_rlock(&(ifp)->if_afdata_lock)
#define IF_AFDATA_WUNLOCK(ifp) rw_wunlock(&(ifp)->if_afdata_lock)
#define IF_AFDATA_RUNLOCK(ifp) rw_runlock(&(ifp)->if_afdata_lock)
#define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp)
#define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp)
#define IF_AFDATA_TRYLOCK(ifp) rw_try_wlock(&(ifp)->if_afdata_lock)
#define IF_AFDATA_DESTROY(ifp) rw_destroy(&(ifp)->if_afdata_lock)
#define IF_AFDATA_LOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_LOCKED)
#define IF_AFDATA_UNLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_UNLOCKED)
int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp,
int adjust);
#define IF_HANDOFF(ifq, m, ifp) \
if_handoff((struct ifqueue *)ifq, m, ifp, 0)
#define IF_HANDOFF_ADJ(ifq, m, ifp, adj) \
if_handoff((struct ifqueue *)ifq, m, ifp, adj)
void if_start(struct ifnet *);
#define IFQ_ENQUEUE(ifq, m, err) \
do { \
IF_LOCK(ifq); \
if (ALTQ_IS_ENABLED(ifq)) \
ALTQ_ENQUEUE(ifq, m, NULL, err); \
else { \
if (_IF_QFULL(ifq)) { \
m_freem(m); \
(err) = ENOBUFS; \
} else { \
_IF_ENQUEUE(ifq, m); \
(err) = 0; \
} \
} \
if (err) \
(ifq)->ifq_drops++; \
IF_UNLOCK(ifq); \
} while (0)
#define IFQ_DEQUEUE_NOLOCK(ifq, m) \
do { \
if (TBR_IS_ENABLED(ifq)) \
(m) = tbr_dequeue_ptr(ifq, ALTDQ_REMOVE); \
else if (ALTQ_IS_ENABLED(ifq)) \
ALTQ_DEQUEUE(ifq, m); \
else \
_IF_DEQUEUE(ifq, m); \
} while (0)
#define IFQ_DEQUEUE(ifq, m) \
do { \
IF_LOCK(ifq); \
IFQ_DEQUEUE_NOLOCK(ifq, m); \
IF_UNLOCK(ifq); \
} while (0)
#define IFQ_POLL_NOLOCK(ifq, m) \
do { \
if (TBR_IS_ENABLED(ifq)) \
(m) = tbr_dequeue_ptr(ifq, ALTDQ_POLL); \
else if (ALTQ_IS_ENABLED(ifq)) \
ALTQ_POLL(ifq, m); \
else \
_IF_POLL(ifq, m); \
} while (0)
#define IFQ_POLL(ifq, m) \
do { \
IF_LOCK(ifq); \
IFQ_POLL_NOLOCK(ifq, m); \
IF_UNLOCK(ifq); \
} while (0)
#define IFQ_PURGE_NOLOCK(ifq) \
do { \
if (ALTQ_IS_ENABLED(ifq)) { \
ALTQ_PURGE(ifq); \
} else \
_IF_DRAIN(ifq); \
} while (0)
#define IFQ_PURGE(ifq) \
do { \
IF_LOCK(ifq); \
IFQ_PURGE_NOLOCK(ifq); \
IF_UNLOCK(ifq); \
} while (0)
#define IFQ_SET_READY(ifq) \
do { ((ifq)->altq_flags |= ALTQF_READY); } while (0)
#define IFQ_LOCK(ifq) IF_LOCK(ifq)
#define IFQ_UNLOCK(ifq) IF_UNLOCK(ifq)
#define IFQ_LOCK_ASSERT(ifq) IF_LOCK_ASSERT(ifq)
#define IFQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0)
#define IFQ_INC_LEN(ifq) ((ifq)->ifq_len++)
#define IFQ_DEC_LEN(ifq) (--(ifq)->ifq_len)
#define IFQ_INC_DROPS(ifq) ((ifq)->ifq_drops++)
#define IFQ_SET_MAXLEN(ifq, len) ((ifq)->ifq_maxlen = (len))
/*
* The IFF_DRV_OACTIVE test should really occur in the device driver, not in
* the handoff logic, as that flag is locked by the device driver.
*/
#define IFQ_HANDOFF_ADJ(ifp, m, adj, err) \
do { \
int len; \
short mflags; \
\
len = (m)->m_pkthdr.len; \
mflags = (m)->m_flags; \
IFQ_ENQUEUE(&(ifp)->if_snd, m, err); \
if ((err) == 0) { \
(ifp)->if_obytes += len + (adj); \
if (mflags & M_MCAST) \
(ifp)->if_omcasts++; \
if (((ifp)->if_drv_flags & IFF_DRV_OACTIVE) == 0) \
if_start(ifp); \
} \
} while (0)
#define IFQ_HANDOFF(ifp, m, err) \
IFQ_HANDOFF_ADJ(ifp, m, 0, err)
#define IFQ_DRV_DEQUEUE(ifq, m) \
do { \
(m) = (ifq)->ifq_drv_head; \
if (m) { \
if (((ifq)->ifq_drv_head = (m)->m_nextpkt) == NULL) \
(ifq)->ifq_drv_tail = NULL; \
(m)->m_nextpkt = NULL; \
(ifq)->ifq_drv_len--; \
} else { \
IFQ_LOCK(ifq); \
IFQ_DEQUEUE_NOLOCK(ifq, m); \
while ((ifq)->ifq_drv_len < (ifq)->ifq_drv_maxlen) { \
struct mbuf *m0; \
IFQ_DEQUEUE_NOLOCK(ifq, m0); \
if (m0 == NULL) \
break; \
m0->m_nextpkt = NULL; \
if ((ifq)->ifq_drv_tail == NULL) \
(ifq)->ifq_drv_head = m0; \
else \
(ifq)->ifq_drv_tail->m_nextpkt = m0; \
(ifq)->ifq_drv_tail = m0; \
(ifq)->ifq_drv_len++; \
} \
IFQ_UNLOCK(ifq); \
} \
} while (0)
#define IFQ_DRV_PREPEND(ifq, m) \
do { \
(m)->m_nextpkt = (ifq)->ifq_drv_head; \
if ((ifq)->ifq_drv_tail == NULL) \
(ifq)->ifq_drv_tail = (m); \
(ifq)->ifq_drv_head = (m); \
(ifq)->ifq_drv_len++; \
} while (0)
#define IFQ_DRV_IS_EMPTY(ifq) \
(((ifq)->ifq_drv_len == 0) && ((ifq)->ifq_len == 0))
#define IFQ_DRV_PURGE(ifq) \
do { \
struct mbuf *m, *n = (ifq)->ifq_drv_head; \
while((m = n) != NULL) { \
n = m->m_nextpkt; \
m_freem(m); \
} \
(ifq)->ifq_drv_head = (ifq)->ifq_drv_tail = NULL; \
(ifq)->ifq_drv_len = 0; \
IFQ_PURGE(ifq); \
} while (0)
#ifdef _KERNEL
static __inline void
drbr_stats_update(struct ifnet *ifp, int len, int mflags)
{
#ifndef NO_SLOW_STATS
ifp->if_obytes += len;
if (mflags & M_MCAST)
ifp->if_omcasts++;
#endif
}
static __inline int
drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m)
{
int error = 0;
int len = m->m_pkthdr.len;
int mflags = m->m_flags;
#ifdef ALTQ
if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
IFQ_ENQUEUE(&ifp->if_snd, m, error);
return (error);
}
#endif
if ((error = buf_ring_enqueue_bytes(br, m, len)) == ENOBUFS) {
br->br_drops++;
m_freem(m);
} else
drbr_stats_update(ifp, len, mflags);
return (error);
}
static __inline void
drbr_flush(struct ifnet *ifp, struct buf_ring *br)
{
struct mbuf *m;
#ifdef ALTQ
- if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
- while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
- IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
- m_freem(m);
- }
- }
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
+ IFQ_PURGE(&ifp->if_snd);
#endif
while ((m = buf_ring_dequeue_sc(br)) != NULL)
m_freem(m);
}
static __inline void
drbr_free(struct buf_ring *br, struct malloc_type *type)
{
drbr_flush(NULL, br);
buf_ring_free(br, type);
}
static __inline struct mbuf *
drbr_dequeue(struct ifnet *ifp, struct buf_ring *br)
{
#ifdef ALTQ
struct mbuf *m;
if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
- IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
+ IFQ_DEQUEUE(&ifp->if_snd, m);
return (m);
}
#endif
return (buf_ring_dequeue_sc(br));
}
static __inline struct mbuf *
drbr_dequeue_cond(struct ifnet *ifp, struct buf_ring *br,
int (*func) (struct mbuf *, void *), void *arg)
{
struct mbuf *m;
#ifdef ALTQ
- /*
- * XXX need to evaluate / requeue
- */
- if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
- IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_LOCK(&ifp->if_snd);
+ IFQ_POLL_NOLOCK(&ifp->if_snd, m);
+ if (m != NULL && func(m, arg) == 0) {
+ IFQ_UNLOCK(&ifp->if_snd);
+ return (NULL);
+ }
+ IFQ_DEQUEUE_NOLOCK(&ifp->if_snd, m);
+ IFQ_UNLOCK(&ifp->if_snd);
return (m);
}
#endif
m = buf_ring_peek(br);
if (m == NULL || func(m, arg) == 0)
return (NULL);
return (buf_ring_dequeue_sc(br));
}
static __inline int
drbr_empty(struct ifnet *ifp, struct buf_ring *br)
{
#ifdef ALTQ
if (ALTQ_IS_ENABLED(&ifp->if_snd))
- return (IFQ_DRV_IS_EMPTY(&ifp->if_snd));
+ return (IFQ_IS_EMPTY(&ifp->if_snd));
#endif
return (buf_ring_empty(br));
+}
+
+static __inline int
+drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ return (1);
+#endif
+ return (!buf_ring_empty(br));
}
static __inline int
drbr_inuse(struct ifnet *ifp, struct buf_ring *br)
{
#ifdef ALTQ
if (ALTQ_IS_ENABLED(&ifp->if_snd))
return (ifp->if_snd.ifq_len);
#endif
return (buf_ring_count(br));
}
#endif
/*
* 72 was chosen below because it is the size of a TCP/IP
* header (40) + the minimum mss (32).
*/
#define IF_MINMTU 72
#define IF_MAXMTU 65535
#endif /* _KERNEL */
/*
* The ifaddr structure contains information about one address
* of an interface. They are maintained by the different address families,
* are allocated and attached when an address is set, and are linked
* together so all addresses for an interface can be located.
*
* NOTE: a 'struct ifaddr' is always at the beginning of a larger
* chunk of malloc'ed memory, where we store the three addresses
* (ifa_addr, ifa_dstaddr and ifa_netmask) referenced here.
*/
struct ifaddr {
struct sockaddr *ifa_addr; /* address of interface */
struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */
#define ifa_broadaddr ifa_dstaddr /* broadcast address interface */
struct sockaddr *ifa_netmask; /* used to determine subnet */
struct if_data if_data; /* not all members are meaningful */
struct ifnet *ifa_ifp; /* back-pointer to interface */
TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */
void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */
(int, struct rtentry *, struct rt_addrinfo *);
u_short ifa_flags; /* mostly rt_flags for cloning */
u_int ifa_refcnt; /* references to this structure */
int ifa_metric; /* cost of going out this interface */
int (*ifa_claim_addr) /* check if an addr goes to this if */
(struct ifaddr *, struct sockaddr *);
struct mtx ifa_mtx;
};
#define IFA_ROUTE RTF_UP /* route installed */
#define IFA_RTSELF RTF_HOST /* loopback route to self installed */
/* for compatibility with other BSDs */
#define ifa_list ifa_link
#ifdef _KERNEL
#define IFA_LOCK(ifa) mtx_lock(&(ifa)->ifa_mtx)
#define IFA_UNLOCK(ifa) mtx_unlock(&(ifa)->ifa_mtx)
void ifa_free(struct ifaddr *ifa);
void ifa_init(struct ifaddr *ifa);
void ifa_ref(struct ifaddr *ifa);
#endif
/*
* The prefix structure contains information about one prefix
* of an interface. They are maintained by the different address families,
* are allocated and attached when a prefix or an address is set,
* and are linked together so all prefixes for an interface can be located.
*/
struct ifprefix {
struct sockaddr *ifpr_prefix; /* prefix of interface */
struct ifnet *ifpr_ifp; /* back-pointer to interface */
TAILQ_ENTRY(ifprefix) ifpr_list; /* queue macro glue */
u_char ifpr_plen; /* prefix length in bits */
u_char ifpr_type; /* protocol dependent prefix type */
};
/*
* Multicast address structure. This is analogous to the ifaddr
* structure except that it keeps track of multicast addresses.
*/
struct ifmultiaddr {
TAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */
struct sockaddr *ifma_addr; /* address this membership is for */
struct sockaddr *ifma_lladdr; /* link-layer translation, if any */
struct ifnet *ifma_ifp; /* back-pointer to interface */
u_int ifma_refcount; /* reference count */
void *ifma_protospec; /* protocol-specific state, if any */
struct ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */
};
#ifdef _KERNEL
extern struct rwlock ifnet_rwlock;
extern struct sx ifnet_sxlock;
#define IFNET_LOCK_INIT() do { \
rw_init_flags(&ifnet_rwlock, "ifnet_rw", RW_RECURSE); \
sx_init_flags(&ifnet_sxlock, "ifnet_sx", SX_RECURSE); \
} while(0)
#define IFNET_WLOCK() do { \
sx_xlock(&ifnet_sxlock); \
rw_wlock(&ifnet_rwlock); \
} while (0)
#define IFNET_WUNLOCK() do { \
rw_wunlock(&ifnet_rwlock); \
sx_xunlock(&ifnet_sxlock); \
} while (0)
/*
* To assert the ifnet lock, you must know not only whether it's for read or
* write, but also whether it was acquired with sleep support or not.
*/
#define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED)
#define IFNET_RLOCK_NOSLEEP_ASSERT() rw_assert(&ifnet_rwlock, RA_RLOCKED)
#define IFNET_WLOCK_ASSERT() do { \
sx_assert(&ifnet_sxlock, SA_XLOCKED); \
rw_assert(&ifnet_rwlock, RA_WLOCKED); \
} while (0)
#define IFNET_RLOCK() sx_slock(&ifnet_sxlock)
#define IFNET_RLOCK_NOSLEEP() rw_rlock(&ifnet_rwlock)
#define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock)
#define IFNET_RUNLOCK_NOSLEEP() rw_runlock(&ifnet_rwlock)
/*
* Look up an ifnet given its index; the _ref variant also acquires a
* reference that must be freed using if_rele(). It is almost always a bug
* to call ifnet_byindex() instead if ifnet_byindex_ref().
*/
struct ifnet *ifnet_byindex(u_short idx);
struct ifnet *ifnet_byindex_locked(u_short idx);
struct ifnet *ifnet_byindex_ref(u_short idx);
/*
* Given the index, ifaddr_byindex() returns the one and only
* link-level ifaddr for the interface. You are not supposed to use
* it to traverse the list of addresses associated to the interface.
*/
struct ifaddr *ifaddr_byindex(u_short idx);
VNET_DECLARE(struct ifnethead, ifnet);
VNET_DECLARE(struct ifgrouphead, ifg_head);
VNET_DECLARE(int, if_index);
VNET_DECLARE(struct ifnet *, loif); /* first loopback interface */
VNET_DECLARE(int, useloopback);
#define V_ifnet VNET(ifnet)
#define V_ifg_head VNET(ifg_head)
#define V_if_index VNET(if_index)
#define V_loif VNET(loif)
#define V_useloopback VNET(useloopback)
extern int ifqmaxlen;
int if_addgroup(struct ifnet *, const char *);
int if_delgroup(struct ifnet *, const char *);
int if_addmulti(struct ifnet *, struct sockaddr *, struct ifmultiaddr **);
int if_allmulti(struct ifnet *, int);
struct ifnet* if_alloc(u_char);
void if_attach(struct ifnet *);
void if_dead(struct ifnet *);
int if_delmulti(struct ifnet *, struct sockaddr *);
void if_delmulti_ifma(struct ifmultiaddr *);
void if_detach(struct ifnet *);
void if_vmove(struct ifnet *, struct vnet *);
void if_purgeaddrs(struct ifnet *);
void if_delallmulti(struct ifnet *);
void if_down(struct ifnet *);
struct ifmultiaddr *
if_findmulti(struct ifnet *, struct sockaddr *);
void if_free(struct ifnet *);
void if_free_type(struct ifnet *, u_char);
void if_initname(struct ifnet *, const char *, int);
void if_link_state_change(struct ifnet *, int);
int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3);
void if_qflush(struct ifnet *);
void if_ref(struct ifnet *);
void if_rele(struct ifnet *);
int if_setlladdr(struct ifnet *, const u_char *, int);
void if_up(struct ifnet *);
int ifioctl(struct socket *, u_long, caddr_t, struct thread *);
int ifpromisc(struct ifnet *, int);
struct ifnet *ifunit(const char *);
struct ifnet *ifunit_ref(const char *);
void ifq_init(struct ifaltq *, struct ifnet *ifp);
void ifq_delete(struct ifaltq *);
int ifa_add_loopback_route(struct ifaddr *, struct sockaddr *);
int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *);
struct ifaddr *ifa_ifwithaddr(struct sockaddr *);
int ifa_ifwithaddr_check(struct sockaddr *);
struct ifaddr *ifa_ifwithbroadaddr(struct sockaddr *);
struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *);
struct ifaddr *ifa_ifwithnet(struct sockaddr *);
struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *);
struct ifaddr *ifa_ifwithroute_fib(int, struct sockaddr *, struct sockaddr *, u_int);
struct ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *);
int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen);
typedef void *if_com_alloc_t(u_char type, struct ifnet *ifp);
typedef void if_com_free_t(void *com, u_char type);
void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f);
void if_deregister_com_alloc(u_char type);
#define IF_LLADDR(ifp) \
LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr))
#ifdef DEVICE_POLLING
enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS };
typedef int poll_handler_t(struct ifnet *ifp, enum poll_cmd cmd, int count);
int ether_poll_register(poll_handler_t *h, struct ifnet *ifp);
int ether_poll_deregister(struct ifnet *ifp);
#endif /* DEVICE_POLLING */
#endif /* _KERNEL */
#endif /* !_NET_IF_VAR_H_ */
Index: stable/8/sys
===================================================================
--- stable/8/sys (revision 205282)
+++ stable/8/sys (revision 205283)
Property changes on: stable/8/sys
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
Merged /head/sys:r203834,205197
File Metadata
Details
Attached
Mime Type
text/x-c
Expires
Mon, Apr 13, 4:46 AM (2 d)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31329384
Default Alt Text
(715 KB)
Attached To
Mode
rS FreeBSD src repository - subversion
Attached
Detach File
Event Timeline
Log In to Comment