diff --git a/sys/dev/beri/virtio/network/if_vtbe.c b/sys/dev/beri/virtio/network/if_vtbe.c
index 03853435a9de..5975a4a1c7f3 100644
--- a/sys/dev/beri/virtio/network/if_vtbe.c
+++ b/sys/dev/beri/virtio/network/if_vtbe.c
@@ -1,651 +1,651 @@
/*-
* Copyright (c) 2014 Ruslan Bukin
* All rights reserved.
*
* This software was developed by SRI International and the University of
* Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
* ("CTSRD"), as part of the DARPA CRASH research programme.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* BERI Virtio Networking Frontend
*/
#include
__FBSDID("$FreeBSD$");
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "pio_if.h"
#define DPRINTF(fmt, args...) printf(fmt, ##args)
#define READ4(_sc, _reg) \
bus_read_4((_sc)->res[0], _reg)
#define WRITE4(_sc, _reg, _val) \
bus_write_4((_sc)->res[0], _reg, _val)
#define VTBE_LOCK(sc) mtx_lock(&(sc)->mtx)
#define VTBE_UNLOCK(sc) mtx_unlock(&(sc)->mtx)
#define VTBE_ASSERT_LOCKED(sc) mtx_assert(&(sc)->mtx, MA_OWNED);
#define VTBE_ASSERT_UNLOCKED(sc) mtx_assert(&(sc)->mtx, MA_NOTOWNED);
/*
* Driver data and defines.
*/
#define DESC_COUNT 256
struct vtbe_softc {
struct resource *res[2];
bus_space_tag_t bst;
bus_space_handle_t bsh;
device_t dev;
struct ifnet *ifp;
int if_flags;
struct mtx mtx;
boolean_t is_attached;
int beri_mem_offset;
device_t pio_send;
device_t pio_recv;
int opened;
struct vqueue_info vs_queues[2];
int vs_curq;
int hdrsize;
};
static struct resource_spec vtbe_spec[] = {
{ SYS_RES_MEMORY, 0, RF_ACTIVE },
{ -1, 0 }
};
static void vtbe_txfinish_locked(struct vtbe_softc *sc);
static void vtbe_rxfinish_locked(struct vtbe_softc *sc);
static void vtbe_stop_locked(struct vtbe_softc *sc);
static int pio_enable_irq(struct vtbe_softc *sc, int enable);
static void
vtbe_txstart_locked(struct vtbe_softc *sc)
{
struct iovec iov[DESC_COUNT];
struct virtio_net_hdr *vnh;
struct vqueue_info *vq;
struct iovec *tiov;
struct ifnet *ifp;
struct mbuf *m;
struct uio uio;
int enqueued;
int iolen;
int error;
int reg;
int len;
int n;
VTBE_ASSERT_LOCKED(sc);
/* RX queue */
vq = &sc->vs_queues[0];
if (!vq_has_descs(vq)) {
return;
}
ifp = sc->ifp;
if (ifp->if_drv_flags & IFF_DRV_OACTIVE) {
return;
}
enqueued = 0;
if (!vq_ring_ready(vq))
return;
vq->vq_save_used = be16toh(vq->vq_used->idx);
for (;;) {
if (!vq_has_descs(vq)) {
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
break;
}
IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
if (m == NULL) {
break;
}
n = vq_getchain(sc->beri_mem_offset, vq, iov,
DESC_COUNT, NULL);
KASSERT(n == 2,
("Unexpected amount of descriptors (%d)", n));
tiov = getcopy(iov, n);
vnh = iov[0].iov_base;
memset(vnh, 0, sc->hdrsize);
len = iov[1].iov_len;
uio.uio_resid = len;
uio.uio_iov = &tiov[1];
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_iovcnt = 1;
uio.uio_offset = 0;
uio.uio_rw = UIO_READ;
error = m_mbuftouio(&uio, m, 0);
if (error)
panic("m_mbuftouio failed\n");
iolen = (len - uio.uio_resid + sc->hdrsize);
free(tiov, M_DEVBUF);
vq_relchain(vq, iov, n, iolen);
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
BPF_MTAP(ifp, m);
m_freem(m);
++enqueued;
}
if (enqueued != 0) {
reg = htobe32(VIRTIO_MMIO_INT_VRING);
WRITE4(sc, VIRTIO_MMIO_INTERRUPT_STATUS, reg);
PIO_SET(sc->pio_send, Q_INTR, 1);
}
}
static void
vtbe_txstart(struct ifnet *ifp)
{
struct vtbe_softc *sc = ifp->if_softc;
VTBE_LOCK(sc);
vtbe_txstart_locked(sc);
VTBE_UNLOCK(sc);
}
static void
vtbe_stop_locked(struct vtbe_softc *sc)
{
struct ifnet *ifp;
VTBE_ASSERT_LOCKED(sc);
ifp = sc->ifp;
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
}
static void
vtbe_init_locked(struct vtbe_softc *sc)
{
struct ifnet *ifp = sc->ifp;
VTBE_ASSERT_LOCKED(sc);
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
return;
ifp->if_drv_flags |= IFF_DRV_RUNNING;
}
static void
vtbe_init(void *if_softc)
{
struct vtbe_softc *sc = if_softc;
VTBE_LOCK(sc);
vtbe_init_locked(sc);
VTBE_UNLOCK(sc);
}
static int
vtbe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
struct ifmediareq *ifmr;
struct vtbe_softc *sc;
struct ifreq *ifr;
int mask, error;
sc = ifp->if_softc;
ifr = (struct ifreq *)data;
error = 0;
switch (cmd) {
case SIOCSIFFLAGS:
VTBE_LOCK(sc);
if (ifp->if_flags & IFF_UP) {
pio_enable_irq(sc, 1);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
vtbe_init_locked(sc);
}
} else {
pio_enable_irq(sc, 0);
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
vtbe_stop_locked(sc);
}
}
sc->if_flags = ifp->if_flags;
VTBE_UNLOCK(sc);
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
break;
case SIOCSIFMEDIA:
case SIOCGIFMEDIA:
ifmr = (struct ifmediareq *)data;
ifmr->ifm_count = 1;
ifmr->ifm_status = (IFM_AVALID | IFM_ACTIVE);
ifmr->ifm_active = (IFM_ETHER | IFM_10G_T | IFM_FDX);
ifmr->ifm_current = ifmr->ifm_active;
break;
case SIOCSIFCAP:
mask = ifp->if_capenable ^ ifr->ifr_reqcap;
if (mask & IFCAP_VLAN_MTU) {
ifp->if_capenable ^= IFCAP_VLAN_MTU;
}
break;
case SIOCSIFADDR:
pio_enable_irq(sc, 1);
default:
error = ether_ioctl(ifp, cmd, data);
break;
}
return (error);
}
static void
vtbe_txfinish_locked(struct vtbe_softc *sc)
{
struct ifnet *ifp;
VTBE_ASSERT_LOCKED(sc);
ifp = sc->ifp;
}
static int
vq_init(struct vtbe_softc *sc)
{
struct vqueue_info *vq;
uint8_t *base;
int size;
int reg;
int pfn;
vq = &sc->vs_queues[sc->vs_curq];
vq->vq_qsize = DESC_COUNT;
reg = READ4(sc, VIRTIO_MMIO_QUEUE_PFN);
pfn = be32toh(reg);
vq->vq_pfn = pfn;
size = vring_size(vq->vq_qsize, VRING_ALIGN);
base = paddr_map(sc->beri_mem_offset,
(pfn << PAGE_SHIFT), size);
/* First pages are descriptors */
vq->vq_desc = (struct vring_desc *)base;
base += vq->vq_qsize * sizeof(struct vring_desc);
/* Then avail ring */
vq->vq_avail = (struct vring_avail *)base;
base += (2 + vq->vq_qsize + 1) * sizeof(uint16_t);
/* Then it's rounded up to the next page */
base = (uint8_t *)roundup2((uintptr_t)base, VRING_ALIGN);
/* And the last pages are the used ring */
vq->vq_used = (struct vring_used *)base;
/* Mark queue as allocated, and start at 0 when we use it. */
vq->vq_flags = VQ_ALLOC;
vq->vq_last_avail = 0;
return (0);
}
static void
vtbe_proc_rx(struct vtbe_softc *sc, struct vqueue_info *vq)
{
struct iovec iov[DESC_COUNT];
struct iovec *tiov;
struct ifnet *ifp;
struct uio uio;
struct mbuf *m;
int iolen;
int i;
int n;
ifp = sc->ifp;
n = vq_getchain(sc->beri_mem_offset, vq, iov,
DESC_COUNT, NULL);
KASSERT(n >= 1 && n <= DESC_COUNT,
("wrong n %d", n));
tiov = getcopy(iov, n);
iolen = 0;
for (i = 1; i < n; i++) {
iolen += iov[i].iov_len;
}
uio.uio_resid = iolen;
uio.uio_iov = &tiov[1];
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_iovcnt = (n - 1);
uio.uio_rw = UIO_WRITE;
if ((m = m_uiotombuf(&uio, M_NOWAIT, 0, ETHER_ALIGN,
M_PKTHDR)) == NULL) {
if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
goto done;
}
m->m_pkthdr.rcvif = ifp;
if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
CURVNET_SET(ifp->if_vnet);
VTBE_UNLOCK(sc);
(*ifp->if_input)(ifp, m);
VTBE_LOCK(sc);
CURVNET_RESTORE();
done:
free(tiov, M_DEVBUF);
vq_relchain(vq, iov, n, iolen + sc->hdrsize);
}
static void
vtbe_rxfinish_locked(struct vtbe_softc *sc)
{
struct vqueue_info *vq;
int reg;
/* TX queue */
vq = &sc->vs_queues[1];
if (!vq_ring_ready(vq))
return;
/* Process new descriptors */
vq->vq_save_used = be16toh(vq->vq_used->idx);
while (vq_has_descs(vq)) {
vtbe_proc_rx(sc, vq);
}
/* Interrupt the other side */
reg = htobe32(VIRTIO_MMIO_INT_VRING);
WRITE4(sc, VIRTIO_MMIO_INTERRUPT_STATUS, reg);
PIO_SET(sc->pio_send, Q_INTR, 1);
}
static void
vtbe_intr(void *arg)
{
struct vtbe_softc *sc;
int pending;
uint32_t reg;
sc = arg;
VTBE_LOCK(sc);
reg = PIO_READ(sc->pio_recv);
/* Ack */
PIO_SET(sc->pio_recv, reg, 0);
pending = htobe32(reg);
if (pending & Q_SEL) {
reg = READ4(sc, VIRTIO_MMIO_QUEUE_SEL);
sc->vs_curq = be32toh(reg);
}
if (pending & Q_PFN) {
vq_init(sc);
}
if (pending & Q_NOTIFY) {
/* beri rx / arm tx notify */
vtbe_txfinish_locked(sc);
}
if (pending & Q_NOTIFY1) {
vtbe_rxfinish_locked(sc);
}
VTBE_UNLOCK(sc);
}
static int
vtbe_get_hwaddr(struct vtbe_softc *sc, uint8_t *hwaddr)
{
int rnd;
/*
* Generate MAC address, use 'bsd' + random 24 low-order bits.
*/
rnd = arc4random() & 0x00ffffff;
hwaddr[0] = 'b';
hwaddr[1] = 's';
hwaddr[2] = 'd';
hwaddr[3] = rnd >> 16;
hwaddr[4] = rnd >> 8;
hwaddr[5] = rnd >> 0;
return (0);
}
static int
pio_enable_irq(struct vtbe_softc *sc, int enable)
{
/*
* IRQ lines should be disabled while reprogram FPGA core.
*/
if (enable) {
if (sc->opened == 0) {
sc->opened = 1;
PIO_SETUP_IRQ(sc->pio_recv, vtbe_intr, sc);
}
} else {
if (sc->opened == 1) {
PIO_TEARDOWN_IRQ(sc->pio_recv);
sc->opened = 0;
}
}
return (0);
}
static int
vtbe_probe(device_t dev)
{
if (!ofw_bus_status_okay(dev))
return (ENXIO);
if (!ofw_bus_is_compatible(dev, "sri-cambridge,beri-vtnet"))
return (ENXIO);
device_set_desc(dev, "Virtio BERI Ethernet Controller");
return (BUS_PROBE_DEFAULT);
}
static int
vtbe_attach(device_t dev)
{
uint8_t macaddr[ETHER_ADDR_LEN];
struct vtbe_softc *sc;
struct ifnet *ifp;
int reg;
sc = device_get_softc(dev);
sc->dev = dev;
sc->hdrsize = sizeof(struct virtio_net_hdr);
if (bus_alloc_resources(dev, vtbe_spec, sc->res)) {
device_printf(dev, "could not allocate resources\n");
return (ENXIO);
}
/* Memory interface */
sc->bst = rman_get_bustag(sc->res[0]);
sc->bsh = rman_get_bushandle(sc->res[0]);
mtx_init(&sc->mtx, device_get_nameunit(sc->dev),
MTX_NETWORK_LOCK, MTX_DEF);
if (setup_offset(dev, &sc->beri_mem_offset) != 0)
return (ENXIO);
if (setup_pio(dev, "pio-send", &sc->pio_send) != 0)
return (ENXIO);
if (setup_pio(dev, "pio-recv", &sc->pio_recv) != 0)
return (ENXIO);
/* Setup MMIO */
/* Specify that we provide network device */
reg = htobe32(VIRTIO_ID_NETWORK);
WRITE4(sc, VIRTIO_MMIO_DEVICE_ID, reg);
/* The number of desc we support */
reg = htobe32(DESC_COUNT);
WRITE4(sc, VIRTIO_MMIO_QUEUE_NUM_MAX, reg);
/* Our features */
reg = htobe32(VIRTIO_NET_F_MAC |
VIRTIO_F_NOTIFY_ON_EMPTY);
WRITE4(sc, VIRTIO_MMIO_HOST_FEATURES, reg);
/* Get MAC */
if (vtbe_get_hwaddr(sc, macaddr)) {
device_printf(sc->dev, "can't get mac\n");
return (ENXIO);
}
/* Set up the ethernet interface. */
sc->ifp = ifp = if_alloc(IFT_ETHER);
ifp->if_baudrate = IF_Gbps(10);
ifp->if_softc = sc;
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
ifp->if_flags = (IFF_BROADCAST | IFF_SIMPLEX |
- IFF_MULTICAST | IFF_PROMISC | IFF_NEEDSEPOCH);
+ IFF_MULTICAST | IFF_PROMISC);
ifp->if_capabilities = IFCAP_VLAN_MTU;
ifp->if_capenable = ifp->if_capabilities;
ifp->if_start = vtbe_txstart;
ifp->if_ioctl = vtbe_ioctl;
ifp->if_init = vtbe_init;
IFQ_SET_MAXLEN(&ifp->if_snd, DESC_COUNT - 1);
ifp->if_snd.ifq_drv_maxlen = DESC_COUNT - 1;
IFQ_SET_READY(&ifp->if_snd);
ifp->if_hdrlen = sizeof(struct ether_vlan_header);
/* All ready to run, attach the ethernet interface. */
ether_ifattach(ifp, macaddr);
sc->is_attached = true;
return (0);
}
static device_method_t vtbe_methods[] = {
DEVMETHOD(device_probe, vtbe_probe),
DEVMETHOD(device_attach, vtbe_attach),
{ 0, 0 }
};
static driver_t vtbe_driver = {
"vtbe",
vtbe_methods,
sizeof(struct vtbe_softc),
};
static devclass_t vtbe_devclass;
DRIVER_MODULE(vtbe, simplebus, vtbe_driver, vtbe_devclass, 0, 0);
MODULE_DEPEND(vtbe, ether, 1, 1, 1);
diff --git a/sys/dev/dpaa/if_dtsec.c b/sys/dev/dpaa/if_dtsec.c
index 704aa22eda54..2c6291b07e34 100644
--- a/sys/dev/dpaa/if_dtsec.c
+++ b/sys/dev/dpaa/if_dtsec.c
@@ -1,856 +1,856 @@
/*-
* Copyright (c) 2011-2012 Semihalf.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include
__FBSDID("$FreeBSD$");
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "miibus_if.h"
#include
#include
#include
#include
#include "fman.h"
#include "if_dtsec.h"
#include "if_dtsec_im.h"
#include "if_dtsec_rm.h"
#define DTSEC_MIN_FRAME_SIZE 64
#define DTSEC_MAX_FRAME_SIZE 9600
#define DTSEC_REG_MAXFRM 0x110
/**
* @group dTSEC private defines.
* @{
*/
/**
* dTSEC FMan MAC exceptions info struct.
*/
struct dtsec_fm_mac_ex_str {
const int num;
const char *str;
};
/** @} */
/**
* @group FMan MAC routines.
* @{
*/
#define DTSEC_MAC_EXCEPTIONS_END (-1)
/**
* FMan MAC exceptions.
*/
static const struct dtsec_fm_mac_ex_str dtsec_fm_mac_exceptions[] = {
{ e_FM_MAC_EX_10G_MDIO_SCAN_EVENTMDIO, "MDIO scan event" },
{ e_FM_MAC_EX_10G_MDIO_CMD_CMPL, "MDIO command completion" },
{ e_FM_MAC_EX_10G_REM_FAULT, "Remote fault" },
{ e_FM_MAC_EX_10G_LOC_FAULT, "Local fault" },
{ e_FM_MAC_EX_10G_1TX_ECC_ER, "Transmit frame ECC error" },
{ e_FM_MAC_EX_10G_TX_FIFO_UNFL, "Transmit FIFO underflow" },
{ e_FM_MAC_EX_10G_TX_FIFO_OVFL, "Receive FIFO overflow" },
{ e_FM_MAC_EX_10G_TX_ER, "Transmit frame error" },
{ e_FM_MAC_EX_10G_RX_FIFO_OVFL, "Receive FIFO overflow" },
{ e_FM_MAC_EX_10G_RX_ECC_ER, "Receive frame ECC error" },
{ e_FM_MAC_EX_10G_RX_JAB_FRM, "Receive jabber frame" },
{ e_FM_MAC_EX_10G_RX_OVRSZ_FRM, "Receive oversized frame" },
{ e_FM_MAC_EX_10G_RX_RUNT_FRM, "Receive runt frame" },
{ e_FM_MAC_EX_10G_RX_FRAG_FRM, "Receive fragment frame" },
{ e_FM_MAC_EX_10G_RX_LEN_ER, "Receive payload length error" },
{ e_FM_MAC_EX_10G_RX_CRC_ER, "Receive CRC error" },
{ e_FM_MAC_EX_10G_RX_ALIGN_ER, "Receive alignment error" },
{ e_FM_MAC_EX_1G_BAB_RX, "Babbling receive error" },
{ e_FM_MAC_EX_1G_RX_CTL, "Receive control (pause frame) interrupt" },
{ e_FM_MAC_EX_1G_GRATEFUL_TX_STP_COMPLET, "Graceful transmit stop "
"complete" },
{ e_FM_MAC_EX_1G_BAB_TX, "Babbling transmit error" },
{ e_FM_MAC_EX_1G_TX_CTL, "Transmit control (pause frame) interrupt" },
{ e_FM_MAC_EX_1G_TX_ERR, "Transmit error" },
{ e_FM_MAC_EX_1G_LATE_COL, "Late collision" },
{ e_FM_MAC_EX_1G_COL_RET_LMT, "Collision retry limit" },
{ e_FM_MAC_EX_1G_TX_FIFO_UNDRN, "Transmit FIFO underrun" },
{ e_FM_MAC_EX_1G_MAG_PCKT, "Magic Packet detected when dTSEC is in "
"Magic Packet detection mode" },
{ e_FM_MAC_EX_1G_MII_MNG_RD_COMPLET, "MII management read completion" },
{ e_FM_MAC_EX_1G_MII_MNG_WR_COMPLET, "MII management write completion" },
{ e_FM_MAC_EX_1G_GRATEFUL_RX_STP_COMPLET, "Graceful receive stop "
"complete" },
{ e_FM_MAC_EX_1G_TX_DATA_ERR, "Internal data error on transmit" },
{ e_FM_MAC_EX_1G_RX_DATA_ERR, "Internal data error on receive" },
{ e_FM_MAC_EX_1G_1588_TS_RX_ERR, "Time-Stamp Receive Error" },
{ e_FM_MAC_EX_1G_RX_MIB_CNT_OVFL, "MIB counter overflow" },
{ DTSEC_MAC_EXCEPTIONS_END, "" }
};
static const char *
dtsec_fm_mac_ex_to_str(e_FmMacExceptions exception)
{
int i;
for (i = 0; dtsec_fm_mac_exceptions[i].num != exception &&
dtsec_fm_mac_exceptions[i].num != DTSEC_MAC_EXCEPTIONS_END; ++i)
;
if (dtsec_fm_mac_exceptions[i].num == DTSEC_MAC_EXCEPTIONS_END)
return ("");
return (dtsec_fm_mac_exceptions[i].str);
}
static void
dtsec_fm_mac_mdio_event_callback(t_Handle h_App,
e_FmMacExceptions exception)
{
struct dtsec_softc *sc;
sc = h_App;
device_printf(sc->sc_dev, "MDIO event %i: %s.\n", exception,
dtsec_fm_mac_ex_to_str(exception));
}
static void
dtsec_fm_mac_exception_callback(t_Handle app, e_FmMacExceptions exception)
{
struct dtsec_softc *sc;
sc = app;
device_printf(sc->sc_dev, "MAC exception %i: %s.\n", exception,
dtsec_fm_mac_ex_to_str(exception));
}
static void
dtsec_fm_mac_free(struct dtsec_softc *sc)
{
if (sc->sc_mach == NULL)
return;
FM_MAC_Disable(sc->sc_mach, e_COMM_MODE_RX_AND_TX);
FM_MAC_Free(sc->sc_mach);
sc->sc_mach = NULL;
}
static int
dtsec_fm_mac_init(struct dtsec_softc *sc, uint8_t *mac)
{
t_FmMacParams params;
t_Error error;
memset(¶ms, 0, sizeof(params));
memcpy(¶ms.addr, mac, sizeof(params.addr));
params.baseAddr = rman_get_bushandle(sc->sc_mem);
params.enetMode = sc->sc_mac_enet_mode;
params.macId = sc->sc_eth_id;
params.mdioIrq = sc->sc_mac_mdio_irq;
params.f_Event = dtsec_fm_mac_mdio_event_callback;
params.f_Exception = dtsec_fm_mac_exception_callback;
params.h_App = sc;
params.h_Fm = sc->sc_fmh;
sc->sc_mach = FM_MAC_Config(¶ms);
if (sc->sc_mach == NULL) {
device_printf(sc->sc_dev, "couldn't configure FM_MAC module.\n"
);
return (ENXIO);
}
error = FM_MAC_ConfigResetOnInit(sc->sc_mach, TRUE);
if (error != E_OK) {
device_printf(sc->sc_dev, "couldn't enable reset on init "
"feature.\n");
dtsec_fm_mac_free(sc);
return (ENXIO);
}
/* Do not inform about pause frames */
error = FM_MAC_ConfigException(sc->sc_mach, e_FM_MAC_EX_1G_RX_CTL,
FALSE);
if (error != E_OK) {
device_printf(sc->sc_dev, "couldn't disable pause frames "
"exception.\n");
dtsec_fm_mac_free(sc);
return (ENXIO);
}
error = FM_MAC_Init(sc->sc_mach);
if (error != E_OK) {
device_printf(sc->sc_dev, "couldn't initialize FM_MAC module."
"\n");
dtsec_fm_mac_free(sc);
return (ENXIO);
}
return (0);
}
/** @} */
/**
* @group FMan PORT routines.
* @{
*/
static const char *
dtsec_fm_port_ex_to_str(e_FmPortExceptions exception)
{
switch (exception) {
case e_FM_PORT_EXCEPTION_IM_BUSY:
return ("IM: RX busy");
default:
return ("");
}
}
void
dtsec_fm_port_rx_exception_callback(t_Handle app,
e_FmPortExceptions exception)
{
struct dtsec_softc *sc;
sc = app;
device_printf(sc->sc_dev, "RX exception: %i: %s.\n", exception,
dtsec_fm_port_ex_to_str(exception));
}
void
dtsec_fm_port_tx_exception_callback(t_Handle app,
e_FmPortExceptions exception)
{
struct dtsec_softc *sc;
sc = app;
device_printf(sc->sc_dev, "TX exception: %i: %s.\n", exception,
dtsec_fm_port_ex_to_str(exception));
}
e_FmPortType
dtsec_fm_port_rx_type(enum eth_dev_type type)
{
switch (type) {
case ETH_DTSEC:
return (e_FM_PORT_TYPE_RX);
case ETH_10GSEC:
return (e_FM_PORT_TYPE_RX_10G);
default:
return (e_FM_PORT_TYPE_DUMMY);
}
}
e_FmPortType
dtsec_fm_port_tx_type(enum eth_dev_type type)
{
switch (type) {
case ETH_DTSEC:
return (e_FM_PORT_TYPE_TX);
case ETH_10GSEC:
return (e_FM_PORT_TYPE_TX_10G);
default:
return (e_FM_PORT_TYPE_DUMMY);
}
}
static void
dtsec_fm_port_free_both(struct dtsec_softc *sc)
{
if (sc->sc_rxph) {
FM_PORT_Free(sc->sc_rxph);
sc->sc_rxph = NULL;
}
if (sc->sc_txph) {
FM_PORT_Free(sc->sc_txph);
sc->sc_txph = NULL;
}
}
/** @} */
/**
* @group IFnet routines.
* @{
*/
static int
dtsec_set_mtu(struct dtsec_softc *sc, unsigned int mtu)
{
mtu += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
DTSEC_LOCK_ASSERT(sc);
if (mtu >= DTSEC_MIN_FRAME_SIZE && mtu <= DTSEC_MAX_FRAME_SIZE) {
bus_write_4(sc->sc_mem, DTSEC_REG_MAXFRM, mtu);
return (mtu);
}
return (0);
}
static int
dtsec_if_enable_locked(struct dtsec_softc *sc)
{
int error;
DTSEC_LOCK_ASSERT(sc);
error = FM_MAC_Enable(sc->sc_mach, e_COMM_MODE_RX_AND_TX);
if (error != E_OK)
return (EIO);
error = FM_PORT_Enable(sc->sc_rxph);
if (error != E_OK)
return (EIO);
error = FM_PORT_Enable(sc->sc_txph);
if (error != E_OK)
return (EIO);
sc->sc_ifnet->if_drv_flags |= IFF_DRV_RUNNING;
/* Refresh link state */
dtsec_miibus_statchg(sc->sc_dev);
return (0);
}
static int
dtsec_if_disable_locked(struct dtsec_softc *sc)
{
int error;
DTSEC_LOCK_ASSERT(sc);
error = FM_MAC_Disable(sc->sc_mach, e_COMM_MODE_RX_AND_TX);
if (error != E_OK)
return (EIO);
error = FM_PORT_Disable(sc->sc_rxph);
if (error != E_OK)
return (EIO);
error = FM_PORT_Disable(sc->sc_txph);
if (error != E_OK)
return (EIO);
sc->sc_ifnet->if_drv_flags &= ~IFF_DRV_RUNNING;
return (0);
}
static int
dtsec_if_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
{
struct dtsec_softc *sc;
struct ifreq *ifr;
int error;
sc = ifp->if_softc;
ifr = (struct ifreq *)data;
error = 0;
/* Basic functionality to achieve media status reports */
switch (command) {
case SIOCSIFMTU:
DTSEC_LOCK(sc);
if (dtsec_set_mtu(sc, ifr->ifr_mtu))
ifp->if_mtu = ifr->ifr_mtu;
else
error = EINVAL;
DTSEC_UNLOCK(sc);
break;
case SIOCSIFFLAGS:
DTSEC_LOCK(sc);
if (sc->sc_ifnet->if_flags & IFF_UP)
error = dtsec_if_enable_locked(sc);
else
error = dtsec_if_disable_locked(sc);
DTSEC_UNLOCK(sc);
break;
case SIOCGIFMEDIA:
case SIOCSIFMEDIA:
error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii->mii_media,
command);
break;
default:
error = ether_ioctl(ifp, command, data);
}
return (error);
}
static void
dtsec_if_tick(void *arg)
{
struct dtsec_softc *sc;
sc = arg;
/* TODO */
DTSEC_LOCK(sc);
mii_tick(sc->sc_mii);
callout_reset(&sc->sc_tick_callout, hz, dtsec_if_tick, sc);
DTSEC_UNLOCK(sc);
}
static void
dtsec_if_deinit_locked(struct dtsec_softc *sc)
{
DTSEC_LOCK_ASSERT(sc);
DTSEC_UNLOCK(sc);
callout_drain(&sc->sc_tick_callout);
DTSEC_LOCK(sc);
}
static void
dtsec_if_init_locked(struct dtsec_softc *sc)
{
int error;
DTSEC_LOCK_ASSERT(sc);
/* Set MAC address */
error = FM_MAC_ModifyMacAddr(sc->sc_mach,
(t_EnetAddr *)IF_LLADDR(sc->sc_ifnet));
if (error != E_OK) {
device_printf(sc->sc_dev, "couldn't set MAC address.\n");
goto err;
}
/* Start MII polling */
if (sc->sc_mii)
callout_reset(&sc->sc_tick_callout, hz, dtsec_if_tick, sc);
if (sc->sc_ifnet->if_flags & IFF_UP) {
error = dtsec_if_enable_locked(sc);
if (error != 0)
goto err;
} else {
error = dtsec_if_disable_locked(sc);
if (error != 0)
goto err;
}
return;
err:
dtsec_if_deinit_locked(sc);
device_printf(sc->sc_dev, "initialization error.\n");
return;
}
static void
dtsec_if_init(void *data)
{
struct dtsec_softc *sc;
sc = data;
DTSEC_LOCK(sc);
dtsec_if_init_locked(sc);
DTSEC_UNLOCK(sc);
}
static void
dtsec_if_start(struct ifnet *ifp)
{
struct dtsec_softc *sc;
sc = ifp->if_softc;
DTSEC_LOCK(sc);
sc->sc_start_locked(sc);
DTSEC_UNLOCK(sc);
}
static void
dtsec_if_watchdog(struct ifnet *ifp)
{
/* TODO */
}
/** @} */
/**
* @group IFmedia routines.
* @{
*/
static int
dtsec_ifmedia_upd(struct ifnet *ifp)
{
struct dtsec_softc *sc = ifp->if_softc;
DTSEC_LOCK(sc);
mii_mediachg(sc->sc_mii);
DTSEC_UNLOCK(sc);
return (0);
}
static void
dtsec_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
{
struct dtsec_softc *sc = ifp->if_softc;
DTSEC_LOCK(sc);
mii_pollstat(sc->sc_mii);
ifmr->ifm_active = sc->sc_mii->mii_media_active;
ifmr->ifm_status = sc->sc_mii->mii_media_status;
DTSEC_UNLOCK(sc);
}
/** @} */
/**
* @group dTSEC bus interface.
* @{
*/
static void
dtsec_configure_mode(struct dtsec_softc *sc)
{
char tunable[64];
snprintf(tunable, sizeof(tunable), "%s.independent_mode",
device_get_nameunit(sc->sc_dev));
sc->sc_mode = DTSEC_MODE_REGULAR;
TUNABLE_INT_FETCH(tunable, &sc->sc_mode);
if (sc->sc_mode == DTSEC_MODE_REGULAR) {
sc->sc_port_rx_init = dtsec_rm_fm_port_rx_init;
sc->sc_port_tx_init = dtsec_rm_fm_port_tx_init;
sc->sc_start_locked = dtsec_rm_if_start_locked;
} else {
sc->sc_port_rx_init = dtsec_im_fm_port_rx_init;
sc->sc_port_tx_init = dtsec_im_fm_port_tx_init;
sc->sc_start_locked = dtsec_im_if_start_locked;
}
device_printf(sc->sc_dev, "Configured for %s mode.\n",
(sc->sc_mode == DTSEC_MODE_REGULAR) ? "regular" : "independent");
}
int
dtsec_attach(device_t dev)
{
struct dtsec_softc *sc;
device_t parent;
int error;
struct ifnet *ifp;
sc = device_get_softc(dev);
parent = device_get_parent(dev);
sc->sc_dev = dev;
sc->sc_mac_mdio_irq = NO_IRQ;
/* Check if MallocSmart allocator is ready */
if (XX_MallocSmartInit() != E_OK)
return (ENXIO);
/* Init locks */
mtx_init(&sc->sc_lock, device_get_nameunit(dev),
"DTSEC Global Lock", MTX_DEF);
mtx_init(&sc->sc_mii_lock, device_get_nameunit(dev),
"DTSEC MII Lock", MTX_DEF);
/* Init callouts */
callout_init(&sc->sc_tick_callout, CALLOUT_MPSAFE);
/* Read configuraton */
if ((error = fman_get_handle(parent, &sc->sc_fmh)) != 0)
return (error);
if ((error = fman_get_muram_handle(parent, &sc->sc_muramh)) != 0)
return (error);
if ((error = fman_get_bushandle(parent, &sc->sc_fm_base)) != 0)
return (error);
/* Configure working mode */
dtsec_configure_mode(sc);
/* If we are working in regular mode configure BMAN and QMAN */
if (sc->sc_mode == DTSEC_MODE_REGULAR) {
/* Create RX buffer pool */
error = dtsec_rm_pool_rx_init(sc);
if (error != 0)
return (EIO);
/* Create RX frame queue range */
error = dtsec_rm_fqr_rx_init(sc);
if (error != 0)
return (EIO);
/* Create frame info pool */
error = dtsec_rm_fi_pool_init(sc);
if (error != 0)
return (EIO);
/* Create TX frame queue range */
error = dtsec_rm_fqr_tx_init(sc);
if (error != 0)
return (EIO);
}
/* Init FMan MAC module. */
error = dtsec_fm_mac_init(sc, sc->sc_mac_addr);
if (error != 0) {
dtsec_detach(dev);
return (ENXIO);
}
/* Init FMan TX port */
error = sc->sc_port_tx_init(sc, device_get_unit(sc->sc_dev));
if (error != 0) {
dtsec_detach(dev);
return (ENXIO);
}
/* Init FMan RX port */
error = sc->sc_port_rx_init(sc, device_get_unit(sc->sc_dev));
if (error != 0) {
dtsec_detach(dev);
return (ENXIO);
}
/* Create network interface for upper layers */
ifp = sc->sc_ifnet = if_alloc(IFT_ETHER);
if (ifp == NULL) {
device_printf(sc->sc_dev, "if_alloc() failed.\n");
dtsec_detach(dev);
return (ENOMEM);
}
ifp->if_softc = sc;
ifp->if_mtu = ETHERMTU; /* TODO: Configure */
- ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_NEEDSEPOCH;
+ ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST;
ifp->if_init = dtsec_if_init;
ifp->if_start = dtsec_if_start;
ifp->if_ioctl = dtsec_if_ioctl;
ifp->if_snd.ifq_maxlen = IFQ_MAXLEN;
if (sc->sc_phy_addr >= 0)
if_initname(ifp, device_get_name(sc->sc_dev),
device_get_unit(sc->sc_dev));
else
if_initname(ifp, "dtsec_phy", device_get_unit(sc->sc_dev));
/* TODO */
#if 0
IFQ_SET_MAXLEN(&ifp->if_snd, TSEC_TX_NUM_DESC - 1);
ifp->if_snd.ifq_drv_maxlen = TSEC_TX_NUM_DESC - 1;
IFQ_SET_READY(&ifp->if_snd);
#endif
ifp->if_capabilities = IFCAP_JUMBO_MTU; /* TODO: HWCSUM */
ifp->if_capenable = ifp->if_capabilities;
/* Attach PHY(s) */
error = mii_attach(sc->sc_dev, &sc->sc_mii_dev, ifp, dtsec_ifmedia_upd,
dtsec_ifmedia_sts, BMSR_DEFCAPMASK, sc->sc_phy_addr,
MII_OFFSET_ANY, 0);
if (error) {
device_printf(sc->sc_dev, "attaching PHYs failed: %d\n", error);
dtsec_detach(sc->sc_dev);
return (error);
}
sc->sc_mii = device_get_softc(sc->sc_mii_dev);
/* Attach to stack */
ether_ifattach(ifp, sc->sc_mac_addr);
return (0);
}
int
dtsec_detach(device_t dev)
{
struct dtsec_softc *sc;
if_t ifp;
sc = device_get_softc(dev);
ifp = sc->sc_ifnet;
if (device_is_attached(dev)) {
ether_ifdetach(ifp);
/* Shutdown interface */
DTSEC_LOCK(sc);
dtsec_if_deinit_locked(sc);
DTSEC_UNLOCK(sc);
}
if (sc->sc_ifnet) {
if_free(sc->sc_ifnet);
sc->sc_ifnet = NULL;
}
if (sc->sc_mode == DTSEC_MODE_REGULAR) {
/* Free RX/TX FQRs */
dtsec_rm_fqr_rx_free(sc);
dtsec_rm_fqr_tx_free(sc);
/* Free frame info pool */
dtsec_rm_fi_pool_free(sc);
/* Free RX buffer pool */
dtsec_rm_pool_rx_free(sc);
}
dtsec_fm_mac_free(sc);
dtsec_fm_port_free_both(sc);
/* Destroy lock */
mtx_destroy(&sc->sc_lock);
return (0);
}
int
dtsec_suspend(device_t dev)
{
return (0);
}
int
dtsec_resume(device_t dev)
{
return (0);
}
int
dtsec_shutdown(device_t dev)
{
return (0);
}
/** @} */
/**
* @group MII bus interface.
* @{
*/
int
dtsec_miibus_readreg(device_t dev, int phy, int reg)
{
struct dtsec_softc *sc;
sc = device_get_softc(dev);
return (MIIBUS_READREG(sc->sc_mdio, phy, reg));
}
int
dtsec_miibus_writereg(device_t dev, int phy, int reg, int value)
{
struct dtsec_softc *sc;
sc = device_get_softc(dev);
return (MIIBUS_WRITEREG(sc->sc_mdio, phy, reg, value));
}
void
dtsec_miibus_statchg(device_t dev)
{
struct dtsec_softc *sc;
e_EnetSpeed speed;
bool duplex;
int error;
sc = device_get_softc(dev);
DTSEC_LOCK_ASSERT(sc);
duplex = ((sc->sc_mii->mii_media_active & IFM_GMASK) == IFM_FDX);
switch (IFM_SUBTYPE(sc->sc_mii->mii_media_active)) {
case IFM_1000_T:
case IFM_1000_SX:
speed = e_ENET_SPEED_1000;
break;
case IFM_100_TX:
speed = e_ENET_SPEED_100;
break;
case IFM_10_T:
speed = e_ENET_SPEED_10;
break;
default:
speed = e_ENET_SPEED_10;
}
error = FM_MAC_AdjustLink(sc->sc_mach, speed, duplex);
if (error != E_OK)
device_printf(sc->sc_dev, "error while adjusting MAC speed.\n");
}
/** @} */
diff --git a/sys/dev/hyperv/netvsc/if_hn.c b/sys/dev/hyperv/netvsc/if_hn.c
index bc96775ad553..e9d1b9439671 100644
--- a/sys/dev/hyperv/netvsc/if_hn.c
+++ b/sys/dev/hyperv/netvsc/if_hn.c
@@ -1,7575 +1,7574 @@
/*-
* Copyright (c) 2010-2012 Citrix Inc.
* Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 2004-2006 Kip Macy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include
__FBSDID("$FreeBSD$");
#include "opt_hn.h"
#include "opt_inet6.h"
#include "opt_inet.h"
#include "opt_rss.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef RSS
#include
#endif
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "vmbus_if.h"
#define HN_IFSTART_SUPPORT
#define HN_RING_CNT_DEF_MAX 8
#define HN_VFMAP_SIZE_DEF 8
#define HN_XPNT_VF_ATTWAIT_MIN 2 /* seconds */
/* YYY should get it from the underlying channel */
#define HN_TX_DESC_CNT 512
#define HN_RNDIS_PKT_LEN \
(sizeof(struct rndis_packet_msg) + \
HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \
HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \
HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \
HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
#define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE
#define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE
#define HN_TX_DATA_BOUNDARY PAGE_SIZE
#define HN_TX_DATA_MAXSIZE IP_MAXPACKET
#define HN_TX_DATA_SEGSIZE PAGE_SIZE
/* -1 for RNDIS packet message */
#define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1)
#define HN_DIRECT_TX_SIZE_DEF 128
#define HN_EARLY_TXEOF_THRESH 8
#define HN_PKTBUF_LEN_DEF (16 * 1024)
#define HN_LROENT_CNT_DEF 128
#define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU)
#define HN_LRO_LENLIM_DEF (25 * ETHERMTU)
/* YYY 2*MTU is a bit rough, but should be good enough. */
#define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu)
#define HN_LRO_ACKCNT_DEF 1
#define HN_LOCK_INIT(sc) \
sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev))
#define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock)
#define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED)
#define HN_LOCK(sc) \
do { \
while (sx_try_xlock(&(sc)->hn_lock) == 0) \
DELAY(1000); \
} while (0)
#define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock)
#define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP)
#define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP)
#define HN_CSUM_IP_HWASSIST(sc) \
((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK)
#define HN_CSUM_IP6_HWASSIST(sc) \
((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK)
#define HN_PKTSIZE_MIN(align) \
roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \
HN_RNDIS_PKT_LEN, (align))
#define HN_PKTSIZE(m, align) \
roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align))
#ifdef RSS
#define HN_RING_IDX2CPU(sc, idx) rss_getcpu((idx) % rss_getnumbuckets())
#else
#define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus)
#endif
struct hn_txdesc {
#ifndef HN_USE_TXDESC_BUFRING
SLIST_ENTRY(hn_txdesc) link;
#endif
STAILQ_ENTRY(hn_txdesc) agg_link;
/* Aggregated txdescs, in sending order. */
STAILQ_HEAD(, hn_txdesc) agg_list;
/* The oldest packet, if transmission aggregation happens. */
struct mbuf *m;
struct hn_tx_ring *txr;
int refs;
uint32_t flags; /* HN_TXD_FLAG_ */
struct hn_nvs_sendctx send_ctx;
uint32_t chim_index;
int chim_size;
bus_dmamap_t data_dmap;
bus_addr_t rndis_pkt_paddr;
struct rndis_packet_msg *rndis_pkt;
bus_dmamap_t rndis_pkt_dmap;
};
#define HN_TXD_FLAG_ONLIST 0x0001
#define HN_TXD_FLAG_DMAMAP 0x0002
#define HN_TXD_FLAG_ONAGG 0x0004
struct hn_rxinfo {
uint32_t vlan_info;
uint32_t csum_info;
uint32_t hash_info;
uint32_t hash_value;
};
struct hn_rxvf_setarg {
struct hn_rx_ring *rxr;
struct ifnet *vf_ifp;
};
#define HN_RXINFO_VLAN 0x0001
#define HN_RXINFO_CSUM 0x0002
#define HN_RXINFO_HASHINF 0x0004
#define HN_RXINFO_HASHVAL 0x0008
#define HN_RXINFO_ALL \
(HN_RXINFO_VLAN | \
HN_RXINFO_CSUM | \
HN_RXINFO_HASHINF | \
HN_RXINFO_HASHVAL)
#define HN_NDIS_VLAN_INFO_INVALID 0xffffffff
#define HN_NDIS_RXCSUM_INFO_INVALID 0
#define HN_NDIS_HASH_INFO_INVALID 0
static int hn_probe(device_t);
static int hn_attach(device_t);
static int hn_detach(device_t);
static int hn_shutdown(device_t);
static void hn_chan_callback(struct vmbus_channel *,
void *);
static void hn_init(void *);
static int hn_ioctl(struct ifnet *, u_long, caddr_t);
#ifdef HN_IFSTART_SUPPORT
static void hn_start(struct ifnet *);
#endif
static int hn_transmit(struct ifnet *, struct mbuf *);
static void hn_xmit_qflush(struct ifnet *);
static int hn_ifmedia_upd(struct ifnet *);
static void hn_ifmedia_sts(struct ifnet *,
struct ifmediareq *);
static void hn_ifnet_event(void *, struct ifnet *, int);
static void hn_ifaddr_event(void *, struct ifnet *);
static void hn_ifnet_attevent(void *, struct ifnet *);
static void hn_ifnet_detevent(void *, struct ifnet *);
static void hn_ifnet_lnkevent(void *, struct ifnet *, int);
static bool hn_ismyvf(const struct hn_softc *,
const struct ifnet *);
static void hn_rxvf_change(struct hn_softc *,
struct ifnet *, bool);
static void hn_rxvf_set(struct hn_softc *, struct ifnet *);
static void hn_rxvf_set_task(void *, int);
static void hn_xpnt_vf_input(struct ifnet *, struct mbuf *);
static int hn_xpnt_vf_iocsetflags(struct hn_softc *);
static int hn_xpnt_vf_iocsetcaps(struct hn_softc *,
struct ifreq *);
static void hn_xpnt_vf_saveifflags(struct hn_softc *);
static bool hn_xpnt_vf_isready(struct hn_softc *);
static void hn_xpnt_vf_setready(struct hn_softc *);
static void hn_xpnt_vf_init_taskfunc(void *, int);
static void hn_xpnt_vf_init(struct hn_softc *);
static void hn_xpnt_vf_setenable(struct hn_softc *);
static void hn_xpnt_vf_setdisable(struct hn_softc *, bool);
static void hn_vf_rss_fixup(struct hn_softc *, bool);
static void hn_vf_rss_restore(struct hn_softc *);
static int hn_rndis_rxinfo(const void *, int,
struct hn_rxinfo *);
static void hn_rndis_rx_data(struct hn_rx_ring *,
const void *, int);
static void hn_rndis_rx_status(struct hn_softc *,
const void *, int);
static void hn_rndis_init_fixat(struct hn_softc *, int);
static void hn_nvs_handle_notify(struct hn_softc *,
const struct vmbus_chanpkt_hdr *);
static void hn_nvs_handle_comp(struct hn_softc *,
struct vmbus_channel *,
const struct vmbus_chanpkt_hdr *);
static void hn_nvs_handle_rxbuf(struct hn_rx_ring *,
struct vmbus_channel *,
const struct vmbus_chanpkt_hdr *);
static void hn_nvs_ack_rxbuf(struct hn_rx_ring *,
struct vmbus_channel *, uint64_t);
#if __FreeBSD_version >= 1100099
static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
#endif
static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS);
#if __FreeBSD_version < 1100095
static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS);
#else
static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
#endif
static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS);
#ifndef RSS
static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS);
#endif
static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_vflist_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS);
static void hn_stop(struct hn_softc *, bool);
static void hn_init_locked(struct hn_softc *);
static int hn_chan_attach(struct hn_softc *,
struct vmbus_channel *);
static void hn_chan_detach(struct hn_softc *,
struct vmbus_channel *);
static int hn_attach_subchans(struct hn_softc *);
static void hn_detach_allchans(struct hn_softc *);
static void hn_chan_rollup(struct hn_rx_ring *,
struct hn_tx_ring *);
static void hn_set_ring_inuse(struct hn_softc *, int);
static int hn_synth_attach(struct hn_softc *, int);
static void hn_synth_detach(struct hn_softc *);
static int hn_synth_alloc_subchans(struct hn_softc *,
int *);
static bool hn_synth_attachable(const struct hn_softc *);
static void hn_suspend(struct hn_softc *);
static void hn_suspend_data(struct hn_softc *);
static void hn_suspend_mgmt(struct hn_softc *);
static void hn_resume(struct hn_softc *);
static void hn_resume_data(struct hn_softc *);
static void hn_resume_mgmt(struct hn_softc *);
static void hn_suspend_mgmt_taskfunc(void *, int);
static void hn_chan_drain(struct hn_softc *,
struct vmbus_channel *);
static void hn_disable_rx(struct hn_softc *);
static void hn_drain_rxtx(struct hn_softc *, int);
static void hn_polling(struct hn_softc *, u_int);
static void hn_chan_polling(struct vmbus_channel *, u_int);
static void hn_mtu_change_fixup(struct hn_softc *);
static void hn_update_link_status(struct hn_softc *);
static void hn_change_network(struct hn_softc *);
static void hn_link_taskfunc(void *, int);
static void hn_netchg_init_taskfunc(void *, int);
static void hn_netchg_status_taskfunc(void *, int);
static void hn_link_status(struct hn_softc *);
static int hn_create_rx_data(struct hn_softc *, int);
static void hn_destroy_rx_data(struct hn_softc *);
static int hn_check_iplen(const struct mbuf *, int);
static void hn_rxpkt_proto(const struct mbuf *, int *, int *);
static int hn_set_rxfilter(struct hn_softc *, uint32_t);
static int hn_rxfilter_config(struct hn_softc *);
static int hn_rss_reconfig(struct hn_softc *);
static void hn_rss_ind_fixup(struct hn_softc *);
static void hn_rss_mbuf_hash(struct hn_softc *, uint32_t);
static int hn_rxpkt(struct hn_rx_ring *, const void *,
int, const struct hn_rxinfo *);
static uint32_t hn_rss_type_fromndis(uint32_t);
static uint32_t hn_rss_type_tondis(uint32_t);
static int hn_tx_ring_create(struct hn_softc *, int);
static void hn_tx_ring_destroy(struct hn_tx_ring *);
static int hn_create_tx_data(struct hn_softc *, int);
static void hn_fixup_tx_data(struct hn_softc *);
static void hn_fixup_rx_data(struct hn_softc *);
static void hn_destroy_tx_data(struct hn_softc *);
static void hn_txdesc_dmamap_destroy(struct hn_txdesc *);
static void hn_txdesc_gc(struct hn_tx_ring *,
struct hn_txdesc *);
static int hn_encap(struct ifnet *, struct hn_tx_ring *,
struct hn_txdesc *, struct mbuf **);
static int hn_txpkt(struct ifnet *, struct hn_tx_ring *,
struct hn_txdesc *);
static void hn_set_chim_size(struct hn_softc *, int);
static void hn_set_tso_maxsize(struct hn_softc *, int, int);
static bool hn_tx_ring_pending(struct hn_tx_ring *);
static void hn_tx_ring_qflush(struct hn_tx_ring *);
static void hn_resume_tx(struct hn_softc *, int);
static void hn_set_txagg(struct hn_softc *);
static void *hn_try_txagg(struct ifnet *,
struct hn_tx_ring *, struct hn_txdesc *,
int);
static int hn_get_txswq_depth(const struct hn_tx_ring *);
static void hn_txpkt_done(struct hn_nvs_sendctx *,
struct hn_softc *, struct vmbus_channel *,
const void *, int);
static int hn_txpkt_sglist(struct hn_tx_ring *,
struct hn_txdesc *);
static int hn_txpkt_chim(struct hn_tx_ring *,
struct hn_txdesc *);
static int hn_xmit(struct hn_tx_ring *, int);
static void hn_xmit_taskfunc(void *, int);
static void hn_xmit_txeof(struct hn_tx_ring *);
static void hn_xmit_txeof_taskfunc(void *, int);
#ifdef HN_IFSTART_SUPPORT
static int hn_start_locked(struct hn_tx_ring *, int);
static void hn_start_taskfunc(void *, int);
static void hn_start_txeof(struct hn_tx_ring *);
static void hn_start_txeof_taskfunc(void *, int);
#endif
SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
"Hyper-V network interface");
/* Trust tcp segements verification on host side. */
static int hn_trust_hosttcp = 1;
SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
&hn_trust_hosttcp, 0,
"Trust tcp segement verification on host side, "
"when csum info is missing (global setting)");
/* Trust udp datagrams verification on host side. */
static int hn_trust_hostudp = 1;
SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
&hn_trust_hostudp, 0,
"Trust udp datagram verification on host side, "
"when csum info is missing (global setting)");
/* Trust ip packets verification on host side. */
static int hn_trust_hostip = 1;
SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
&hn_trust_hostip, 0,
"Trust ip packet verification on host side, "
"when csum info is missing (global setting)");
/*
* Offload UDP/IPv4 checksum.
*/
static int hn_enable_udp4cs = 1;
SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp4cs, CTLFLAG_RDTUN,
&hn_enable_udp4cs, 0, "Offload UDP/IPv4 checksum");
/*
* Offload UDP/IPv6 checksum.
*/
static int hn_enable_udp6cs = 1;
SYSCTL_INT(_hw_hn, OID_AUTO, enable_udp6cs, CTLFLAG_RDTUN,
&hn_enable_udp6cs, 0, "Offload UDP/IPv6 checksum");
/* Stats. */
static counter_u64_t hn_udpcs_fixup;
SYSCTL_COUNTER_U64(_hw_hn, OID_AUTO, udpcs_fixup, CTLFLAG_RW,
&hn_udpcs_fixup, "# of UDP checksum fixup");
/*
* See hn_set_hlen().
*
* This value is for Azure. For Hyper-V, set this above
* 65536 to disable UDP datagram checksum fixup.
*/
static int hn_udpcs_fixup_mtu = 1420;
SYSCTL_INT(_hw_hn, OID_AUTO, udpcs_fixup_mtu, CTLFLAG_RWTUN,
&hn_udpcs_fixup_mtu, 0, "UDP checksum fixup MTU threshold");
/* Limit TSO burst size */
static int hn_tso_maxlen = IP_MAXPACKET;
SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
&hn_tso_maxlen, 0, "TSO burst limit");
/* Limit chimney send size */
static int hn_tx_chimney_size = 0;
SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
&hn_tx_chimney_size, 0, "Chimney send packet size limit");
/* Limit the size of packet for direct transmission */
static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
&hn_direct_tx_size, 0, "Size of the packet for direct transmission");
/* # of LRO entries per RX ring */
#if defined(INET) || defined(INET6)
#if __FreeBSD_version >= 1100095
static int hn_lro_entry_count = HN_LROENT_CNT_DEF;
SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
&hn_lro_entry_count, 0, "LRO entry count");
#endif
#endif
static int hn_tx_taskq_cnt = 1;
SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN,
&hn_tx_taskq_cnt, 0, "# of TX taskqueues");
#define HN_TX_TASKQ_M_INDEP 0
#define HN_TX_TASKQ_M_GLOBAL 1
#define HN_TX_TASKQ_M_EVTTQ 2
static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN,
&hn_tx_taskq_mode, 0, "TX taskqueue modes: "
"0 - independent, 1 - share global tx taskqs, 2 - share event taskqs");
#ifndef HN_USE_TXDESC_BUFRING
static int hn_use_txdesc_bufring = 0;
#else
static int hn_use_txdesc_bufring = 1;
#endif
SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
&hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
#ifdef HN_IFSTART_SUPPORT
/* Use ifnet.if_start instead of ifnet.if_transmit */
static int hn_use_if_start = 0;
SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
&hn_use_if_start, 0, "Use if_start TX method");
#endif
/* # of channels to use */
static int hn_chan_cnt = 0;
SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
&hn_chan_cnt, 0,
"# of channels to use; each channel has one RX ring and one TX ring");
/* # of transmit rings to use */
static int hn_tx_ring_cnt = 0;
SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
&hn_tx_ring_cnt, 0, "# of TX rings to use");
/* Software TX ring deptch */
static int hn_tx_swq_depth = 0;
SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
&hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
/* Enable sorted LRO, and the depth of the per-channel mbuf queue */
#if __FreeBSD_version >= 1100095
static u_int hn_lro_mbufq_depth = 0;
SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
&hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
#endif
/* Packet transmission aggregation size limit */
static int hn_tx_agg_size = -1;
SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN,
&hn_tx_agg_size, 0, "Packet transmission aggregation size limit");
/* Packet transmission aggregation count limit */
static int hn_tx_agg_pkts = -1;
SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN,
&hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit");
/* VF list */
SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, CTLFLAG_RD | CTLTYPE_STRING,
0, 0, hn_vflist_sysctl, "A", "VF list");
/* VF mapping */
SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, CTLFLAG_RD | CTLTYPE_STRING,
0, 0, hn_vfmap_sysctl, "A", "VF mapping");
/* Transparent VF */
static int hn_xpnt_vf = 1;
SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN,
&hn_xpnt_vf, 0, "Transparent VF mod");
/* Accurate BPF support for Transparent VF */
static int hn_xpnt_vf_accbpf = 0;
SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN,
&hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF");
/* Extra wait for transparent VF attach routing; unit seconds. */
static int hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN;
SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN,
&hn_xpnt_vf_attwait, 0,
"Extra wait for transparent VF attach routing; unit: seconds");
static u_int hn_cpu_index; /* next CPU for channel */
static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */
static struct rmlock hn_vfmap_lock;
static int hn_vfmap_size;
static struct ifnet **hn_vfmap;
#ifndef RSS
static const uint8_t
hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
};
#endif /* !RSS */
static const struct hyperv_guid hn_guid = {
.hv_guid = {
0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46,
0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e }
};
static device_method_t hn_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, hn_probe),
DEVMETHOD(device_attach, hn_attach),
DEVMETHOD(device_detach, hn_detach),
DEVMETHOD(device_shutdown, hn_shutdown),
DEVMETHOD_END
};
static driver_t hn_driver = {
"hn",
hn_methods,
sizeof(struct hn_softc)
};
static devclass_t hn_devclass;
DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0);
MODULE_VERSION(hn, 1);
MODULE_DEPEND(hn, vmbus, 1, 1, 1);
#if __FreeBSD_version >= 1100099
static void
hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
{
int i;
for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
}
#endif
static int
hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd)
{
KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
txd->chim_size == 0, ("invalid rndis sglist txd"));
return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA,
&txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt));
}
static int
hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd)
{
struct hn_nvs_rndis rndis;
KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID &&
txd->chim_size > 0, ("invalid rndis chim txd"));
rndis.nvs_type = HN_NVS_TYPE_RNDIS;
rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA;
rndis.nvs_chim_idx = txd->chim_index;
rndis.nvs_chim_sz = txd->chim_size;
return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC,
&rndis, sizeof(rndis), &txd->send_ctx));
}
static __inline uint32_t
hn_chim_alloc(struct hn_softc *sc)
{
int i, bmap_cnt = sc->hn_chim_bmap_cnt;
u_long *bmap = sc->hn_chim_bmap;
uint32_t ret = HN_NVS_CHIM_IDX_INVALID;
for (i = 0; i < bmap_cnt; ++i) {
int idx;
idx = ffsl(~bmap[i]);
if (idx == 0)
continue;
--idx; /* ffsl is 1-based */
KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt,
("invalid i %d and idx %d", i, idx));
if (atomic_testandset_long(&bmap[i], idx))
continue;
ret = i * LONG_BIT + idx;
break;
}
return (ret);
}
static __inline void
hn_chim_free(struct hn_softc *sc, uint32_t chim_idx)
{
u_long mask;
uint32_t idx;
idx = chim_idx / LONG_BIT;
KASSERT(idx < sc->hn_chim_bmap_cnt,
("invalid chimney index 0x%x", chim_idx));
mask = 1UL << (chim_idx % LONG_BIT);
KASSERT(sc->hn_chim_bmap[idx] & mask,
("index bitmap 0x%lx, chimney index %u, "
"bitmap idx %d, bitmask 0x%lx",
sc->hn_chim_bmap[idx], chim_idx, idx, mask));
atomic_clear_long(&sc->hn_chim_bmap[idx], mask);
}
#if defined(INET6) || defined(INET)
#define PULLUP_HDR(m, len) \
do { \
if (__predict_false((m)->m_len < (len))) { \
(m) = m_pullup((m), (len)); \
if ((m) == NULL) \
return (NULL); \
} \
} while (0)
/*
* NOTE: If this function failed, the m_head would be freed.
*/
static __inline struct mbuf *
hn_tso_fixup(struct mbuf *m_head)
{
struct ether_vlan_header *evl;
struct tcphdr *th;
int ehlen;
KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable"));
PULLUP_HDR(m_head, sizeof(*evl));
evl = mtod(m_head, struct ether_vlan_header *);
if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
else
ehlen = ETHER_HDR_LEN;
m_head->m_pkthdr.l2hlen = ehlen;
#ifdef INET
if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
struct ip *ip;
int iphlen;
PULLUP_HDR(m_head, ehlen + sizeof(*ip));
ip = mtodo(m_head, ehlen);
iphlen = ip->ip_hl << 2;
m_head->m_pkthdr.l3hlen = iphlen;
PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
th = mtodo(m_head, ehlen + iphlen);
ip->ip_len = 0;
ip->ip_sum = 0;
th->th_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(IPPROTO_TCP));
}
#endif
#if defined(INET6) && defined(INET)
else
#endif
#ifdef INET6
{
struct ip6_hdr *ip6;
PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
ip6 = mtodo(m_head, ehlen);
if (ip6->ip6_nxt != IPPROTO_TCP) {
m_freem(m_head);
return (NULL);
}
m_head->m_pkthdr.l3hlen = sizeof(*ip6);
PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th));
th = mtodo(m_head, ehlen + sizeof(*ip6));
ip6->ip6_plen = 0;
th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
}
#endif
return (m_head);
}
/*
* NOTE: If this function failed, the m_head would be freed.
*/
static __inline struct mbuf *
hn_set_hlen(struct mbuf *m_head)
{
const struct ether_vlan_header *evl;
int ehlen;
PULLUP_HDR(m_head, sizeof(*evl));
evl = mtod(m_head, const struct ether_vlan_header *);
if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
else
ehlen = ETHER_HDR_LEN;
m_head->m_pkthdr.l2hlen = ehlen;
#ifdef INET
if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP_UDP)) {
const struct ip *ip;
int iphlen;
PULLUP_HDR(m_head, ehlen + sizeof(*ip));
ip = mtodo(m_head, ehlen);
iphlen = ip->ip_hl << 2;
m_head->m_pkthdr.l3hlen = iphlen;
/*
* UDP checksum offload does not work in Azure, if the
* following conditions meet:
* - sizeof(IP hdr + UDP hdr + payload) > 1420.
* - IP_DF is not set in the IP hdr.
*
* Fallback to software checksum for these UDP datagrams.
*/
if ((m_head->m_pkthdr.csum_flags & CSUM_IP_UDP) &&
m_head->m_pkthdr.len > hn_udpcs_fixup_mtu + ehlen &&
(ntohs(ip->ip_off) & IP_DF) == 0) {
uint16_t off = ehlen + iphlen;
counter_u64_add(hn_udpcs_fixup, 1);
PULLUP_HDR(m_head, off + sizeof(struct udphdr));
*(uint16_t *)(m_head->m_data + off +
m_head->m_pkthdr.csum_data) = in_cksum_skip(
m_head, m_head->m_pkthdr.len, off);
m_head->m_pkthdr.csum_flags &= ~CSUM_IP_UDP;
}
}
#endif
#if defined(INET6) && defined(INET)
else
#endif
#ifdef INET6
{
const struct ip6_hdr *ip6;
PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
ip6 = mtodo(m_head, ehlen);
if (ip6->ip6_nxt != IPPROTO_TCP &&
ip6->ip6_nxt != IPPROTO_UDP) {
m_freem(m_head);
return (NULL);
}
m_head->m_pkthdr.l3hlen = sizeof(*ip6);
}
#endif
return (m_head);
}
/*
* NOTE: If this function failed, the m_head would be freed.
*/
static __inline struct mbuf *
hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn)
{
const struct tcphdr *th;
int ehlen, iphlen;
*tcpsyn = 0;
ehlen = m_head->m_pkthdr.l2hlen;
iphlen = m_head->m_pkthdr.l3hlen;
PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
th = mtodo(m_head, ehlen + iphlen);
if (th->th_flags & TH_SYN)
*tcpsyn = 1;
return (m_head);
}
#undef PULLUP_HDR
#endif /* INET6 || INET */
static int
hn_set_rxfilter(struct hn_softc *sc, uint32_t filter)
{
int error = 0;
HN_LOCK_ASSERT(sc);
if (sc->hn_rx_filter != filter) {
error = hn_rndis_set_rxfilter(sc, filter);
if (!error)
sc->hn_rx_filter = filter;
}
return (error);
}
static int
hn_rxfilter_config(struct hn_softc *sc)
{
struct ifnet *ifp = sc->hn_ifp;
uint32_t filter;
HN_LOCK_ASSERT(sc);
/*
* If the non-transparent mode VF is activated, we don't know how
* its RX filter is configured, so stick the synthetic device in
* the promiscous mode.
*/
if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) {
filter = NDIS_PACKET_TYPE_PROMISCUOUS;
} else {
filter = NDIS_PACKET_TYPE_DIRECTED;
if (ifp->if_flags & IFF_BROADCAST)
filter |= NDIS_PACKET_TYPE_BROADCAST;
/* TODO: support multicast list */
if ((ifp->if_flags & IFF_ALLMULTI) ||
!CK_STAILQ_EMPTY(&ifp->if_multiaddrs))
filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
}
return (hn_set_rxfilter(sc, filter));
}
static void
hn_set_txagg(struct hn_softc *sc)
{
uint32_t size, pkts;
int i;
/*
* Setup aggregation size.
*/
if (sc->hn_agg_size < 0)
size = UINT32_MAX;
else
size = sc->hn_agg_size;
if (sc->hn_rndis_agg_size < size)
size = sc->hn_rndis_agg_size;
/* NOTE: We only aggregate packets using chimney sending buffers. */
if (size > (uint32_t)sc->hn_chim_szmax)
size = sc->hn_chim_szmax;
if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) {
/* Disable */
size = 0;
pkts = 0;
goto done;
}
/* NOTE: Type of the per TX ring setting is 'int'. */
if (size > INT_MAX)
size = INT_MAX;
/*
* Setup aggregation packet count.
*/
if (sc->hn_agg_pkts < 0)
pkts = UINT32_MAX;
else
pkts = sc->hn_agg_pkts;
if (sc->hn_rndis_agg_pkts < pkts)
pkts = sc->hn_rndis_agg_pkts;
if (pkts <= 1) {
/* Disable */
size = 0;
pkts = 0;
goto done;
}
/* NOTE: Type of the per TX ring setting is 'short'. */
if (pkts > SHRT_MAX)
pkts = SHRT_MAX;
done:
/* NOTE: Type of the per TX ring setting is 'short'. */
if (sc->hn_rndis_agg_align > SHRT_MAX) {
/* Disable */
size = 0;
pkts = 0;
}
if (bootverbose) {
if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n",
size, pkts, sc->hn_rndis_agg_align);
}
for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
mtx_lock(&txr->hn_tx_lock);
txr->hn_agg_szmax = size;
txr->hn_agg_pktmax = pkts;
txr->hn_agg_align = sc->hn_rndis_agg_align;
mtx_unlock(&txr->hn_tx_lock);
}
}
static int
hn_get_txswq_depth(const struct hn_tx_ring *txr)
{
KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
return txr->hn_txdesc_cnt;
return hn_tx_swq_depth;
}
static int
hn_rss_reconfig(struct hn_softc *sc)
{
int error;
HN_LOCK_ASSERT(sc);
if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
return (ENXIO);
/*
* Disable RSS first.
*
* NOTE:
* Direct reconfiguration by setting the UNCHG flags does
* _not_ work properly.
*/
if (bootverbose)
if_printf(sc->hn_ifp, "disable RSS\n");
error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE);
if (error) {
if_printf(sc->hn_ifp, "RSS disable failed\n");
return (error);
}
/*
* Reenable the RSS w/ the updated RSS key or indirect
* table.
*/
if (bootverbose)
if_printf(sc->hn_ifp, "reconfig RSS\n");
error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
if (error) {
if_printf(sc->hn_ifp, "RSS reconfig failed\n");
return (error);
}
return (0);
}
static void
hn_rss_ind_fixup(struct hn_softc *sc)
{
struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
int i, nchan;
nchan = sc->hn_rx_ring_inuse;
KASSERT(nchan > 1, ("invalid # of channels %d", nchan));
/*
* Check indirect table to make sure that all channels in it
* can be used.
*/
for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
if (rss->rss_ind[i] >= nchan) {
if_printf(sc->hn_ifp,
"RSS indirect table %d fixup: %u -> %d\n",
i, rss->rss_ind[i], nchan - 1);
rss->rss_ind[i] = nchan - 1;
}
}
}
static int
hn_ifmedia_upd(struct ifnet *ifp __unused)
{
return EOPNOTSUPP;
}
static void
hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
{
struct hn_softc *sc = ifp->if_softc;
ifmr->ifm_status = IFM_AVALID;
ifmr->ifm_active = IFM_ETHER;
if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) {
ifmr->ifm_active |= IFM_NONE;
return;
}
ifmr->ifm_status |= IFM_ACTIVE;
ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
}
static void
hn_rxvf_set_task(void *xarg, int pending __unused)
{
struct hn_rxvf_setarg *arg = xarg;
arg->rxr->hn_rxvf_ifp = arg->vf_ifp;
}
static void
hn_rxvf_set(struct hn_softc *sc, struct ifnet *vf_ifp)
{
struct hn_rx_ring *rxr;
struct hn_rxvf_setarg arg;
struct task task;
int i;
HN_LOCK_ASSERT(sc);
TASK_INIT(&task, 0, hn_rxvf_set_task, &arg);
for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
rxr = &sc->hn_rx_ring[i];
if (i < sc->hn_rx_ring_inuse) {
arg.rxr = rxr;
arg.vf_ifp = vf_ifp;
vmbus_chan_run_task(rxr->hn_chan, &task);
} else {
rxr->hn_rxvf_ifp = vf_ifp;
}
}
}
static bool
hn_ismyvf(const struct hn_softc *sc, const struct ifnet *ifp)
{
const struct ifnet *hn_ifp;
hn_ifp = sc->hn_ifp;
if (ifp == hn_ifp)
return (false);
if (ifp->if_alloctype != IFT_ETHER)
return (false);
/* Ignore lagg/vlan interfaces */
if (strcmp(ifp->if_dname, "lagg") == 0 ||
strcmp(ifp->if_dname, "vlan") == 0)
return (false);
/*
* During detach events ifp->if_addr might be NULL.
* Make sure the bcmp() below doesn't panic on that:
*/
if (ifp->if_addr == NULL || hn_ifp->if_addr == NULL)
return (false);
if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0)
return (false);
return (true);
}
static void
hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp, bool rxvf)
{
struct ifnet *hn_ifp;
HN_LOCK(sc);
if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
goto out;
if (!hn_ismyvf(sc, ifp))
goto out;
hn_ifp = sc->hn_ifp;
if (rxvf) {
if (sc->hn_flags & HN_FLAG_RXVF)
goto out;
sc->hn_flags |= HN_FLAG_RXVF;
hn_rxfilter_config(sc);
} else {
if (!(sc->hn_flags & HN_FLAG_RXVF))
goto out;
sc->hn_flags &= ~HN_FLAG_RXVF;
if (hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
hn_rxfilter_config(sc);
else
hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE);
}
hn_nvs_set_datapath(sc,
rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH);
hn_rxvf_set(sc, rxvf ? ifp : NULL);
if (rxvf) {
hn_vf_rss_fixup(sc, true);
hn_suspend_mgmt(sc);
sc->hn_link_flags &=
~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG);
if_link_state_change(hn_ifp, LINK_STATE_DOWN);
} else {
hn_vf_rss_restore(sc);
hn_resume_mgmt(sc);
}
devctl_notify("HYPERV_NIC_VF", hn_ifp->if_xname,
rxvf ? "VF_UP" : "VF_DOWN", NULL);
if (bootverbose) {
if_printf(hn_ifp, "datapath is switched %s %s\n",
rxvf ? "to" : "from", ifp->if_xname);
}
out:
HN_UNLOCK(sc);
}
static void
hn_ifnet_event(void *arg, struct ifnet *ifp, int event)
{
if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN)
return;
hn_rxvf_change(arg, ifp, event == IFNET_EVENT_UP);
}
static void
hn_ifaddr_event(void *arg, struct ifnet *ifp)
{
hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP);
}
static int
hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr)
{
struct ifnet *ifp, *vf_ifp;
uint64_t tmp;
int error;
HN_LOCK_ASSERT(sc);
ifp = sc->hn_ifp;
vf_ifp = sc->hn_vf_ifp;
/*
* Fix up requested capabilities w/ supported capabilities,
* since the supported capabilities could have been changed.
*/
ifr->ifr_reqcap &= ifp->if_capabilities;
/* Pass SIOCSIFCAP to VF. */
error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr);
/*
* NOTE:
* The error will be propagated to the callers, however, it
* is _not_ useful here.
*/
/*
* Merge VF's enabled capabilities.
*/
ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities;
tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc);
if (ifp->if_capenable & IFCAP_TXCSUM)
ifp->if_hwassist |= tmp;
else
ifp->if_hwassist &= ~tmp;
tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc);
if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
ifp->if_hwassist |= tmp;
else
ifp->if_hwassist &= ~tmp;
tmp = vf_ifp->if_hwassist & CSUM_IP_TSO;
if (ifp->if_capenable & IFCAP_TSO4)
ifp->if_hwassist |= tmp;
else
ifp->if_hwassist &= ~tmp;
tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO;
if (ifp->if_capenable & IFCAP_TSO6)
ifp->if_hwassist |= tmp;
else
ifp->if_hwassist &= ~tmp;
return (error);
}
static int
hn_xpnt_vf_iocsetflags(struct hn_softc *sc)
{
struct ifnet *vf_ifp;
struct ifreq ifr;
HN_LOCK_ASSERT(sc);
vf_ifp = sc->hn_vf_ifp;
memset(&ifr, 0, sizeof(ifr));
strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name));
ifr.ifr_flags = vf_ifp->if_flags & 0xffff;
ifr.ifr_flagshigh = vf_ifp->if_flags >> 16;
return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
}
static void
hn_xpnt_vf_saveifflags(struct hn_softc *sc)
{
struct ifnet *ifp = sc->hn_ifp;
int allmulti = 0;
HN_LOCK_ASSERT(sc);
/* XXX vlan(4) style mcast addr maintenance */
if (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs))
allmulti = IFF_ALLMULTI;
/* Always set the VF's if_flags */
sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti;
}
static void
hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m)
{
struct rm_priotracker pt;
struct ifnet *hn_ifp = NULL;
struct mbuf *mn;
/*
* XXX racy, if hn(4) ever detached.
*/
rm_rlock(&hn_vfmap_lock, &pt);
if (vf_ifp->if_index < hn_vfmap_size)
hn_ifp = hn_vfmap[vf_ifp->if_index];
rm_runlock(&hn_vfmap_lock, &pt);
if (hn_ifp != NULL) {
for (mn = m; mn != NULL; mn = mn->m_nextpkt) {
/*
* Allow tapping on the VF.
*/
ETHER_BPF_MTAP(vf_ifp, mn);
/*
* Update VF stats.
*/
if ((vf_ifp->if_capenable & IFCAP_HWSTATS) == 0) {
if_inc_counter(vf_ifp, IFCOUNTER_IBYTES,
mn->m_pkthdr.len);
}
/*
* XXX IFCOUNTER_IMCAST
* This stat updating is kinda invasive, since it
* requires two checks on the mbuf: the length check
* and the ethernet header check. As of this write,
* all multicast packets go directly to hn(4), which
* makes imcast stat updating in the VF a try in vian.
*/
/*
* Fix up rcvif and increase hn(4)'s ipackets.
*/
mn->m_pkthdr.rcvif = hn_ifp;
if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1);
}
/*
* Go through hn(4)'s if_input.
*/
hn_ifp->if_input(hn_ifp, m);
} else {
/*
* In the middle of the transition; free this
* mbuf chain.
*/
while (m != NULL) {
mn = m->m_nextpkt;
m->m_nextpkt = NULL;
m_freem(m);
m = mn;
}
}
}
static void
hn_mtu_change_fixup(struct hn_softc *sc)
{
struct ifnet *ifp;
HN_LOCK_ASSERT(sc);
ifp = sc->hn_ifp;
hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu);
#if __FreeBSD_version >= 1100099
if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp))
hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
#endif
}
static uint32_t
hn_rss_type_fromndis(uint32_t rss_hash)
{
uint32_t types = 0;
if (rss_hash & NDIS_HASH_IPV4)
types |= RSS_TYPE_IPV4;
if (rss_hash & NDIS_HASH_TCP_IPV4)
types |= RSS_TYPE_TCP_IPV4;
if (rss_hash & NDIS_HASH_IPV6)
types |= RSS_TYPE_IPV6;
if (rss_hash & NDIS_HASH_IPV6_EX)
types |= RSS_TYPE_IPV6_EX;
if (rss_hash & NDIS_HASH_TCP_IPV6)
types |= RSS_TYPE_TCP_IPV6;
if (rss_hash & NDIS_HASH_TCP_IPV6_EX)
types |= RSS_TYPE_TCP_IPV6_EX;
if (rss_hash & NDIS_HASH_UDP_IPV4_X)
types |= RSS_TYPE_UDP_IPV4;
return (types);
}
static uint32_t
hn_rss_type_tondis(uint32_t types)
{
uint32_t rss_hash = 0;
KASSERT((types & (RSS_TYPE_UDP_IPV6 | RSS_TYPE_UDP_IPV6_EX)) == 0,
("UDP6 and UDP6EX are not supported"));
if (types & RSS_TYPE_IPV4)
rss_hash |= NDIS_HASH_IPV4;
if (types & RSS_TYPE_TCP_IPV4)
rss_hash |= NDIS_HASH_TCP_IPV4;
if (types & RSS_TYPE_IPV6)
rss_hash |= NDIS_HASH_IPV6;
if (types & RSS_TYPE_IPV6_EX)
rss_hash |= NDIS_HASH_IPV6_EX;
if (types & RSS_TYPE_TCP_IPV6)
rss_hash |= NDIS_HASH_TCP_IPV6;
if (types & RSS_TYPE_TCP_IPV6_EX)
rss_hash |= NDIS_HASH_TCP_IPV6_EX;
if (types & RSS_TYPE_UDP_IPV4)
rss_hash |= NDIS_HASH_UDP_IPV4_X;
return (rss_hash);
}
static void
hn_rss_mbuf_hash(struct hn_softc *sc, uint32_t mbuf_hash)
{
int i;
HN_LOCK_ASSERT(sc);
for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
sc->hn_rx_ring[i].hn_mbuf_hash = mbuf_hash;
}
static void
hn_vf_rss_fixup(struct hn_softc *sc, bool reconf)
{
struct ifnet *ifp, *vf_ifp;
struct ifrsshash ifrh;
struct ifrsskey ifrk;
int error;
uint32_t my_types, diff_types, mbuf_types = 0;
HN_LOCK_ASSERT(sc);
KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
("%s: synthetic parts are not attached", sc->hn_ifp->if_xname));
if (sc->hn_rx_ring_inuse == 1) {
/* No RSS on synthetic parts; done. */
return;
}
if ((sc->hn_rss_hcap & NDIS_HASH_FUNCTION_TOEPLITZ) == 0) {
/* Synthetic parts do not support Toeplitz; done. */
return;
}
ifp = sc->hn_ifp;
vf_ifp = sc->hn_vf_ifp;
/*
* Extract VF's RSS key. Only 40 bytes key for Toeplitz is
* supported.
*/
memset(&ifrk, 0, sizeof(ifrk));
strlcpy(ifrk.ifrk_name, vf_ifp->if_xname, sizeof(ifrk.ifrk_name));
error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSKEY, (caddr_t)&ifrk);
if (error) {
if_printf(ifp, "%s SIOCGIFRSSKEY failed: %d\n",
vf_ifp->if_xname, error);
goto done;
}
if (ifrk.ifrk_func != RSS_FUNC_TOEPLITZ) {
if_printf(ifp, "%s RSS function %u is not Toeplitz\n",
vf_ifp->if_xname, ifrk.ifrk_func);
goto done;
}
if (ifrk.ifrk_keylen != NDIS_HASH_KEYSIZE_TOEPLITZ) {
if_printf(ifp, "%s invalid RSS Toeplitz key length %d\n",
vf_ifp->if_xname, ifrk.ifrk_keylen);
goto done;
}
/*
* Extract VF's RSS hash. Only Toeplitz is supported.
*/
memset(&ifrh, 0, sizeof(ifrh));
strlcpy(ifrh.ifrh_name, vf_ifp->if_xname, sizeof(ifrh.ifrh_name));
error = vf_ifp->if_ioctl(vf_ifp, SIOCGIFRSSHASH, (caddr_t)&ifrh);
if (error) {
if_printf(ifp, "%s SIOCGRSSHASH failed: %d\n",
vf_ifp->if_xname, error);
goto done;
}
if (ifrh.ifrh_func != RSS_FUNC_TOEPLITZ) {
if_printf(ifp, "%s RSS function %u is not Toeplitz\n",
vf_ifp->if_xname, ifrh.ifrh_func);
goto done;
}
my_types = hn_rss_type_fromndis(sc->hn_rss_hcap);
if ((ifrh.ifrh_types & my_types) == 0) {
/* This disables RSS; ignore it then */
if_printf(ifp, "%s intersection of RSS types failed. "
"VF %#x, mine %#x\n", vf_ifp->if_xname,
ifrh.ifrh_types, my_types);
goto done;
}
diff_types = my_types ^ ifrh.ifrh_types;
my_types &= ifrh.ifrh_types;
mbuf_types = my_types;
/*
* Detect RSS hash value/type confliction.
*
* NOTE:
* We don't disable the hash type, but stop delivery the hash
* value/type through mbufs on RX path.
*
* XXX If HN_CAP_UDPHASH is set in hn_caps, then UDP 4-tuple
* hash is delivered with type of TCP_IPV4. This means if
* UDP_IPV4 is enabled, then TCP_IPV4 should be forced, at
* least to hn_mbuf_hash. However, given that _all_ of the
* NICs implement TCP_IPV4, this will _not_ impose any issues
* here.
*/
if ((my_types & RSS_TYPE_IPV4) &&
(diff_types & ifrh.ifrh_types &
(RSS_TYPE_TCP_IPV4 | RSS_TYPE_UDP_IPV4))) {
/* Conflict; disable IPV4 hash type/value delivery. */
if_printf(ifp, "disable IPV4 mbuf hash delivery\n");
mbuf_types &= ~RSS_TYPE_IPV4;
}
if ((my_types & RSS_TYPE_IPV6) &&
(diff_types & ifrh.ifrh_types &
(RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 |
RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX |
RSS_TYPE_IPV6_EX))) {
/* Conflict; disable IPV6 hash type/value delivery. */
if_printf(ifp, "disable IPV6 mbuf hash delivery\n");
mbuf_types &= ~RSS_TYPE_IPV6;
}
if ((my_types & RSS_TYPE_IPV6_EX) &&
(diff_types & ifrh.ifrh_types &
(RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6 |
RSS_TYPE_TCP_IPV6_EX | RSS_TYPE_UDP_IPV6_EX |
RSS_TYPE_IPV6))) {
/* Conflict; disable IPV6_EX hash type/value delivery. */
if_printf(ifp, "disable IPV6_EX mbuf hash delivery\n");
mbuf_types &= ~RSS_TYPE_IPV6_EX;
}
if ((my_types & RSS_TYPE_TCP_IPV6) &&
(diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6_EX)) {
/* Conflict; disable TCP_IPV6 hash type/value delivery. */
if_printf(ifp, "disable TCP_IPV6 mbuf hash delivery\n");
mbuf_types &= ~RSS_TYPE_TCP_IPV6;
}
if ((my_types & RSS_TYPE_TCP_IPV6_EX) &&
(diff_types & ifrh.ifrh_types & RSS_TYPE_TCP_IPV6)) {
/* Conflict; disable TCP_IPV6_EX hash type/value delivery. */
if_printf(ifp, "disable TCP_IPV6_EX mbuf hash delivery\n");
mbuf_types &= ~RSS_TYPE_TCP_IPV6_EX;
}
if ((my_types & RSS_TYPE_UDP_IPV6) &&
(diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6_EX)) {
/* Conflict; disable UDP_IPV6 hash type/value delivery. */
if_printf(ifp, "disable UDP_IPV6 mbuf hash delivery\n");
mbuf_types &= ~RSS_TYPE_UDP_IPV6;
}
if ((my_types & RSS_TYPE_UDP_IPV6_EX) &&
(diff_types & ifrh.ifrh_types & RSS_TYPE_UDP_IPV6)) {
/* Conflict; disable UDP_IPV6_EX hash type/value delivery. */
if_printf(ifp, "disable UDP_IPV6_EX mbuf hash delivery\n");
mbuf_types &= ~RSS_TYPE_UDP_IPV6_EX;
}
/*
* Indirect table does not matter.
*/
sc->hn_rss_hash = (sc->hn_rss_hcap & NDIS_HASH_FUNCTION_MASK) |
hn_rss_type_tondis(my_types);
memcpy(sc->hn_rss.rss_key, ifrk.ifrk_key, sizeof(sc->hn_rss.rss_key));
sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
if (reconf) {
error = hn_rss_reconfig(sc);
if (error) {
/* XXX roll-back? */
if_printf(ifp, "hn_rss_reconfig failed: %d\n", error);
/* XXX keep going. */
}
}
done:
/* Hash deliverability for mbufs. */
hn_rss_mbuf_hash(sc, hn_rss_type_tondis(mbuf_types));
}
static void
hn_vf_rss_restore(struct hn_softc *sc)
{
HN_LOCK_ASSERT(sc);
KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
("%s: synthetic parts are not attached", sc->hn_ifp->if_xname));
if (sc->hn_rx_ring_inuse == 1)
goto done;
/*
* Restore hash types. Key does _not_ matter.
*/
if (sc->hn_rss_hash != sc->hn_rss_hcap) {
int error;
sc->hn_rss_hash = sc->hn_rss_hcap;
error = hn_rss_reconfig(sc);
if (error) {
if_printf(sc->hn_ifp, "hn_rss_reconfig failed: %d\n",
error);
/* XXX keep going. */
}
}
done:
/* Hash deliverability for mbufs. */
hn_rss_mbuf_hash(sc, NDIS_HASH_ALL);
}
static void
hn_xpnt_vf_setready(struct hn_softc *sc)
{
struct ifnet *ifp, *vf_ifp;
struct ifreq ifr;
HN_LOCK_ASSERT(sc);
ifp = sc->hn_ifp;
vf_ifp = sc->hn_vf_ifp;
/*
* Mark the VF ready.
*/
sc->hn_vf_rdytick = 0;
/*
* Save information for restoration.
*/
sc->hn_saved_caps = ifp->if_capabilities;
sc->hn_saved_tsomax = ifp->if_hw_tsomax;
sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount;
sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize;
/*
* Intersect supported/enabled capabilities.
*
* NOTE:
* if_hwassist is not changed here.
*/
ifp->if_capabilities &= vf_ifp->if_capabilities;
ifp->if_capenable &= ifp->if_capabilities;
/*
* Fix TSO settings.
*/
if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax)
ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax;
if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount)
ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount;
if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize)
ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize;
/*
* Change VF's enabled capabilities.
*/
memset(&ifr, 0, sizeof(ifr));
strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name));
ifr.ifr_reqcap = ifp->if_capenable;
hn_xpnt_vf_iocsetcaps(sc, &ifr);
if (ifp->if_mtu != ETHERMTU) {
int error;
/*
* Change VF's MTU.
*/
memset(&ifr, 0, sizeof(ifr));
strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name));
ifr.ifr_mtu = ifp->if_mtu;
error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr);
if (error) {
if_printf(ifp, "%s SIOCSIFMTU %u failed\n",
vf_ifp->if_xname, ifp->if_mtu);
if (ifp->if_mtu > ETHERMTU) {
if_printf(ifp, "change MTU to %d\n", ETHERMTU);
/*
* XXX
* No need to adjust the synthetic parts' MTU;
* failure of the adjustment will cause us
* infinite headache.
*/
ifp->if_mtu = ETHERMTU;
hn_mtu_change_fixup(sc);
}
}
}
}
static bool
hn_xpnt_vf_isready(struct hn_softc *sc)
{
HN_LOCK_ASSERT(sc);
if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL)
return (false);
if (sc->hn_vf_rdytick == 0)
return (true);
if (sc->hn_vf_rdytick > ticks)
return (false);
/* Mark VF as ready. */
hn_xpnt_vf_setready(sc);
return (true);
}
static void
hn_xpnt_vf_setenable(struct hn_softc *sc)
{
int i;
HN_LOCK_ASSERT(sc);
/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
rm_wlock(&sc->hn_vf_lock);
sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED;
rm_wunlock(&sc->hn_vf_lock);
for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_XPNT_VF;
}
static void
hn_xpnt_vf_setdisable(struct hn_softc *sc, bool clear_vf)
{
int i;
HN_LOCK_ASSERT(sc);
/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
rm_wlock(&sc->hn_vf_lock);
sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED;
if (clear_vf)
sc->hn_vf_ifp = NULL;
rm_wunlock(&sc->hn_vf_lock);
for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
sc->hn_rx_ring[i].hn_rx_flags &= ~HN_RX_FLAG_XPNT_VF;
}
static void
hn_xpnt_vf_init(struct hn_softc *sc)
{
int error;
HN_LOCK_ASSERT(sc);
KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0,
("%s: transparent VF was enabled", sc->hn_ifp->if_xname));
if (bootverbose) {
if_printf(sc->hn_ifp, "try bringing up %s\n",
sc->hn_vf_ifp->if_xname);
}
/*
* Bring the VF up.
*/
hn_xpnt_vf_saveifflags(sc);
sc->hn_vf_ifp->if_flags |= IFF_UP;
error = hn_xpnt_vf_iocsetflags(sc);
if (error) {
if_printf(sc->hn_ifp, "bringing up %s failed: %d\n",
sc->hn_vf_ifp->if_xname, error);
return;
}
/*
* NOTE:
* Datapath setting must happen _after_ bringing the VF up.
*/
hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF);
/*
* NOTE:
* Fixup RSS related bits _after_ the VF is brought up, since
* many VFs generate RSS key during it's initialization.
*/
hn_vf_rss_fixup(sc, true);
/* Mark transparent mode VF as enabled. */
hn_xpnt_vf_setenable(sc);
}
static void
hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused)
{
struct hn_softc *sc = xsc;
HN_LOCK(sc);
if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
goto done;
if (sc->hn_vf_ifp == NULL)
goto done;
if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
goto done;
if (sc->hn_vf_rdytick != 0) {
/* Mark VF as ready. */
hn_xpnt_vf_setready(sc);
}
if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) {
/*
* Delayed VF initialization.
*/
if (bootverbose) {
if_printf(sc->hn_ifp, "delayed initialize %s\n",
sc->hn_vf_ifp->if_xname);
}
hn_xpnt_vf_init(sc);
}
done:
HN_UNLOCK(sc);
}
static void
hn_ifnet_attevent(void *xsc, struct ifnet *ifp)
{
struct hn_softc *sc = xsc;
HN_LOCK(sc);
if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
goto done;
if (!hn_ismyvf(sc, ifp))
goto done;
if (sc->hn_vf_ifp != NULL) {
if_printf(sc->hn_ifp, "%s was attached as VF\n",
sc->hn_vf_ifp->if_xname);
goto done;
}
if (hn_xpnt_vf && ifp->if_start != NULL) {
/*
* ifnet.if_start is _not_ supported by transparent
* mode VF; mainly due to the IFF_DRV_OACTIVE flag.
*/
if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported "
"in transparent VF mode.\n", ifp->if_xname);
goto done;
}
rm_wlock(&hn_vfmap_lock);
if (ifp->if_index >= hn_vfmap_size) {
struct ifnet **newmap;
int newsize;
newsize = ifp->if_index + HN_VFMAP_SIZE_DEF;
newmap = malloc(sizeof(struct ifnet *) * newsize, M_DEVBUF,
M_WAITOK | M_ZERO);
memcpy(newmap, hn_vfmap,
sizeof(struct ifnet *) * hn_vfmap_size);
free(hn_vfmap, M_DEVBUF);
hn_vfmap = newmap;
hn_vfmap_size = newsize;
}
KASSERT(hn_vfmap[ifp->if_index] == NULL,
("%s: ifindex %d was mapped to %s",
ifp->if_xname, ifp->if_index, hn_vfmap[ifp->if_index]->if_xname));
hn_vfmap[ifp->if_index] = sc->hn_ifp;
rm_wunlock(&hn_vfmap_lock);
/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
rm_wlock(&sc->hn_vf_lock);
KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0,
("%s: transparent VF was enabled", sc->hn_ifp->if_xname));
sc->hn_vf_ifp = ifp;
rm_wunlock(&sc->hn_vf_lock);
if (hn_xpnt_vf) {
int wait_ticks;
/*
* Install if_input for vf_ifp, which does vf_ifp -> hn_ifp.
* Save vf_ifp's current if_input for later restoration.
*/
sc->hn_vf_input = ifp->if_input;
ifp->if_input = hn_xpnt_vf_input;
/*
* Stop link status management; use the VF's.
*/
hn_suspend_mgmt(sc);
/*
* Give VF sometime to complete its attach routing.
*/
wait_ticks = hn_xpnt_vf_attwait * hz;
sc->hn_vf_rdytick = ticks + wait_ticks;
taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init,
wait_ticks);
}
done:
HN_UNLOCK(sc);
}
static void
hn_ifnet_detevent(void *xsc, struct ifnet *ifp)
{
struct hn_softc *sc = xsc;
HN_LOCK(sc);
if (sc->hn_vf_ifp == NULL)
goto done;
if (!hn_ismyvf(sc, ifp))
goto done;
if (hn_xpnt_vf) {
/*
* Make sure that the delayed initialization is not running.
*
* NOTE:
* - This lock _must_ be released, since the hn_vf_init task
* will try holding this lock.
* - It is safe to release this lock here, since the
* hn_ifnet_attevent() is interlocked by the hn_vf_ifp.
*
* XXX racy, if hn(4) ever detached.
*/
HN_UNLOCK(sc);
taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init);
HN_LOCK(sc);
KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved",
sc->hn_ifp->if_xname));
ifp->if_input = sc->hn_vf_input;
sc->hn_vf_input = NULL;
if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) &&
(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED))
hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH);
if (sc->hn_vf_rdytick == 0) {
/*
* The VF was ready; restore some settings.
*/
sc->hn_ifp->if_capabilities = sc->hn_saved_caps;
/*
* NOTE:
* There is _no_ need to fixup if_capenable and
* if_hwassist, since the if_capabilities before
* restoration was an intersection of the VF's
* if_capabilites and the synthetic device's
* if_capabilites.
*/
sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax;
sc->hn_ifp->if_hw_tsomaxsegcount =
sc->hn_saved_tsosegcnt;
sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz;
}
if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
/*
* Restore RSS settings.
*/
hn_vf_rss_restore(sc);
/*
* Resume link status management, which was suspended
* by hn_ifnet_attevent().
*/
hn_resume_mgmt(sc);
}
}
/* Mark transparent mode VF as disabled. */
hn_xpnt_vf_setdisable(sc, true /* clear hn_vf_ifp */);
rm_wlock(&hn_vfmap_lock);
KASSERT(ifp->if_index < hn_vfmap_size,
("ifindex %d, vfmapsize %d", ifp->if_index, hn_vfmap_size));
if (hn_vfmap[ifp->if_index] != NULL) {
KASSERT(hn_vfmap[ifp->if_index] == sc->hn_ifp,
("%s: ifindex %d was mapped to %s",
ifp->if_xname, ifp->if_index,
hn_vfmap[ifp->if_index]->if_xname));
hn_vfmap[ifp->if_index] = NULL;
}
rm_wunlock(&hn_vfmap_lock);
done:
HN_UNLOCK(sc);
}
static void
hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state)
{
struct hn_softc *sc = xsc;
if (sc->hn_vf_ifp == ifp)
if_link_state_change(sc->hn_ifp, link_state);
}
static int
hn_probe(device_t dev)
{
if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &hn_guid) == 0) {
device_set_desc(dev, "Hyper-V Network Interface");
return BUS_PROBE_DEFAULT;
}
return ENXIO;
}
static int
hn_attach(device_t dev)
{
struct hn_softc *sc = device_get_softc(dev);
struct sysctl_oid_list *child;
struct sysctl_ctx_list *ctx;
uint8_t eaddr[ETHER_ADDR_LEN];
struct ifnet *ifp = NULL;
int error, ring_cnt, tx_ring_cnt;
uint32_t mtu;
sc->hn_dev = dev;
sc->hn_prichan = vmbus_get_channel(dev);
HN_LOCK_INIT(sc);
rm_init(&sc->hn_vf_lock, "hnvf");
if (hn_xpnt_vf && hn_xpnt_vf_accbpf)
sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF;
/*
* Initialize these tunables once.
*/
sc->hn_agg_size = hn_tx_agg_size;
sc->hn_agg_pkts = hn_tx_agg_pkts;
/*
* Setup taskqueue for transmission.
*/
if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) {
int i;
sc->hn_tx_taskqs =
malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
M_DEVBUF, M_WAITOK);
for (i = 0; i < hn_tx_taskq_cnt; ++i) {
sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx",
M_WAITOK, taskqueue_thread_enqueue,
&sc->hn_tx_taskqs[i]);
taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET,
"%s tx%d", device_get_nameunit(dev), i);
}
} else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) {
sc->hn_tx_taskqs = hn_tx_taskque;
}
/*
* Setup taskqueue for mangement tasks, e.g. link status.
*/
sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK,
taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0);
taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt",
device_get_nameunit(dev));
TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc);
TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc);
TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0,
hn_netchg_status_taskfunc, sc);
if (hn_xpnt_vf) {
/*
* Setup taskqueue for VF tasks, e.g. delayed VF bringing up.
*/
sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK,
taskqueue_thread_enqueue, &sc->hn_vf_taskq);
taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf",
device_get_nameunit(dev));
TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0,
hn_xpnt_vf_init_taskfunc, sc);
}
/*
* Allocate ifnet and setup its name earlier, so that if_printf
* can be used by functions, which will be called after
* ether_ifattach().
*/
ifp = sc->hn_ifp = if_alloc(IFT_ETHER);
ifp->if_softc = sc;
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
/*
* Initialize ifmedia earlier so that it can be unconditionally
* destroyed, if error happened later on.
*/
ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
/*
* Figure out the # of RX rings (ring_cnt) and the # of TX rings
* to use (tx_ring_cnt).
*
* NOTE:
* The # of RX rings to use is same as the # of channels to use.
*/
ring_cnt = hn_chan_cnt;
if (ring_cnt <= 0) {
/* Default */
ring_cnt = mp_ncpus;
if (ring_cnt > HN_RING_CNT_DEF_MAX)
ring_cnt = HN_RING_CNT_DEF_MAX;
} else if (ring_cnt > mp_ncpus) {
ring_cnt = mp_ncpus;
}
#ifdef RSS
if (ring_cnt > rss_getnumbuckets())
ring_cnt = rss_getnumbuckets();
#endif
tx_ring_cnt = hn_tx_ring_cnt;
if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
tx_ring_cnt = ring_cnt;
#ifdef HN_IFSTART_SUPPORT
if (hn_use_if_start) {
/* ifnet.if_start only needs one TX ring. */
tx_ring_cnt = 1;
}
#endif
/*
* Set the leader CPU for channels.
*/
sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
/*
* Create enough TX/RX rings, even if only limited number of
* channels can be allocated.
*/
error = hn_create_tx_data(sc, tx_ring_cnt);
if (error)
goto failed;
error = hn_create_rx_data(sc, ring_cnt);
if (error)
goto failed;
/*
* Create transaction context for NVS and RNDIS transactions.
*/
sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0);
if (sc->hn_xact == NULL) {
error = ENXIO;
goto failed;
}
/*
* Install orphan handler for the revocation of this device's
* primary channel.
*
* NOTE:
* The processing order is critical here:
* Install the orphan handler, _before_ testing whether this
* device's primary channel has been revoked or not.
*/
vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact);
if (vmbus_chan_is_revoked(sc->hn_prichan)) {
error = ENXIO;
goto failed;
}
/*
* Attach the synthetic parts, i.e. NVS and RNDIS.
*/
error = hn_synth_attach(sc, ETHERMTU);
if (error)
goto failed;
error = hn_rndis_get_eaddr(sc, eaddr);
if (error)
goto failed;
error = hn_rndis_get_mtu(sc, &mtu);
if (error)
mtu = ETHERMTU;
else if (bootverbose)
device_printf(dev, "RNDIS mtu %u\n", mtu);
#if __FreeBSD_version >= 1100099
if (sc->hn_rx_ring_inuse > 1) {
/*
* Reduce TCP segment aggregation limit for multiple
* RX rings to increase ACK timeliness.
*/
hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
}
#endif
/*
* Fixup TX/RX stuffs after synthetic parts are attached.
*/
hn_fixup_tx_data(sc);
hn_fixup_rx_data(sc);
ctx = device_get_sysctl_ctx(dev);
child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD,
&sc->hn_nvs_ver, 0, "NVS version");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version",
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
hn_ndis_version_sysctl, "A", "NDIS version");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps",
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
hn_caps_sysctl, "A", "capabilities");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist",
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
hn_hwassist_sysctl, "A", "hwassist");
SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max",
CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size");
SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt",
CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0,
"max # of TSO segments");
SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz",
CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0,
"max size of TSO segment");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter",
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
hn_rxfilter_sysctl, "A", "rxfilter");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash",
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
hn_rss_hash_sysctl, "A", "RSS hash");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hashcap",
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
hn_rss_hcap_sysctl, "A", "RSS hash capabilities");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "mbuf_hash",
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
hn_rss_mbuf_sysctl, "A", "RSS hash for mbufs");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size",
CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count");
#ifndef RSS
/*
* Don't allow RSS key/indirect table changes, if RSS is defined.
*/
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key",
CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
hn_rss_key_sysctl, "IU", "RSS key");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind",
CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
hn_rss_ind_sysctl, "IU", "RSS indirect table");
#endif
SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size",
CTLFLAG_RD, &sc->hn_rndis_agg_size, 0,
"RNDIS offered packet transmission aggregation size limit");
SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts",
CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0,
"RNDIS offered packet transmission aggregation count limit");
SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align",
CTLFLAG_RD, &sc->hn_rndis_agg_align, 0,
"RNDIS packet transmission aggregation alignment");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size",
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
hn_txagg_size_sysctl, "I",
"Packet transmission aggregation size, 0 -- disable, -1 -- auto");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts",
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
hn_txagg_pkts_sysctl, "I",
"Packet transmission aggregation packets, "
"0 -- disable, -1 -- auto");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling",
CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
hn_polling_sysctl, "I",
"Polling frequency: [100,1000000], 0 disable polling");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf",
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
hn_vf_sysctl, "A", "Virtual Function's name");
if (!hn_xpnt_vf) {
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf",
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
hn_rxvf_sysctl, "A", "activated Virtual Function's name");
} else {
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled",
CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
hn_xpnt_vf_enabled_sysctl, "I",
"Transparent VF enabled");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf",
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
hn_xpnt_vf_accbpf_sysctl, "I",
"Accurate BPF for transparent VF");
}
/*
* Setup the ifmedia, which has been initialized earlier.
*/
ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
/* XXX ifmedia_set really should do this for us */
sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
/*
* Setup the ifnet for this interface.
*/
ifp->if_baudrate = IF_Gbps(10);
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
- IFF_NEEDSEPOCH;
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = hn_ioctl;
ifp->if_init = hn_init;
#ifdef HN_IFSTART_SUPPORT
if (hn_use_if_start) {
int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
ifp->if_start = hn_start;
IFQ_SET_MAXLEN(&ifp->if_snd, qdepth);
ifp->if_snd.ifq_drv_maxlen = qdepth - 1;
IFQ_SET_READY(&ifp->if_snd);
} else
#endif
{
ifp->if_transmit = hn_transmit;
ifp->if_qflush = hn_xmit_qflush;
}
ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE;
#ifdef foo
/* We can't diff IPv6 packets from IPv4 packets on RX path. */
ifp->if_capabilities |= IFCAP_RXCSUM_IPV6;
#endif
if (sc->hn_caps & HN_CAP_VLAN) {
/* XXX not sure about VLAN_MTU. */
ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
}
ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist;
if (ifp->if_hwassist & HN_CSUM_IP_MASK)
ifp->if_capabilities |= IFCAP_TXCSUM;
if (ifp->if_hwassist & HN_CSUM_IP6_MASK)
ifp->if_capabilities |= IFCAP_TXCSUM_IPV6;
if (sc->hn_caps & HN_CAP_TSO4) {
ifp->if_capabilities |= IFCAP_TSO4;
ifp->if_hwassist |= CSUM_IP_TSO;
}
if (sc->hn_caps & HN_CAP_TSO6) {
ifp->if_capabilities |= IFCAP_TSO6;
ifp->if_hwassist |= CSUM_IP6_TSO;
}
/* Enable all available capabilities by default. */
ifp->if_capenable = ifp->if_capabilities;
/*
* Disable IPv6 TSO and TXCSUM by default, they still can
* be enabled through SIOCSIFCAP.
*/
ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6);
ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO);
if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) {
/*
* Lock hn_set_tso_maxsize() to simplify its
* internal logic.
*/
HN_LOCK(sc);
hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU);
HN_UNLOCK(sc);
ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
}
ether_ifattach(ifp, eaddr);
if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) {
if_printf(ifp, "TSO segcnt %u segsz %u\n",
ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
}
if (mtu < ETHERMTU) {
if_printf(ifp, "fixup mtu %u -> %u\n", ifp->if_mtu, mtu);
ifp->if_mtu = mtu;
}
/* Inform the upper layer about the long frame support. */
ifp->if_hdrlen = sizeof(struct ether_vlan_header);
/*
* Kick off link status check.
*/
sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
hn_update_link_status(sc);
if (!hn_xpnt_vf) {
sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event,
hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY);
sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event,
hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY);
} else {
sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event,
hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY);
}
/*
* NOTE:
* Subscribe ether_ifattach event, instead of ifnet_arrival event,
* since interface's LLADDR is needed; interface LLADDR is not
* available when ifnet_arrival event is triggered.
*/
sc->hn_ifnet_atthand = EVENTHANDLER_REGISTER(ether_ifattach_event,
hn_ifnet_attevent, sc, EVENTHANDLER_PRI_ANY);
sc->hn_ifnet_dethand = EVENTHANDLER_REGISTER(ifnet_departure_event,
hn_ifnet_detevent, sc, EVENTHANDLER_PRI_ANY);
return (0);
failed:
if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)
hn_synth_detach(sc);
hn_detach(dev);
return (error);
}
static int
hn_detach(device_t dev)
{
struct hn_softc *sc = device_get_softc(dev);
struct ifnet *ifp = sc->hn_ifp, *vf_ifp;
if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) {
/*
* In case that the vmbus missed the orphan handler
* installation.
*/
vmbus_xact_ctx_orphan(sc->hn_xact);
}
if (sc->hn_ifaddr_evthand != NULL)
EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand);
if (sc->hn_ifnet_evthand != NULL)
EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand);
if (sc->hn_ifnet_atthand != NULL) {
EVENTHANDLER_DEREGISTER(ether_ifattach_event,
sc->hn_ifnet_atthand);
}
if (sc->hn_ifnet_dethand != NULL) {
EVENTHANDLER_DEREGISTER(ifnet_departure_event,
sc->hn_ifnet_dethand);
}
if (sc->hn_ifnet_lnkhand != NULL)
EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand);
vf_ifp = sc->hn_vf_ifp;
__compiler_membar();
if (vf_ifp != NULL)
hn_ifnet_detevent(sc, vf_ifp);
if (device_is_attached(dev)) {
HN_LOCK(sc);
if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
hn_stop(sc, true);
/*
* NOTE:
* hn_stop() only suspends data, so managment
* stuffs have to be suspended manually here.
*/
hn_suspend_mgmt(sc);
hn_synth_detach(sc);
}
HN_UNLOCK(sc);
ether_ifdetach(ifp);
}
ifmedia_removeall(&sc->hn_media);
hn_destroy_rx_data(sc);
hn_destroy_tx_data(sc);
if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) {
int i;
for (i = 0; i < hn_tx_taskq_cnt; ++i)
taskqueue_free(sc->hn_tx_taskqs[i]);
free(sc->hn_tx_taskqs, M_DEVBUF);
}
taskqueue_free(sc->hn_mgmt_taskq0);
if (sc->hn_vf_taskq != NULL)
taskqueue_free(sc->hn_vf_taskq);
if (sc->hn_xact != NULL) {
/*
* Uninstall the orphan handler _before_ the xact is
* destructed.
*/
vmbus_chan_unset_orphan(sc->hn_prichan);
vmbus_xact_ctx_destroy(sc->hn_xact);
}
if_free(ifp);
HN_LOCK_DESTROY(sc);
rm_destroy(&sc->hn_vf_lock);
return (0);
}
static int
hn_shutdown(device_t dev)
{
return (0);
}
static void
hn_link_status(struct hn_softc *sc)
{
uint32_t link_status;
int error;
error = hn_rndis_get_linkstatus(sc, &link_status);
if (error) {
/* XXX what to do? */
return;
}
if (link_status == NDIS_MEDIA_STATE_CONNECTED)
sc->hn_link_flags |= HN_LINK_FLAG_LINKUP;
else
sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
if_link_state_change(sc->hn_ifp,
(sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ?
LINK_STATE_UP : LINK_STATE_DOWN);
}
static void
hn_link_taskfunc(void *xsc, int pending __unused)
{
struct hn_softc *sc = xsc;
if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
return;
hn_link_status(sc);
}
static void
hn_netchg_init_taskfunc(void *xsc, int pending __unused)
{
struct hn_softc *sc = xsc;
/* Prevent any link status checks from running. */
sc->hn_link_flags |= HN_LINK_FLAG_NETCHG;
/*
* Fake up a [link down --> link up] state change; 5 seconds
* delay is used, which closely simulates miibus reaction
* upon link down event.
*/
sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0,
&sc->hn_netchg_status, 5 * hz);
}
static void
hn_netchg_status_taskfunc(void *xsc, int pending __unused)
{
struct hn_softc *sc = xsc;
/* Re-allow link status checks. */
sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG;
hn_link_status(sc);
}
static void
hn_update_link_status(struct hn_softc *sc)
{
if (sc->hn_mgmt_taskq != NULL)
taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task);
}
static void
hn_change_network(struct hn_softc *sc)
{
if (sc->hn_mgmt_taskq != NULL)
taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init);
}
static __inline int
hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
{
struct mbuf *m = *m_head;
int error;
KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim"));
error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
m, segs, nsegs, BUS_DMA_NOWAIT);
if (error == EFBIG) {
struct mbuf *m_new;
m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
if (m_new == NULL)
return ENOBUFS;
else
*m_head = m = m_new;
txr->hn_tx_collapsed++;
error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
}
if (!error) {
bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
BUS_DMASYNC_PREWRITE);
txd->flags |= HN_TXD_FLAG_DMAMAP;
}
return error;
}
static __inline int
hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
{
KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
("put an onlist txd %#x", txd->flags));
KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
("put an onagg txd %#x", txd->flags));
KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
if (atomic_fetchadd_int(&txd->refs, -1) != 1)
return 0;
if (!STAILQ_EMPTY(&txd->agg_list)) {
struct hn_txdesc *tmp_txd;
while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) {
int freed;
KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list),
("resursive aggregation on aggregated txdesc"));
KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG),
("not aggregated txdesc"));
KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
("aggregated txdesc uses dmamap"));
KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
("aggregated txdesc consumes "
"chimney sending buffer"));
KASSERT(tmp_txd->chim_size == 0,
("aggregated txdesc has non-zero "
"chimney sending size"));
STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link);
tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG;
freed = hn_txdesc_put(txr, tmp_txd);
KASSERT(freed, ("failed to free aggregated txdesc"));
}
}
if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
("chim txd uses dmamap"));
hn_chim_free(txr->hn_sc, txd->chim_index);
txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
txd->chim_size = 0;
} else if (txd->flags & HN_TXD_FLAG_DMAMAP) {
bus_dmamap_sync(txr->hn_tx_data_dtag,
txd->data_dmap, BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(txr->hn_tx_data_dtag,
txd->data_dmap);
txd->flags &= ~HN_TXD_FLAG_DMAMAP;
}
if (txd->m != NULL) {
m_freem(txd->m);
txd->m = NULL;
}
txd->flags |= HN_TXD_FLAG_ONLIST;
#ifndef HN_USE_TXDESC_BUFRING
mtx_lock_spin(&txr->hn_txlist_spin);
KASSERT(txr->hn_txdesc_avail >= 0 &&
txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
txr->hn_txdesc_avail++;
SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
mtx_unlock_spin(&txr->hn_txlist_spin);
#else /* HN_USE_TXDESC_BUFRING */
#ifdef HN_DEBUG
atomic_add_int(&txr->hn_txdesc_avail, 1);
#endif
buf_ring_enqueue(txr->hn_txdesc_br, txd);
#endif /* !HN_USE_TXDESC_BUFRING */
return 1;
}
static __inline struct hn_txdesc *
hn_txdesc_get(struct hn_tx_ring *txr)
{
struct hn_txdesc *txd;
#ifndef HN_USE_TXDESC_BUFRING
mtx_lock_spin(&txr->hn_txlist_spin);
txd = SLIST_FIRST(&txr->hn_txlist);
if (txd != NULL) {
KASSERT(txr->hn_txdesc_avail > 0,
("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
txr->hn_txdesc_avail--;
SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
}
mtx_unlock_spin(&txr->hn_txlist_spin);
#else
txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
#endif
if (txd != NULL) {
#ifdef HN_USE_TXDESC_BUFRING
#ifdef HN_DEBUG
atomic_subtract_int(&txr->hn_txdesc_avail, 1);
#endif
#endif /* HN_USE_TXDESC_BUFRING */
KASSERT(txd->m == NULL && txd->refs == 0 &&
STAILQ_EMPTY(&txd->agg_list) &&
txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
txd->chim_size == 0 &&
(txd->flags & HN_TXD_FLAG_ONLIST) &&
(txd->flags & HN_TXD_FLAG_ONAGG) == 0 &&
(txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd"));
txd->flags &= ~HN_TXD_FLAG_ONLIST;
txd->refs = 1;
}
return txd;
}
static __inline void
hn_txdesc_hold(struct hn_txdesc *txd)
{
/* 0->1 transition will never work */
KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
atomic_add_int(&txd->refs, 1);
}
static __inline void
hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd)
{
KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0,
("recursive aggregation on aggregating txdesc"));
KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
("already aggregated"));
KASSERT(STAILQ_EMPTY(&txd->agg_list),
("recursive aggregation on to-be-aggregated txdesc"));
txd->flags |= HN_TXD_FLAG_ONAGG;
STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link);
}
static bool
hn_tx_ring_pending(struct hn_tx_ring *txr)
{
bool pending = false;
#ifndef HN_USE_TXDESC_BUFRING
mtx_lock_spin(&txr->hn_txlist_spin);
if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt)
pending = true;
mtx_unlock_spin(&txr->hn_txlist_spin);
#else
if (!buf_ring_full(txr->hn_txdesc_br))
pending = true;
#endif
return (pending);
}
static __inline void
hn_txeof(struct hn_tx_ring *txr)
{
txr->hn_has_txeof = 0;
txr->hn_txeof(txr);
}
static void
hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc,
struct vmbus_channel *chan, const void *data __unused, int dlen __unused)
{
struct hn_txdesc *txd = sndc->hn_cbarg;
struct hn_tx_ring *txr;
txr = txd->txr;
KASSERT(txr->hn_chan == chan,
("channel mismatch, on chan%u, should be chan%u",
vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan)));
txr->hn_has_txeof = 1;
hn_txdesc_put(txr, txd);
++txr->hn_txdone_cnt;
if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
txr->hn_txdone_cnt = 0;
if (txr->hn_oactive)
hn_txeof(txr);
}
}
static void
hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
{
#if defined(INET) || defined(INET6)
tcp_lro_flush_all(&rxr->hn_lro);
#endif
/*
* NOTE:
* 'txr' could be NULL, if multiple channels and
* ifnet.if_start method are enabled.
*/
if (txr == NULL || !txr->hn_has_txeof)
return;
txr->hn_txdone_cnt = 0;
hn_txeof(txr);
}
static __inline uint32_t
hn_rndis_pktmsg_offset(uint32_t ofs)
{
KASSERT(ofs >= sizeof(struct rndis_packet_msg),
("invalid RNDIS packet msg offset %u", ofs));
return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset));
}
static __inline void *
hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
size_t pi_dlen, uint32_t pi_type)
{
const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen);
struct rndis_pktinfo *pi;
KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0,
("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen));
/*
* Per-packet-info does not move; it only grows.
*
* NOTE:
* rm_pktinfooffset in this phase counts from the beginning
* of rndis_packet_msg.
*/
KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize,
("%u pktinfo overflows RNDIS packet msg", pi_type));
pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset +
pkt->rm_pktinfolen);
pkt->rm_pktinfolen += pi_size;
pi->rm_size = pi_size;
pi->rm_type = pi_type;
pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET;
return (pi->rm_data);
}
static __inline int
hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr)
{
struct hn_txdesc *txd;
struct mbuf *m;
int error, pkts;
txd = txr->hn_agg_txd;
KASSERT(txd != NULL, ("no aggregate txdesc"));
/*
* Since hn_txpkt() will reset this temporary stat, save
* it now, so that oerrors can be updated properly, if
* hn_txpkt() ever fails.
*/
pkts = txr->hn_stat_pkts;
/*
* Since txd's mbuf will _not_ be freed upon hn_txpkt()
* failure, save it for later freeing, if hn_txpkt() ever
* fails.
*/
m = txd->m;
error = hn_txpkt(ifp, txr, txd);
if (__predict_false(error)) {
/* txd is freed, but m is not. */
m_freem(m);
txr->hn_flush_failed++;
if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts);
}
/* Reset all aggregation states. */
txr->hn_agg_txd = NULL;
txr->hn_agg_szleft = 0;
txr->hn_agg_pktleft = 0;
txr->hn_agg_prevpkt = NULL;
return (error);
}
static void *
hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
int pktsize)
{
void *chim;
if (txr->hn_agg_txd != NULL) {
if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) {
struct hn_txdesc *agg_txd = txr->hn_agg_txd;
struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt;
int olen;
/*
* Update the previous RNDIS packet's total length,
* it can be increased due to the mandatory alignment
* padding for this RNDIS packet. And update the
* aggregating txdesc's chimney sending buffer size
* accordingly.
*
* XXX
* Zero-out the padding, as required by the RNDIS spec.
*/
olen = pkt->rm_len;
pkt->rm_len = roundup2(olen, txr->hn_agg_align);
agg_txd->chim_size += pkt->rm_len - olen;
/* Link this txdesc to the parent. */
hn_txdesc_agg(agg_txd, txd);
chim = (uint8_t *)pkt + pkt->rm_len;
/* Save the current packet for later fixup. */
txr->hn_agg_prevpkt = chim;
txr->hn_agg_pktleft--;
txr->hn_agg_szleft -= pktsize;
if (txr->hn_agg_szleft <=
HN_PKTSIZE_MIN(txr->hn_agg_align)) {
/*
* Probably can't aggregate more packets,
* flush this aggregating txdesc proactively.
*/
txr->hn_agg_pktleft = 0;
}
/* Done! */
return (chim);
}
hn_flush_txagg(ifp, txr);
}
KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
txr->hn_tx_chimney_tried++;
txd->chim_index = hn_chim_alloc(txr->hn_sc);
if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID)
return (NULL);
txr->hn_tx_chimney++;
chim = txr->hn_sc->hn_chim +
(txd->chim_index * txr->hn_sc->hn_chim_szmax);
if (txr->hn_agg_pktmax > 1 &&
txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) {
txr->hn_agg_txd = txd;
txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1;
txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize;
txr->hn_agg_prevpkt = chim;
}
return (chim);
}
/*
* NOTE:
* If this function fails, then both txd and m_head0 will be freed.
*/
static int
hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
struct mbuf **m_head0)
{
bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
int error, nsegs, i;
struct mbuf *m_head = *m_head0;
struct rndis_packet_msg *pkt;
uint32_t *pi_data;
void *chim = NULL;
int pkt_hlen, pkt_size;
pkt = txd->rndis_pkt;
pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align);
if (pkt_size < txr->hn_chim_size) {
chim = hn_try_txagg(ifp, txr, txd, pkt_size);
if (chim != NULL)
pkt = chim;
} else {
if (txr->hn_agg_txd != NULL)
hn_flush_txagg(ifp, txr);
}
pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
pkt->rm_len = m_head->m_pkthdr.len;
pkt->rm_dataoffset = 0;
pkt->rm_datalen = m_head->m_pkthdr.len;
pkt->rm_oobdataoffset = 0;
pkt->rm_oobdatalen = 0;
pkt->rm_oobdataelements = 0;
pkt->rm_pktinfooffset = sizeof(*pkt);
pkt->rm_pktinfolen = 0;
pkt->rm_vchandle = 0;
pkt->rm_reserved = 0;
if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) {
/*
* Set the hash value for this packet, so that the host could
* dispatch the TX done event for this packet back to this TX
* ring's channel.
*/
pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL);
*pi_data = txr->hn_tx_idx;
}
if (m_head->m_flags & M_VLANTAG) {
pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
*pi_data = NDIS_VLAN_INFO_MAKE(
EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag),
EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag),
EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag));
}
if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
#if defined(INET6) || defined(INET)
pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO);
#ifdef INET
if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
*pi_data = NDIS_LSO2_INFO_MAKEIPV4(
m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen,
m_head->m_pkthdr.tso_segsz);
}
#endif
#if defined(INET6) && defined(INET)
else
#endif
#ifdef INET6
{
*pi_data = NDIS_LSO2_INFO_MAKEIPV6(
m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen,
m_head->m_pkthdr.tso_segsz);
}
#endif
#endif /* INET6 || INET */
} else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
if (m_head->m_pkthdr.csum_flags &
(CSUM_IP6_TCP | CSUM_IP6_UDP)) {
*pi_data = NDIS_TXCSUM_INFO_IPV6;
} else {
*pi_data = NDIS_TXCSUM_INFO_IPV4;
if (m_head->m_pkthdr.csum_flags & CSUM_IP)
*pi_data |= NDIS_TXCSUM_INFO_IPCS;
}
if (m_head->m_pkthdr.csum_flags &
(CSUM_IP_TCP | CSUM_IP6_TCP)) {
*pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(
m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen);
} else if (m_head->m_pkthdr.csum_flags &
(CSUM_IP_UDP | CSUM_IP6_UDP)) {
*pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(
m_head->m_pkthdr.l2hlen + m_head->m_pkthdr.l3hlen);
}
}
pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
/* Fixup RNDIS packet message total length */
pkt->rm_len += pkt_hlen;
/* Convert RNDIS packet message offsets */
pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen);
pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset);
/*
* Fast path: Chimney sending.
*/
if (chim != NULL) {
struct hn_txdesc *tgt_txd = txd;
if (txr->hn_agg_txd != NULL) {
tgt_txd = txr->hn_agg_txd;
#ifdef INVARIANTS
*m_head0 = NULL;
#endif
}
KASSERT(pkt == chim,
("RNDIS pkt not in chimney sending buffer"));
KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID,
("chimney sending buffer is not used"));
tgt_txd->chim_size += pkt->rm_len;
m_copydata(m_head, 0, m_head->m_pkthdr.len,
((uint8_t *)chim) + pkt_hlen);
txr->hn_gpa_cnt = 0;
txr->hn_sendpkt = hn_txpkt_chim;
goto done;
}
KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc"));
KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
("chimney buffer is used"));
KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc"));
error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
if (__predict_false(error)) {
int freed;
/*
* This mbuf is not linked w/ the txd yet, so free it now.
*/
m_freem(m_head);
*m_head0 = NULL;
freed = hn_txdesc_put(txr, txd);
KASSERT(freed != 0,
("fail to free txd upon txdma error"));
txr->hn_txdma_failed++;
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return error;
}
*m_head0 = m_head;
/* +1 RNDIS packet message */
txr->hn_gpa_cnt = nsegs + 1;
/* send packet with page buffer */
txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr);
txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK;
txr->hn_gpa[0].gpa_len = pkt_hlen;
/*
* Fill the page buffers with mbuf info after the page
* buffer for RNDIS packet message.
*/
for (i = 0; i < nsegs; ++i) {
struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1];
gpa->gpa_page = atop(segs[i].ds_addr);
gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK;
gpa->gpa_len = segs[i].ds_len;
}
txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
txd->chim_size = 0;
txr->hn_sendpkt = hn_txpkt_sglist;
done:
txd->m = m_head;
/* Set the completion routine */
hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd);
/* Update temporary stats for later use. */
txr->hn_stat_pkts++;
txr->hn_stat_size += m_head->m_pkthdr.len;
if (m_head->m_flags & M_MCAST)
txr->hn_stat_mcasts++;
return 0;
}
/*
* NOTE:
* If this function fails, then txd will be freed, but the mbuf
* associated w/ the txd will _not_ be freed.
*/
static int
hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
{
int error, send_failed = 0, has_bpf;
again:
has_bpf = bpf_peers_present(ifp->if_bpf);
if (has_bpf) {
/*
* Make sure that this txd and any aggregated txds are not
* freed before ETHER_BPF_MTAP.
*/
hn_txdesc_hold(txd);
}
error = txr->hn_sendpkt(txr, txd);
if (!error) {
if (has_bpf) {
const struct hn_txdesc *tmp_txd;
ETHER_BPF_MTAP(ifp, txd->m);
STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link)
ETHER_BPF_MTAP(ifp, tmp_txd->m);
}
if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts);
#ifdef HN_IFSTART_SUPPORT
if (!hn_use_if_start)
#endif
{
if_inc_counter(ifp, IFCOUNTER_OBYTES,
txr->hn_stat_size);
if (txr->hn_stat_mcasts != 0) {
if_inc_counter(ifp, IFCOUNTER_OMCASTS,
txr->hn_stat_mcasts);
}
}
txr->hn_pkts += txr->hn_stat_pkts;
txr->hn_sends++;
}
if (has_bpf)
hn_txdesc_put(txr, txd);
if (__predict_false(error)) {
int freed;
/*
* This should "really rarely" happen.
*
* XXX Too many RX to be acked or too many sideband
* commands to run? Ask netvsc_channel_rollup()
* to kick start later.
*/
txr->hn_has_txeof = 1;
if (!send_failed) {
txr->hn_send_failed++;
send_failed = 1;
/*
* Try sending again after set hn_has_txeof;
* in case that we missed the last
* netvsc_channel_rollup().
*/
goto again;
}
if_printf(ifp, "send failed\n");
/*
* Caller will perform further processing on the
* associated mbuf, so don't free it in hn_txdesc_put();
* only unload it from the DMA map in hn_txdesc_put(),
* if it was loaded.
*/
txd->m = NULL;
freed = hn_txdesc_put(txr, txd);
KASSERT(freed != 0,
("fail to free txd upon send error"));
txr->hn_send_failed++;
}
/* Reset temporary stats, after this sending is done. */
txr->hn_stat_size = 0;
txr->hn_stat_pkts = 0;
txr->hn_stat_mcasts = 0;
return (error);
}
/*
* Append the specified data to the indicated mbuf chain,
* Extend the mbuf chain if the new data does not fit in
* existing space.
*
* This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
* There should be an equivalent in the kernel mbuf code,
* but there does not appear to be one yet.
*
* Differs from m_append() in that additional mbufs are
* allocated with cluster size MJUMPAGESIZE, and filled
* accordingly.
*
* Return 1 if able to complete the job; otherwise 0.
*/
static int
hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
{
struct mbuf *m, *n;
int remainder, space;
for (m = m0; m->m_next != NULL; m = m->m_next)
;
remainder = len;
space = M_TRAILINGSPACE(m);
if (space > 0) {
/*
* Copy into available space.
*/
if (space > remainder)
space = remainder;
bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
m->m_len += space;
cp += space;
remainder -= space;
}
while (remainder > 0) {
/*
* Allocate a new mbuf; could check space
* and allocate a cluster instead.
*/
n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE);
if (n == NULL)
break;
n->m_len = min(MJUMPAGESIZE, remainder);
bcopy(cp, mtod(n, caddr_t), n->m_len);
cp += n->m_len;
remainder -= n->m_len;
m->m_next = n;
m = n;
}
if (m0->m_flags & M_PKTHDR)
m0->m_pkthdr.len += len - remainder;
return (remainder == 0);
}
#if defined(INET) || defined(INET6)
static __inline int
hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
{
#if __FreeBSD_version >= 1100095
if (hn_lro_mbufq_depth) {
tcp_lro_queue_mbuf(lc, m);
return 0;
}
#endif
return tcp_lro_rx(lc, m, 0);
}
#endif
static int
hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
const struct hn_rxinfo *info)
{
struct ifnet *ifp, *hn_ifp = rxr->hn_ifp;
struct mbuf *m_new;
int size, do_lro = 0, do_csum = 1, is_vf = 0;
int hash_type = M_HASHTYPE_NONE;
int l3proto = ETHERTYPE_MAX, l4proto = IPPROTO_DONE;
ifp = hn_ifp;
if (rxr->hn_rxvf_ifp != NULL) {
/*
* Non-transparent mode VF; pretend this packet is from
* the VF.
*/
ifp = rxr->hn_rxvf_ifp;
is_vf = 1;
} else if (rxr->hn_rx_flags & HN_RX_FLAG_XPNT_VF) {
/* Transparent mode VF. */
is_vf = 1;
}
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
/*
* NOTE:
* See the NOTE of hn_rndis_init_fixat(). This
* function can be reached, immediately after the
* RNDIS is initialized but before the ifnet is
* setup on the hn_attach() path; drop the unexpected
* packets.
*/
return (0);
}
if (__predict_false(dlen < ETHER_HDR_LEN)) {
if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1);
return (0);
}
if (dlen <= MHLEN) {
m_new = m_gethdr(M_NOWAIT, MT_DATA);
if (m_new == NULL) {
if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1);
return (0);
}
memcpy(mtod(m_new, void *), data, dlen);
m_new->m_pkthdr.len = m_new->m_len = dlen;
rxr->hn_small_pkts++;
} else {
/*
* Get an mbuf with a cluster. For packets 2K or less,
* get a standard 2K cluster. For anything larger, get a
* 4K cluster. Any buffers larger than 4K can cause problems
* if looped around to the Hyper-V TX channel, so avoid them.
*/
size = MCLBYTES;
if (dlen > MCLBYTES) {
/* 4096 */
size = MJUMPAGESIZE;
}
m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
if (m_new == NULL) {
if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1);
return (0);
}
hv_m_append(m_new, dlen, data);
}
m_new->m_pkthdr.rcvif = ifp;
if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0))
do_csum = 0;
/* receive side checksum offload */
if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
/* IP csum offload */
if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) {
m_new->m_pkthdr.csum_flags |=
(CSUM_IP_CHECKED | CSUM_IP_VALID);
rxr->hn_csum_ip++;
}
/* TCP/UDP csum offload */
if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK |
NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) {
m_new->m_pkthdr.csum_flags |=
(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
m_new->m_pkthdr.csum_data = 0xffff;
if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK)
rxr->hn_csum_tcp++;
else
rxr->hn_csum_udp++;
}
/*
* XXX
* As of this write (Oct 28th, 2016), host side will turn
* on only TCPCS_OK and IPCS_OK even for UDP datagrams, so
* the do_lro setting here is actually _not_ accurate. We
* depend on the RSS hash type check to reset do_lro.
*/
if ((info->csum_info &
(NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) ==
(NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK))
do_lro = 1;
} else {
hn_rxpkt_proto(m_new, &l3proto, &l4proto);
if (l3proto == ETHERTYPE_IP) {
if (l4proto == IPPROTO_TCP) {
if (do_csum &&
(rxr->hn_trust_hcsum &
HN_TRUST_HCSUM_TCP)) {
rxr->hn_csum_trusted++;
m_new->m_pkthdr.csum_flags |=
(CSUM_IP_CHECKED | CSUM_IP_VALID |
CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
m_new->m_pkthdr.csum_data = 0xffff;
}
do_lro = 1;
} else if (l4proto == IPPROTO_UDP) {
if (do_csum &&
(rxr->hn_trust_hcsum &
HN_TRUST_HCSUM_UDP)) {
rxr->hn_csum_trusted++;
m_new->m_pkthdr.csum_flags |=
(CSUM_IP_CHECKED | CSUM_IP_VALID |
CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
m_new->m_pkthdr.csum_data = 0xffff;
}
} else if (l4proto != IPPROTO_DONE && do_csum &&
(rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
rxr->hn_csum_trusted++;
m_new->m_pkthdr.csum_flags |=
(CSUM_IP_CHECKED | CSUM_IP_VALID);
}
}
}
if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
m_new->m_pkthdr.ether_vtag = EVL_MAKETAG(
NDIS_VLAN_INFO_ID(info->vlan_info),
NDIS_VLAN_INFO_PRI(info->vlan_info),
NDIS_VLAN_INFO_CFI(info->vlan_info));
m_new->m_flags |= M_VLANTAG;
}
/*
* If VF is activated (tranparent/non-transparent mode does not
* matter here).
*
* - Disable LRO
*
* hn(4) will only receive broadcast packets, multicast packets,
* TCP SYN and SYN|ACK (in Azure), LRO is useless for these
* packet types.
*
* For non-transparent, we definitely _cannot_ enable LRO at
* all, since the LRO flush will use hn(4) as the receiving
* interface; i.e. hn_ifp->if_input(hn_ifp, m).
*/
if (is_vf)
do_lro = 0;
/*
* If VF is activated (tranparent/non-transparent mode does not
* matter here), do _not_ mess with unsupported hash types or
* functions.
*/
if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
rxr->hn_rss_pkts++;
m_new->m_pkthdr.flowid = info->hash_value;
if (!is_vf)
hash_type = M_HASHTYPE_OPAQUE_HASH;
if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
NDIS_HASH_FUNCTION_TOEPLITZ) {
uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK &
rxr->hn_mbuf_hash);
/*
* NOTE:
* do_lro is resetted, if the hash types are not TCP
* related. See the comment in the above csum_flags
* setup section.
*/
switch (type) {
case NDIS_HASH_IPV4:
hash_type = M_HASHTYPE_RSS_IPV4;
do_lro = 0;
break;
case NDIS_HASH_TCP_IPV4:
hash_type = M_HASHTYPE_RSS_TCP_IPV4;
if (rxr->hn_rx_flags & HN_RX_FLAG_UDP_HASH) {
int def_htype = M_HASHTYPE_OPAQUE_HASH;
if (is_vf)
def_htype = M_HASHTYPE_NONE;
/*
* UDP 4-tuple hash is delivered as
* TCP 4-tuple hash.
*/
if (l3proto == ETHERTYPE_MAX) {
hn_rxpkt_proto(m_new,
&l3proto, &l4proto);
}
if (l3proto == ETHERTYPE_IP) {
if (l4proto == IPPROTO_UDP &&
(rxr->hn_mbuf_hash &
NDIS_HASH_UDP_IPV4_X)) {
hash_type =
M_HASHTYPE_RSS_UDP_IPV4;
do_lro = 0;
} else if (l4proto !=
IPPROTO_TCP) {
hash_type = def_htype;
do_lro = 0;
}
} else {
hash_type = def_htype;
do_lro = 0;
}
}
break;
case NDIS_HASH_IPV6:
hash_type = M_HASHTYPE_RSS_IPV6;
do_lro = 0;
break;
case NDIS_HASH_IPV6_EX:
hash_type = M_HASHTYPE_RSS_IPV6_EX;
do_lro = 0;
break;
case NDIS_HASH_TCP_IPV6:
hash_type = M_HASHTYPE_RSS_TCP_IPV6;
break;
case NDIS_HASH_TCP_IPV6_EX:
hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
break;
}
}
} else if (!is_vf) {
m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
hash_type = M_HASHTYPE_OPAQUE;
}
M_HASHTYPE_SET(m_new, hash_type);
if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
if (hn_ifp != ifp) {
const struct ether_header *eh;
/*
* Non-transparent mode VF is activated.
*/
/*
* Allow tapping on hn(4).
*/
ETHER_BPF_MTAP(hn_ifp, m_new);
/*
* Update hn(4)'s stats.
*/
if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1);
if_inc_counter(hn_ifp, IFCOUNTER_IBYTES, m_new->m_pkthdr.len);
/* Checked at the beginning of this function. */
KASSERT(m_new->m_len >= ETHER_HDR_LEN, ("not ethernet frame"));
eh = mtod(m_new, struct ether_header *);
if (ETHER_IS_MULTICAST(eh->ether_dhost))
if_inc_counter(hn_ifp, IFCOUNTER_IMCASTS, 1);
}
rxr->hn_pkts++;
if ((hn_ifp->if_capenable & IFCAP_LRO) && do_lro) {
#if defined(INET) || defined(INET6)
struct lro_ctrl *lro = &rxr->hn_lro;
if (lro->lro_cnt) {
rxr->hn_lro_tried++;
if (hn_lro_rx(lro, m_new) == 0) {
/* DONE! */
return 0;
}
}
#endif
}
ifp->if_input(ifp, m_new);
return (0);
}
static int
hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
struct hn_softc *sc = ifp->if_softc;
struct ifreq *ifr = (struct ifreq *)data, ifr_vf;
struct ifnet *vf_ifp;
int mask, error = 0;
struct ifrsskey *ifrk;
struct ifrsshash *ifrh;
uint32_t mtu;
switch (cmd) {
case SIOCSIFMTU:
if (ifr->ifr_mtu > HN_MTU_MAX) {
error = EINVAL;
break;
}
HN_LOCK(sc);
if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
HN_UNLOCK(sc);
break;
}
if ((sc->hn_caps & HN_CAP_MTU) == 0) {
/* Can't change MTU */
HN_UNLOCK(sc);
error = EOPNOTSUPP;
break;
}
if (ifp->if_mtu == ifr->ifr_mtu) {
HN_UNLOCK(sc);
break;
}
if (hn_xpnt_vf_isready(sc)) {
vf_ifp = sc->hn_vf_ifp;
ifr_vf = *ifr;
strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname,
sizeof(ifr_vf.ifr_name));
error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU,
(caddr_t)&ifr_vf);
if (error) {
HN_UNLOCK(sc);
if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n",
vf_ifp->if_xname, ifr->ifr_mtu, error);
break;
}
}
/*
* Suspend this interface before the synthetic parts
* are ripped.
*/
hn_suspend(sc);
/*
* Detach the synthetics parts, i.e. NVS and RNDIS.
*/
hn_synth_detach(sc);
/*
* Reattach the synthetic parts, i.e. NVS and RNDIS,
* with the new MTU setting.
*/
error = hn_synth_attach(sc, ifr->ifr_mtu);
if (error) {
HN_UNLOCK(sc);
break;
}
error = hn_rndis_get_mtu(sc, &mtu);
if (error)
mtu = ifr->ifr_mtu;
else if (bootverbose)
if_printf(ifp, "RNDIS mtu %u\n", mtu);
/*
* Commit the requested MTU, after the synthetic parts
* have been successfully attached.
*/
if (mtu >= ifr->ifr_mtu) {
mtu = ifr->ifr_mtu;
} else {
if_printf(ifp, "fixup mtu %d -> %u\n",
ifr->ifr_mtu, mtu);
}
ifp->if_mtu = mtu;
/*
* Synthetic parts' reattach may change the chimney
* sending size; update it.
*/
if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
hn_set_chim_size(sc, sc->hn_chim_szmax);
/*
* Make sure that various parameters based on MTU are
* still valid, after the MTU change.
*/
hn_mtu_change_fixup(sc);
/*
* All done! Resume the interface now.
*/
hn_resume(sc);
if ((sc->hn_flags & HN_FLAG_RXVF) ||
(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) {
/*
* Since we have reattached the NVS part,
* change the datapath to VF again; in case
* that it is lost, after the NVS was detached.
*/
hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF);
}
HN_UNLOCK(sc);
break;
case SIOCSIFFLAGS:
HN_LOCK(sc);
if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
HN_UNLOCK(sc);
break;
}
if (hn_xpnt_vf_isready(sc))
hn_xpnt_vf_saveifflags(sc);
if (ifp->if_flags & IFF_UP) {
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
/*
* Caller meight hold mutex, e.g.
* bpf; use busy-wait for the RNDIS
* reply.
*/
HN_NO_SLEEPING(sc);
hn_rxfilter_config(sc);
HN_SLEEPING_OK(sc);
if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
error = hn_xpnt_vf_iocsetflags(sc);
} else {
hn_init_locked(sc);
}
} else {
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
hn_stop(sc, false);
}
sc->hn_if_flags = ifp->if_flags;
HN_UNLOCK(sc);
break;
case SIOCSIFCAP:
HN_LOCK(sc);
if (hn_xpnt_vf_isready(sc)) {
ifr_vf = *ifr;
strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname,
sizeof(ifr_vf.ifr_name));
error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf);
HN_UNLOCK(sc);
break;
}
/*
* Fix up requested capabilities w/ supported capabilities,
* since the supported capabilities could have been changed.
*/
mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^
ifp->if_capenable;
if (mask & IFCAP_TXCSUM) {
ifp->if_capenable ^= IFCAP_TXCSUM;
if (ifp->if_capenable & IFCAP_TXCSUM)
ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc);
else
ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc);
}
if (mask & IFCAP_TXCSUM_IPV6) {
ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc);
else
ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc);
}
/* TODO: flip RNDIS offload parameters for RXCSUM. */
if (mask & IFCAP_RXCSUM)
ifp->if_capenable ^= IFCAP_RXCSUM;
#ifdef foo
/* We can't diff IPv6 packets from IPv4 packets on RX path. */
if (mask & IFCAP_RXCSUM_IPV6)
ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
#endif
if (mask & IFCAP_LRO)
ifp->if_capenable ^= IFCAP_LRO;
if (mask & IFCAP_TSO4) {
ifp->if_capenable ^= IFCAP_TSO4;
if (ifp->if_capenable & IFCAP_TSO4)
ifp->if_hwassist |= CSUM_IP_TSO;
else
ifp->if_hwassist &= ~CSUM_IP_TSO;
}
if (mask & IFCAP_TSO6) {
ifp->if_capenable ^= IFCAP_TSO6;
if (ifp->if_capenable & IFCAP_TSO6)
ifp->if_hwassist |= CSUM_IP6_TSO;
else
ifp->if_hwassist &= ~CSUM_IP6_TSO;
}
HN_UNLOCK(sc);
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
HN_LOCK(sc);
if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
HN_UNLOCK(sc);
break;
}
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
/*
* Multicast uses mutex; use busy-wait for
* the RNDIS reply.
*/
HN_NO_SLEEPING(sc);
hn_rxfilter_config(sc);
HN_SLEEPING_OK(sc);
}
/* XXX vlan(4) style mcast addr maintenance */
if (hn_xpnt_vf_isready(sc)) {
int old_if_flags;
old_if_flags = sc->hn_vf_ifp->if_flags;
hn_xpnt_vf_saveifflags(sc);
if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) &&
((old_if_flags ^ sc->hn_vf_ifp->if_flags) &
IFF_ALLMULTI))
error = hn_xpnt_vf_iocsetflags(sc);
}
HN_UNLOCK(sc);
break;
case SIOCSIFMEDIA:
case SIOCGIFMEDIA:
HN_LOCK(sc);
if (hn_xpnt_vf_isready(sc)) {
/*
* SIOCGIFMEDIA expects ifmediareq, so don't
* create and pass ifr_vf to the VF here; just
* replace the ifr_name.
*/
vf_ifp = sc->hn_vf_ifp;
strlcpy(ifr->ifr_name, vf_ifp->if_xname,
sizeof(ifr->ifr_name));
error = vf_ifp->if_ioctl(vf_ifp, cmd, data);
/* Restore the ifr_name. */
strlcpy(ifr->ifr_name, ifp->if_xname,
sizeof(ifr->ifr_name));
HN_UNLOCK(sc);
break;
}
HN_UNLOCK(sc);
error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
break;
case SIOCGIFRSSHASH:
ifrh = (struct ifrsshash *)data;
HN_LOCK(sc);
if (sc->hn_rx_ring_inuse == 1) {
HN_UNLOCK(sc);
ifrh->ifrh_func = RSS_FUNC_NONE;
ifrh->ifrh_types = 0;
break;
}
if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ)
ifrh->ifrh_func = RSS_FUNC_TOEPLITZ;
else
ifrh->ifrh_func = RSS_FUNC_PRIVATE;
ifrh->ifrh_types = hn_rss_type_fromndis(sc->hn_rss_hash);
HN_UNLOCK(sc);
break;
case SIOCGIFRSSKEY:
ifrk = (struct ifrsskey *)data;
HN_LOCK(sc);
if (sc->hn_rx_ring_inuse == 1) {
HN_UNLOCK(sc);
ifrk->ifrk_func = RSS_FUNC_NONE;
ifrk->ifrk_keylen = 0;
break;
}
if (sc->hn_rss_hash & NDIS_HASH_FUNCTION_TOEPLITZ)
ifrk->ifrk_func = RSS_FUNC_TOEPLITZ;
else
ifrk->ifrk_func = RSS_FUNC_PRIVATE;
ifrk->ifrk_keylen = NDIS_HASH_KEYSIZE_TOEPLITZ;
memcpy(ifrk->ifrk_key, sc->hn_rss.rss_key,
NDIS_HASH_KEYSIZE_TOEPLITZ);
HN_UNLOCK(sc);
break;
default:
error = ether_ioctl(ifp, cmd, data);
break;
}
return (error);
}
static void
hn_stop(struct hn_softc *sc, bool detaching)
{
struct ifnet *ifp = sc->hn_ifp;
int i;
HN_LOCK_ASSERT(sc);
KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
("synthetic parts were not attached"));
/* Clear RUNNING bit ASAP. */
atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
/* Disable polling. */
hn_polling(sc, 0);
if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) {
KASSERT(sc->hn_vf_ifp != NULL,
("%s: VF is not attached", ifp->if_xname));
/* Mark transparent mode VF as disabled. */
hn_xpnt_vf_setdisable(sc, false /* keep hn_vf_ifp */);
/*
* NOTE:
* Datapath setting must happen _before_ bringing
* the VF down.
*/
hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH);
/*
* Bring the VF down.
*/
hn_xpnt_vf_saveifflags(sc);
sc->hn_vf_ifp->if_flags &= ~IFF_UP;
hn_xpnt_vf_iocsetflags(sc);
}
/* Suspend data transfers. */
hn_suspend_data(sc);
/* Clear OACTIVE bit. */
atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
sc->hn_tx_ring[i].hn_oactive = 0;
/*
* If the non-transparent mode VF is active, make sure
* that the RX filter still allows packet reception.
*/
if (!detaching && (sc->hn_flags & HN_FLAG_RXVF))
hn_rxfilter_config(sc);
}
static void
hn_init_locked(struct hn_softc *sc)
{
struct ifnet *ifp = sc->hn_ifp;
int i;
HN_LOCK_ASSERT(sc);
if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
return;
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
return;
/* Configure RX filter */
hn_rxfilter_config(sc);
/* Clear OACTIVE bit. */
atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
sc->hn_tx_ring[i].hn_oactive = 0;
/* Clear TX 'suspended' bit. */
hn_resume_tx(sc, sc->hn_tx_ring_inuse);
if (hn_xpnt_vf_isready(sc)) {
/* Initialize transparent VF. */
hn_xpnt_vf_init(sc);
}
/* Everything is ready; unleash! */
atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
/* Re-enable polling if requested. */
if (sc->hn_pollhz > 0)
hn_polling(sc, sc->hn_pollhz);
}
static void
hn_init(void *xsc)
{
struct hn_softc *sc = xsc;
HN_LOCK(sc);
hn_init_locked(sc);
HN_UNLOCK(sc);
}
#if __FreeBSD_version >= 1100099
static int
hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
unsigned int lenlim;
int error;
lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
error = sysctl_handle_int(oidp, &lenlim, 0, req);
if (error || req->newptr == NULL)
return error;
HN_LOCK(sc);
if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
lenlim > TCP_LRO_LENGTH_MAX) {
HN_UNLOCK(sc);
return EINVAL;
}
hn_set_lro_lenlim(sc, lenlim);
HN_UNLOCK(sc);
return 0;
}
static int
hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int ackcnt, error, i;
/*
* lro_ackcnt_lim is append count limit,
* +1 to turn it into aggregation limit.
*/
ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
error = sysctl_handle_int(oidp, &ackcnt, 0, req);
if (error || req->newptr == NULL)
return error;
if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
return EINVAL;
/*
* Convert aggregation limit back to append
* count limit.
*/
--ackcnt;
HN_LOCK(sc);
for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
HN_UNLOCK(sc);
return 0;
}
#endif
static int
hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int hcsum = arg2;
int on, error, i;
on = 0;
if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
on = 1;
error = sysctl_handle_int(oidp, &on, 0, req);
if (error || req->newptr == NULL)
return error;
HN_LOCK(sc);
for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
if (on)
rxr->hn_trust_hcsum |= hcsum;
else
rxr->hn_trust_hcsum &= ~hcsum;
}
HN_UNLOCK(sc);
return 0;
}
static int
hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int chim_size, error;
chim_size = sc->hn_tx_ring[0].hn_chim_size;
error = sysctl_handle_int(oidp, &chim_size, 0, req);
if (error || req->newptr == NULL)
return error;
if (chim_size > sc->hn_chim_szmax || chim_size <= 0)
return EINVAL;
HN_LOCK(sc);
hn_set_chim_size(sc, chim_size);
HN_UNLOCK(sc);
return 0;
}
#if __FreeBSD_version < 1100095
static int
hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int ofs = arg2, i, error;
struct hn_rx_ring *rxr;
uint64_t stat;
stat = 0;
for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
rxr = &sc->hn_rx_ring[i];
stat += *((int *)((uint8_t *)rxr + ofs));
}
error = sysctl_handle_64(oidp, &stat, 0, req);
if (error || req->newptr == NULL)
return error;
/* Zero out this stat. */
for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
rxr = &sc->hn_rx_ring[i];
*((int *)((uint8_t *)rxr + ofs)) = 0;
}
return 0;
}
#else
static int
hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int ofs = arg2, i, error;
struct hn_rx_ring *rxr;
uint64_t stat;
stat = 0;
for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
rxr = &sc->hn_rx_ring[i];
stat += *((uint64_t *)((uint8_t *)rxr + ofs));
}
error = sysctl_handle_64(oidp, &stat, 0, req);
if (error || req->newptr == NULL)
return error;
/* Zero out this stat. */
for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
rxr = &sc->hn_rx_ring[i];
*((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
}
return 0;
}
#endif
static int
hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int ofs = arg2, i, error;
struct hn_rx_ring *rxr;
u_long stat;
stat = 0;
for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
rxr = &sc->hn_rx_ring[i];
stat += *((u_long *)((uint8_t *)rxr + ofs));
}
error = sysctl_handle_long(oidp, &stat, 0, req);
if (error || req->newptr == NULL)
return error;
/* Zero out this stat. */
for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
rxr = &sc->hn_rx_ring[i];
*((u_long *)((uint8_t *)rxr + ofs)) = 0;
}
return 0;
}
static int
hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int ofs = arg2, i, error;
struct hn_tx_ring *txr;
u_long stat;
stat = 0;
for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
txr = &sc->hn_tx_ring[i];
stat += *((u_long *)((uint8_t *)txr + ofs));
}
error = sysctl_handle_long(oidp, &stat, 0, req);
if (error || req->newptr == NULL)
return error;
/* Zero out this stat. */
for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
txr = &sc->hn_tx_ring[i];
*((u_long *)((uint8_t *)txr + ofs)) = 0;
}
return 0;
}
static int
hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int ofs = arg2, i, error, conf;
struct hn_tx_ring *txr;
txr = &sc->hn_tx_ring[0];
conf = *((int *)((uint8_t *)txr + ofs));
error = sysctl_handle_int(oidp, &conf, 0, req);
if (error || req->newptr == NULL)
return error;
HN_LOCK(sc);
for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
txr = &sc->hn_tx_ring[i];
*((int *)((uint8_t *)txr + ofs)) = conf;
}
HN_UNLOCK(sc);
return 0;
}
static int
hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int error, size;
size = sc->hn_agg_size;
error = sysctl_handle_int(oidp, &size, 0, req);
if (error || req->newptr == NULL)
return (error);
HN_LOCK(sc);
sc->hn_agg_size = size;
hn_set_txagg(sc);
HN_UNLOCK(sc);
return (0);
}
static int
hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int error, pkts;
pkts = sc->hn_agg_pkts;
error = sysctl_handle_int(oidp, &pkts, 0, req);
if (error || req->newptr == NULL)
return (error);
HN_LOCK(sc);
sc->hn_agg_pkts = pkts;
hn_set_txagg(sc);
HN_UNLOCK(sc);
return (0);
}
static int
hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int pkts;
pkts = sc->hn_tx_ring[0].hn_agg_pktmax;
return (sysctl_handle_int(oidp, &pkts, 0, req));
}
static int
hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int align;
align = sc->hn_tx_ring[0].hn_agg_align;
return (sysctl_handle_int(oidp, &align, 0, req));
}
static void
hn_chan_polling(struct vmbus_channel *chan, u_int pollhz)
{
if (pollhz == 0)
vmbus_chan_poll_disable(chan);
else
vmbus_chan_poll_enable(chan, pollhz);
}
static void
hn_polling(struct hn_softc *sc, u_int pollhz)
{
int nsubch = sc->hn_rx_ring_inuse - 1;
HN_LOCK_ASSERT(sc);
if (nsubch > 0) {
struct vmbus_channel **subch;
int i;
subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
for (i = 0; i < nsubch; ++i)
hn_chan_polling(subch[i], pollhz);
vmbus_subchan_rel(subch, nsubch);
}
hn_chan_polling(sc->hn_prichan, pollhz);
}
static int
hn_polling_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int pollhz, error;
pollhz = sc->hn_pollhz;
error = sysctl_handle_int(oidp, &pollhz, 0, req);
if (error || req->newptr == NULL)
return (error);
if (pollhz != 0 &&
(pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX))
return (EINVAL);
HN_LOCK(sc);
if (sc->hn_pollhz != pollhz) {
sc->hn_pollhz = pollhz;
if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) &&
(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
hn_polling(sc, sc->hn_pollhz);
}
HN_UNLOCK(sc);
return (0);
}
static int
hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
char verstr[16];
snprintf(verstr, sizeof(verstr), "%u.%u",
HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
}
static int
hn_caps_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
char caps_str[128];
uint32_t caps;
HN_LOCK(sc);
caps = sc->hn_caps;
HN_UNLOCK(sc);
snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS);
return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req);
}
static int
hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
char assist_str[128];
uint32_t hwassist;
HN_LOCK(sc);
hwassist = sc->hn_ifp->if_hwassist;
HN_UNLOCK(sc);
snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS);
return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req);
}
static int
hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
char filter_str[128];
uint32_t filter;
HN_LOCK(sc);
filter = sc->hn_rx_filter;
HN_UNLOCK(sc);
snprintf(filter_str, sizeof(filter_str), "%b", filter,
NDIS_PACKET_TYPES);
return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req);
}
#ifndef RSS
static int
hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int error;
HN_LOCK(sc);
error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
if (error || req->newptr == NULL)
goto back;
if ((sc->hn_flags & HN_FLAG_RXVF) ||
(hn_xpnt_vf && sc->hn_vf_ifp != NULL)) {
/*
* RSS key is synchronized w/ VF's, don't allow users
* to change it.
*/
error = EBUSY;
goto back;
}
error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
if (error)
goto back;
sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
if (sc->hn_rx_ring_inuse > 1) {
error = hn_rss_reconfig(sc);
} else {
/* Not RSS capable, at least for now; just save the RSS key. */
error = 0;
}
back:
HN_UNLOCK(sc);
return (error);
}
static int
hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int error;
HN_LOCK(sc);
error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
if (error || req->newptr == NULL)
goto back;
/*
* Don't allow RSS indirect table change, if this interface is not
* RSS capable currently.
*/
if (sc->hn_rx_ring_inuse == 1) {
error = EOPNOTSUPP;
goto back;
}
error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
if (error)
goto back;
sc->hn_flags |= HN_FLAG_HAS_RSSIND;
hn_rss_ind_fixup(sc);
error = hn_rss_reconfig(sc);
back:
HN_UNLOCK(sc);
return (error);
}
#endif /* !RSS */
static int
hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
char hash_str[128];
uint32_t hash;
HN_LOCK(sc);
hash = sc->hn_rss_hash;
HN_UNLOCK(sc);
snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
}
static int
hn_rss_hcap_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
char hash_str[128];
uint32_t hash;
HN_LOCK(sc);
hash = sc->hn_rss_hcap;
HN_UNLOCK(sc);
snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
}
static int
hn_rss_mbuf_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
char hash_str[128];
uint32_t hash;
HN_LOCK(sc);
hash = sc->hn_rx_ring[0].hn_mbuf_hash;
HN_UNLOCK(sc);
snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
}
static int
hn_vf_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
char vf_name[IFNAMSIZ + 1];
struct ifnet *vf_ifp;
HN_LOCK(sc);
vf_name[0] = '\0';
vf_ifp = sc->hn_vf_ifp;
if (vf_ifp != NULL)
snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname);
HN_UNLOCK(sc);
return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req);
}
static int
hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
char vf_name[IFNAMSIZ + 1];
struct ifnet *vf_ifp;
HN_LOCK(sc);
vf_name[0] = '\0';
vf_ifp = sc->hn_rx_ring[0].hn_rxvf_ifp;
if (vf_ifp != NULL)
snprintf(vf_name, sizeof(vf_name), "%s", vf_ifp->if_xname);
HN_UNLOCK(sc);
return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req);
}
static int
hn_vflist_sysctl(SYSCTL_HANDLER_ARGS)
{
struct rm_priotracker pt;
struct sbuf *sb;
int error, i;
bool first;
error = sysctl_wire_old_buffer(req, 0);
if (error != 0)
return (error);
sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
if (sb == NULL)
return (ENOMEM);
rm_rlock(&hn_vfmap_lock, &pt);
first = true;
for (i = 0; i < hn_vfmap_size; ++i) {
struct ifnet *ifp;
if (hn_vfmap[i] == NULL)
continue;
ifp = ifnet_byindex(i);
if (ifp != NULL) {
if (first)
sbuf_printf(sb, "%s", ifp->if_xname);
else
sbuf_printf(sb, " %s", ifp->if_xname);
first = false;
}
}
rm_runlock(&hn_vfmap_lock, &pt);
error = sbuf_finish(sb);
sbuf_delete(sb);
return (error);
}
static int
hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS)
{
struct rm_priotracker pt;
struct sbuf *sb;
int error, i;
bool first;
error = sysctl_wire_old_buffer(req, 0);
if (error != 0)
return (error);
sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
if (sb == NULL)
return (ENOMEM);
rm_rlock(&hn_vfmap_lock, &pt);
first = true;
for (i = 0; i < hn_vfmap_size; ++i) {
struct ifnet *ifp, *hn_ifp;
hn_ifp = hn_vfmap[i];
if (hn_ifp == NULL)
continue;
ifp = ifnet_byindex(i);
if (ifp != NULL) {
if (first) {
sbuf_printf(sb, "%s:%s", ifp->if_xname,
hn_ifp->if_xname);
} else {
sbuf_printf(sb, " %s:%s", ifp->if_xname,
hn_ifp->if_xname);
}
first = false;
}
}
rm_runlock(&hn_vfmap_lock, &pt);
error = sbuf_finish(sb);
sbuf_delete(sb);
return (error);
}
static int
hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int error, onoff = 0;
if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF)
onoff = 1;
error = sysctl_handle_int(oidp, &onoff, 0, req);
if (error || req->newptr == NULL)
return (error);
HN_LOCK(sc);
/* NOTE: hn_vf_lock for hn_transmit() */
rm_wlock(&sc->hn_vf_lock);
if (onoff)
sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF;
else
sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF;
rm_wunlock(&sc->hn_vf_lock);
HN_UNLOCK(sc);
return (0);
}
static int
hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int enabled = 0;
if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
enabled = 1;
return (sysctl_handle_int(oidp, &enabled, 0, req));
}
static int
hn_check_iplen(const struct mbuf *m, int hoff)
{
const struct ip *ip;
int len, iphlen, iplen;
const struct tcphdr *th;
int thoff; /* TCP data offset */
len = hoff + sizeof(struct ip);
/* The packet must be at least the size of an IP header. */
if (m->m_pkthdr.len < len)
return IPPROTO_DONE;
/* The fixed IP header must reside completely in the first mbuf. */
if (m->m_len < len)
return IPPROTO_DONE;
ip = mtodo(m, hoff);
/* Bound check the packet's stated IP header length. */
iphlen = ip->ip_hl << 2;
if (iphlen < sizeof(struct ip)) /* minimum header length */
return IPPROTO_DONE;
/* The full IP header must reside completely in the one mbuf. */
if (m->m_len < hoff + iphlen)
return IPPROTO_DONE;
iplen = ntohs(ip->ip_len);
/*
* Check that the amount of data in the buffers is as
* at least much as the IP header would have us expect.
*/
if (m->m_pkthdr.len < hoff + iplen)
return IPPROTO_DONE;
/*
* Ignore IP fragments.
*/
if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
return IPPROTO_DONE;
/*
* The TCP/IP or UDP/IP header must be entirely contained within
* the first fragment of a packet.
*/
switch (ip->ip_p) {
case IPPROTO_TCP:
if (iplen < iphlen + sizeof(struct tcphdr))
return IPPROTO_DONE;
if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
return IPPROTO_DONE;
th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
thoff = th->th_off << 2;
if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
return IPPROTO_DONE;
if (m->m_len < hoff + iphlen + thoff)
return IPPROTO_DONE;
break;
case IPPROTO_UDP:
if (iplen < iphlen + sizeof(struct udphdr))
return IPPROTO_DONE;
if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
return IPPROTO_DONE;
break;
default:
if (iplen < iphlen)
return IPPROTO_DONE;
break;
}
return ip->ip_p;
}
static void
hn_rxpkt_proto(const struct mbuf *m_new, int *l3proto, int *l4proto)
{
const struct ether_header *eh;
uint16_t etype;
int hoff;
hoff = sizeof(*eh);
/* Checked at the beginning of this function. */
KASSERT(m_new->m_len >= hoff, ("not ethernet frame"));
eh = mtod(m_new, const struct ether_header *);
etype = ntohs(eh->ether_type);
if (etype == ETHERTYPE_VLAN) {
const struct ether_vlan_header *evl;
hoff = sizeof(*evl);
if (m_new->m_len < hoff)
return;
evl = mtod(m_new, const struct ether_vlan_header *);
etype = ntohs(evl->evl_proto);
}
*l3proto = etype;
if (etype == ETHERTYPE_IP)
*l4proto = hn_check_iplen(m_new, hoff);
else
*l4proto = IPPROTO_DONE;
}
static int
hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
{
struct sysctl_oid_list *child;
struct sysctl_ctx_list *ctx;
device_t dev = sc->hn_dev;
#if defined(INET) || defined(INET6)
#if __FreeBSD_version >= 1100095
int lroent_cnt;
#endif
#endif
int i;
/*
* Create RXBUF for reception.
*
* NOTE:
* - It is shared by all channels.
* - A large enough buffer is allocated, certain version of NVSes
* may further limit the usable space.
*/
sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma,
BUS_DMA_WAITOK | BUS_DMA_ZERO);
if (sc->hn_rxbuf == NULL) {
device_printf(sc->hn_dev, "allocate rxbuf failed\n");
return (ENOMEM);
}
sc->hn_rx_ring_cnt = ring_cnt;
sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
M_DEVBUF, M_WAITOK | M_ZERO);
#if defined(INET) || defined(INET6)
#if __FreeBSD_version >= 1100095
lroent_cnt = hn_lro_entry_count;
if (lroent_cnt < TCP_LRO_ENTRIES)
lroent_cnt = TCP_LRO_ENTRIES;
if (bootverbose)
device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
#endif
#endif /* INET || INET6 */
ctx = device_get_sysctl_ctx(dev);
child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
/* Create dev.hn.UNIT.rx sysctl tree */
sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE,
&rxr->hn_br_dma, BUS_DMA_WAITOK);
if (rxr->hn_br == NULL) {
device_printf(dev, "allocate bufring failed\n");
return (ENOMEM);
}
if (hn_trust_hosttcp)
rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
if (hn_trust_hostudp)
rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
if (hn_trust_hostip)
rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
rxr->hn_mbuf_hash = NDIS_HASH_ALL;
rxr->hn_ifp = sc->hn_ifp;
if (i < sc->hn_tx_ring_cnt)
rxr->hn_txr = &sc->hn_tx_ring[i];
rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF;
rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK);
rxr->hn_rx_idx = i;
rxr->hn_rxbuf = sc->hn_rxbuf;
/*
* Initialize LRO.
*/
#if defined(INET) || defined(INET6)
#if __FreeBSD_version >= 1100095
tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt,
hn_lro_mbufq_depth);
#else
tcp_lro_init(&rxr->hn_lro);
rxr->hn_lro.ifp = sc->hn_ifp;
#endif
#if __FreeBSD_version >= 1100099
rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
#endif
#endif /* INET || INET6 */
if (sc->hn_rx_sysctl_tree != NULL) {
char name[16];
/*
* Create per RX ring sysctl tree:
* dev.hn.UNIT.rx.RINGID
*/
snprintf(name, sizeof(name), "%d", i);
rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
if (rxr->hn_rx_sysctl_tree != NULL) {
SYSCTL_ADD_ULONG(ctx,
SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
OID_AUTO, "packets", CTLFLAG_RW,
&rxr->hn_pkts, "# of packets received");
SYSCTL_ADD_ULONG(ctx,
SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
OID_AUTO, "rss_pkts", CTLFLAG_RW,
&rxr->hn_rss_pkts,
"# of packets w/ RSS info received");
SYSCTL_ADD_INT(ctx,
SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
OID_AUTO, "pktbuf_len", CTLFLAG_RD,
&rxr->hn_pktbuf_len, 0,
"Temporary channel packet buffer length");
}
}
}
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_lro.lro_queued),
#if __FreeBSD_version < 1100095
hn_rx_stat_int_sysctl,
#else
hn_rx_stat_u64_sysctl,
#endif
"LU", "LRO queued");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
#if __FreeBSD_version < 1100095
hn_rx_stat_int_sysctl,
#else
hn_rx_stat_u64_sysctl,
#endif
"LU", "LRO flushed");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_lro_tried),
hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
#if __FreeBSD_version >= 1100099
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
hn_lro_lenlim_sysctl, "IU",
"Max # of data bytes to be aggregated by LRO");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
hn_lro_ackcnt_sysctl, "I",
"Max # of ACKs to be aggregated by LRO");
#endif
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
hn_trust_hcsum_sysctl, "I",
"Trust tcp segement verification on host side, "
"when csum info is missing");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
hn_trust_hcsum_sysctl, "I",
"Trust udp datagram verification on host side, "
"when csum info is missing");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
hn_trust_hcsum_sysctl, "I",
"Trust ip packet verification on host side, "
"when csum info is missing");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_csum_ip),
hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_csum_tcp),
hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_csum_udp),
hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_csum_trusted),
hn_rx_stat_ulong_sysctl, "LU",
"# of packets that we trust host's csum verification");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_small_pkts),
hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_ack_failed),
hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
return (0);
}
static void
hn_destroy_rx_data(struct hn_softc *sc)
{
int i;
if (sc->hn_rxbuf != NULL) {
if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0)
hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
else
device_printf(sc->hn_dev, "RXBUF is referenced\n");
sc->hn_rxbuf = NULL;
}
if (sc->hn_rx_ring_cnt == 0)
return;
for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
if (rxr->hn_br == NULL)
continue;
if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) {
hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br);
} else {
device_printf(sc->hn_dev,
"%dth channel bufring is referenced", i);
}
rxr->hn_br = NULL;
#if defined(INET) || defined(INET6)
tcp_lro_free(&rxr->hn_lro);
#endif
free(rxr->hn_pktbuf, M_DEVBUF);
}
free(sc->hn_rx_ring, M_DEVBUF);
sc->hn_rx_ring = NULL;
sc->hn_rx_ring_cnt = 0;
sc->hn_rx_ring_inuse = 0;
}
static int
hn_tx_ring_create(struct hn_softc *sc, int id)
{
struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
device_t dev = sc->hn_dev;
bus_dma_tag_t parent_dtag;
int error, i;
txr->hn_sc = sc;
txr->hn_tx_idx = id;
#ifndef HN_USE_TXDESC_BUFRING
mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
#endif
mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
M_DEVBUF, M_WAITOK | M_ZERO);
#ifndef HN_USE_TXDESC_BUFRING
SLIST_INIT(&txr->hn_txlist);
#else
txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF,
M_WAITOK, &txr->hn_tx_lock);
#endif
if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) {
txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ(
device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id));
} else {
txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt];
}
#ifdef HN_IFSTART_SUPPORT
if (hn_use_if_start) {
txr->hn_txeof = hn_start_txeof;
TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
} else
#endif
{
int br_depth;
txr->hn_txeof = hn_xmit_txeof;
TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
br_depth = hn_get_txswq_depth(txr);
txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF,
M_WAITOK, &txr->hn_tx_lock);
}
txr->hn_direct_tx_size = hn_direct_tx_size;
/*
* Always schedule transmission instead of trying to do direct
* transmission. This one gives the best performance so far.
*/
txr->hn_sched_tx = 1;
parent_dtag = bus_get_dma_tag(dev);
/* DMA tag for RNDIS packet messages. */
error = bus_dma_tag_create(parent_dtag, /* parent */
HN_RNDIS_PKT_ALIGN, /* alignment */
HN_RNDIS_PKT_BOUNDARY, /* boundary */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
HN_RNDIS_PKT_LEN, /* maxsize */
1, /* nsegments */
HN_RNDIS_PKT_LEN, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
&txr->hn_tx_rndis_dtag);
if (error) {
device_printf(dev, "failed to create rndis dmatag\n");
return error;
}
/* DMA tag for data. */
error = bus_dma_tag_create(parent_dtag, /* parent */
1, /* alignment */
HN_TX_DATA_BOUNDARY, /* boundary */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
HN_TX_DATA_MAXSIZE, /* maxsize */
HN_TX_DATA_SEGCNT_MAX, /* nsegments */
HN_TX_DATA_SEGSIZE, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
&txr->hn_tx_data_dtag);
if (error) {
device_printf(dev, "failed to create data dmatag\n");
return error;
}
for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
struct hn_txdesc *txd = &txr->hn_txdesc[i];
txd->txr = txr;
txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
STAILQ_INIT(&txd->agg_list);
/*
* Allocate and load RNDIS packet message.
*/
error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
(void **)&txd->rndis_pkt,
BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
&txd->rndis_pkt_dmap);
if (error) {
device_printf(dev,
"failed to allocate rndis_packet_msg, %d\n", i);
return error;
}
error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
txd->rndis_pkt_dmap,
txd->rndis_pkt, HN_RNDIS_PKT_LEN,
hyperv_dma_map_paddr, &txd->rndis_pkt_paddr,
BUS_DMA_NOWAIT);
if (error) {
device_printf(dev,
"failed to load rndis_packet_msg, %d\n", i);
bus_dmamem_free(txr->hn_tx_rndis_dtag,
txd->rndis_pkt, txd->rndis_pkt_dmap);
return error;
}
/* DMA map for TX data. */
error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
&txd->data_dmap);
if (error) {
device_printf(dev,
"failed to allocate tx data dmamap\n");
bus_dmamap_unload(txr->hn_tx_rndis_dtag,
txd->rndis_pkt_dmap);
bus_dmamem_free(txr->hn_tx_rndis_dtag,
txd->rndis_pkt, txd->rndis_pkt_dmap);
return error;
}
/* All set, put it to list */
txd->flags |= HN_TXD_FLAG_ONLIST;
#ifndef HN_USE_TXDESC_BUFRING
SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
#else
buf_ring_enqueue(txr->hn_txdesc_br, txd);
#endif
}
txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
if (sc->hn_tx_sysctl_tree != NULL) {
struct sysctl_oid_list *child;
struct sysctl_ctx_list *ctx;
char name[16];
/*
* Create per TX ring sysctl tree:
* dev.hn.UNIT.tx.RINGID
*/
ctx = device_get_sysctl_ctx(dev);
child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
snprintf(name, sizeof(name), "%d", id);
txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
if (txr->hn_tx_sysctl_tree != NULL) {
child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
#ifdef HN_DEBUG
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
"# of available TX descs");
#endif
#ifdef HN_IFSTART_SUPPORT
if (!hn_use_if_start)
#endif
{
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
CTLFLAG_RD, &txr->hn_oactive, 0,
"over active");
}
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
CTLFLAG_RW, &txr->hn_pkts,
"# of packets transmitted");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends",
CTLFLAG_RW, &txr->hn_sends, "# of sends");
}
}
return 0;
}
static void
hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
{
struct hn_tx_ring *txr = txd->txr;
KASSERT(txd->m == NULL, ("still has mbuf installed"));
KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap);
bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt,
txd->rndis_pkt_dmap);
bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
}
static void
hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd)
{
KASSERT(txd->refs == 0 || txd->refs == 1,
("invalid txd refs %d", txd->refs));
/* Aggregated txds will be freed by their aggregating txd. */
if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) {
int freed;
freed = hn_txdesc_put(txr, txd);
KASSERT(freed, ("can't free txdesc"));
}
}
static void
hn_tx_ring_destroy(struct hn_tx_ring *txr)
{
int i;
if (txr->hn_txdesc == NULL)
return;
/*
* NOTE:
* Because the freeing of aggregated txds will be deferred
* to the aggregating txd, two passes are used here:
* - The first pass GCes any pending txds. This GC is necessary,
* since if the channels are revoked, hypervisor will not
* deliver send-done for all pending txds.
* - The second pass frees the busdma stuffs, i.e. after all txds
* were freed.
*/
for (i = 0; i < txr->hn_txdesc_cnt; ++i)
hn_txdesc_gc(txr, &txr->hn_txdesc[i]);
for (i = 0; i < txr->hn_txdesc_cnt; ++i)
hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]);
if (txr->hn_tx_data_dtag != NULL)
bus_dma_tag_destroy(txr->hn_tx_data_dtag);
if (txr->hn_tx_rndis_dtag != NULL)
bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
#ifdef HN_USE_TXDESC_BUFRING
buf_ring_free(txr->hn_txdesc_br, M_DEVBUF);
#endif
free(txr->hn_txdesc, M_DEVBUF);
txr->hn_txdesc = NULL;
if (txr->hn_mbuf_br != NULL)
buf_ring_free(txr->hn_mbuf_br, M_DEVBUF);
#ifndef HN_USE_TXDESC_BUFRING
mtx_destroy(&txr->hn_txlist_spin);
#endif
mtx_destroy(&txr->hn_tx_lock);
}
static int
hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
{
struct sysctl_oid_list *child;
struct sysctl_ctx_list *ctx;
int i;
/*
* Create TXBUF for chimney sending.
*
* NOTE: It is shared by all channels.
*/
sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev),
PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma,
BUS_DMA_WAITOK | BUS_DMA_ZERO);
if (sc->hn_chim == NULL) {
device_printf(sc->hn_dev, "allocate txbuf failed\n");
return (ENOMEM);
}
sc->hn_tx_ring_cnt = ring_cnt;
sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
M_DEVBUF, M_WAITOK | M_ZERO);
ctx = device_get_sysctl_ctx(sc->hn_dev);
child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
/* Create dev.hn.UNIT.tx sysctl tree */
sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
int error;
error = hn_tx_ring_create(sc, i);
if (error)
return error;
}
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_no_txdescs),
hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_send_failed),
hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_txdma_failed),
hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_flush_failed),
hn_tx_stat_ulong_sysctl, "LU",
"# of packet transmission aggregation flush failure");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_tx_collapsed),
hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_tx_chimney),
hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
"# of total TX descs");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
CTLFLAG_RD, &sc->hn_chim_szmax, 0,
"Chimney send packet size upper boundary");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
hn_chim_size_sysctl, "I", "Chimney send packet size limit");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_direct_tx_size),
hn_tx_conf_int_sysctl, "I",
"Size of the packet for direct transmission");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_sched_tx),
hn_tx_conf_int_sysctl, "I",
"Always schedule transmission "
"instead of doing direct transmission");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax",
CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0,
"Applied packet transmission aggregation size");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax",
CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
hn_txagg_pktmax_sysctl, "I",
"Applied packet transmission aggregation packets");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align",
CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
hn_txagg_align_sysctl, "I",
"Applied packet transmission aggregation alignment");
return 0;
}
static void
hn_set_chim_size(struct hn_softc *sc, int chim_size)
{
int i;
for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
sc->hn_tx_ring[i].hn_chim_size = chim_size;
}
static void
hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu)
{
struct ifnet *ifp = sc->hn_ifp;
u_int hw_tsomax;
int tso_minlen;
HN_LOCK_ASSERT(sc);
if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0)
return;
KASSERT(sc->hn_ndis_tso_sgmin >= 2,
("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin));
tso_minlen = sc->hn_ndis_tso_sgmin * mtu;
KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen &&
sc->hn_ndis_tso_szmax <= IP_MAXPACKET,
("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax));
if (tso_maxlen < tso_minlen)
tso_maxlen = tso_minlen;
else if (tso_maxlen > IP_MAXPACKET)
tso_maxlen = IP_MAXPACKET;
if (tso_maxlen > sc->hn_ndis_tso_szmax)
tso_maxlen = sc->hn_ndis_tso_szmax;
hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
if (hn_xpnt_vf_isready(sc)) {
if (hw_tsomax > sc->hn_vf_ifp->if_hw_tsomax)
hw_tsomax = sc->hn_vf_ifp->if_hw_tsomax;
}
ifp->if_hw_tsomax = hw_tsomax;
if (bootverbose)
if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax);
}
static void
hn_fixup_tx_data(struct hn_softc *sc)
{
uint64_t csum_assist;
int i;
hn_set_chim_size(sc, sc->hn_chim_szmax);
if (hn_tx_chimney_size > 0 &&
hn_tx_chimney_size < sc->hn_chim_szmax)
hn_set_chim_size(sc, hn_tx_chimney_size);
csum_assist = 0;
if (sc->hn_caps & HN_CAP_IPCS)
csum_assist |= CSUM_IP;
if (sc->hn_caps & HN_CAP_TCP4CS)
csum_assist |= CSUM_IP_TCP;
if ((sc->hn_caps & HN_CAP_UDP4CS) && hn_enable_udp4cs)
csum_assist |= CSUM_IP_UDP;
if (sc->hn_caps & HN_CAP_TCP6CS)
csum_assist |= CSUM_IP6_TCP;
if ((sc->hn_caps & HN_CAP_UDP6CS) && hn_enable_udp6cs)
csum_assist |= CSUM_IP6_UDP;
for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
sc->hn_tx_ring[i].hn_csum_assist = csum_assist;
if (sc->hn_caps & HN_CAP_HASHVAL) {
/*
* Support HASHVAL pktinfo on TX path.
*/
if (bootverbose)
if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n");
for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL;
}
}
static void
hn_fixup_rx_data(struct hn_softc *sc)
{
if (sc->hn_caps & HN_CAP_UDPHASH) {
int i;
for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_UDP_HASH;
}
}
static void
hn_destroy_tx_data(struct hn_softc *sc)
{
int i;
if (sc->hn_chim != NULL) {
if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) {
hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
} else {
device_printf(sc->hn_dev,
"chimney sending buffer is referenced");
}
sc->hn_chim = NULL;
}
if (sc->hn_tx_ring_cnt == 0)
return;
for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
hn_tx_ring_destroy(&sc->hn_tx_ring[i]);
free(sc->hn_tx_ring, M_DEVBUF);
sc->hn_tx_ring = NULL;
sc->hn_tx_ring_cnt = 0;
sc->hn_tx_ring_inuse = 0;
}
#ifdef HN_IFSTART_SUPPORT
static void
hn_start_taskfunc(void *xtxr, int pending __unused)
{
struct hn_tx_ring *txr = xtxr;
mtx_lock(&txr->hn_tx_lock);
hn_start_locked(txr, 0);
mtx_unlock(&txr->hn_tx_lock);
}
static int
hn_start_locked(struct hn_tx_ring *txr, int len)
{
struct hn_softc *sc = txr->hn_sc;
struct ifnet *ifp = sc->hn_ifp;
int sched = 0;
KASSERT(hn_use_if_start,
("hn_start_locked is called, when if_start is disabled"));
KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
mtx_assert(&txr->hn_tx_lock, MA_OWNED);
KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
if (__predict_false(txr->hn_suspended))
return (0);
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING)
return (0);
while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
struct hn_txdesc *txd;
struct mbuf *m_head;
int error;
IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
if (m_head == NULL)
break;
if (len > 0 && m_head->m_pkthdr.len > len) {
/*
* This sending could be time consuming; let callers
* dispatch this packet sending (and sending of any
* following up packets) to tx taskqueue.
*/
IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
sched = 1;
break;
}
#if defined(INET6) || defined(INET)
if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
m_head = hn_tso_fixup(m_head);
if (__predict_false(m_head == NULL)) {
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
continue;
}
} else if (m_head->m_pkthdr.csum_flags &
(CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) {
m_head = hn_set_hlen(m_head);
if (__predict_false(m_head == NULL)) {
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
continue;
}
}
#endif
txd = hn_txdesc_get(txr);
if (txd == NULL) {
txr->hn_no_txdescs++;
IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
break;
}
error = hn_encap(ifp, txr, txd, &m_head);
if (error) {
/* Both txd and m_head are freed */
KASSERT(txr->hn_agg_txd == NULL,
("encap failed w/ pending aggregating txdesc"));
continue;
}
if (txr->hn_agg_pktleft == 0) {
if (txr->hn_agg_txd != NULL) {
KASSERT(m_head == NULL,
("pending mbuf for aggregating txdesc"));
error = hn_flush_txagg(ifp, txr);
if (__predict_false(error)) {
atomic_set_int(&ifp->if_drv_flags,
IFF_DRV_OACTIVE);
break;
}
} else {
KASSERT(m_head != NULL, ("mbuf was freed"));
error = hn_txpkt(ifp, txr, txd);
if (__predict_false(error)) {
/* txd is freed, but m_head is not */
IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
atomic_set_int(&ifp->if_drv_flags,
IFF_DRV_OACTIVE);
break;
}
}
}
#ifdef INVARIANTS
else {
KASSERT(txr->hn_agg_txd != NULL,
("no aggregating txdesc"));
KASSERT(m_head == NULL,
("pending mbuf for aggregating txdesc"));
}
#endif
}
/* Flush pending aggerated transmission. */
if (txr->hn_agg_txd != NULL)
hn_flush_txagg(ifp, txr);
return (sched);
}
static void
hn_start(struct ifnet *ifp)
{
struct hn_softc *sc = ifp->if_softc;
struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
if (txr->hn_sched_tx)
goto do_sched;
if (mtx_trylock(&txr->hn_tx_lock)) {
int sched;
sched = hn_start_locked(txr, txr->hn_direct_tx_size);
mtx_unlock(&txr->hn_tx_lock);
if (!sched)
return;
}
do_sched:
taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
}
static void
hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
{
struct hn_tx_ring *txr = xtxr;
mtx_lock(&txr->hn_tx_lock);
atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
hn_start_locked(txr, 0);
mtx_unlock(&txr->hn_tx_lock);
}
static void
hn_start_txeof(struct hn_tx_ring *txr)
{
struct hn_softc *sc = txr->hn_sc;
struct ifnet *ifp = sc->hn_ifp;
KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
if (txr->hn_sched_tx)
goto do_sched;
if (mtx_trylock(&txr->hn_tx_lock)) {
int sched;
atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
sched = hn_start_locked(txr, txr->hn_direct_tx_size);
mtx_unlock(&txr->hn_tx_lock);
if (sched) {
taskqueue_enqueue(txr->hn_tx_taskq,
&txr->hn_tx_task);
}
} else {
do_sched:
/*
* Release the OACTIVE earlier, with the hope, that
* others could catch up. The task will clear the
* flag again with the hn_tx_lock to avoid possible
* races.
*/
atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
}
}
#endif /* HN_IFSTART_SUPPORT */
static int
hn_xmit(struct hn_tx_ring *txr, int len)
{
struct hn_softc *sc = txr->hn_sc;
struct ifnet *ifp = sc->hn_ifp;
struct mbuf *m_head;
int sched = 0;
mtx_assert(&txr->hn_tx_lock, MA_OWNED);
#ifdef HN_IFSTART_SUPPORT
KASSERT(hn_use_if_start == 0,
("hn_xmit is called, when if_start is enabled"));
#endif
KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
if (__predict_false(txr->hn_suspended))
return (0);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
return (0);
while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
struct hn_txdesc *txd;
int error;
if (len > 0 && m_head->m_pkthdr.len > len) {
/*
* This sending could be time consuming; let callers
* dispatch this packet sending (and sending of any
* following up packets) to tx taskqueue.
*/
drbr_putback(ifp, txr->hn_mbuf_br, m_head);
sched = 1;
break;
}
txd = hn_txdesc_get(txr);
if (txd == NULL) {
txr->hn_no_txdescs++;
drbr_putback(ifp, txr->hn_mbuf_br, m_head);
txr->hn_oactive = 1;
break;
}
error = hn_encap(ifp, txr, txd, &m_head);
if (error) {
/* Both txd and m_head are freed; discard */
KASSERT(txr->hn_agg_txd == NULL,
("encap failed w/ pending aggregating txdesc"));
drbr_advance(ifp, txr->hn_mbuf_br);
continue;
}
if (txr->hn_agg_pktleft == 0) {
if (txr->hn_agg_txd != NULL) {
KASSERT(m_head == NULL,
("pending mbuf for aggregating txdesc"));
error = hn_flush_txagg(ifp, txr);
if (__predict_false(error)) {
txr->hn_oactive = 1;
break;
}
} else {
KASSERT(m_head != NULL, ("mbuf was freed"));
error = hn_txpkt(ifp, txr, txd);
if (__predict_false(error)) {
/* txd is freed, but m_head is not */
drbr_putback(ifp, txr->hn_mbuf_br,
m_head);
txr->hn_oactive = 1;
break;
}
}
}
#ifdef INVARIANTS
else {
KASSERT(txr->hn_agg_txd != NULL,
("no aggregating txdesc"));
KASSERT(m_head == NULL,
("pending mbuf for aggregating txdesc"));
}
#endif
/* Sent */
drbr_advance(ifp, txr->hn_mbuf_br);
}
/* Flush pending aggerated transmission. */
if (txr->hn_agg_txd != NULL)
hn_flush_txagg(ifp, txr);
return (sched);
}
static int
hn_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct hn_softc *sc = ifp->if_softc;
struct hn_tx_ring *txr;
int error, idx = 0;
if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) {
struct rm_priotracker pt;
rm_rlock(&sc->hn_vf_lock, &pt);
if (__predict_true(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) {
struct mbuf *m_bpf = NULL;
int obytes, omcast;
obytes = m->m_pkthdr.len;
omcast = (m->m_flags & M_MCAST) != 0;
if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) {
if (bpf_peers_present(ifp->if_bpf)) {
m_bpf = m_copypacket(m, M_NOWAIT);
if (m_bpf == NULL) {
/*
* Failed to grab a shallow
* copy; tap now.
*/
ETHER_BPF_MTAP(ifp, m);
}
}
} else {
ETHER_BPF_MTAP(ifp, m);
}
error = sc->hn_vf_ifp->if_transmit(sc->hn_vf_ifp, m);
rm_runlock(&sc->hn_vf_lock, &pt);
if (m_bpf != NULL) {
if (!error)
ETHER_BPF_MTAP(ifp, m_bpf);
m_freem(m_bpf);
}
if (error == ENOBUFS) {
if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
} else if (error) {
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
} else {
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
if_inc_counter(ifp, IFCOUNTER_OBYTES, obytes);
if (omcast) {
if_inc_counter(ifp, IFCOUNTER_OMCASTS,
omcast);
}
}
return (error);
}
rm_runlock(&sc->hn_vf_lock, &pt);
}
#if defined(INET6) || defined(INET)
/*
* Perform TSO packet header fixup or get l2/l3 header length now,
* since packet headers should be cache-hot.
*/
if (m->m_pkthdr.csum_flags & CSUM_TSO) {
m = hn_tso_fixup(m);
if (__predict_false(m == NULL)) {
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return EIO;
}
} else if (m->m_pkthdr.csum_flags &
(CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP6_UDP | CSUM_IP6_TCP)) {
m = hn_set_hlen(m);
if (__predict_false(m == NULL)) {
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return EIO;
}
}
#endif
/*
* Select the TX ring based on flowid
*/
if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
#ifdef RSS
uint32_t bid;
if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
&bid) == 0)
idx = bid % sc->hn_tx_ring_inuse;
else
#endif
{
#if defined(INET6) || defined(INET)
int tcpsyn = 0;
if (m->m_pkthdr.len < 128 &&
(m->m_pkthdr.csum_flags &
(CSUM_IP_TCP | CSUM_IP6_TCP)) &&
(m->m_pkthdr.csum_flags & CSUM_TSO) == 0) {
m = hn_check_tcpsyn(m, &tcpsyn);
if (__predict_false(m == NULL)) {
if_inc_counter(ifp,
IFCOUNTER_OERRORS, 1);
return (EIO);
}
}
#else
const int tcpsyn = 0;
#endif
if (tcpsyn)
idx = 0;
else
idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
}
}
txr = &sc->hn_tx_ring[idx];
error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
if (error) {
if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
return error;
}
if (txr->hn_oactive)
return 0;
if (txr->hn_sched_tx)
goto do_sched;
if (mtx_trylock(&txr->hn_tx_lock)) {
int sched;
sched = hn_xmit(txr, txr->hn_direct_tx_size);
mtx_unlock(&txr->hn_tx_lock);
if (!sched)
return 0;
}
do_sched:
taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
return 0;
}
static void
hn_tx_ring_qflush(struct hn_tx_ring *txr)
{
struct mbuf *m;
mtx_lock(&txr->hn_tx_lock);
while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
m_freem(m);
mtx_unlock(&txr->hn_tx_lock);
}
static void
hn_xmit_qflush(struct ifnet *ifp)
{
struct hn_softc *sc = ifp->if_softc;
struct rm_priotracker pt;
int i;
for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
if_qflush(ifp);
rm_rlock(&sc->hn_vf_lock, &pt);
if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
sc->hn_vf_ifp->if_qflush(sc->hn_vf_ifp);
rm_runlock(&sc->hn_vf_lock, &pt);
}
static void
hn_xmit_txeof(struct hn_tx_ring *txr)
{
if (txr->hn_sched_tx)
goto do_sched;
if (mtx_trylock(&txr->hn_tx_lock)) {
int sched;
txr->hn_oactive = 0;
sched = hn_xmit(txr, txr->hn_direct_tx_size);
mtx_unlock(&txr->hn_tx_lock);
if (sched) {
taskqueue_enqueue(txr->hn_tx_taskq,
&txr->hn_tx_task);
}
} else {
do_sched:
/*
* Release the oactive earlier, with the hope, that
* others could catch up. The task will clear the
* oactive again with the hn_tx_lock to avoid possible
* races.
*/
txr->hn_oactive = 0;
taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
}
}
static void
hn_xmit_taskfunc(void *xtxr, int pending __unused)
{
struct hn_tx_ring *txr = xtxr;
mtx_lock(&txr->hn_tx_lock);
hn_xmit(txr, 0);
mtx_unlock(&txr->hn_tx_lock);
}
static void
hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
{
struct hn_tx_ring *txr = xtxr;
mtx_lock(&txr->hn_tx_lock);
txr->hn_oactive = 0;
hn_xmit(txr, 0);
mtx_unlock(&txr->hn_tx_lock);
}
static int
hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
{
struct vmbus_chan_br cbr;
struct hn_rx_ring *rxr;
struct hn_tx_ring *txr = NULL;
int idx, error;
idx = vmbus_chan_subidx(chan);
/*
* Link this channel to RX/TX ring.
*/
KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
("invalid channel index %d, should > 0 && < %d",
idx, sc->hn_rx_ring_inuse));
rxr = &sc->hn_rx_ring[idx];
KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
("RX ring %d already attached", idx));
rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
rxr->hn_chan = chan;
if (bootverbose) {
if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n",
idx, vmbus_chan_id(chan));
}
if (idx < sc->hn_tx_ring_inuse) {
txr = &sc->hn_tx_ring[idx];
KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
("TX ring %d already attached", idx));
txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
txr->hn_chan = chan;
if (bootverbose) {
if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n",
idx, vmbus_chan_id(chan));
}
}
/* Bind this channel to a proper CPU. */
vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx));
/*
* Open this channel
*/
cbr.cbr = rxr->hn_br;
cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr;
cbr.cbr_txsz = HN_TXBR_SIZE;
cbr.cbr_rxsz = HN_RXBR_SIZE;
error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr);
if (error) {
if (error == EISCONN) {
if_printf(sc->hn_ifp, "bufring is connected after "
"chan%u open failure\n", vmbus_chan_id(chan));
rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
} else {
if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
vmbus_chan_id(chan), error);
}
}
return (error);
}
static void
hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan)
{
struct hn_rx_ring *rxr;
int idx, error;
idx = vmbus_chan_subidx(chan);
/*
* Link this channel to RX/TX ring.
*/
KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
("invalid channel index %d, should > 0 && < %d",
idx, sc->hn_rx_ring_inuse));
rxr = &sc->hn_rx_ring[idx];
KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED),
("RX ring %d is not attached", idx));
rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
if (idx < sc->hn_tx_ring_inuse) {
struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED),
("TX ring %d is not attached attached", idx));
txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
}
/*
* Close this channel.
*
* NOTE:
* Channel closing does _not_ destroy the target channel.
*/
error = vmbus_chan_close_direct(chan);
if (error == EISCONN) {
if_printf(sc->hn_ifp, "chan%u bufring is connected "
"after being closed\n", vmbus_chan_id(chan));
rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
} else if (error) {
if_printf(sc->hn_ifp, "chan%u close failed: %d\n",
vmbus_chan_id(chan), error);
}
}
static int
hn_attach_subchans(struct hn_softc *sc)
{
struct vmbus_channel **subchans;
int subchan_cnt = sc->hn_rx_ring_inuse - 1;
int i, error = 0;
KASSERT(subchan_cnt > 0, ("no sub-channels"));
/* Attach the sub-channels. */
subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
for (i = 0; i < subchan_cnt; ++i) {
int error1;
error1 = hn_chan_attach(sc, subchans[i]);
if (error1) {
error = error1;
/* Move on; all channels will be detached later. */
}
}
vmbus_subchan_rel(subchans, subchan_cnt);
if (error) {
if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error);
} else {
if (bootverbose) {
if_printf(sc->hn_ifp, "%d sub-channels attached\n",
subchan_cnt);
}
}
return (error);
}
static void
hn_detach_allchans(struct hn_softc *sc)
{
struct vmbus_channel **subchans;
int subchan_cnt = sc->hn_rx_ring_inuse - 1;
int i;
if (subchan_cnt == 0)
goto back;
/* Detach the sub-channels. */
subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
for (i = 0; i < subchan_cnt; ++i)
hn_chan_detach(sc, subchans[i]);
vmbus_subchan_rel(subchans, subchan_cnt);
back:
/*
* Detach the primary channel, _after_ all sub-channels
* are detached.
*/
hn_chan_detach(sc, sc->hn_prichan);
/* Wait for sub-channels to be destroyed, if any. */
vmbus_subchan_drain(sc->hn_prichan);
#ifdef INVARIANTS
for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
KASSERT((sc->hn_rx_ring[i].hn_rx_flags &
HN_RX_FLAG_ATTACHED) == 0,
("%dth RX ring is still attached", i));
}
for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
KASSERT((sc->hn_tx_ring[i].hn_tx_flags &
HN_TX_FLAG_ATTACHED) == 0,
("%dth TX ring is still attached", i));
}
#endif
}
static int
hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch)
{
struct vmbus_channel **subchans;
int nchan, rxr_cnt, error;
nchan = *nsubch + 1;
if (nchan == 1) {
/*
* Multiple RX/TX rings are not requested.
*/
*nsubch = 0;
return (0);
}
/*
* Query RSS capabilities, e.g. # of RX rings, and # of indirect
* table entries.
*/
error = hn_rndis_query_rsscaps(sc, &rxr_cnt);
if (error) {
/* No RSS; this is benign. */
*nsubch = 0;
return (0);
}
if (bootverbose) {
if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n",
rxr_cnt, nchan);
}
if (nchan > rxr_cnt)
nchan = rxr_cnt;
if (nchan == 1) {
if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n");
*nsubch = 0;
return (0);
}
/*
* Allocate sub-channels from NVS.
*/
*nsubch = nchan - 1;
error = hn_nvs_alloc_subchans(sc, nsubch);
if (error || *nsubch == 0) {
/* Failed to allocate sub-channels. */
*nsubch = 0;
return (0);
}
/*
* Wait for all sub-channels to become ready before moving on.
*/
subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch);
vmbus_subchan_rel(subchans, *nsubch);
return (0);
}
static bool
hn_synth_attachable(const struct hn_softc *sc)
{
int i;
if (sc->hn_flags & HN_FLAG_ERRORS)
return (false);
for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF)
return (false);
}
return (true);
}
/*
* Make sure that the RX filter is zero after the successful
* RNDIS initialization.
*
* NOTE:
* Under certain conditions on certain versions of Hyper-V,
* the RNDIS rxfilter is _not_ zero on the hypervisor side
* after the successful RNDIS initialization, which breaks
* the assumption of any following code (well, it breaks the
* RNDIS API contract actually). Clear the RNDIS rxfilter
* explicitly, drain packets sneaking through, and drain the
* interrupt taskqueues scheduled due to the stealth packets.
*/
static void
hn_rndis_init_fixat(struct hn_softc *sc, int nchan)
{
hn_disable_rx(sc);
hn_drain_rxtx(sc, nchan);
}
static int
hn_synth_attach(struct hn_softc *sc, int mtu)
{
#define ATTACHED_NVS 0x0002
#define ATTACHED_RNDIS 0x0004
struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
int error, nsubch, nchan = 1, i, rndis_inited;
uint32_t old_caps, attached = 0;
KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0,
("synthetic parts were attached"));
if (!hn_synth_attachable(sc))
return (ENXIO);
/* Save capabilities for later verification. */
old_caps = sc->hn_caps;
sc->hn_caps = 0;
/* Clear RSS stuffs. */
sc->hn_rss_ind_size = 0;
sc->hn_rss_hash = 0;
sc->hn_rss_hcap = 0;
/*
* Attach the primary channel _before_ attaching NVS and RNDIS.
*/
error = hn_chan_attach(sc, sc->hn_prichan);
if (error)
goto failed;
/*
* Attach NVS.
*/
error = hn_nvs_attach(sc, mtu);
if (error)
goto failed;
attached |= ATTACHED_NVS;
/*
* Attach RNDIS _after_ NVS is attached.
*/
error = hn_rndis_attach(sc, mtu, &rndis_inited);
if (rndis_inited)
attached |= ATTACHED_RNDIS;
if (error)
goto failed;
/*
* Make sure capabilities are not changed.
*/
if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) {
if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n",
old_caps, sc->hn_caps);
error = ENXIO;
goto failed;
}
/*
* Allocate sub-channels for multi-TX/RX rings.
*
* NOTE:
* The # of RX rings that can be used is equivalent to the # of
* channels to be requested.
*/
nsubch = sc->hn_rx_ring_cnt - 1;
error = hn_synth_alloc_subchans(sc, &nsubch);
if (error)
goto failed;
/* NOTE: _Full_ synthetic parts detach is required now. */
sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
/*
* Set the # of TX/RX rings that could be used according to
* the # of channels that NVS offered.
*/
nchan = nsubch + 1;
hn_set_ring_inuse(sc, nchan);
if (nchan == 1) {
/* Only the primary channel can be used; done */
goto back;
}
/*
* Attach the sub-channels.
*
* NOTE: hn_set_ring_inuse() _must_ have been called.
*/
error = hn_attach_subchans(sc);
if (error)
goto failed;
/*
* Configure RSS key and indirect table _after_ all sub-channels
* are attached.
*/
if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) {
/*
* RSS key is not set yet; set it to the default RSS key.
*/
if (bootverbose)
if_printf(sc->hn_ifp, "setup default RSS key\n");
#ifdef RSS
rss_getkey(rss->rss_key);
#else
memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key));
#endif
sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
}
if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) {
/*
* RSS indirect table is not set yet; set it up in round-
* robin fashion.
*/
if (bootverbose) {
if_printf(sc->hn_ifp, "setup default RSS indirect "
"table\n");
}
for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
uint32_t subidx;
#ifdef RSS
subidx = rss_get_indirection_to_bucket(i);
#else
subidx = i;
#endif
rss->rss_ind[i] = subidx % nchan;
}
sc->hn_flags |= HN_FLAG_HAS_RSSIND;
} else {
/*
* # of usable channels may be changed, so we have to
* make sure that all entries in RSS indirect table
* are valid.
*
* NOTE: hn_set_ring_inuse() _must_ have been called.
*/
hn_rss_ind_fixup(sc);
}
sc->hn_rss_hash = sc->hn_rss_hcap;
if ((sc->hn_flags & HN_FLAG_RXVF) ||
(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) {
/* NOTE: Don't reconfigure RSS; will do immediately. */
hn_vf_rss_fixup(sc, false);
}
error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
if (error)
goto failed;
back:
/*
* Fixup transmission aggregation setup.
*/
hn_set_txagg(sc);
hn_rndis_init_fixat(sc, nchan);
return (0);
failed:
if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
hn_rndis_init_fixat(sc, nchan);
hn_synth_detach(sc);
} else {
if (attached & ATTACHED_RNDIS) {
hn_rndis_init_fixat(sc, nchan);
hn_rndis_detach(sc);
}
if (attached & ATTACHED_NVS)
hn_nvs_detach(sc);
hn_chan_detach(sc, sc->hn_prichan);
/* Restore old capabilities. */
sc->hn_caps = old_caps;
}
return (error);
#undef ATTACHED_RNDIS
#undef ATTACHED_NVS
}
/*
* NOTE:
* The interface must have been suspended though hn_suspend(), before
* this function get called.
*/
static void
hn_synth_detach(struct hn_softc *sc)
{
KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
("synthetic parts were not attached"));
/* Detach the RNDIS first. */
hn_rndis_detach(sc);
/* Detach NVS. */
hn_nvs_detach(sc);
/* Detach all of the channels. */
hn_detach_allchans(sc);
if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_rxbuf_gpadl != 0) {
/*
* Host is post-Win2016, disconnect RXBUF from primary channel here.
*/
int error;
error = vmbus_chan_gpadl_disconnect(sc->hn_prichan,
sc->hn_rxbuf_gpadl);
if (error) {
if_printf(sc->hn_ifp,
"rxbuf gpadl disconn failed: %d\n", error);
sc->hn_flags |= HN_FLAG_RXBUF_REF;
}
sc->hn_rxbuf_gpadl = 0;
}
if (vmbus_current_version >= VMBUS_VERSION_WIN10 && sc->hn_chim_gpadl != 0) {
/*
* Host is post-Win2016, disconnect chimney sending buffer from
* primary channel here.
*/
int error;
error = vmbus_chan_gpadl_disconnect(sc->hn_prichan,
sc->hn_chim_gpadl);
if (error) {
if_printf(sc->hn_ifp,
"chim gpadl disconn failed: %d\n", error);
sc->hn_flags |= HN_FLAG_CHIM_REF;
}
sc->hn_chim_gpadl = 0;
}
sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED;
}
static void
hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt)
{
KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt,
("invalid ring count %d", ring_cnt));
if (sc->hn_tx_ring_cnt > ring_cnt)
sc->hn_tx_ring_inuse = ring_cnt;
else
sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
sc->hn_rx_ring_inuse = ring_cnt;
#ifdef RSS
if (sc->hn_rx_ring_inuse != rss_getnumbuckets()) {
if_printf(sc->hn_ifp, "# of RX rings (%d) does not match "
"# of RSS buckets (%d)\n", sc->hn_rx_ring_inuse,
rss_getnumbuckets());
}
#endif
if (bootverbose) {
if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n",
sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
}
}
static void
hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan)
{
/*
* NOTE:
* The TX bufring will not be drained by the hypervisor,
* if the primary channel is revoked.
*/
while (!vmbus_chan_rx_empty(chan) ||
(!vmbus_chan_is_revoked(sc->hn_prichan) &&
!vmbus_chan_tx_empty(chan)))
pause("waitch", 1);
vmbus_chan_intr_drain(chan);
}
static void
hn_disable_rx(struct hn_softc *sc)
{
/*
* Disable RX by clearing RX filter forcefully.
*/
sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE;
hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */
/*
* Give RNDIS enough time to flush all pending data packets.
*/
pause("waitrx", (200 * hz) / 1000);
}
/*
* NOTE:
* RX/TX _must_ have been suspended/disabled, before this function
* is called.
*/
static void
hn_drain_rxtx(struct hn_softc *sc, int nchan)
{
struct vmbus_channel **subch = NULL;
int nsubch;
/*
* Drain RX/TX bufrings and interrupts.
*/
nsubch = nchan - 1;
if (nsubch > 0)
subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
if (subch != NULL) {
int i;
for (i = 0; i < nsubch; ++i)
hn_chan_drain(sc, subch[i]);
}
hn_chan_drain(sc, sc->hn_prichan);
if (subch != NULL)
vmbus_subchan_rel(subch, nsubch);
}
static void
hn_suspend_data(struct hn_softc *sc)
{
struct hn_tx_ring *txr;
int i;
HN_LOCK_ASSERT(sc);
/*
* Suspend TX.
*/
for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
txr = &sc->hn_tx_ring[i];
mtx_lock(&txr->hn_tx_lock);
txr->hn_suspended = 1;
mtx_unlock(&txr->hn_tx_lock);
/* No one is able send more packets now. */
/*
* Wait for all pending sends to finish.
*
* NOTE:
* We will _not_ receive all pending send-done, if the
* primary channel is revoked.
*/
while (hn_tx_ring_pending(txr) &&
!vmbus_chan_is_revoked(sc->hn_prichan))
pause("hnwtx", 1 /* 1 tick */);
}
/*
* Disable RX.
*/
hn_disable_rx(sc);
/*
* Drain RX/TX.
*/
hn_drain_rxtx(sc, sc->hn_rx_ring_inuse);
/*
* Drain any pending TX tasks.
*
* NOTE:
* The above hn_drain_rxtx() can dispatch TX tasks, so the TX
* tasks will have to be drained _after_ the above hn_drain_rxtx().
*/
for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
txr = &sc->hn_tx_ring[i];
taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
}
}
static void
hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused)
{
((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL;
}
static void
hn_suspend_mgmt(struct hn_softc *sc)
{
struct task task;
HN_LOCK_ASSERT(sc);
/*
* Make sure that hn_mgmt_taskq0 can nolonger be accessed
* through hn_mgmt_taskq.
*/
TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc);
vmbus_chan_run_task(sc->hn_prichan, &task);
/*
* Make sure that all pending management tasks are completed.
*/
taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init);
taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status);
taskqueue_drain_all(sc->hn_mgmt_taskq0);
}
static void
hn_suspend(struct hn_softc *sc)
{
/* Disable polling. */
hn_polling(sc, 0);
/*
* If the non-transparent mode VF is activated, the synthetic
* device is receiving packets, so the data path of the
* synthetic device must be suspended.
*/
if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
(sc->hn_flags & HN_FLAG_RXVF))
hn_suspend_data(sc);
hn_suspend_mgmt(sc);
}
static void
hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt)
{
int i;
KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt,
("invalid TX ring count %d", tx_ring_cnt));
for (i = 0; i < tx_ring_cnt; ++i) {
struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
mtx_lock(&txr->hn_tx_lock);
txr->hn_suspended = 0;
mtx_unlock(&txr->hn_tx_lock);
}
}
static void
hn_resume_data(struct hn_softc *sc)
{
int i;
HN_LOCK_ASSERT(sc);
/*
* Re-enable RX.
*/
hn_rxfilter_config(sc);
/*
* Make sure to clear suspend status on "all" TX rings,
* since hn_tx_ring_inuse can be changed after
* hn_suspend_data().
*/
hn_resume_tx(sc, sc->hn_tx_ring_cnt);
#ifdef HN_IFSTART_SUPPORT
if (!hn_use_if_start)
#endif
{
/*
* Flush unused drbrs, since hn_tx_ring_inuse may be
* reduced.
*/
for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i)
hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
}
/*
* Kick start TX.
*/
for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
/*
* Use txeof task, so that any pending oactive can be
* cleared properly.
*/
taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
}
}
static void
hn_resume_mgmt(struct hn_softc *sc)
{
sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
/*
* Kick off network change detection, if it was pending.
* If no network change was pending, start link status
* checks, which is more lightweight than network change
* detection.
*/
if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
hn_change_network(sc);
else
hn_update_link_status(sc);
}
static void
hn_resume(struct hn_softc *sc)
{
/*
* If the non-transparent mode VF is activated, the synthetic
* device have to receive packets, so the data path of the
* synthetic device must be resumed.
*/
if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
(sc->hn_flags & HN_FLAG_RXVF))
hn_resume_data(sc);
/*
* Don't resume link status change if VF is attached/activated.
* - In the non-transparent VF mode, the synthetic device marks
* link down until the VF is deactivated; i.e. VF is down.
* - In transparent VF mode, VF's media status is used until
* the VF is detached.
*/
if ((sc->hn_flags & HN_FLAG_RXVF) == 0 &&
!(hn_xpnt_vf && sc->hn_vf_ifp != NULL))
hn_resume_mgmt(sc);
/*
* Re-enable polling if this interface is running and
* the polling is requested.
*/
if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0)
hn_polling(sc, sc->hn_pollhz);
}
static void
hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen)
{
const struct rndis_status_msg *msg;
int ofs;
if (dlen < sizeof(*msg)) {
if_printf(sc->hn_ifp, "invalid RNDIS status\n");
return;
}
msg = data;
switch (msg->rm_status) {
case RNDIS_STATUS_MEDIA_CONNECT:
case RNDIS_STATUS_MEDIA_DISCONNECT:
hn_update_link_status(sc);
break;
case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG:
case RNDIS_STATUS_LINK_SPEED_CHANGE:
/* Not really useful; ignore. */
break;
case RNDIS_STATUS_NETWORK_CHANGE:
ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset);
if (dlen < ofs + msg->rm_stbuflen ||
msg->rm_stbuflen < sizeof(uint32_t)) {
if_printf(sc->hn_ifp, "network changed\n");
} else {
uint32_t change;
memcpy(&change, ((const uint8_t *)msg) + ofs,
sizeof(change));
if_printf(sc->hn_ifp, "network changed, change %u\n",
change);
}
hn_change_network(sc);
break;
default:
if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n",
msg->rm_status);
break;
}
}
static int
hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info)
{
const struct rndis_pktinfo *pi = info_data;
uint32_t mask = 0;
while (info_dlen != 0) {
const void *data;
uint32_t dlen;
if (__predict_false(info_dlen < sizeof(*pi)))
return (EINVAL);
if (__predict_false(info_dlen < pi->rm_size))
return (EINVAL);
info_dlen -= pi->rm_size;
if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK))
return (EINVAL);
if (__predict_false(pi->rm_size < pi->rm_pktinfooffset))
return (EINVAL);
dlen = pi->rm_size - pi->rm_pktinfooffset;
data = pi->rm_data;
switch (pi->rm_type) {
case NDIS_PKTINFO_TYPE_VLAN:
if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE))
return (EINVAL);
info->vlan_info = *((const uint32_t *)data);
mask |= HN_RXINFO_VLAN;
break;
case NDIS_PKTINFO_TYPE_CSUM:
if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE))
return (EINVAL);
info->csum_info = *((const uint32_t *)data);
mask |= HN_RXINFO_CSUM;
break;
case HN_NDIS_PKTINFO_TYPE_HASHVAL:
if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE))
return (EINVAL);
info->hash_value = *((const uint32_t *)data);
mask |= HN_RXINFO_HASHVAL;
break;
case HN_NDIS_PKTINFO_TYPE_HASHINF:
if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE))
return (EINVAL);
info->hash_info = *((const uint32_t *)data);
mask |= HN_RXINFO_HASHINF;
break;
default:
goto next;
}
if (mask == HN_RXINFO_ALL) {
/* All found; done */
break;
}
next:
pi = (const struct rndis_pktinfo *)
((const uint8_t *)pi + pi->rm_size);
}
/*
* Final fixup.
* - If there is no hash value, invalidate the hash info.
*/
if ((mask & HN_RXINFO_HASHVAL) == 0)
info->hash_info = HN_NDIS_HASH_INFO_INVALID;
return (0);
}
static __inline bool
hn_rndis_check_overlap(int off, int len, int check_off, int check_len)
{
if (off < check_off) {
if (__predict_true(off + len <= check_off))
return (false);
} else if (off > check_off) {
if (__predict_true(check_off + check_len <= off))
return (false);
}
return (true);
}
static void
hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen)
{
const struct rndis_packet_msg *pkt;
struct hn_rxinfo info;
int data_off, pktinfo_off, data_len, pktinfo_len;
/*
* Check length.
*/
if (__predict_false(dlen < sizeof(*pkt))) {
if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n");
return;
}
pkt = data;
if (__predict_false(dlen < pkt->rm_len)) {
if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, "
"dlen %d, msglen %u\n", dlen, pkt->rm_len);
return;
}
if (__predict_false(pkt->rm_len <
pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) {
if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, "
"msglen %u, data %u, oob %u, pktinfo %u\n",
pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen,
pkt->rm_pktinfolen);
return;
}
if (__predict_false(pkt->rm_datalen == 0)) {
if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n");
return;
}
/*
* Check offests.
*/
#define IS_OFFSET_INVALID(ofs) \
((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \
((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))
/* XXX Hyper-V does not meet data offset alignment requirement */
if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) {
if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
"data offset %u\n", pkt->rm_dataoffset);
return;
}
if (__predict_false(pkt->rm_oobdataoffset > 0 &&
IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) {
if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
"oob offset %u\n", pkt->rm_oobdataoffset);
return;
}
if (__predict_true(pkt->rm_pktinfooffset > 0) &&
__predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) {
if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
"pktinfo offset %u\n", pkt->rm_pktinfooffset);
return;
}
#undef IS_OFFSET_INVALID
data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset);
data_len = pkt->rm_datalen;
pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset);
pktinfo_len = pkt->rm_pktinfolen;
/*
* Check OOB coverage.
*/
if (__predict_false(pkt->rm_oobdatalen != 0)) {
int oob_off, oob_len;
if_printf(rxr->hn_ifp, "got oobdata\n");
oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset);
oob_len = pkt->rm_oobdatalen;
if (__predict_false(oob_off + oob_len > pkt->rm_len)) {
if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
"oob overflow, msglen %u, oob abs %d len %d\n",
pkt->rm_len, oob_off, oob_len);
return;
}
/*
* Check against data.
*/
if (hn_rndis_check_overlap(oob_off, oob_len,
data_off, data_len)) {
if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
"oob overlaps data, oob abs %d len %d, "
"data abs %d len %d\n",
oob_off, oob_len, data_off, data_len);
return;
}
/*
* Check against pktinfo.
*/
if (pktinfo_len != 0 &&
hn_rndis_check_overlap(oob_off, oob_len,
pktinfo_off, pktinfo_len)) {
if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
"oob overlaps pktinfo, oob abs %d len %d, "
"pktinfo abs %d len %d\n",
oob_off, oob_len, pktinfo_off, pktinfo_len);
return;
}
}
/*
* Check per-packet-info coverage and find useful per-packet-info.
*/
info.vlan_info = HN_NDIS_VLAN_INFO_INVALID;
info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID;
info.hash_info = HN_NDIS_HASH_INFO_INVALID;
if (__predict_true(pktinfo_len != 0)) {
bool overlap;
int error;
if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) {
if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
"pktinfo overflow, msglen %u, "
"pktinfo abs %d len %d\n",
pkt->rm_len, pktinfo_off, pktinfo_len);
return;
}
/*
* Check packet info coverage.
*/
overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len,
data_off, data_len);
if (__predict_false(overlap)) {
if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
"pktinfo overlap data, pktinfo abs %d len %d, "
"data abs %d len %d\n",
pktinfo_off, pktinfo_len, data_off, data_len);
return;
}
/*
* Find useful per-packet-info.
*/
error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off,
pktinfo_len, &info);
if (__predict_false(error)) {
if_printf(rxr->hn_ifp, "invalid RNDIS packet msg "
"pktinfo\n");
return;
}
}
if (__predict_false(data_off + data_len > pkt->rm_len)) {
if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
"data overflow, msglen %u, data abs %d len %d\n",
pkt->rm_len, data_off, data_len);
return;
}
hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info);
}
static __inline void
hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen)
{
const struct rndis_msghdr *hdr;
if (__predict_false(dlen < sizeof(*hdr))) {
if_printf(rxr->hn_ifp, "invalid RNDIS msg\n");
return;
}
hdr = data;
if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) {
/* Hot data path. */
hn_rndis_rx_data(rxr, data, dlen);
/* Done! */
return;
}
if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG)
hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen);
else
hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen);
}
static void
hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt)
{
const struct hn_nvs_hdr *hdr;
if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) {
if_printf(sc->hn_ifp, "invalid nvs notify\n");
return;
}
hdr = VMBUS_CHANPKT_CONST_DATA(pkt);
if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) {
/* Useless; ignore */
return;
}
if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type);
}
static void
hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan,
const struct vmbus_chanpkt_hdr *pkt)
{
struct hn_nvs_sendctx *sndc;
sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid;
sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt),
VMBUS_CHANPKT_DATALEN(pkt));
/*
* NOTE:
* 'sndc' CAN NOT be accessed anymore, since it can be freed by
* its callback.
*/
}
static void
hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
const struct vmbus_chanpkt_hdr *pkthdr)
{
const struct vmbus_chanpkt_rxbuf *pkt;
const struct hn_nvs_hdr *nvs_hdr;
int count, i, hlen;
if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) {
if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n");
return;
}
nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr);
/* Make sure that this is a RNDIS message. */
if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) {
if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n",
nvs_hdr->nvs_type);
return;
}
hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen);
if (__predict_false(hlen < sizeof(*pkt))) {
if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n");
return;
}
pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr;
if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) {
if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n",
pkt->cp_rxbuf_id);
return;
}
count = pkt->cp_rxbuf_cnt;
if (__predict_false(hlen <
__offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) {
if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count);
return;
}
/* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
for (i = 0; i < count; ++i) {
int ofs, len;
ofs = pkt->cp_rxbuf[i].rb_ofs;
len = pkt->cp_rxbuf[i].rb_len;
if (__predict_false(ofs + len > HN_RXBUF_SIZE)) {
if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, "
"ofs %d, len %d\n", i, ofs, len);
continue;
}
hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len);
}
/*
* Ack the consumed RXBUF associated w/ this channel packet,
* so that this RXBUF can be recycled by the hypervisor.
*/
hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid);
}
static void
hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
uint64_t tid)
{
struct hn_nvs_rndis_ack ack;
int retries, error;
ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK;
ack.nvs_status = HN_NVS_STATUS_OK;
retries = 0;
again:
error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP,
VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid);
if (__predict_false(error == EAGAIN)) {
/*
* NOTE:
* This should _not_ happen in real world, since the
* consumption of the TX bufring from the TX path is
* controlled.
*/
if (rxr->hn_ack_failed == 0)
if_printf(rxr->hn_ifp, "RXBUF ack retry\n");
rxr->hn_ack_failed++;
retries++;
if (retries < 10) {
DELAY(100);
goto again;
}
/* RXBUF leaks! */
if_printf(rxr->hn_ifp, "RXBUF ack failed\n");
}
}
static void
hn_chan_callback(struct vmbus_channel *chan, void *xrxr)
{
struct hn_rx_ring *rxr = xrxr;
struct hn_softc *sc = rxr->hn_ifp->if_softc;
for (;;) {
struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf;
int error, pktlen;
pktlen = rxr->hn_pktbuf_len;
error = vmbus_chan_recv_pkt(chan, pkt, &pktlen);
if (__predict_false(error == ENOBUFS)) {
void *nbuf;
int nlen;
/*
* Expand channel packet buffer.
*
* XXX
* Use M_WAITOK here, since allocation failure
* is fatal.
*/
nlen = rxr->hn_pktbuf_len * 2;
while (nlen < pktlen)
nlen *= 2;
nbuf = malloc(nlen, M_DEVBUF, M_WAITOK);
if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n",
rxr->hn_pktbuf_len, nlen);
free(rxr->hn_pktbuf, M_DEVBUF);
rxr->hn_pktbuf = nbuf;
rxr->hn_pktbuf_len = nlen;
/* Retry! */
continue;
} else if (__predict_false(error == EAGAIN)) {
/* No more channel packets; done! */
break;
}
KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error));
switch (pkt->cph_type) {
case VMBUS_CHANPKT_TYPE_COMP:
hn_nvs_handle_comp(sc, chan, pkt);
break;
case VMBUS_CHANPKT_TYPE_RXBUF:
hn_nvs_handle_rxbuf(rxr, chan, pkt);
break;
case VMBUS_CHANPKT_TYPE_INBAND:
hn_nvs_handle_notify(sc, pkt);
break;
default:
if_printf(rxr->hn_ifp, "unknown chan pkt %u\n",
pkt->cph_type);
break;
}
}
hn_chan_rollup(rxr, rxr->hn_txr);
}
static void
hn_sysinit(void *arg __unused)
{
int i;
hn_udpcs_fixup = counter_u64_alloc(M_WAITOK);
#ifdef HN_IFSTART_SUPPORT
/*
* Don't use ifnet.if_start if transparent VF mode is requested;
* mainly due to the IFF_DRV_OACTIVE flag.
*/
if (hn_xpnt_vf && hn_use_if_start) {
hn_use_if_start = 0;
printf("hn: tranparent VF mode, if_transmit will be used, "
"instead of if_start\n");
}
#endif
if (hn_xpnt_vf_attwait < HN_XPNT_VF_ATTWAIT_MIN) {
printf("hn: invalid transparent VF attach routing "
"wait timeout %d, reset to %d\n",
hn_xpnt_vf_attwait, HN_XPNT_VF_ATTWAIT_MIN);
hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN;
}
/*
* Initialize VF map.
*/
rm_init_flags(&hn_vfmap_lock, "hn_vfmap", RM_SLEEPABLE);
hn_vfmap_size = HN_VFMAP_SIZE_DEF;
hn_vfmap = malloc(sizeof(struct ifnet *) * hn_vfmap_size, M_DEVBUF,
M_WAITOK | M_ZERO);
/*
* Fix the # of TX taskqueues.
*/
if (hn_tx_taskq_cnt <= 0)
hn_tx_taskq_cnt = 1;
else if (hn_tx_taskq_cnt > mp_ncpus)
hn_tx_taskq_cnt = mp_ncpus;
/*
* Fix the TX taskqueue mode.
*/
switch (hn_tx_taskq_mode) {
case HN_TX_TASKQ_M_INDEP:
case HN_TX_TASKQ_M_GLOBAL:
case HN_TX_TASKQ_M_EVTTQ:
break;
default:
hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
break;
}
if (vm_guest != VM_GUEST_HV)
return;
if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL)
return;
hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
M_DEVBUF, M_WAITOK);
for (i = 0; i < hn_tx_taskq_cnt; ++i) {
hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK,
taskqueue_thread_enqueue, &hn_tx_taskque[i]);
taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET,
"hn tx%d", i);
}
}
SYSINIT(hn_sysinit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysinit, NULL);
static void
hn_sysuninit(void *arg __unused)
{
if (hn_tx_taskque != NULL) {
int i;
for (i = 0; i < hn_tx_taskq_cnt; ++i)
taskqueue_free(hn_tx_taskque[i]);
free(hn_tx_taskque, M_DEVBUF);
}
if (hn_vfmap != NULL)
free(hn_vfmap, M_DEVBUF);
rm_destroy(&hn_vfmap_lock);
counter_u64_free(hn_udpcs_fixup);
}
SYSUNINIT(hn_sysuninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, hn_sysuninit, NULL);
diff --git a/sys/dev/if_ndis/if_ndis.c b/sys/dev/if_ndis/if_ndis.c
index 37cf0e6bf703..1a5e8eeed4be 100644
--- a/sys/dev/if_ndis/if_ndis.c
+++ b/sys/dev/if_ndis/if_ndis.c
@@ -1,3424 +1,3423 @@
/*-
* SPDX-License-Identifier: BSD-4-Clause
*
* Copyright (c) 2003
* Bill Paul . All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Bill Paul.
* 4. Neither the name of the author nor the names of any co-contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*
* WPA support originally contributed by Arvind Srinivasan
* then hacked upon mercilessly by my.
*/
#include
__FBSDID("$FreeBSD$");
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define NDIS_DEBUG
#ifdef NDIS_DEBUG
#define DPRINTF(x) do { if (ndis_debug > 0) printf x; } while (0)
int ndis_debug = 0;
SYSCTL_INT(_debug, OID_AUTO, ndis, CTLFLAG_RW, &ndis_debug, 0,
"if_ndis debug level");
#else
#define DPRINTF(x)
#endif
SYSCTL_DECL(_hw_ndisusb);
int ndisusb_halt = 1;
SYSCTL_INT(_hw_ndisusb, OID_AUTO, halt, CTLFLAG_RW, &ndisusb_halt, 0,
"Halt NDIS USB driver when it's attached");
/* 0 - 30 dBm to mW conversion table */
static const uint16_t dBm2mW[] = {
1, 1, 1, 1, 2, 2, 2, 2, 3, 3,
3, 4, 4, 4, 5, 6, 6, 7, 8, 9,
10, 11, 13, 14, 16, 18, 20, 22, 25, 28,
32, 35, 40, 45, 50, 56, 63, 71, 79, 89,
100, 112, 126, 141, 158, 178, 200, 224, 251, 282,
316, 355, 398, 447, 501, 562, 631, 708, 794, 891,
1000
};
MODULE_DEPEND(ndis, ether, 1, 1, 1);
MODULE_DEPEND(ndis, wlan, 1, 1, 1);
MODULE_DEPEND(ndis, ndisapi, 1, 1, 1);
MODULE_VERSION(ndis, 1);
int ndis_attach (device_t);
int ndis_detach (device_t);
int ndis_suspend (device_t);
int ndis_resume (device_t);
void ndis_shutdown (device_t);
int ndisdrv_modevent (module_t, int, void *);
static void ndis_txeof (ndis_handle, ndis_packet *, ndis_status);
static void ndis_rxeof (ndis_handle, ndis_packet **, uint32_t);
static void ndis_rxeof_eth (ndis_handle, ndis_handle, char *, void *,
uint32_t, void *, uint32_t, uint32_t);
static void ndis_rxeof_done (ndis_handle);
static void ndis_rxeof_xfr (kdpc *, ndis_handle, void *, void *);
static void ndis_rxeof_xfr_done (ndis_handle, ndis_packet *,
uint32_t, uint32_t);
static void ndis_linksts (ndis_handle, ndis_status, void *, uint32_t);
static void ndis_linksts_done (ndis_handle);
/* We need to wrap these functions for amd64. */
static funcptr ndis_txeof_wrap;
static funcptr ndis_rxeof_wrap;
static funcptr ndis_rxeof_eth_wrap;
static funcptr ndis_rxeof_done_wrap;
static funcptr ndis_rxeof_xfr_wrap;
static funcptr ndis_rxeof_xfr_done_wrap;
static funcptr ndis_linksts_wrap;
static funcptr ndis_linksts_done_wrap;
static funcptr ndis_ticktask_wrap;
static funcptr ndis_ifstarttask_wrap;
static funcptr ndis_resettask_wrap;
static funcptr ndis_inputtask_wrap;
static struct ieee80211vap *ndis_vap_create(struct ieee80211com *,
const char [IFNAMSIZ], int, enum ieee80211_opmode, int,
const uint8_t [IEEE80211_ADDR_LEN],
const uint8_t [IEEE80211_ADDR_LEN]);
static void ndis_vap_delete (struct ieee80211vap *);
static void ndis_tick (void *);
static void ndis_ticktask (device_object *, void *);
static int ndis_raw_xmit (struct ieee80211_node *, struct mbuf *,
const struct ieee80211_bpf_params *);
static void ndis_update_mcast (struct ieee80211com *);
static void ndis_update_promisc (struct ieee80211com *);
static void ndis_ifstart (struct ifnet *);
static void ndis_ifstarttask (device_object *, void *);
static void ndis_resettask (device_object *, void *);
static void ndis_inputtask (device_object *, void *);
static int ndis_ifioctl (struct ifnet *, u_long, caddr_t);
static int ndis_newstate (struct ieee80211vap *, enum ieee80211_state,
int);
static int ndis_nettype_chan (uint32_t);
static int ndis_nettype_mode (uint32_t);
static void ndis_scan (void *);
static void ndis_scan_results (struct ndis_softc *);
static void ndis_scan_start (struct ieee80211com *);
static void ndis_scan_end (struct ieee80211com *);
static void ndis_set_channel (struct ieee80211com *);
static void ndis_scan_curchan (struct ieee80211_scan_state *, unsigned long);
static void ndis_scan_mindwell (struct ieee80211_scan_state *);
static void ndis_init (void *);
static void ndis_stop (struct ndis_softc *);
static int ndis_ifmedia_upd (struct ifnet *);
static void ndis_ifmedia_sts (struct ifnet *, struct ifmediareq *);
static int ndis_get_bssid_list (struct ndis_softc *,
ndis_80211_bssid_list_ex **);
static int ndis_get_assoc (struct ndis_softc *, ndis_wlan_bssid_ex **);
static int ndis_probe_offload (struct ndis_softc *);
static int ndis_set_offload (struct ndis_softc *);
static void ndis_getstate_80211 (struct ndis_softc *);
static void ndis_setstate_80211 (struct ndis_softc *);
static void ndis_auth_and_assoc (struct ndis_softc *, struct ieee80211vap *);
static void ndis_media_status (struct ifnet *, struct ifmediareq *);
static int ndis_set_cipher (struct ndis_softc *, int);
static int ndis_set_wpa (struct ndis_softc *, void *, int);
static int ndis_add_key (struct ieee80211vap *,
const struct ieee80211_key *);
static int ndis_del_key (struct ieee80211vap *,
const struct ieee80211_key *);
static void ndis_setmulti (struct ndis_softc *);
static void ndis_map_sclist (void *, bus_dma_segment_t *,
int, bus_size_t, int);
static int ndis_ifattach(struct ndis_softc *);
static int ndis_80211attach(struct ndis_softc *);
static int ndis_80211ioctl(struct ieee80211com *, u_long , void *);
static int ndis_80211transmit(struct ieee80211com *, struct mbuf *);
static void ndis_80211parent(struct ieee80211com *);
static int ndisdrv_loaded = 0;
/*
* This routine should call windrv_load() once for each driver
* image. This will do the relocation and dynalinking for the
* image, and create a Windows driver object which will be
* saved in our driver database.
*/
int
ndisdrv_modevent(mod, cmd, arg)
module_t mod;
int cmd;
void *arg;
{
int error = 0;
switch (cmd) {
case MOD_LOAD:
ndisdrv_loaded++;
if (ndisdrv_loaded > 1)
break;
windrv_wrap((funcptr)ndis_rxeof, &ndis_rxeof_wrap,
3, WINDRV_WRAP_STDCALL);
windrv_wrap((funcptr)ndis_rxeof_eth, &ndis_rxeof_eth_wrap,
8, WINDRV_WRAP_STDCALL);
windrv_wrap((funcptr)ndis_rxeof_done, &ndis_rxeof_done_wrap,
1, WINDRV_WRAP_STDCALL);
windrv_wrap((funcptr)ndis_rxeof_xfr, &ndis_rxeof_xfr_wrap,
4, WINDRV_WRAP_STDCALL);
windrv_wrap((funcptr)ndis_rxeof_xfr_done,
&ndis_rxeof_xfr_done_wrap, 4, WINDRV_WRAP_STDCALL);
windrv_wrap((funcptr)ndis_txeof, &ndis_txeof_wrap,
3, WINDRV_WRAP_STDCALL);
windrv_wrap((funcptr)ndis_linksts, &ndis_linksts_wrap,
4, WINDRV_WRAP_STDCALL);
windrv_wrap((funcptr)ndis_linksts_done,
&ndis_linksts_done_wrap, 1, WINDRV_WRAP_STDCALL);
windrv_wrap((funcptr)ndis_ticktask, &ndis_ticktask_wrap,
2, WINDRV_WRAP_STDCALL);
windrv_wrap((funcptr)ndis_ifstarttask, &ndis_ifstarttask_wrap,
2, WINDRV_WRAP_STDCALL);
windrv_wrap((funcptr)ndis_resettask, &ndis_resettask_wrap,
2, WINDRV_WRAP_STDCALL);
windrv_wrap((funcptr)ndis_inputtask, &ndis_inputtask_wrap,
2, WINDRV_WRAP_STDCALL);
break;
case MOD_UNLOAD:
ndisdrv_loaded--;
if (ndisdrv_loaded > 0)
break;
/* fallthrough */
case MOD_SHUTDOWN:
windrv_unwrap(ndis_rxeof_wrap);
windrv_unwrap(ndis_rxeof_eth_wrap);
windrv_unwrap(ndis_rxeof_done_wrap);
windrv_unwrap(ndis_rxeof_xfr_wrap);
windrv_unwrap(ndis_rxeof_xfr_done_wrap);
windrv_unwrap(ndis_txeof_wrap);
windrv_unwrap(ndis_linksts_wrap);
windrv_unwrap(ndis_linksts_done_wrap);
windrv_unwrap(ndis_ticktask_wrap);
windrv_unwrap(ndis_ifstarttask_wrap);
windrv_unwrap(ndis_resettask_wrap);
windrv_unwrap(ndis_inputtask_wrap);
break;
default:
error = EINVAL;
break;
}
return (error);
}
struct mclist_ctx {
uint8_t *mclist;
int mclistsz;
};
static u_int
ndis_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
{
struct mclist_ctx *ctx = arg;
if (cnt < ctx->mclistsz)
bcopy(LLADDR(sdl), ctx->mclist + (ETHER_ADDR_LEN * cnt),
ETHER_ADDR_LEN);
return (1);
}
/*
* Program the 64-bit multicast hash filter.
*/
static void
ndis_setmulti(sc)
struct ndis_softc *sc;
{
struct ifnet *ifp;
struct mclist_ctx ctx;
int len, error;
if (!NDIS_INITIALIZED(sc))
return;
if (sc->ndis_80211)
return;
ifp = sc->ifp;
if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) {
sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
len = sizeof(sc->ndis_filter);
error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER,
&sc->ndis_filter, &len);
if (error)
device_printf(sc->ndis_dev,
"set allmulti failed: %d\n", error);
return;
}
if (if_llmaddr_count(ifp) == 0)
return;
len = sizeof(ctx.mclistsz);
ndis_get_info(sc, OID_802_3_MAXIMUM_LIST_SIZE, &ctx.mclistsz, &len);
ctx.mclist = malloc(ETHER_ADDR_LEN * ctx.mclistsz, M_TEMP,
M_NOWAIT | M_ZERO);
if (ctx.mclist == NULL) {
sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
goto out;
}
sc->ndis_filter |= NDIS_PACKET_TYPE_MULTICAST;
len = if_foreach_llmaddr(ifp, ndis_copy_maddr, &ctx);
if (len > ctx.mclistsz) {
sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
sc->ndis_filter &= ~NDIS_PACKET_TYPE_MULTICAST;
goto out;
}
len = len * ETHER_ADDR_LEN;
error = ndis_set_info(sc, OID_802_3_MULTICAST_LIST, ctx.mclist, &len);
if (error) {
device_printf(sc->ndis_dev, "set mclist failed: %d\n", error);
sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
sc->ndis_filter &= ~NDIS_PACKET_TYPE_MULTICAST;
}
out:
free(ctx.mclist, M_TEMP);
len = sizeof(sc->ndis_filter);
error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER,
&sc->ndis_filter, &len);
if (error)
device_printf(sc->ndis_dev, "set multi failed: %d\n", error);
}
static int
ndis_set_offload(sc)
struct ndis_softc *sc;
{
ndis_task_offload *nto;
ndis_task_offload_hdr *ntoh;
ndis_task_tcpip_csum *nttc;
struct ifnet *ifp;
int len, error;
if (!NDIS_INITIALIZED(sc))
return (EINVAL);
if (sc->ndis_80211)
return (EINVAL);
/* See if there's anything to set. */
ifp = sc->ifp;
error = ndis_probe_offload(sc);
if (error)
return (error);
if (sc->ndis_hwassist == 0 && ifp->if_capabilities == 0)
return (0);
len = sizeof(ndis_task_offload_hdr) + sizeof(ndis_task_offload) +
sizeof(ndis_task_tcpip_csum);
ntoh = malloc(len, M_TEMP, M_NOWAIT|M_ZERO);
if (ntoh == NULL)
return (ENOMEM);
ntoh->ntoh_vers = NDIS_TASK_OFFLOAD_VERSION;
ntoh->ntoh_len = sizeof(ndis_task_offload_hdr);
ntoh->ntoh_offset_firsttask = sizeof(ndis_task_offload_hdr);
ntoh->ntoh_encapfmt.nef_encaphdrlen = sizeof(struct ether_header);
ntoh->ntoh_encapfmt.nef_encap = NDIS_ENCAP_IEEE802_3;
ntoh->ntoh_encapfmt.nef_flags = NDIS_ENCAPFLAG_FIXEDHDRLEN;
nto = (ndis_task_offload *)((char *)ntoh +
ntoh->ntoh_offset_firsttask);
nto->nto_vers = NDIS_TASK_OFFLOAD_VERSION;
nto->nto_len = sizeof(ndis_task_offload);
nto->nto_task = NDIS_TASK_TCPIP_CSUM;
nto->nto_offset_nexttask = 0;
nto->nto_taskbuflen = sizeof(ndis_task_tcpip_csum);
nttc = (ndis_task_tcpip_csum *)nto->nto_taskbuf;
if (ifp->if_capenable & IFCAP_TXCSUM)
nttc->nttc_v4tx = sc->ndis_v4tx;
if (ifp->if_capenable & IFCAP_RXCSUM)
nttc->nttc_v4rx = sc->ndis_v4rx;
error = ndis_set_info(sc, OID_TCP_TASK_OFFLOAD, ntoh, &len);
free(ntoh, M_TEMP);
return (error);
}
static int
ndis_probe_offload(sc)
struct ndis_softc *sc;
{
ndis_task_offload *nto;
ndis_task_offload_hdr *ntoh;
ndis_task_tcpip_csum *nttc = NULL;
struct ifnet *ifp;
int len, error, dummy;
ifp = sc->ifp;
len = sizeof(dummy);
error = ndis_get_info(sc, OID_TCP_TASK_OFFLOAD, &dummy, &len);
if (error != ENOSPC)
return (error);
ntoh = malloc(len, M_TEMP, M_NOWAIT|M_ZERO);
if (ntoh == NULL)
return (ENOMEM);
ntoh->ntoh_vers = NDIS_TASK_OFFLOAD_VERSION;
ntoh->ntoh_len = sizeof(ndis_task_offload_hdr);
ntoh->ntoh_encapfmt.nef_encaphdrlen = sizeof(struct ether_header);
ntoh->ntoh_encapfmt.nef_encap = NDIS_ENCAP_IEEE802_3;
ntoh->ntoh_encapfmt.nef_flags = NDIS_ENCAPFLAG_FIXEDHDRLEN;
error = ndis_get_info(sc, OID_TCP_TASK_OFFLOAD, ntoh, &len);
if (error) {
free(ntoh, M_TEMP);
return (error);
}
if (ntoh->ntoh_vers != NDIS_TASK_OFFLOAD_VERSION) {
free(ntoh, M_TEMP);
return (EINVAL);
}
nto = (ndis_task_offload *)((char *)ntoh +
ntoh->ntoh_offset_firsttask);
while (1) {
switch (nto->nto_task) {
case NDIS_TASK_TCPIP_CSUM:
nttc = (ndis_task_tcpip_csum *)nto->nto_taskbuf;
break;
/* Don't handle these yet. */
case NDIS_TASK_IPSEC:
case NDIS_TASK_TCP_LARGESEND:
default:
break;
}
if (nto->nto_offset_nexttask == 0)
break;
nto = (ndis_task_offload *)((char *)nto +
nto->nto_offset_nexttask);
}
if (nttc == NULL) {
free(ntoh, M_TEMP);
return (ENOENT);
}
sc->ndis_v4tx = nttc->nttc_v4tx;
sc->ndis_v4rx = nttc->nttc_v4rx;
if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_IP_CSUM)
sc->ndis_hwassist |= CSUM_IP;
if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_TCP_CSUM)
sc->ndis_hwassist |= CSUM_TCP;
if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_UDP_CSUM)
sc->ndis_hwassist |= CSUM_UDP;
if (sc->ndis_hwassist)
ifp->if_capabilities |= IFCAP_TXCSUM;
if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_IP_CSUM)
ifp->if_capabilities |= IFCAP_RXCSUM;
if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_TCP_CSUM)
ifp->if_capabilities |= IFCAP_RXCSUM;
if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_UDP_CSUM)
ifp->if_capabilities |= IFCAP_RXCSUM;
free(ntoh, M_TEMP);
return (0);
}
static int
ndis_nettype_chan(uint32_t type)
{
switch (type) {
case NDIS_80211_NETTYPE_11FH: return (IEEE80211_CHAN_FHSS);
case NDIS_80211_NETTYPE_11DS: return (IEEE80211_CHAN_B);
case NDIS_80211_NETTYPE_11OFDM5: return (IEEE80211_CHAN_A);
case NDIS_80211_NETTYPE_11OFDM24: return (IEEE80211_CHAN_G);
}
DPRINTF(("unknown channel nettype %d\n", type));
return (IEEE80211_CHAN_B); /* Default to 11B chan */
}
static int
ndis_nettype_mode(uint32_t type)
{
switch (type) {
case NDIS_80211_NETTYPE_11FH: return (IEEE80211_MODE_FH);
case NDIS_80211_NETTYPE_11DS: return (IEEE80211_MODE_11B);
case NDIS_80211_NETTYPE_11OFDM5: return (IEEE80211_MODE_11A);
case NDIS_80211_NETTYPE_11OFDM24: return (IEEE80211_MODE_11G);
}
DPRINTF(("unknown mode nettype %d\n", type));
return (IEEE80211_MODE_AUTO);
}
/*
* Attach the interface. Allocate softc structures, do ifmedia
* setup and ethernet/BPF attach.
*/
int
ndis_attach(device_t dev)
{
struct ndis_softc *sc;
driver_object *pdrv;
device_object *pdo;
int error = 0, len;
int i;
sc = device_get_softc(dev);
mtx_init(&sc->ndis_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
MTX_DEF);
KeInitializeSpinLock(&sc->ndis_rxlock);
KeInitializeSpinLock(&sc->ndisusb_tasklock);
KeInitializeSpinLock(&sc->ndisusb_xferdonelock);
InitializeListHead(&sc->ndis_shlist);
InitializeListHead(&sc->ndisusb_tasklist);
InitializeListHead(&sc->ndisusb_xferdonelist);
callout_init(&sc->ndis_stat_callout, 1);
mbufq_init(&sc->ndis_rxqueue, INT_MAX); /* XXXGL: sane maximum */
/* Create sysctl registry nodes */
ndis_create_sysctls(sc);
/* Find the PDO for this device instance. */
if (sc->ndis_iftype == PCIBus)
pdrv = windrv_lookup(0, "PCI Bus");
else if (sc->ndis_iftype == PCMCIABus)
pdrv = windrv_lookup(0, "PCCARD Bus");
else
pdrv = windrv_lookup(0, "USB Bus");
pdo = windrv_find_pdo(pdrv, dev);
/*
* Create a new functional device object for this
* device. This is what creates the miniport block
* for this device instance.
*/
if (NdisAddDevice(sc->ndis_dobj, pdo) != STATUS_SUCCESS) {
device_printf(dev, "failed to create FDO!\n");
error = ENXIO;
goto fail;
}
/* Tell the user what version of the API the driver is using. */
device_printf(dev, "NDIS API version: %d.%d\n",
sc->ndis_chars->nmc_version_major,
sc->ndis_chars->nmc_version_minor);
/* Do resource conversion. */
if (sc->ndis_iftype == PCMCIABus || sc->ndis_iftype == PCIBus)
ndis_convert_res(sc);
else
sc->ndis_block->nmb_rlist = NULL;
/* Install our RX and TX interrupt handlers. */
sc->ndis_block->nmb_senddone_func = ndis_txeof_wrap;
sc->ndis_block->nmb_pktind_func = ndis_rxeof_wrap;
sc->ndis_block->nmb_ethrxindicate_func = ndis_rxeof_eth_wrap;
sc->ndis_block->nmb_ethrxdone_func = ndis_rxeof_done_wrap;
sc->ndis_block->nmb_tdcond_func = ndis_rxeof_xfr_done_wrap;
/* Override the status handler so we can detect link changes. */
sc->ndis_block->nmb_status_func = ndis_linksts_wrap;
sc->ndis_block->nmb_statusdone_func = ndis_linksts_done_wrap;
/* Set up work item handlers. */
sc->ndis_tickitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj);
sc->ndis_startitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj);
sc->ndis_resetitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj);
sc->ndis_inputitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj);
sc->ndisusb_xferdoneitem =
IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj);
sc->ndisusb_taskitem =
IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj);
KeInitializeDpc(&sc->ndis_rxdpc, ndis_rxeof_xfr_wrap, sc->ndis_block);
/* Call driver's init routine. */
if (ndis_init_nic(sc)) {
device_printf(dev, "init handler failed\n");
error = ENXIO;
goto fail;
}
/*
* Figure out how big to make the TX buffer pool.
*/
len = sizeof(sc->ndis_maxpkts);
if (ndis_get_info(sc, OID_GEN_MAXIMUM_SEND_PACKETS,
&sc->ndis_maxpkts, &len)) {
device_printf(dev, "failed to get max TX packets\n");
error = ENXIO;
goto fail;
}
/*
* If this is a deserialized miniport, we don't have
* to honor the OID_GEN_MAXIMUM_SEND_PACKETS result.
*/
if (!NDIS_SERIALIZED(sc->ndis_block))
sc->ndis_maxpkts = NDIS_TXPKTS;
/* Enforce some sanity, just in case. */
if (sc->ndis_maxpkts == 0)
sc->ndis_maxpkts = 10;
sc->ndis_txarray = malloc(sizeof(ndis_packet *) *
sc->ndis_maxpkts, M_DEVBUF, M_NOWAIT|M_ZERO);
/* Allocate a pool of ndis_packets for TX encapsulation. */
NdisAllocatePacketPool(&i, &sc->ndis_txpool,
sc->ndis_maxpkts, PROTOCOL_RESERVED_SIZE_IN_PACKET);
if (i != NDIS_STATUS_SUCCESS) {
sc->ndis_txpool = NULL;
device_printf(dev, "failed to allocate TX packet pool");
error = ENOMEM;
goto fail;
}
sc->ndis_txpending = sc->ndis_maxpkts;
sc->ndis_oidcnt = 0;
/* Get supported oid list. */
ndis_get_supported_oids(sc, &sc->ndis_oids, &sc->ndis_oidcnt);
/* If the NDIS module requested scatter/gather, init maps. */
if (sc->ndis_sc)
ndis_init_dma(sc);
/*
* See if the OID_802_11_CONFIGURATION OID is
* supported by this driver. If it is, then this an 802.11
* wireless driver, and we should set up media for wireless.
*/
for (i = 0; i < sc->ndis_oidcnt; i++)
if (sc->ndis_oids[i] == OID_802_11_CONFIGURATION) {
sc->ndis_80211 = 1;
break;
}
if (sc->ndis_80211)
error = ndis_80211attach(sc);
else
error = ndis_ifattach(sc);
fail:
if (error) {
ndis_detach(dev);
return (error);
}
if (sc->ndis_iftype == PNPBus && ndisusb_halt == 0)
return (error);
DPRINTF(("attach done.\n"));
/* We're done talking to the NIC for now; halt it. */
ndis_halt_nic(sc);
DPRINTF(("halting done.\n"));
return (error);
}
static int
ndis_80211attach(struct ndis_softc *sc)
{
struct ieee80211com *ic = &sc->ndis_ic;
ndis_80211_rates_ex rates;
struct ndis_80211_nettype_list *ntl;
uint32_t arg;
int mode, i, r, len, nonettypes = 1;
uint8_t bands[IEEE80211_MODE_BYTES] = { 0 };
callout_init(&sc->ndis_scan_callout, 1);
ic->ic_softc = sc;
ic->ic_ioctl = ndis_80211ioctl;
ic->ic_name = device_get_nameunit(sc->ndis_dev);
ic->ic_opmode = IEEE80211_M_STA;
ic->ic_phytype = IEEE80211_T_DS;
ic->ic_caps = IEEE80211_C_8023ENCAP |
IEEE80211_C_STA | IEEE80211_C_IBSS;
setbit(ic->ic_modecaps, IEEE80211_MODE_AUTO);
len = 0;
r = ndis_get_info(sc, OID_802_11_NETWORK_TYPES_SUPPORTED, NULL, &len);
if (r != ENOSPC)
goto nonettypes;
ntl = malloc(len, M_DEVBUF, M_WAITOK | M_ZERO);
r = ndis_get_info(sc, OID_802_11_NETWORK_TYPES_SUPPORTED, ntl, &len);
if (r != 0) {
free(ntl, M_DEVBUF);
goto nonettypes;
}
for (i = 0; i < ntl->ntl_items; i++) {
mode = ndis_nettype_mode(ntl->ntl_type[i]);
if (mode) {
nonettypes = 0;
setbit(ic->ic_modecaps, mode);
setbit(bands, mode);
} else
device_printf(sc->ndis_dev, "Unknown nettype %d\n",
ntl->ntl_type[i]);
}
free(ntl, M_DEVBUF);
nonettypes:
/* Default to 11b channels if the card did not supply any */
if (nonettypes) {
setbit(ic->ic_modecaps, IEEE80211_MODE_11B);
setbit(bands, IEEE80211_MODE_11B);
}
len = sizeof(rates);
bzero((char *)&rates, len);
r = ndis_get_info(sc, OID_802_11_SUPPORTED_RATES, (void *)rates, &len);
if (r != 0)
device_printf(sc->ndis_dev, "get rates failed: 0x%x\n", r);
/*
* Since the supported rates only up to 8 can be supported,
* if this is not 802.11b we're just going to be faking it
* all up to heck.
*/
#define TESTSETRATE(x, y) \
do { \
int i; \
for (i = 0; i < ic->ic_sup_rates[x].rs_nrates; i++) { \
if (ic->ic_sup_rates[x].rs_rates[i] == (y)) \
break; \
} \
if (i == ic->ic_sup_rates[x].rs_nrates) { \
ic->ic_sup_rates[x].rs_rates[i] = (y); \
ic->ic_sup_rates[x].rs_nrates++; \
} \
} while (0)
#define SETRATE(x, y) \
ic->ic_sup_rates[x].rs_rates[ic->ic_sup_rates[x].rs_nrates] = (y)
#define INCRATE(x) \
ic->ic_sup_rates[x].rs_nrates++
ic->ic_curmode = IEEE80211_MODE_AUTO;
if (isset(ic->ic_modecaps, IEEE80211_MODE_11A))
ic->ic_sup_rates[IEEE80211_MODE_11A].rs_nrates = 0;
if (isset(ic->ic_modecaps, IEEE80211_MODE_11B))
ic->ic_sup_rates[IEEE80211_MODE_11B].rs_nrates = 0;
if (isset(ic->ic_modecaps, IEEE80211_MODE_11G))
ic->ic_sup_rates[IEEE80211_MODE_11G].rs_nrates = 0;
for (i = 0; i < len; i++) {
switch (rates[i] & IEEE80211_RATE_VAL) {
case 2:
case 4:
case 11:
case 10:
case 22:
if (isclr(ic->ic_modecaps, IEEE80211_MODE_11B)) {
/* Lazy-init 802.11b. */
setbit(ic->ic_modecaps, IEEE80211_MODE_11B);
ic->ic_sup_rates[IEEE80211_MODE_11B].
rs_nrates = 0;
}
SETRATE(IEEE80211_MODE_11B, rates[i]);
INCRATE(IEEE80211_MODE_11B);
break;
default:
if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) {
SETRATE(IEEE80211_MODE_11A, rates[i]);
INCRATE(IEEE80211_MODE_11A);
}
if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) {
SETRATE(IEEE80211_MODE_11G, rates[i]);
INCRATE(IEEE80211_MODE_11G);
}
break;
}
}
/*
* If the hardware supports 802.11g, it most
* likely supports 802.11b and all of the
* 802.11b and 802.11g speeds, so maybe we can
* just cheat here. Just how in the heck do
* we detect turbo modes, though?
*/
if (isset(ic->ic_modecaps, IEEE80211_MODE_11B)) {
TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|2);
TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|4);
TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|11);
TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|22);
}
if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) {
TESTSETRATE(IEEE80211_MODE_11G, 48);
TESTSETRATE(IEEE80211_MODE_11G, 72);
TESTSETRATE(IEEE80211_MODE_11G, 96);
TESTSETRATE(IEEE80211_MODE_11G, 108);
}
if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) {
TESTSETRATE(IEEE80211_MODE_11A, 48);
TESTSETRATE(IEEE80211_MODE_11A, 72);
TESTSETRATE(IEEE80211_MODE_11A, 96);
TESTSETRATE(IEEE80211_MODE_11A, 108);
}
#undef SETRATE
#undef INCRATE
#undef TESTSETRATE
ieee80211_init_channels(ic, NULL, bands);
/*
* To test for WPA support, we need to see if we can
* set AUTHENTICATION_MODE to WPA and read it back
* successfully.
*/
i = sizeof(arg);
arg = NDIS_80211_AUTHMODE_WPA;
r = ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i);
if (r == 0) {
r = ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i);
if (r == 0 && arg == NDIS_80211_AUTHMODE_WPA)
ic->ic_caps |= IEEE80211_C_WPA;
}
/*
* To test for supported ciphers, we set each
* available encryption type in descending order.
* If ENC3 works, then we have WEP, TKIP and AES.
* If only ENC2 works, then we have WEP and TKIP.
* If only ENC1 works, then we have just WEP.
*/
i = sizeof(arg);
arg = NDIS_80211_WEPSTAT_ENC3ENABLED;
r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i);
if (r == 0) {
ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP
| IEEE80211_CRYPTO_TKIP
| IEEE80211_CRYPTO_AES_CCM;
goto got_crypto;
}
arg = NDIS_80211_WEPSTAT_ENC2ENABLED;
r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i);
if (r == 0) {
ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP
| IEEE80211_CRYPTO_TKIP;
goto got_crypto;
}
arg = NDIS_80211_WEPSTAT_ENC1ENABLED;
r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i);
if (r == 0)
ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP;
got_crypto:
i = sizeof(arg);
r = ndis_get_info(sc, OID_802_11_POWER_MODE, &arg, &i);
if (r == 0)
ic->ic_caps |= IEEE80211_C_PMGT;
r = ndis_get_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &i);
if (r == 0)
ic->ic_caps |= IEEE80211_C_TXPMGT;
/*
* Get station address from the driver.
*/
len = sizeof(ic->ic_macaddr);
ndis_get_info(sc, OID_802_3_CURRENT_ADDRESS, &ic->ic_macaddr, &len);
ieee80211_ifattach(ic);
ic->ic_raw_xmit = ndis_raw_xmit;
ic->ic_scan_start = ndis_scan_start;
ic->ic_scan_end = ndis_scan_end;
ic->ic_set_channel = ndis_set_channel;
ic->ic_scan_curchan = ndis_scan_curchan;
ic->ic_scan_mindwell = ndis_scan_mindwell;
ic->ic_bsschan = IEEE80211_CHAN_ANYC;
ic->ic_vap_create = ndis_vap_create;
ic->ic_vap_delete = ndis_vap_delete;
ic->ic_update_mcast = ndis_update_mcast;
ic->ic_update_promisc = ndis_update_promisc;
ic->ic_transmit = ndis_80211transmit;
ic->ic_parent = ndis_80211parent;
if (bootverbose)
ieee80211_announce(ic);
return (0);
}
static int
ndis_ifattach(struct ndis_softc *sc)
{
struct ifnet *ifp;
u_char eaddr[ETHER_ADDR_LEN];
int len;
ifp = if_alloc(IFT_ETHER);
if (ifp == NULL)
return (ENOSPC);
sc->ifp = ifp;
ifp->if_softc = sc;
/* Check for task offload support. */
ndis_probe_offload(sc);
/*
* Get station address from the driver.
*/
len = sizeof(eaddr);
ndis_get_info(sc, OID_802_3_CURRENT_ADDRESS, eaddr, &len);
if_initname(ifp, device_get_name(sc->ndis_dev),
device_get_unit(sc->ndis_dev));
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
- IFF_NEEDSEPOCH;
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = ndis_ifioctl;
ifp->if_start = ndis_ifstart;
ifp->if_init = ndis_init;
ifp->if_baudrate = 10000000;
IFQ_SET_MAXLEN(&ifp->if_snd, 50);
ifp->if_snd.ifq_drv_maxlen = 25;
IFQ_SET_READY(&ifp->if_snd);
ifp->if_capenable = ifp->if_capabilities;
ifp->if_hwassist = sc->ndis_hwassist;
ifmedia_init(&sc->ifmedia, IFM_IMASK, ndis_ifmedia_upd,
ndis_ifmedia_sts);
ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T, 0, NULL);
ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL);
ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX, 0, NULL);
ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX|IFM_FDX, 0, NULL);
ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_AUTO, 0, NULL);
ifmedia_set(&sc->ifmedia, IFM_ETHER|IFM_AUTO);
ether_ifattach(ifp, eaddr);
return (0);
}
static struct ieee80211vap *
ndis_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit,
enum ieee80211_opmode opmode, int flags,
const uint8_t bssid[IEEE80211_ADDR_LEN],
const uint8_t mac[IEEE80211_ADDR_LEN])
{
struct ndis_vap *nvp;
struct ieee80211vap *vap;
if (!TAILQ_EMPTY(&ic->ic_vaps)) /* only one at a time */
return NULL;
nvp = malloc(sizeof(struct ndis_vap), M_80211_VAP, M_WAITOK | M_ZERO);
vap = &nvp->vap;
ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid);
/* override with driver methods */
nvp->newstate = vap->iv_newstate;
vap->iv_newstate = ndis_newstate;
/* complete setup */
ieee80211_vap_attach(vap, ieee80211_media_change, ndis_media_status,
mac);
ic->ic_opmode = opmode;
/* install key handing routines */
vap->iv_key_set = ndis_add_key;
vap->iv_key_delete = ndis_del_key;
return vap;
}
static void
ndis_vap_delete(struct ieee80211vap *vap)
{
struct ndis_vap *nvp = NDIS_VAP(vap);
struct ieee80211com *ic = vap->iv_ic;
struct ndis_softc *sc = ic->ic_softc;
ndis_stop(sc);
callout_drain(&sc->ndis_scan_callout);
ieee80211_vap_detach(vap);
free(nvp, M_80211_VAP);
}
/*
* Shutdown hardware and free up resources. This can be called any
* time after the mutex has been initialized. It is called in both
* the error case in attach and the normal detach case so it needs
* to be careful about only freeing resources that have actually been
* allocated.
*/
int
ndis_detach(device_t dev)
{
struct ifnet *ifp;
struct ndis_softc *sc;
driver_object *drv;
sc = device_get_softc(dev);
NDIS_LOCK(sc);
if (!sc->ndis_80211)
ifp = sc->ifp;
else
ifp = NULL;
if (ifp != NULL)
ifp->if_flags &= ~IFF_UP;
if (device_is_attached(dev)) {
NDIS_UNLOCK(sc);
ndis_stop(sc);
if (sc->ndis_80211)
ieee80211_ifdetach(&sc->ndis_ic);
else if (ifp != NULL)
ether_ifdetach(ifp);
} else
NDIS_UNLOCK(sc);
if (sc->ndis_tickitem != NULL)
IoFreeWorkItem(sc->ndis_tickitem);
if (sc->ndis_startitem != NULL)
IoFreeWorkItem(sc->ndis_startitem);
if (sc->ndis_resetitem != NULL)
IoFreeWorkItem(sc->ndis_resetitem);
if (sc->ndis_inputitem != NULL)
IoFreeWorkItem(sc->ndis_inputitem);
if (sc->ndisusb_xferdoneitem != NULL)
IoFreeWorkItem(sc->ndisusb_xferdoneitem);
if (sc->ndisusb_taskitem != NULL)
IoFreeWorkItem(sc->ndisusb_taskitem);
bus_generic_detach(dev);
ndis_unload_driver(sc);
if (sc->ndis_irq)
bus_release_resource(dev, SYS_RES_IRQ, 0, sc->ndis_irq);
if (sc->ndis_res_io)
bus_release_resource(dev, SYS_RES_IOPORT,
sc->ndis_io_rid, sc->ndis_res_io);
if (sc->ndis_res_mem)
bus_release_resource(dev, SYS_RES_MEMORY,
sc->ndis_mem_rid, sc->ndis_res_mem);
if (sc->ndis_res_altmem)
bus_release_resource(dev, SYS_RES_MEMORY,
sc->ndis_altmem_rid, sc->ndis_res_altmem);
if (ifp != NULL)
if_free(ifp);
if (sc->ndis_sc)
ndis_destroy_dma(sc);
if (sc->ndis_txarray)
free(sc->ndis_txarray, M_DEVBUF);
if (!sc->ndis_80211)
ifmedia_removeall(&sc->ifmedia);
if (sc->ndis_txpool != NULL)
NdisFreePacketPool(sc->ndis_txpool);
/* Destroy the PDO for this device. */
if (sc->ndis_iftype == PCIBus)
drv = windrv_lookup(0, "PCI Bus");
else if (sc->ndis_iftype == PCMCIABus)
drv = windrv_lookup(0, "PCCARD Bus");
else
drv = windrv_lookup(0, "USB Bus");
if (drv == NULL)
panic("couldn't find driver object");
windrv_destroy_pdo(drv, dev);
if (sc->ndis_iftype == PCIBus)
bus_dma_tag_destroy(sc->ndis_parent_tag);
return (0);
}
int
ndis_suspend(dev)
device_t dev;
{
struct ndis_softc *sc;
struct ifnet *ifp;
sc = device_get_softc(dev);
ifp = sc->ifp;
#ifdef notdef
if (NDIS_INITIALIZED(sc))
ndis_stop(sc);
#endif
return (0);
}
int
ndis_resume(dev)
device_t dev;
{
struct ndis_softc *sc;
struct ifnet *ifp;
sc = device_get_softc(dev);
ifp = sc->ifp;
if (NDIS_INITIALIZED(sc))
ndis_init(sc);
return (0);
}
/*
* The following bunch of routines are here to support drivers that
* use the NdisMEthIndicateReceive()/MiniportTransferData() mechanism.
* The NdisMEthIndicateReceive() handler runs at DISPATCH_LEVEL for
* serialized miniports, or IRQL <= DISPATCH_LEVEL for deserialized
* miniports.
*/
static void
ndis_rxeof_eth(adapter, ctx, addr, hdr, hdrlen, lookahead, lookaheadlen, pktlen)
ndis_handle adapter;
ndis_handle ctx;
char *addr;
void *hdr;
uint32_t hdrlen;
void *lookahead;
uint32_t lookaheadlen;
uint32_t pktlen;
{
ndis_miniport_block *block;
uint8_t irql = 0;
uint32_t status;
ndis_buffer *b;
ndis_packet *p;
struct mbuf *m;
ndis_ethpriv *priv;
block = adapter;
m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
return;
/* Save the data provided to us so far. */
m->m_len = lookaheadlen + hdrlen;
m->m_pkthdr.len = pktlen + hdrlen;
m->m_next = NULL;
m_copyback(m, 0, hdrlen, hdr);
m_copyback(m, hdrlen, lookaheadlen, lookahead);
/* Now create a fake NDIS_PACKET to hold the data */
NdisAllocatePacket(&status, &p, block->nmb_rxpool);
if (status != NDIS_STATUS_SUCCESS) {
m_freem(m);
return;
}
p->np_m0 = m;
b = IoAllocateMdl(m->m_data, m->m_pkthdr.len, FALSE, FALSE, NULL);
if (b == NULL) {
NdisFreePacket(p);
m_freem(m);
return;
}
p->np_private.npp_head = p->np_private.npp_tail = b;
p->np_private.npp_totlen = m->m_pkthdr.len;
/* Save the packet RX context somewhere. */
priv = (ndis_ethpriv *)&p->np_protocolreserved;
priv->nep_ctx = ctx;
if (!NDIS_SERIALIZED(block))
KeAcquireSpinLock(&block->nmb_lock, &irql);
InsertTailList((&block->nmb_packetlist), (&p->np_list));
if (!NDIS_SERIALIZED(block))
KeReleaseSpinLock(&block->nmb_lock, irql);
}
/*
* NdisMEthIndicateReceiveComplete() handler, runs at DISPATCH_LEVEL
* for serialized miniports, or IRQL <= DISPATCH_LEVEL for deserialized
* miniports.
*/
static void
ndis_rxeof_done(adapter)
ndis_handle adapter;
{
struct ndis_softc *sc;
ndis_miniport_block *block;
block = adapter;
/* Schedule transfer/RX of queued packets. */
sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
KeInsertQueueDpc(&sc->ndis_rxdpc, NULL, NULL);
}
/*
* MiniportTransferData() handler, runs at DISPATCH_LEVEL.
*/
static void
ndis_rxeof_xfr(dpc, adapter, sysarg1, sysarg2)
kdpc *dpc;
ndis_handle adapter;
void *sysarg1;
void *sysarg2;
{
ndis_miniport_block *block;
struct ndis_softc *sc;
ndis_packet *p;
list_entry *l;
uint32_t status;
ndis_ethpriv *priv;
struct ifnet *ifp;
struct mbuf *m;
block = adapter;
sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
ifp = sc->ifp;
KeAcquireSpinLockAtDpcLevel(&block->nmb_lock);
l = block->nmb_packetlist.nle_flink;
while(!IsListEmpty(&block->nmb_packetlist)) {
l = RemoveHeadList((&block->nmb_packetlist));
p = CONTAINING_RECORD(l, ndis_packet, np_list);
InitializeListHead((&p->np_list));
priv = (ndis_ethpriv *)&p->np_protocolreserved;
m = p->np_m0;
p->np_softc = sc;
p->np_m0 = NULL;
KeReleaseSpinLockFromDpcLevel(&block->nmb_lock);
status = MSCALL6(sc->ndis_chars->nmc_transferdata_func,
p, &p->np_private.npp_totlen, block, priv->nep_ctx,
m->m_len, m->m_pkthdr.len - m->m_len);
KeAcquireSpinLockAtDpcLevel(&block->nmb_lock);
/*
* If status is NDIS_STATUS_PENDING, do nothing and
* wait for a callback to the ndis_rxeof_xfr_done()
* handler.
*/
m->m_len = m->m_pkthdr.len;
m->m_pkthdr.rcvif = ifp;
if (status == NDIS_STATUS_SUCCESS) {
IoFreeMdl(p->np_private.npp_head);
NdisFreePacket(p);
KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock);
mbufq_enqueue(&sc->ndis_rxqueue, m);
KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock);
IoQueueWorkItem(sc->ndis_inputitem,
(io_workitem_func)ndis_inputtask_wrap,
WORKQUEUE_CRITICAL, sc);
}
if (status == NDIS_STATUS_FAILURE)
m_freem(m);
/* Advance to next packet */
l = block->nmb_packetlist.nle_flink;
}
KeReleaseSpinLockFromDpcLevel(&block->nmb_lock);
}
/*
* NdisMTransferDataComplete() handler, runs at DISPATCH_LEVEL.
*/
static void
ndis_rxeof_xfr_done(adapter, packet, status, len)
ndis_handle adapter;
ndis_packet *packet;
uint32_t status;
uint32_t len;
{
ndis_miniport_block *block;
struct ndis_softc *sc;
struct ifnet *ifp;
struct mbuf *m;
block = adapter;
sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
ifp = sc->ifp;
m = packet->np_m0;
IoFreeMdl(packet->np_private.npp_head);
NdisFreePacket(packet);
if (status != NDIS_STATUS_SUCCESS) {
m_freem(m);
return;
}
m->m_len = m->m_pkthdr.len;
m->m_pkthdr.rcvif = ifp;
KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock);
mbufq_enqueue(&sc->ndis_rxqueue, m);
KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock);
IoQueueWorkItem(sc->ndis_inputitem,
(io_workitem_func)ndis_inputtask_wrap,
WORKQUEUE_CRITICAL, sc);
}
/*
* A frame has been uploaded: pass the resulting mbuf chain up to
* the higher level protocols.
*
* When handling received NDIS packets, the 'status' field in the
* out-of-band portion of the ndis_packet has special meaning. In the
* most common case, the underlying NDIS driver will set this field
* to NDIS_STATUS_SUCCESS, which indicates that it's ok for us to
* take possession of it. We then change the status field to
* NDIS_STATUS_PENDING to tell the driver that we now own the packet,
* and that we will return it at some point in the future via the
* return packet handler.
*
* If the driver hands us a packet with a status of NDIS_STATUS_RESOURCES,
* this means the driver is running out of packet/buffer resources and
* wants to maintain ownership of the packet. In this case, we have to
* copy the packet data into local storage and let the driver keep the
* packet.
*/
static void
ndis_rxeof(adapter, packets, pktcnt)
ndis_handle adapter;
ndis_packet **packets;
uint32_t pktcnt;
{
struct ndis_softc *sc;
ndis_miniport_block *block;
ndis_packet *p;
uint32_t s;
ndis_tcpip_csum *csum;
struct ifnet *ifp;
struct mbuf *m0, *m;
int i;
block = (ndis_miniport_block *)adapter;
sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
ifp = sc->ifp;
/*
* There's a slim chance the driver may indicate some packets
* before we're completely ready to handle them. If we detect this,
* we need to return them to the miniport and ignore them.
*/
if (!sc->ndis_running) {
for (i = 0; i < pktcnt; i++) {
p = packets[i];
if (p->np_oob.npo_status == NDIS_STATUS_SUCCESS) {
p->np_refcnt++;
ndis_return_packet(p);
}
}
return;
}
for (i = 0; i < pktcnt; i++) {
p = packets[i];
/* Stash the softc here so ptom can use it. */
p->np_softc = sc;
if (ndis_ptom(&m0, p)) {
device_printf(sc->ndis_dev, "ptom failed\n");
if (p->np_oob.npo_status == NDIS_STATUS_SUCCESS)
ndis_return_packet(p);
} else {
#ifdef notdef
if (p->np_oob.npo_status == NDIS_STATUS_RESOURCES) {
m = m_dup(m0, M_NOWAIT);
/*
* NOTE: we want to destroy the mbuf here, but
* we don't actually want to return it to the
* driver via the return packet handler. By
* bumping np_refcnt, we can prevent the
* ndis_return_packet() routine from actually
* doing anything.
*/
p->np_refcnt++;
m_freem(m0);
if (m == NULL)
if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
else
m0 = m;
} else
p->np_oob.npo_status = NDIS_STATUS_PENDING;
#endif
m = m_dup(m0, M_NOWAIT);
if (p->np_oob.npo_status == NDIS_STATUS_RESOURCES)
p->np_refcnt++;
else
p->np_oob.npo_status = NDIS_STATUS_PENDING;
m_freem(m0);
if (m == NULL) {
if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
continue;
}
m0 = m;
m0->m_pkthdr.rcvif = ifp;
/* Deal with checksum offload. */
if (ifp->if_capenable & IFCAP_RXCSUM &&
p->np_ext.npe_info[ndis_tcpipcsum_info] != NULL) {
s = (uintptr_t)
p->np_ext.npe_info[ndis_tcpipcsum_info];
csum = (ndis_tcpip_csum *)&s;
if (csum->u.ntc_rxflags &
NDIS_RXCSUM_IP_PASSED)
m0->m_pkthdr.csum_flags |=
CSUM_IP_CHECKED|CSUM_IP_VALID;
if (csum->u.ntc_rxflags &
(NDIS_RXCSUM_TCP_PASSED |
NDIS_RXCSUM_UDP_PASSED)) {
m0->m_pkthdr.csum_flags |=
CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
m0->m_pkthdr.csum_data = 0xFFFF;
}
}
KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock);
mbufq_enqueue(&sc->ndis_rxqueue, m0);
KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock);
IoQueueWorkItem(sc->ndis_inputitem,
(io_workitem_func)ndis_inputtask_wrap,
WORKQUEUE_CRITICAL, sc);
}
}
}
/*
* This routine is run at PASSIVE_LEVEL. We use this routine to pass
* packets into the stack in order to avoid calling (*ifp->if_input)()
* with any locks held (at DISPATCH_LEVEL, we'll be holding the
* 'dispatch level' per-cpu sleep lock).
*/
static void
ndis_inputtask(device_object *dobj, void *arg)
{
ndis_miniport_block *block;
struct ndis_softc *sc = arg;
struct mbuf *m;
uint8_t irql;
block = dobj->do_devext;
KeAcquireSpinLock(&sc->ndis_rxlock, &irql);
while ((m = mbufq_dequeue(&sc->ndis_rxqueue)) != NULL) {
KeReleaseSpinLock(&sc->ndis_rxlock, irql);
if ((sc->ndis_80211 != 0)) {
struct ieee80211com *ic = &sc->ndis_ic;
struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
if (vap != NULL)
vap->iv_deliver_data(vap, vap->iv_bss, m);
} else {
struct ifnet *ifp = sc->ifp;
(*ifp->if_input)(ifp, m);
}
KeAcquireSpinLock(&sc->ndis_rxlock, &irql);
}
KeReleaseSpinLock(&sc->ndis_rxlock, irql);
}
/*
* A frame was downloaded to the chip. It's safe for us to clean up
* the list buffers.
*/
static void
ndis_txeof(adapter, packet, status)
ndis_handle adapter;
ndis_packet *packet;
ndis_status status;
{
struct ndis_softc *sc;
ndis_miniport_block *block;
struct ifnet *ifp;
int idx;
struct mbuf *m;
block = (ndis_miniport_block *)adapter;
sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
ifp = sc->ifp;
m = packet->np_m0;
idx = packet->np_txidx;
if (sc->ndis_sc)
bus_dmamap_unload(sc->ndis_ttag, sc->ndis_tmaps[idx]);
ndis_free_packet(packet);
m_freem(m);
NDIS_LOCK(sc);
sc->ndis_txarray[idx] = NULL;
sc->ndis_txpending++;
if (!sc->ndis_80211) {
struct ifnet *ifp = sc->ifp;
if (status == NDIS_STATUS_SUCCESS)
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
else
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
}
sc->ndis_tx_timer = 0;
NDIS_UNLOCK(sc);
if (!sc->ndis_80211)
IoQueueWorkItem(sc->ndis_startitem,
(io_workitem_func)ndis_ifstarttask_wrap,
WORKQUEUE_CRITICAL, sc);
DPRINTF(("%s: ndis_ifstarttask_wrap sc=%p\n", __func__, sc));
}
static void
ndis_linksts(adapter, status, sbuf, slen)
ndis_handle adapter;
ndis_status status;
void *sbuf;
uint32_t slen;
{
ndis_miniport_block *block;
struct ndis_softc *sc;
block = adapter;
sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
sc->ndis_sts = status;
/* Event list is all full up, drop this one. */
NDIS_LOCK(sc);
if (sc->ndis_evt[sc->ndis_evtpidx].ne_sts) {
NDIS_UNLOCK(sc);
return;
}
/* Cache the event. */
if (slen) {
sc->ndis_evt[sc->ndis_evtpidx].ne_buf = malloc(slen,
M_TEMP, M_NOWAIT);
if (sc->ndis_evt[sc->ndis_evtpidx].ne_buf == NULL) {
NDIS_UNLOCK(sc);
return;
}
bcopy((char *)sbuf,
sc->ndis_evt[sc->ndis_evtpidx].ne_buf, slen);
}
sc->ndis_evt[sc->ndis_evtpidx].ne_sts = status;
sc->ndis_evt[sc->ndis_evtpidx].ne_len = slen;
NDIS_EVTINC(sc->ndis_evtpidx);
NDIS_UNLOCK(sc);
}
static void
ndis_linksts_done(adapter)
ndis_handle adapter;
{
ndis_miniport_block *block;
struct ndis_softc *sc;
struct ifnet *ifp;
block = adapter;
sc = device_get_softc(block->nmb_physdeviceobj->do_devext);
ifp = sc->ifp;
if (!NDIS_INITIALIZED(sc))
return;
switch (sc->ndis_sts) {
case NDIS_STATUS_MEDIA_CONNECT:
IoQueueWorkItem(sc->ndis_tickitem,
(io_workitem_func)ndis_ticktask_wrap,
WORKQUEUE_CRITICAL, sc);
if (!sc->ndis_80211)
IoQueueWorkItem(sc->ndis_startitem,
(io_workitem_func)ndis_ifstarttask_wrap,
WORKQUEUE_CRITICAL, sc);
break;
case NDIS_STATUS_MEDIA_DISCONNECT:
if (sc->ndis_link)
IoQueueWorkItem(sc->ndis_tickitem,
(io_workitem_func)ndis_ticktask_wrap,
WORKQUEUE_CRITICAL, sc);
break;
default:
break;
}
}
static void
ndis_tick(xsc)
void *xsc;
{
struct ndis_softc *sc;
sc = xsc;
if (sc->ndis_hang_timer && --sc->ndis_hang_timer == 0) {
IoQueueWorkItem(sc->ndis_tickitem,
(io_workitem_func)ndis_ticktask_wrap,
WORKQUEUE_CRITICAL, sc);
sc->ndis_hang_timer = sc->ndis_block->nmb_checkforhangsecs;
}
if (sc->ndis_tx_timer && --sc->ndis_tx_timer == 0) {
if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1);
device_printf(sc->ndis_dev, "watchdog timeout\n");
IoQueueWorkItem(sc->ndis_resetitem,
(io_workitem_func)ndis_resettask_wrap,
WORKQUEUE_CRITICAL, sc);
if (!sc->ndis_80211)
IoQueueWorkItem(sc->ndis_startitem,
(io_workitem_func)ndis_ifstarttask_wrap,
WORKQUEUE_CRITICAL, sc);
}
callout_reset(&sc->ndis_stat_callout, hz, ndis_tick, sc);
}
static void
ndis_ticktask(device_object *d, void *xsc)
{
struct ndis_softc *sc = xsc;
ndis_checkforhang_handler hangfunc;
uint8_t rval;
NDIS_LOCK(sc);
if (!NDIS_INITIALIZED(sc)) {
NDIS_UNLOCK(sc);
return;
}
NDIS_UNLOCK(sc);
hangfunc = sc->ndis_chars->nmc_checkhang_func;
if (hangfunc != NULL) {
rval = MSCALL1(hangfunc,
sc->ndis_block->nmb_miniportadapterctx);
if (rval == TRUE) {
ndis_reset_nic(sc);
return;
}
}
NDIS_LOCK(sc);
if (sc->ndis_link == 0 &&
sc->ndis_sts == NDIS_STATUS_MEDIA_CONNECT) {
sc->ndis_link = 1;
if (sc->ndis_80211 != 0) {
struct ieee80211com *ic = &sc->ndis_ic;
struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
if (vap != NULL) {
NDIS_UNLOCK(sc);
ndis_getstate_80211(sc);
ieee80211_new_state(vap, IEEE80211_S_RUN, -1);
NDIS_LOCK(sc);
if_link_state_change(vap->iv_ifp,
LINK_STATE_UP);
}
} else
if_link_state_change(sc->ifp, LINK_STATE_UP);
}
if (sc->ndis_link == 1 &&
sc->ndis_sts == NDIS_STATUS_MEDIA_DISCONNECT) {
sc->ndis_link = 0;
if (sc->ndis_80211 != 0) {
struct ieee80211com *ic = &sc->ndis_ic;
struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
if (vap != NULL) {
NDIS_UNLOCK(sc);
ieee80211_new_state(vap, IEEE80211_S_SCAN, 0);
NDIS_LOCK(sc);
if_link_state_change(vap->iv_ifp,
LINK_STATE_DOWN);
}
} else
if_link_state_change(sc->ifp, LINK_STATE_DOWN);
}
NDIS_UNLOCK(sc);
}
static void
ndis_map_sclist(arg, segs, nseg, mapsize, error)
void *arg;
bus_dma_segment_t *segs;
int nseg;
bus_size_t mapsize;
int error;
{
struct ndis_sc_list *sclist;
int i;
if (error || arg == NULL)
return;
sclist = arg;
sclist->nsl_frags = nseg;
for (i = 0; i < nseg; i++) {
sclist->nsl_elements[i].nse_addr.np_quad = segs[i].ds_addr;
sclist->nsl_elements[i].nse_len = segs[i].ds_len;
}
}
static int
ndis_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
const struct ieee80211_bpf_params *params)
{
/* no support; just discard */
m_freem(m);
ieee80211_free_node(ni);
return (0);
}
static void
ndis_update_mcast(struct ieee80211com *ic)
{
struct ndis_softc *sc = ic->ic_softc;
ndis_setmulti(sc);
}
static void
ndis_update_promisc(struct ieee80211com *ic)
{
/* not supported */
}
static void
ndis_ifstarttask(device_object *d, void *arg)
{
struct ndis_softc *sc = arg;
DPRINTF(("%s: sc=%p, ifp=%p\n", __func__, sc, sc->ifp));
if (sc->ndis_80211)
return;
struct ifnet *ifp = sc->ifp;
if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
ndis_ifstart(ifp);
}
/*
* Main transmit routine. To make NDIS drivers happy, we need to
* transform mbuf chains into NDIS packets and feed them to the
* send packet routines. Most drivers allow you to send several
* packets at once (up to the maxpkts limit). Unfortunately, rather
* that accepting them in the form of a linked list, they expect
* a contiguous array of pointers to packets.
*
* For those drivers which use the NDIS scatter/gather DMA mechanism,
* we need to perform busdma work here. Those that use map registers
* will do the mapping themselves on a buffer by buffer basis.
*/
static void
ndis_ifstart(struct ifnet *ifp)
{
struct ndis_softc *sc;
struct mbuf *m = NULL;
ndis_packet **p0 = NULL, *p = NULL;
ndis_tcpip_csum *csum;
int pcnt = 0, status;
sc = ifp->if_softc;
NDIS_LOCK(sc);
if (!sc->ndis_link || ifp->if_drv_flags & IFF_DRV_OACTIVE) {
NDIS_UNLOCK(sc);
return;
}
p0 = &sc->ndis_txarray[sc->ndis_txidx];
while(sc->ndis_txpending) {
IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
if (m == NULL)
break;
NdisAllocatePacket(&status,
&sc->ndis_txarray[sc->ndis_txidx], sc->ndis_txpool);
if (status != NDIS_STATUS_SUCCESS)
break;
if (ndis_mtop(m, &sc->ndis_txarray[sc->ndis_txidx])) {
IFQ_DRV_PREPEND(&ifp->if_snd, m);
NDIS_UNLOCK(sc);
return;
}
/*
* Save pointer to original mbuf
* so we can free it later.
*/
p = sc->ndis_txarray[sc->ndis_txidx];
p->np_txidx = sc->ndis_txidx;
p->np_m0 = m;
p->np_oob.npo_status = NDIS_STATUS_PENDING;
/*
* Do scatter/gather processing, if driver requested it.
*/
if (sc->ndis_sc) {
bus_dmamap_load_mbuf(sc->ndis_ttag,
sc->ndis_tmaps[sc->ndis_txidx], m,
ndis_map_sclist, &p->np_sclist, BUS_DMA_NOWAIT);
bus_dmamap_sync(sc->ndis_ttag,
sc->ndis_tmaps[sc->ndis_txidx],
BUS_DMASYNC_PREREAD);
p->np_ext.npe_info[ndis_sclist_info] = &p->np_sclist;
}
/* Handle checksum offload. */
if (ifp->if_capenable & IFCAP_TXCSUM &&
m->m_pkthdr.csum_flags) {
csum = (ndis_tcpip_csum *)
&p->np_ext.npe_info[ndis_tcpipcsum_info];
csum->u.ntc_txflags = NDIS_TXCSUM_DO_IPV4;
if (m->m_pkthdr.csum_flags & CSUM_IP)
csum->u.ntc_txflags |= NDIS_TXCSUM_DO_IP;
if (m->m_pkthdr.csum_flags & CSUM_TCP)
csum->u.ntc_txflags |= NDIS_TXCSUM_DO_TCP;
if (m->m_pkthdr.csum_flags & CSUM_UDP)
csum->u.ntc_txflags |= NDIS_TXCSUM_DO_UDP;
p->np_private.npp_flags = NDIS_PROTOCOL_ID_TCP_IP;
}
NDIS_INC(sc);
sc->ndis_txpending--;
pcnt++;
/*
* If there's a BPF listener, bounce a copy of this frame
* to him.
*/
if (!sc->ndis_80211) /* XXX handle 80211 */
BPF_MTAP(ifp, m);
/*
* The array that p0 points to must appear contiguous,
* so we must not wrap past the end of sc->ndis_txarray[].
* If it looks like we're about to wrap, break out here
* so the this batch of packets can be transmitted, then
* wait for txeof to ask us to send the rest.
*/
if (sc->ndis_txidx == 0)
break;
}
if (pcnt == 0) {
NDIS_UNLOCK(sc);
return;
}
if (sc->ndis_txpending == 0)
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
/*
* Set a timeout in case the chip goes out to lunch.
*/
sc->ndis_tx_timer = 5;
NDIS_UNLOCK(sc);
/*
* According to NDIS documentation, if a driver exports
* a MiniportSendPackets() routine, we prefer that over
* a MiniportSend() routine (which sends just a single
* packet).
*/
if (sc->ndis_chars->nmc_sendmulti_func != NULL)
ndis_send_packets(sc, p0, pcnt);
else
ndis_send_packet(sc, p);
return;
}
static int
ndis_80211transmit(struct ieee80211com *ic, struct mbuf *m)
{
struct ndis_softc *sc = ic->ic_softc;
ndis_packet **p0 = NULL, *p = NULL;
int status;
NDIS_LOCK(sc);
if (!sc->ndis_link || !sc->ndis_running) {
NDIS_UNLOCK(sc);
return (ENXIO);
}
if (sc->ndis_txpending == 0) {
NDIS_UNLOCK(sc);
return (ENOBUFS);
}
p0 = &sc->ndis_txarray[sc->ndis_txidx];
NdisAllocatePacket(&status,
&sc->ndis_txarray[sc->ndis_txidx], sc->ndis_txpool);
if (status != NDIS_STATUS_SUCCESS) {
NDIS_UNLOCK(sc);
return (ENOBUFS);
}
if (ndis_mtop(m, &sc->ndis_txarray[sc->ndis_txidx])) {
NDIS_UNLOCK(sc);
return (ENOBUFS);
}
/*
* Save pointer to original mbuf
* so we can free it later.
*/
p = sc->ndis_txarray[sc->ndis_txidx];
p->np_txidx = sc->ndis_txidx;
p->np_m0 = m;
p->np_oob.npo_status = NDIS_STATUS_PENDING;
/*
* Do scatter/gather processing, if driver requested it.
*/
if (sc->ndis_sc) {
bus_dmamap_load_mbuf(sc->ndis_ttag,
sc->ndis_tmaps[sc->ndis_txidx], m,
ndis_map_sclist, &p->np_sclist, BUS_DMA_NOWAIT);
bus_dmamap_sync(sc->ndis_ttag,
sc->ndis_tmaps[sc->ndis_txidx],
BUS_DMASYNC_PREREAD);
p->np_ext.npe_info[ndis_sclist_info] = &p->np_sclist;
}
NDIS_INC(sc);
sc->ndis_txpending--;
/*
* Set a timeout in case the chip goes out to lunch.
*/
sc->ndis_tx_timer = 5;
NDIS_UNLOCK(sc);
/*
* According to NDIS documentation, if a driver exports
* a MiniportSendPackets() routine, we prefer that over
* a MiniportSend() routine (which sends just a single
* packet).
*/
if (sc->ndis_chars->nmc_sendmulti_func != NULL)
ndis_send_packets(sc, p0, 1);
else
ndis_send_packet(sc, p);
return (0);
}
static void
ndis_80211parent(struct ieee80211com *ic)
{
struct ndis_softc *sc = ic->ic_softc;
/*NDIS_LOCK(sc);*/
if (ic->ic_nrunning > 0) {
if (!sc->ndis_running)
ndis_init(sc);
} else if (sc->ndis_running)
ndis_stop(sc);
/*NDIS_UNLOCK(sc);*/
}
static void
ndis_init(void *xsc)
{
struct ndis_softc *sc = xsc;
int i, len, error;
/*
* Avoid reintializing the link unnecessarily.
* This should be dealt with in a better way by
* fixing the upper layer modules so they don't
* call ifp->if_init() quite as often.
*/
if (sc->ndis_link)
return;
/*
* Cancel pending I/O and free all RX/TX buffers.
*/
ndis_stop(sc);
if (!(sc->ndis_iftype == PNPBus && ndisusb_halt == 0)) {
error = ndis_init_nic(sc);
if (error != 0) {
device_printf(sc->ndis_dev,
"failed to initialize the device: %d\n", error);
return;
}
}
/* Program the packet filter */
sc->ndis_filter = NDIS_PACKET_TYPE_DIRECTED |
NDIS_PACKET_TYPE_BROADCAST;
if (sc->ndis_80211) {
struct ieee80211com *ic = &sc->ndis_ic;
if (ic->ic_promisc > 0)
sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS;
} else {
struct ifnet *ifp = sc->ifp;
if (ifp->if_flags & IFF_PROMISC)
sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS;
}
len = sizeof(sc->ndis_filter);
error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER,
&sc->ndis_filter, &len);
if (error)
device_printf(sc->ndis_dev, "set filter failed: %d\n", error);
/*
* Set lookahead.
*/
if (sc->ndis_80211)
i = ETHERMTU;
else
i = sc->ifp->if_mtu;
len = sizeof(i);
ndis_set_info(sc, OID_GEN_CURRENT_LOOKAHEAD, &i, &len);
/*
* Program the multicast filter, if necessary.
*/
ndis_setmulti(sc);
/* Setup task offload. */
ndis_set_offload(sc);
NDIS_LOCK(sc);
sc->ndis_txidx = 0;
sc->ndis_txpending = sc->ndis_maxpkts;
sc->ndis_link = 0;
if (!sc->ndis_80211) {
if_link_state_change(sc->ifp, LINK_STATE_UNKNOWN);
sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
}
sc->ndis_tx_timer = 0;
/*
* Some drivers don't set this value. The NDIS spec says
* the default checkforhang timeout is "approximately 2
* seconds." We use 3 seconds, because it seems for some
* drivers, exactly 2 seconds is too fast.
*/
if (sc->ndis_block->nmb_checkforhangsecs == 0)
sc->ndis_block->nmb_checkforhangsecs = 3;
sc->ndis_hang_timer = sc->ndis_block->nmb_checkforhangsecs;
callout_reset(&sc->ndis_stat_callout, hz, ndis_tick, sc);
sc->ndis_running = 1;
NDIS_UNLOCK(sc);
/* XXX force handling */
if (sc->ndis_80211)
ieee80211_start_all(&sc->ndis_ic); /* start all vap's */
}
/*
* Set media options.
*/
static int
ndis_ifmedia_upd(ifp)
struct ifnet *ifp;
{
struct ndis_softc *sc;
sc = ifp->if_softc;
if (NDIS_INITIALIZED(sc))
ndis_init(sc);
return (0);
}
/*
* Report current media status.
*/
static void
ndis_ifmedia_sts(ifp, ifmr)
struct ifnet *ifp;
struct ifmediareq *ifmr;
{
struct ndis_softc *sc;
uint32_t media_info;
ndis_media_state linkstate;
int len;
ifmr->ifm_status = IFM_AVALID;
ifmr->ifm_active = IFM_ETHER;
sc = ifp->if_softc;
if (!NDIS_INITIALIZED(sc))
return;
len = sizeof(linkstate);
ndis_get_info(sc, OID_GEN_MEDIA_CONNECT_STATUS,
(void *)&linkstate, &len);
len = sizeof(media_info);
ndis_get_info(sc, OID_GEN_LINK_SPEED,
(void *)&media_info, &len);
if (linkstate == nmc_connected)
ifmr->ifm_status |= IFM_ACTIVE;
switch (media_info) {
case 100000:
ifmr->ifm_active |= IFM_10_T;
break;
case 1000000:
ifmr->ifm_active |= IFM_100_TX;
break;
case 10000000:
ifmr->ifm_active |= IFM_1000_T;
break;
default:
device_printf(sc->ndis_dev, "unknown speed: %d\n", media_info);
break;
}
}
static int
ndis_set_cipher(struct ndis_softc *sc, int cipher)
{
struct ieee80211com *ic = &sc->ndis_ic;
int rval = 0, len;
uint32_t arg, save;
len = sizeof(arg);
if (cipher == WPA_CSE_WEP40 || cipher == WPA_CSE_WEP104) {
if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_WEP))
return (ENOTSUP);
arg = NDIS_80211_WEPSTAT_ENC1ENABLED;
}
if (cipher == WPA_CSE_TKIP) {
if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_TKIP))
return (ENOTSUP);
arg = NDIS_80211_WEPSTAT_ENC2ENABLED;
}
if (cipher == WPA_CSE_CCMP) {
if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_AES_CCM))
return (ENOTSUP);
arg = NDIS_80211_WEPSTAT_ENC3ENABLED;
}
DPRINTF(("Setting cipher to %d\n", arg));
save = arg;
rval = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len);
if (rval)
return (rval);
/* Check that the cipher was set correctly. */
len = sizeof(save);
rval = ndis_get_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len);
if (rval != 0 || arg != save)
return (ENODEV);
return (0);
}
/*
* WPA is hairy to set up. Do the work in a separate routine
* so we don't clutter the setstate function too much.
* Important yet undocumented fact: first we have to set the
* authentication mode, _then_ we enable the ciphers. If one
* of the WPA authentication modes isn't enabled, the driver
* might not permit the TKIP or AES ciphers to be selected.
*/
static int
ndis_set_wpa(sc, ie, ielen)
struct ndis_softc *sc;
void *ie;
int ielen;
{
struct ieee80211_ie_wpa *w;
struct ndis_ie *n;
char *pos;
uint32_t arg;
int i;
/*
* Apparently, the only way for us to know what ciphers
* and key management/authentication mode to use is for
* us to inspect the optional information element (IE)
* stored in the 802.11 state machine. This IE should be
* supplied by the WPA supplicant.
*/
w = (struct ieee80211_ie_wpa *)ie;
/* Check for the right kind of IE. */
if (w->wpa_id != IEEE80211_ELEMID_VENDOR) {
DPRINTF(("Incorrect IE type %d\n", w->wpa_id));
return (EINVAL);
}
/* Skip over the ucast cipher OIDs. */
pos = (char *)&w->wpa_uciphers[0];
pos += w->wpa_uciphercnt * sizeof(struct ndis_ie);
/* Skip over the authmode count. */
pos += sizeof(u_int16_t);
/*
* Check for the authentication modes. I'm
* pretty sure there's only supposed to be one.
*/
n = (struct ndis_ie *)pos;
if (n->ni_val == WPA_ASE_NONE)
arg = NDIS_80211_AUTHMODE_WPANONE;
if (n->ni_val == WPA_ASE_8021X_UNSPEC)
arg = NDIS_80211_AUTHMODE_WPA;
if (n->ni_val == WPA_ASE_8021X_PSK)
arg = NDIS_80211_AUTHMODE_WPAPSK;
DPRINTF(("Setting WPA auth mode to %d\n", arg));
i = sizeof(arg);
if (ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i))
return (ENOTSUP);
i = sizeof(arg);
ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i);
/* Now configure the desired ciphers. */
/* First, set up the multicast group cipher. */
n = (struct ndis_ie *)&w->wpa_mcipher[0];
if (ndis_set_cipher(sc, n->ni_val))
return (ENOTSUP);
/* Now start looking around for the unicast ciphers. */
pos = (char *)&w->wpa_uciphers[0];
n = (struct ndis_ie *)pos;
for (i = 0; i < w->wpa_uciphercnt; i++) {
if (ndis_set_cipher(sc, n->ni_val))
return (ENOTSUP);
n++;
}
return (0);
}
static void
ndis_media_status(struct ifnet *ifp, struct ifmediareq *imr)
{
struct ieee80211vap *vap = ifp->if_softc;
struct ndis_softc *sc = vap->iv_ic->ic_softc;
uint32_t txrate;
int len;
if (!NDIS_INITIALIZED(sc))
return;
len = sizeof(txrate);
if (ndis_get_info(sc, OID_GEN_LINK_SPEED, &txrate, &len) == 0)
vap->iv_bss->ni_txrate = txrate / 5000;
ieee80211_media_status(ifp, imr);
}
static void
ndis_setstate_80211(struct ndis_softc *sc)
{
struct ieee80211com *ic = &sc->ndis_ic;
struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
ndis_80211_macaddr bssid;
ndis_80211_config config;
int rval = 0, len;
uint32_t arg;
if (!NDIS_INITIALIZED(sc)) {
DPRINTF(("%s: NDIS not initialized\n", __func__));
return;
}
/* Disassociate and turn off radio. */
len = sizeof(arg);
arg = 1;
ndis_set_info(sc, OID_802_11_DISASSOCIATE, &arg, &len);
/* Set network infrastructure mode. */
len = sizeof(arg);
if (ic->ic_opmode == IEEE80211_M_IBSS)
arg = NDIS_80211_NET_INFRA_IBSS;
else
arg = NDIS_80211_NET_INFRA_BSS;
rval = ndis_set_info(sc, OID_802_11_INFRASTRUCTURE_MODE, &arg, &len);
if (rval)
device_printf (sc->ndis_dev, "set infra failed: %d\n", rval);
/* Set power management */
len = sizeof(arg);
if (vap->iv_flags & IEEE80211_F_PMGTON)
arg = NDIS_80211_POWERMODE_FAST_PSP;
else
arg = NDIS_80211_POWERMODE_CAM;
ndis_set_info(sc, OID_802_11_POWER_MODE, &arg, &len);
/* Set TX power */
if ((ic->ic_caps & IEEE80211_C_TXPMGT) &&
ic->ic_txpowlimit < nitems(dBm2mW)) {
arg = dBm2mW[ic->ic_txpowlimit];
len = sizeof(arg);
ndis_set_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &len);
}
/*
* Default encryption mode to off, authentication
* to open and privacy to 'accept everything.'
*/
len = sizeof(arg);
arg = NDIS_80211_WEPSTAT_DISABLED;
ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len);
len = sizeof(arg);
arg = NDIS_80211_AUTHMODE_OPEN;
ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len);
/*
* Note that OID_802_11_PRIVACY_FILTER is optional:
* not all drivers implement it.
*/
len = sizeof(arg);
arg = NDIS_80211_PRIVFILT_8021XWEP;
ndis_set_info(sc, OID_802_11_PRIVACY_FILTER, &arg, &len);
len = sizeof(config);
bzero((char *)&config, len);
config.nc_length = len;
config.nc_fhconfig.ncf_length = sizeof(ndis_80211_config_fh);
rval = ndis_get_info(sc, OID_802_11_CONFIGURATION, &config, &len);
/*
* Some drivers expect us to initialize these values, so
* provide some defaults.
*/
if (config.nc_beaconperiod == 0)
config.nc_beaconperiod = 100;
if (config.nc_atimwin == 0)
config.nc_atimwin = 100;
if (config.nc_fhconfig.ncf_dwelltime == 0)
config.nc_fhconfig.ncf_dwelltime = 200;
if (rval == 0 && ic->ic_bsschan != IEEE80211_CHAN_ANYC) {
int chan, chanflag;
chan = ieee80211_chan2ieee(ic, ic->ic_bsschan);
chanflag = config.nc_dsconfig > 2500000 ? IEEE80211_CHAN_2GHZ :
IEEE80211_CHAN_5GHZ;
if (chan != ieee80211_mhz2ieee(config.nc_dsconfig / 1000, 0)) {
config.nc_dsconfig =
ic->ic_bsschan->ic_freq * 1000;
len = sizeof(config);
config.nc_length = len;
config.nc_fhconfig.ncf_length =
sizeof(ndis_80211_config_fh);
DPRINTF(("Setting channel to %ukHz\n", config.nc_dsconfig));
rval = ndis_set_info(sc, OID_802_11_CONFIGURATION,
&config, &len);
if (rval)
device_printf(sc->ndis_dev, "couldn't change "
"DS config to %ukHz: %d\n",
config.nc_dsconfig, rval);
}
} else if (rval)
device_printf(sc->ndis_dev, "couldn't retrieve "
"channel info: %d\n", rval);
/* Set the BSSID to our value so the driver doesn't associate */
len = IEEE80211_ADDR_LEN;
bcopy(vap->iv_myaddr, bssid, len);
DPRINTF(("Setting BSSID to %6D\n", (uint8_t *)&bssid, ":"));
rval = ndis_set_info(sc, OID_802_11_BSSID, &bssid, &len);
if (rval)
device_printf(sc->ndis_dev,
"setting BSSID failed: %d\n", rval);
}
static void
ndis_auth_and_assoc(struct ndis_softc *sc, struct ieee80211vap *vap)
{
struct ieee80211_node *ni = vap->iv_bss;
ndis_80211_ssid ssid;
ndis_80211_macaddr bssid;
ndis_80211_wep wep;
int i, rval = 0, len, error;
uint32_t arg;
if (!NDIS_INITIALIZED(sc)) {
DPRINTF(("%s: NDIS not initialized\n", __func__));
return;
}
/* Initial setup */
ndis_setstate_80211(sc);
/* Set network infrastructure mode. */
len = sizeof(arg);
if (vap->iv_opmode == IEEE80211_M_IBSS)
arg = NDIS_80211_NET_INFRA_IBSS;
else
arg = NDIS_80211_NET_INFRA_BSS;
rval = ndis_set_info(sc, OID_802_11_INFRASTRUCTURE_MODE, &arg, &len);
if (rval)
device_printf (sc->ndis_dev, "set infra failed: %d\n", rval);
/* Set RTS threshold */
len = sizeof(arg);
arg = vap->iv_rtsthreshold;
ndis_set_info(sc, OID_802_11_RTS_THRESHOLD, &arg, &len);
/* Set fragmentation threshold */
len = sizeof(arg);
arg = vap->iv_fragthreshold;
ndis_set_info(sc, OID_802_11_FRAGMENTATION_THRESHOLD, &arg, &len);
/* Set WEP */
if (vap->iv_flags & IEEE80211_F_PRIVACY &&
!(vap->iv_flags & IEEE80211_F_WPA)) {
int keys_set = 0;
if (ni->ni_authmode == IEEE80211_AUTH_SHARED) {
len = sizeof(arg);
arg = NDIS_80211_AUTHMODE_SHARED;
DPRINTF(("Setting shared auth\n"));
ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE,
&arg, &len);
}
for (i = 0; i < IEEE80211_WEP_NKID; i++) {
if (vap->iv_nw_keys[i].wk_keylen) {
if (vap->iv_nw_keys[i].wk_cipher->ic_cipher !=
IEEE80211_CIPHER_WEP)
continue;
bzero((char *)&wep, sizeof(wep));
wep.nw_keylen = vap->iv_nw_keys[i].wk_keylen;
/*
* 5, 13 and 16 are the only valid
* key lengths. Anything in between
* will be zero padded out to the
* next highest boundary.
*/
if (vap->iv_nw_keys[i].wk_keylen < 5)
wep.nw_keylen = 5;
else if (vap->iv_nw_keys[i].wk_keylen > 5 &&
vap->iv_nw_keys[i].wk_keylen < 13)
wep.nw_keylen = 13;
else if (vap->iv_nw_keys[i].wk_keylen > 13 &&
vap->iv_nw_keys[i].wk_keylen < 16)
wep.nw_keylen = 16;
wep.nw_keyidx = i;
wep.nw_length = (sizeof(uint32_t) * 3)
+ wep.nw_keylen;
if (i == vap->iv_def_txkey)
wep.nw_keyidx |= NDIS_80211_WEPKEY_TX;
bcopy(vap->iv_nw_keys[i].wk_key,
wep.nw_keydata, wep.nw_length);
len = sizeof(wep);
DPRINTF(("Setting WEP key %d\n", i));
rval = ndis_set_info(sc,
OID_802_11_ADD_WEP, &wep, &len);
if (rval)
device_printf(sc->ndis_dev,
"set wepkey failed: %d\n", rval);
keys_set++;
}
}
if (keys_set) {
DPRINTF(("Setting WEP on\n"));
arg = NDIS_80211_WEPSTAT_ENABLED;
len = sizeof(arg);
rval = ndis_set_info(sc,
OID_802_11_WEP_STATUS, &arg, &len);
if (rval)
device_printf(sc->ndis_dev,
"enable WEP failed: %d\n", rval);
if (vap->iv_flags & IEEE80211_F_DROPUNENC)
arg = NDIS_80211_PRIVFILT_8021XWEP;
else
arg = NDIS_80211_PRIVFILT_ACCEPTALL;
len = sizeof(arg);
ndis_set_info(sc,
OID_802_11_PRIVACY_FILTER, &arg, &len);
}
}
/* Set up WPA. */
if ((vap->iv_flags & IEEE80211_F_WPA) &&
vap->iv_appie_assocreq != NULL) {
struct ieee80211_appie *ie = vap->iv_appie_assocreq;
error = ndis_set_wpa(sc, ie->ie_data, ie->ie_len);
if (error != 0)
device_printf(sc->ndis_dev, "WPA setup failed\n");
}
#ifdef notyet
/* Set network type. */
arg = 0;
switch (vap->iv_curmode) {
case IEEE80211_MODE_11A:
arg = NDIS_80211_NETTYPE_11OFDM5;
break;
case IEEE80211_MODE_11B:
arg = NDIS_80211_NETTYPE_11DS;
break;
case IEEE80211_MODE_11G:
arg = NDIS_80211_NETTYPE_11OFDM24;
break;
default:
device_printf(sc->ndis_dev, "unknown mode: %d\n",
vap->iv_curmode);
}
if (arg) {
DPRINTF(("Setting network type to %d\n", arg));
len = sizeof(arg);
rval = ndis_set_info(sc, OID_802_11_NETWORK_TYPE_IN_USE,
&arg, &len);
if (rval)
device_printf(sc->ndis_dev,
"set nettype failed: %d\n", rval);
}
#endif
/*
* If the user selected a specific BSSID, try
* to use that one. This is useful in the case where
* there are several APs in range with the same network
* name. To delete the BSSID, we use the broadcast
* address as the BSSID.
* Note that some drivers seem to allow setting a BSSID
* in ad-hoc mode, which has the effect of forcing the
* NIC to create an ad-hoc cell with a specific BSSID,
* instead of a randomly chosen one. However, the net80211
* code makes the assumtion that the BSSID setting is invalid
* when you're in ad-hoc mode, so we don't allow that here.
*/
len = IEEE80211_ADDR_LEN;
if (vap->iv_flags & IEEE80211_F_DESBSSID &&
vap->iv_opmode != IEEE80211_M_IBSS)
bcopy(ni->ni_bssid, bssid, len);
else
bcopy(ieee80211broadcastaddr, bssid, len);
DPRINTF(("Setting BSSID to %6D\n", (uint8_t *)&bssid, ":"));
rval = ndis_set_info(sc, OID_802_11_BSSID, &bssid, &len);
if (rval)
device_printf(sc->ndis_dev,
"setting BSSID failed: %d\n", rval);
/* Set SSID -- always do this last. */
#ifdef NDIS_DEBUG
if (ndis_debug > 0) {
printf("Setting ESSID to ");
ieee80211_print_essid(ni->ni_essid, ni->ni_esslen);
printf("\n");
}
#endif
len = sizeof(ssid);
bzero((char *)&ssid, len);
ssid.ns_ssidlen = ni->ni_esslen;
if (ssid.ns_ssidlen == 0) {
ssid.ns_ssidlen = 1;
} else
bcopy(ni->ni_essid, ssid.ns_ssid, ssid.ns_ssidlen);
rval = ndis_set_info(sc, OID_802_11_SSID, &ssid, &len);
if (rval)
device_printf (sc->ndis_dev, "set ssid failed: %d\n", rval);
return;
}
static int
ndis_get_bssid_list(sc, bl)
struct ndis_softc *sc;
ndis_80211_bssid_list_ex **bl;
{
int len, error;
len = sizeof(uint32_t) + (sizeof(ndis_wlan_bssid_ex) * 16);
*bl = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
if (*bl == NULL)
return (ENOMEM);
error = ndis_get_info(sc, OID_802_11_BSSID_LIST, *bl, &len);
if (error == ENOSPC) {
free(*bl, M_DEVBUF);
*bl = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
if (*bl == NULL)
return (ENOMEM);
error = ndis_get_info(sc, OID_802_11_BSSID_LIST, *bl, &len);
}
if (error) {
DPRINTF(("%s: failed to read\n", __func__));
free(*bl, M_DEVBUF);
return (error);
}
return (0);
}
static int
ndis_get_assoc(struct ndis_softc *sc, ndis_wlan_bssid_ex **assoc)
{
struct ieee80211com *ic = &sc->ndis_ic;
struct ieee80211vap *vap;
struct ieee80211_node *ni;
ndis_80211_bssid_list_ex *bl;
ndis_wlan_bssid_ex *bs;
ndis_80211_macaddr bssid;
int i, len, error;
if (!sc->ndis_link)
return (ENOENT);
len = sizeof(bssid);
error = ndis_get_info(sc, OID_802_11_BSSID, &bssid, &len);
if (error) {
device_printf(sc->ndis_dev, "failed to get bssid\n");
return (ENOENT);
}
vap = TAILQ_FIRST(&ic->ic_vaps);
ni = vap->iv_bss;
error = ndis_get_bssid_list(sc, &bl);
if (error)
return (error);
bs = (ndis_wlan_bssid_ex *)&bl->nblx_bssid[0];
for (i = 0; i < bl->nblx_items; i++) {
if (bcmp(bs->nwbx_macaddr, bssid, sizeof(bssid)) == 0) {
*assoc = malloc(bs->nwbx_len, M_TEMP, M_NOWAIT);
if (*assoc == NULL) {
free(bl, M_TEMP);
return (ENOMEM);
}
bcopy((char *)bs, (char *)*assoc, bs->nwbx_len);
free(bl, M_TEMP);
if (ic->ic_opmode == IEEE80211_M_STA)
ni->ni_associd = 1 | 0xc000; /* fake associd */
return (0);
}
bs = (ndis_wlan_bssid_ex *)((char *)bs + bs->nwbx_len);
}
free(bl, M_TEMP);
return (ENOENT);
}
static void
ndis_getstate_80211(struct ndis_softc *sc)
{
struct ieee80211com *ic = &sc->ndis_ic;
struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
struct ieee80211_node *ni = vap->iv_bss;
ndis_wlan_bssid_ex *bs;
int rval, len, i = 0;
int chanflag;
uint32_t arg;
if (!NDIS_INITIALIZED(sc))
return;
if ((rval = ndis_get_assoc(sc, &bs)) != 0)
return;
/* We're associated, retrieve info on the current bssid. */
ic->ic_curmode = ndis_nettype_mode(bs->nwbx_nettype);
chanflag = ndis_nettype_chan(bs->nwbx_nettype);
IEEE80211_ADDR_COPY(ni->ni_bssid, bs->nwbx_macaddr);
/* Get SSID from current association info. */
bcopy(bs->nwbx_ssid.ns_ssid, ni->ni_essid,
bs->nwbx_ssid.ns_ssidlen);
ni->ni_esslen = bs->nwbx_ssid.ns_ssidlen;
if (ic->ic_caps & IEEE80211_C_PMGT) {
len = sizeof(arg);
rval = ndis_get_info(sc, OID_802_11_POWER_MODE, &arg, &len);
if (rval)
device_printf(sc->ndis_dev,
"get power mode failed: %d\n", rval);
if (arg == NDIS_80211_POWERMODE_CAM)
vap->iv_flags &= ~IEEE80211_F_PMGTON;
else
vap->iv_flags |= IEEE80211_F_PMGTON;
}
/* Get TX power */
if (ic->ic_caps & IEEE80211_C_TXPMGT) {
len = sizeof(arg);
ndis_get_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &len);
for (i = 0; i < nitems(dBm2mW); i++)
if (dBm2mW[i] >= arg)
break;
ic->ic_txpowlimit = i;
}
/*
* Use the current association information to reflect
* what channel we're on.
*/
ic->ic_curchan = ieee80211_find_channel(ic,
bs->nwbx_config.nc_dsconfig / 1000, chanflag);
if (ic->ic_curchan == NULL)
ic->ic_curchan = &ic->ic_channels[0];
ni->ni_chan = ic->ic_curchan;
ic->ic_bsschan = ic->ic_curchan;
free(bs, M_TEMP);
/*
* Determine current authentication mode.
*/
len = sizeof(arg);
rval = ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len);
if (rval)
device_printf(sc->ndis_dev,
"get authmode status failed: %d\n", rval);
else {
vap->iv_flags &= ~IEEE80211_F_WPA;
switch (arg) {
case NDIS_80211_AUTHMODE_OPEN:
ni->ni_authmode = IEEE80211_AUTH_OPEN;
break;
case NDIS_80211_AUTHMODE_SHARED:
ni->ni_authmode = IEEE80211_AUTH_SHARED;
break;
case NDIS_80211_AUTHMODE_AUTO:
ni->ni_authmode = IEEE80211_AUTH_AUTO;
break;
case NDIS_80211_AUTHMODE_WPA:
case NDIS_80211_AUTHMODE_WPAPSK:
case NDIS_80211_AUTHMODE_WPANONE:
ni->ni_authmode = IEEE80211_AUTH_WPA;
vap->iv_flags |= IEEE80211_F_WPA1;
break;
case NDIS_80211_AUTHMODE_WPA2:
case NDIS_80211_AUTHMODE_WPA2PSK:
ni->ni_authmode = IEEE80211_AUTH_WPA;
vap->iv_flags |= IEEE80211_F_WPA2;
break;
default:
ni->ni_authmode = IEEE80211_AUTH_NONE;
break;
}
}
len = sizeof(arg);
rval = ndis_get_info(sc, OID_802_11_WEP_STATUS, &arg, &len);
if (rval)
device_printf(sc->ndis_dev,
"get wep status failed: %d\n", rval);
if (arg == NDIS_80211_WEPSTAT_ENABLED)
vap->iv_flags |= IEEE80211_F_PRIVACY|IEEE80211_F_DROPUNENC;
else
vap->iv_flags &= ~(IEEE80211_F_PRIVACY|IEEE80211_F_DROPUNENC);
}
static int
ndis_ifioctl(ifp, command, data)
struct ifnet *ifp;
u_long command;
caddr_t data;
{
struct ndis_softc *sc = ifp->if_softc;
struct ifreq *ifr = (struct ifreq *) data;
int i, error = 0;
/*NDIS_LOCK(sc);*/
switch (command) {
case SIOCSIFFLAGS:
if (ifp->if_flags & IFF_UP) {
if (sc->ndis_running &&
ifp->if_flags & IFF_PROMISC &&
!(sc->ndis_if_flags & IFF_PROMISC)) {
sc->ndis_filter |=
NDIS_PACKET_TYPE_PROMISCUOUS;
i = sizeof(sc->ndis_filter);
error = ndis_set_info(sc,
OID_GEN_CURRENT_PACKET_FILTER,
&sc->ndis_filter, &i);
} else if (sc->ndis_running &&
!(ifp->if_flags & IFF_PROMISC) &&
sc->ndis_if_flags & IFF_PROMISC) {
sc->ndis_filter &=
~NDIS_PACKET_TYPE_PROMISCUOUS;
i = sizeof(sc->ndis_filter);
error = ndis_set_info(sc,
OID_GEN_CURRENT_PACKET_FILTER,
&sc->ndis_filter, &i);
} else
ndis_init(sc);
} else {
if (sc->ndis_running)
ndis_stop(sc);
}
sc->ndis_if_flags = ifp->if_flags;
error = 0;
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
ndis_setmulti(sc);
error = 0;
break;
case SIOCGIFMEDIA:
case SIOCSIFMEDIA:
error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command);
break;
case SIOCSIFCAP:
ifp->if_capenable = ifr->ifr_reqcap;
if (ifp->if_capenable & IFCAP_TXCSUM)
ifp->if_hwassist = sc->ndis_hwassist;
else
ifp->if_hwassist = 0;
ndis_set_offload(sc);
break;
default:
error = ether_ioctl(ifp, command, data);
break;
}
/*NDIS_UNLOCK(sc);*/
return(error);
}
static int
ndis_80211ioctl(struct ieee80211com *ic, u_long cmd, void *data)
{
struct ndis_softc *sc = ic->ic_softc;
struct ifreq *ifr = data;
struct ndis_oid_data oid;
struct ndis_evt evt;
void *oidbuf = NULL;
int error = 0;
if ((error = priv_check(curthread, PRIV_DRIVER)) != 0)
return (error);
switch (cmd) {
case SIOCGDRVSPEC:
case SIOCSDRVSPEC:
error = copyin(ifr_data_get_ptr(ifr), &oid, sizeof(oid));
if (error)
break;
oidbuf = malloc(oid.len, M_TEMP, M_WAITOK | M_ZERO);
error = copyin((caddr_t)ifr_data_get_ptr(ifr) + sizeof(oid),
oidbuf, oid.len);
}
if (error) {
free(oidbuf, M_TEMP);
return (error);
}
switch (cmd) {
case SIOCGDRVSPEC:
error = ndis_get_info(sc, oid.oid, oidbuf, &oid.len);
break;
case SIOCSDRVSPEC:
error = ndis_set_info(sc, oid.oid, oidbuf, &oid.len);
break;
case SIOCGPRIVATE_0:
NDIS_LOCK(sc);
if (sc->ndis_evt[sc->ndis_evtcidx].ne_sts == 0) {
error = ENOENT;
NDIS_UNLOCK(sc);
break;
}
error = copyin(ifr_data_get_ptr(ifr), &evt, sizeof(evt));
if (error) {
NDIS_UNLOCK(sc);
break;
}
if (evt.ne_len < sc->ndis_evt[sc->ndis_evtcidx].ne_len) {
error = ENOSPC;
NDIS_UNLOCK(sc);
break;
}
error = copyout(&sc->ndis_evt[sc->ndis_evtcidx],
ifr_data_get_ptr(ifr), sizeof(uint32_t) * 2);
if (error) {
NDIS_UNLOCK(sc);
break;
}
if (sc->ndis_evt[sc->ndis_evtcidx].ne_len) {
error = copyout(sc->ndis_evt[sc->ndis_evtcidx].ne_buf,
(caddr_t)ifr_data_get_ptr(ifr) +
(sizeof(uint32_t) * 2),
sc->ndis_evt[sc->ndis_evtcidx].ne_len);
if (error) {
NDIS_UNLOCK(sc);
break;
}
free(sc->ndis_evt[sc->ndis_evtcidx].ne_buf, M_TEMP);
sc->ndis_evt[sc->ndis_evtcidx].ne_buf = NULL;
}
sc->ndis_evt[sc->ndis_evtcidx].ne_len = 0;
sc->ndis_evt[sc->ndis_evtcidx].ne_sts = 0;
NDIS_EVTINC(sc->ndis_evtcidx);
NDIS_UNLOCK(sc);
break;
default:
error = ENOTTY;
break;
}
switch (cmd) {
case SIOCGDRVSPEC:
case SIOCSDRVSPEC:
error = copyout(&oid, ifr_data_get_ptr(ifr), sizeof(oid));
if (error)
break;
error = copyout(oidbuf,
(caddr_t)ifr_data_get_ptr(ifr) + sizeof(oid), oid.len);
}
free(oidbuf, M_TEMP);
return (error);
}
int
ndis_del_key(struct ieee80211vap *vap, const struct ieee80211_key *key)
{
struct ndis_softc *sc = vap->iv_ic->ic_softc;
ndis_80211_key rkey;
int len, error = 0;
bzero((char *)&rkey, sizeof(rkey));
len = sizeof(rkey);
rkey.nk_len = len;
rkey.nk_keyidx = key->wk_keyix;
bcopy(vap->iv_ifp->if_broadcastaddr,
rkey.nk_bssid, IEEE80211_ADDR_LEN);
error = ndis_set_info(sc, OID_802_11_REMOVE_KEY, &rkey, &len);
if (error)
return (0);
return (1);
}
/*
* In theory this could be called for any key, but we'll
* only use it for WPA TKIP or AES keys. These need to be
* set after initial authentication with the AP.
*/
static int
ndis_add_key(struct ieee80211vap *vap, const struct ieee80211_key *key)
{
struct ndis_softc *sc = vap->iv_ic->ic_softc;
ndis_80211_key rkey;
int len, error = 0;
switch (key->wk_cipher->ic_cipher) {
case IEEE80211_CIPHER_TKIP:
len = sizeof(ndis_80211_key);
bzero((char *)&rkey, sizeof(rkey));
rkey.nk_len = len;
rkey.nk_keylen = key->wk_keylen;
if (key->wk_flags & IEEE80211_KEY_SWMIC)
rkey.nk_keylen += 16;
/* key index - gets weird in NDIS */
if (key->wk_keyix != IEEE80211_KEYIX_NONE)
rkey.nk_keyidx = key->wk_keyix;
else
rkey.nk_keyidx = 0;
if (key->wk_flags & IEEE80211_KEY_XMIT)
rkey.nk_keyidx |= 1 << 31;
if (key->wk_flags & IEEE80211_KEY_GROUP) {
bcopy(ieee80211broadcastaddr,
rkey.nk_bssid, IEEE80211_ADDR_LEN);
} else {
bcopy(vap->iv_bss->ni_bssid,
rkey.nk_bssid, IEEE80211_ADDR_LEN);
/* pairwise key */
rkey.nk_keyidx |= 1 << 30;
}
/* need to set bit 29 based on keyrsc */
rkey.nk_keyrsc = key->wk_keyrsc[0]; /* XXX need tid */
if (rkey.nk_keyrsc)
rkey.nk_keyidx |= 1 << 29;
if (key->wk_flags & IEEE80211_KEY_SWMIC) {
bcopy(key->wk_key, rkey.nk_keydata, 16);
bcopy(key->wk_key + 24, rkey.nk_keydata + 16, 8);
bcopy(key->wk_key + 16, rkey.nk_keydata + 24, 8);
} else
bcopy(key->wk_key, rkey.nk_keydata, key->wk_keylen);
error = ndis_set_info(sc, OID_802_11_ADD_KEY, &rkey, &len);
break;
case IEEE80211_CIPHER_WEP:
error = 0;
break;
/*
* I don't know how to set up keys for the AES
* cipher yet. Is it the same as TKIP?
*/
case IEEE80211_CIPHER_AES_CCM:
default:
error = ENOTTY;
break;
}
/* We need to return 1 for success, 0 for failure. */
if (error)
return (0);
return (1);
}
static void
ndis_resettask(d, arg)
device_object *d;
void *arg;
{
struct ndis_softc *sc;
sc = arg;
ndis_reset_nic(sc);
}
/*
* Stop the adapter and free any mbufs allocated to the
* RX and TX lists.
*/
static void
ndis_stop(struct ndis_softc *sc)
{
int i;
callout_drain(&sc->ndis_stat_callout);
NDIS_LOCK(sc);
sc->ndis_tx_timer = 0;
sc->ndis_link = 0;
if (!sc->ndis_80211)
sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
sc->ndis_running = 0;
NDIS_UNLOCK(sc);
if (sc->ndis_iftype != PNPBus ||
(sc->ndis_iftype == PNPBus &&
!(sc->ndisusb_status & NDISUSB_STATUS_DETACH) &&
ndisusb_halt != 0))
ndis_halt_nic(sc);
NDIS_LOCK(sc);
for (i = 0; i < NDIS_EVENTS; i++) {
if (sc->ndis_evt[i].ne_sts && sc->ndis_evt[i].ne_buf != NULL) {
free(sc->ndis_evt[i].ne_buf, M_TEMP);
sc->ndis_evt[i].ne_buf = NULL;
}
sc->ndis_evt[i].ne_sts = 0;
sc->ndis_evt[i].ne_len = 0;
}
sc->ndis_evtcidx = 0;
sc->ndis_evtpidx = 0;
NDIS_UNLOCK(sc);
}
/*
* Stop all chip I/O so that the kernel's probe routines don't
* get confused by errant DMAs when rebooting.
*/
void
ndis_shutdown(dev)
device_t dev;
{
struct ndis_softc *sc;
sc = device_get_softc(dev);
ndis_stop(sc);
}
static int
ndis_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
{
struct ndis_vap *nvp = NDIS_VAP(vap);
struct ieee80211com *ic = vap->iv_ic;
struct ndis_softc *sc = ic->ic_softc;
enum ieee80211_state ostate;
DPRINTF(("%s: %s -> %s\n", __func__,
ieee80211_state_name[vap->iv_state],
ieee80211_state_name[nstate]));
ostate = vap->iv_state;
vap->iv_state = nstate;
switch (nstate) {
/* pass on to net80211 */
case IEEE80211_S_INIT:
case IEEE80211_S_SCAN:
return nvp->newstate(vap, nstate, arg);
case IEEE80211_S_ASSOC:
if (ostate != IEEE80211_S_AUTH) {
IEEE80211_UNLOCK(ic);
ndis_auth_and_assoc(sc, vap);
IEEE80211_LOCK(ic);
}
break;
case IEEE80211_S_AUTH:
IEEE80211_UNLOCK(ic);
ndis_auth_and_assoc(sc, vap);
if (vap->iv_state == IEEE80211_S_AUTH) /* XXX */
ieee80211_new_state(vap, IEEE80211_S_ASSOC, 0);
IEEE80211_LOCK(ic);
break;
default:
break;
}
return (0);
}
static void
ndis_scan(void *arg)
{
struct ieee80211vap *vap = arg;
ieee80211_scan_done(vap);
}
static void
ndis_scan_results(struct ndis_softc *sc)
{
struct ieee80211com *ic = &sc->ndis_ic;
struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
ndis_80211_bssid_list_ex *bl;
ndis_wlan_bssid_ex *wb;
struct ieee80211_scanparams sp;
struct ieee80211_frame wh;
struct ieee80211_channel *saved_chan;
int i, j;
int rssi, noise, freq, chanflag;
uint8_t ssid[2+IEEE80211_NWID_LEN];
uint8_t rates[2+IEEE80211_RATE_MAXSIZE];
uint8_t *frm, *efrm;
saved_chan = ic->ic_curchan;
noise = -96;
if (ndis_get_bssid_list(sc, &bl))
return;
DPRINTF(("%s: %d results\n", __func__, bl->nblx_items));
wb = &bl->nblx_bssid[0];
for (i = 0; i < bl->nblx_items; i++) {
memset(&sp, 0, sizeof(sp));
memcpy(wh.i_addr2, wb->nwbx_macaddr, sizeof(wh.i_addr2));
memcpy(wh.i_addr3, wb->nwbx_macaddr, sizeof(wh.i_addr3));
rssi = 100 * (wb->nwbx_rssi - noise) / (-32 - noise);
rssi = max(0, min(rssi, 100)); /* limit 0 <= rssi <= 100 */
if (wb->nwbx_privacy)
sp.capinfo |= IEEE80211_CAPINFO_PRIVACY;
sp.bintval = wb->nwbx_config.nc_beaconperiod;
switch (wb->nwbx_netinfra) {
case NDIS_80211_NET_INFRA_IBSS:
sp.capinfo |= IEEE80211_CAPINFO_IBSS;
break;
case NDIS_80211_NET_INFRA_BSS:
sp.capinfo |= IEEE80211_CAPINFO_ESS;
break;
}
sp.rates = &rates[0];
for (j = 0; j < IEEE80211_RATE_MAXSIZE; j++) {
/* XXX - check units */
if (wb->nwbx_supportedrates[j] == 0)
break;
rates[2 + j] =
wb->nwbx_supportedrates[j] & 0x7f;
}
rates[1] = j;
sp.ssid = (uint8_t *)&ssid[0];
memcpy(sp.ssid + 2, &wb->nwbx_ssid.ns_ssid,
wb->nwbx_ssid.ns_ssidlen);
sp.ssid[1] = wb->nwbx_ssid.ns_ssidlen;
chanflag = ndis_nettype_chan(wb->nwbx_nettype);
freq = wb->nwbx_config.nc_dsconfig / 1000;
sp.chan = sp.bchan = ieee80211_mhz2ieee(freq, chanflag);
/* Hack ic->ic_curchan to be in sync with the scan result */
ic->ic_curchan = ieee80211_find_channel(ic, freq, chanflag);
if (ic->ic_curchan == NULL)
ic->ic_curchan = &ic->ic_channels[0];
/* Process extended info from AP */
if (wb->nwbx_len > sizeof(ndis_wlan_bssid)) {
frm = (uint8_t *)&wb->nwbx_ies;
efrm = frm + wb->nwbx_ielen;
if (efrm - frm < 12)
goto done;
sp.tstamp = frm; frm += 8;
sp.bintval = le16toh(*(uint16_t *)frm); frm += 2;
sp.capinfo = le16toh(*(uint16_t *)frm); frm += 2;
sp.ies = frm;
sp.ies_len = efrm - frm;
}
done:
DPRINTF(("scan: bssid %s chan %dMHz (%d/%d) rssi %d\n",
ether_sprintf(wb->nwbx_macaddr), freq, sp.bchan, chanflag,
rssi));
ieee80211_add_scan(vap, ic->ic_curchan, &sp, &wh, 0, rssi, noise);
wb = (ndis_wlan_bssid_ex *)((char *)wb + wb->nwbx_len);
}
free(bl, M_DEVBUF);
/* Restore the channel after messing with it */
ic->ic_curchan = saved_chan;
}
static void
ndis_scan_start(struct ieee80211com *ic)
{
struct ndis_softc *sc = ic->ic_softc;
struct ieee80211vap *vap;
struct ieee80211_scan_state *ss;
ndis_80211_ssid ssid;
int error, len;
ss = ic->ic_scan;
vap = TAILQ_FIRST(&ic->ic_vaps);
if (!NDIS_INITIALIZED(sc)) {
DPRINTF(("%s: scan aborted\n", __func__));
ieee80211_cancel_scan(vap);
return;
}
len = sizeof(ssid);
bzero((char *)&ssid, len);
if (ss->ss_nssid == 0)
ssid.ns_ssidlen = 1;
else {
/* Perform a directed scan */
ssid.ns_ssidlen = ss->ss_ssid[0].len;
bcopy(ss->ss_ssid[0].ssid, ssid.ns_ssid, ssid.ns_ssidlen);
}
error = ndis_set_info(sc, OID_802_11_SSID, &ssid, &len);
if (error)
DPRINTF(("%s: set ESSID failed\n", __func__));
len = 0;
error = ndis_set_info(sc, OID_802_11_BSSID_LIST_SCAN, NULL, &len);
if (error) {
DPRINTF(("%s: scan command failed\n", __func__));
ieee80211_cancel_scan(vap);
return;
}
/* Set a timer to collect the results */
callout_reset(&sc->ndis_scan_callout, hz * 3, ndis_scan, vap);
}
static void
ndis_set_channel(struct ieee80211com *ic)
{
/* ignore */
}
static void
ndis_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell)
{
/* ignore */
}
static void
ndis_scan_mindwell(struct ieee80211_scan_state *ss)
{
/* NB: don't try to abort scan; wait for firmware to finish */
}
static void
ndis_scan_end(struct ieee80211com *ic)
{
struct ndis_softc *sc = ic->ic_softc;
ndis_scan_results(sc);
}
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
index 98f06af5230c..45aa824eae9b 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@@ -1,4709 +1,4710 @@
/*-
* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include "opt_kern_tls.h"
#include "en.h"
#include
#include
#include
#include
#ifndef ETH_DRIVER_VERSION
#define ETH_DRIVER_VERSION "3.5.2"
#endif
#define DRIVER_RELDATE "September 2019"
static const char mlx5e_version[] = "mlx5en: Mellanox Ethernet driver "
ETH_DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs);
struct mlx5e_channel_param {
struct mlx5e_rq_param rq;
struct mlx5e_sq_param sq;
struct mlx5e_cq_param rx_cq;
struct mlx5e_cq_param tx_cq;
};
struct media {
u32 subtype;
u64 baudrate;
};
static const struct media mlx5e_mode_table[MLX5E_LINK_SPEEDS_NUMBER][MLX5E_LINK_MODES_NUMBER] = {
[MLX5E_1000BASE_CX_SGMII][MLX5E_SGMII] = {
.subtype = IFM_1000_CX_SGMII,
.baudrate = IF_Mbps(1000ULL),
},
[MLX5E_1000BASE_KX][MLX5E_KX] = {
.subtype = IFM_1000_KX,
.baudrate = IF_Mbps(1000ULL),
},
[MLX5E_10GBASE_CX4][MLX5E_CX4] = {
.subtype = IFM_10G_CX4,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_10GBASE_KX4][MLX5E_KX4] = {
.subtype = IFM_10G_KX4,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_10GBASE_KR][MLX5E_KR] = {
.subtype = IFM_10G_KR,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_20GBASE_KR2][MLX5E_KR2] = {
.subtype = IFM_20G_KR2,
.baudrate = IF_Gbps(20ULL),
},
[MLX5E_40GBASE_CR4][MLX5E_CR4] = {
.subtype = IFM_40G_CR4,
.baudrate = IF_Gbps(40ULL),
},
[MLX5E_40GBASE_KR4][MLX5E_KR4] = {
.subtype = IFM_40G_KR4,
.baudrate = IF_Gbps(40ULL),
},
[MLX5E_56GBASE_R4][MLX5E_R] = {
.subtype = IFM_56G_R4,
.baudrate = IF_Gbps(56ULL),
},
[MLX5E_10GBASE_CR][MLX5E_CR1] = {
.subtype = IFM_10G_CR1,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_10GBASE_SR][MLX5E_SR] = {
.subtype = IFM_10G_SR,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_10GBASE_ER_LR][MLX5E_ER] = {
.subtype = IFM_10G_ER,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_10GBASE_ER_LR][MLX5E_LR] = {
.subtype = IFM_10G_LR,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_40GBASE_SR4][MLX5E_SR4] = {
.subtype = IFM_40G_SR4,
.baudrate = IF_Gbps(40ULL),
},
[MLX5E_40GBASE_LR4_ER4][MLX5E_LR4] = {
.subtype = IFM_40G_LR4,
.baudrate = IF_Gbps(40ULL),
},
[MLX5E_40GBASE_LR4_ER4][MLX5E_ER4] = {
.subtype = IFM_40G_ER4,
.baudrate = IF_Gbps(40ULL),
},
[MLX5E_100GBASE_CR4][MLX5E_CR4] = {
.subtype = IFM_100G_CR4,
.baudrate = IF_Gbps(100ULL),
},
[MLX5E_100GBASE_SR4][MLX5E_SR4] = {
.subtype = IFM_100G_SR4,
.baudrate = IF_Gbps(100ULL),
},
[MLX5E_100GBASE_KR4][MLX5E_KR4] = {
.subtype = IFM_100G_KR4,
.baudrate = IF_Gbps(100ULL),
},
[MLX5E_100GBASE_LR4][MLX5E_LR4] = {
.subtype = IFM_100G_LR4,
.baudrate = IF_Gbps(100ULL),
},
[MLX5E_100BASE_TX][MLX5E_TX] = {
.subtype = IFM_100_TX,
.baudrate = IF_Mbps(100ULL),
},
[MLX5E_1000BASE_T][MLX5E_T] = {
.subtype = IFM_1000_T,
.baudrate = IF_Mbps(1000ULL),
},
[MLX5E_10GBASE_T][MLX5E_T] = {
.subtype = IFM_10G_T,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_25GBASE_CR][MLX5E_CR] = {
.subtype = IFM_25G_CR,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_25GBASE_KR][MLX5E_KR] = {
.subtype = IFM_25G_KR,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_25GBASE_SR][MLX5E_SR] = {
.subtype = IFM_25G_SR,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_50GBASE_CR2][MLX5E_CR2] = {
.subtype = IFM_50G_CR2,
.baudrate = IF_Gbps(50ULL),
},
[MLX5E_50GBASE_KR2][MLX5E_KR2] = {
.subtype = IFM_50G_KR2,
.baudrate = IF_Gbps(50ULL),
},
};
static const struct media mlx5e_ext_mode_table[MLX5E_EXT_LINK_SPEEDS_NUMBER][MLX5E_LINK_MODES_NUMBER] = {
[MLX5E_SGMII_100M][MLX5E_SGMII] = {
.subtype = IFM_100_SGMII,
.baudrate = IF_Mbps(100),
},
[MLX5E_1000BASE_X_SGMII][MLX5E_KX] = {
.subtype = IFM_1000_KX,
.baudrate = IF_Mbps(1000),
},
[MLX5E_1000BASE_X_SGMII][MLX5E_CX_SGMII] = {
.subtype = IFM_1000_CX_SGMII,
.baudrate = IF_Mbps(1000),
},
[MLX5E_1000BASE_X_SGMII][MLX5E_CX] = {
.subtype = IFM_1000_CX,
.baudrate = IF_Mbps(1000),
},
[MLX5E_1000BASE_X_SGMII][MLX5E_LX] = {
.subtype = IFM_1000_LX,
.baudrate = IF_Mbps(1000),
},
[MLX5E_1000BASE_X_SGMII][MLX5E_SX] = {
.subtype = IFM_1000_SX,
.baudrate = IF_Mbps(1000),
},
[MLX5E_1000BASE_X_SGMII][MLX5E_T] = {
.subtype = IFM_1000_T,
.baudrate = IF_Mbps(1000),
},
[MLX5E_5GBASE_R][MLX5E_T] = {
.subtype = IFM_5000_T,
.baudrate = IF_Mbps(5000),
},
[MLX5E_5GBASE_R][MLX5E_KR] = {
.subtype = IFM_5000_KR,
.baudrate = IF_Mbps(5000),
},
[MLX5E_5GBASE_R][MLX5E_KR1] = {
.subtype = IFM_5000_KR1,
.baudrate = IF_Mbps(5000),
},
[MLX5E_5GBASE_R][MLX5E_KR_S] = {
.subtype = IFM_5000_KR_S,
.baudrate = IF_Mbps(5000),
},
[MLX5E_10GBASE_XFI_XAUI_1][MLX5E_ER] = {
.subtype = IFM_10G_ER,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_10GBASE_XFI_XAUI_1][MLX5E_KR] = {
.subtype = IFM_10G_KR,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_10GBASE_XFI_XAUI_1][MLX5E_LR] = {
.subtype = IFM_10G_LR,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_10GBASE_XFI_XAUI_1][MLX5E_SR] = {
.subtype = IFM_10G_SR,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_10GBASE_XFI_XAUI_1][MLX5E_T] = {
.subtype = IFM_10G_T,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_10GBASE_XFI_XAUI_1][MLX5E_AOC] = {
.subtype = IFM_10G_AOC,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_10GBASE_XFI_XAUI_1][MLX5E_CR1] = {
.subtype = IFM_10G_CR1,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_CR4] = {
.subtype = IFM_40G_CR4,
.baudrate = IF_Gbps(40ULL),
},
[MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_KR4] = {
.subtype = IFM_40G_KR4,
.baudrate = IF_Gbps(40ULL),
},
[MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_LR4] = {
.subtype = IFM_40G_LR4,
.baudrate = IF_Gbps(40ULL),
},
[MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_SR4] = {
.subtype = IFM_40G_SR4,
.baudrate = IF_Gbps(40ULL),
},
[MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_ER4] = {
.subtype = IFM_40G_ER4,
.baudrate = IF_Gbps(40ULL),
},
[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CR] = {
.subtype = IFM_25G_CR,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_KR] = {
.subtype = IFM_25G_KR,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_SR] = {
.subtype = IFM_25G_SR,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_ACC] = {
.subtype = IFM_25G_ACC,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_AOC] = {
.subtype = IFM_25G_AOC,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CR1] = {
.subtype = IFM_25G_CR1,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CR_S] = {
.subtype = IFM_25G_CR_S,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_KR1] = {
.subtype = IFM_5000_KR1,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_KR_S] = {
.subtype = IFM_25G_KR_S,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_LR] = {
.subtype = IFM_25G_LR,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_T] = {
.subtype = IFM_25G_T,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_CR2] = {
.subtype = IFM_50G_CR2,
.baudrate = IF_Gbps(50ULL),
},
[MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_KR2] = {
.subtype = IFM_50G_KR2,
.baudrate = IF_Gbps(50ULL),
},
[MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_SR2] = {
.subtype = IFM_50G_SR2,
.baudrate = IF_Gbps(50ULL),
},
[MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_LR2] = {
.subtype = IFM_50G_LR2,
.baudrate = IF_Gbps(50ULL),
},
[MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_LR] = {
.subtype = IFM_50G_LR,
.baudrate = IF_Gbps(50ULL),
},
[MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_SR] = {
.subtype = IFM_50G_SR,
.baudrate = IF_Gbps(50ULL),
},
[MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_CP] = {
.subtype = IFM_50G_CP,
.baudrate = IF_Gbps(50ULL),
},
[MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_FR] = {
.subtype = IFM_50G_FR,
.baudrate = IF_Gbps(50ULL),
},
[MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_KR_PAM4] = {
.subtype = IFM_50G_KR_PAM4,
.baudrate = IF_Gbps(50ULL),
},
[MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_CR4] = {
.subtype = IFM_100G_CR4,
.baudrate = IF_Gbps(100ULL),
},
[MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_KR4] = {
.subtype = IFM_100G_KR4,
.baudrate = IF_Gbps(100ULL),
},
[MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_LR4] = {
.subtype = IFM_100G_LR4,
.baudrate = IF_Gbps(100ULL),
},
[MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_SR4] = {
.subtype = IFM_100G_SR4,
.baudrate = IF_Gbps(100ULL),
},
[MLX5E_100GAUI_2_100GBASE_CR2_KR2][MLX5E_SR2] = {
.subtype = IFM_100G_SR2,
.baudrate = IF_Gbps(100ULL),
},
[MLX5E_100GAUI_2_100GBASE_CR2_KR2][MLX5E_CP2] = {
.subtype = IFM_100G_CP2,
.baudrate = IF_Gbps(100ULL),
},
[MLX5E_100GAUI_2_100GBASE_CR2_KR2][MLX5E_KR2_PAM4] = {
.subtype = IFM_100G_KR2_PAM4,
.baudrate = IF_Gbps(100ULL),
},
[MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_DR4] = {
.subtype = IFM_200G_DR4,
.baudrate = IF_Gbps(200ULL),
},
[MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_LR4] = {
.subtype = IFM_200G_LR4,
.baudrate = IF_Gbps(200ULL),
},
[MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_SR4] = {
.subtype = IFM_200G_SR4,
.baudrate = IF_Gbps(200ULL),
},
[MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_FR4] = {
.subtype = IFM_200G_FR4,
.baudrate = IF_Gbps(200ULL),
},
[MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_CR4_PAM4] = {
.subtype = IFM_200G_CR4_PAM4,
.baudrate = IF_Gbps(200ULL),
},
[MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_KR4_PAM4] = {
.subtype = IFM_200G_KR4_PAM4,
.baudrate = IF_Gbps(200ULL),
},
};
DEBUGNET_DEFINE(mlx5_en);
MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
static void
mlx5e_update_carrier(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
u32 out[MLX5_ST_SZ_DW(ptys_reg)];
u32 eth_proto_oper;
int error;
u8 port_state;
u8 is_er_type;
u8 i, j;
bool ext;
struct media media_entry = {};
port_state = mlx5_query_vport_state(mdev,
MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
if (port_state == VPORT_STATE_UP) {
priv->media_status_last |= IFM_ACTIVE;
} else {
priv->media_status_last &= ~IFM_ACTIVE;
priv->media_active_last = IFM_ETHER;
if_link_state_change(priv->ifp, LINK_STATE_DOWN);
return;
}
error = mlx5_query_port_ptys(mdev, out, sizeof(out),
MLX5_PTYS_EN, 1);
if (error) {
priv->media_active_last = IFM_ETHER;
priv->ifp->if_baudrate = 1;
mlx5_en_err(priv->ifp, "query port ptys failed: 0x%x\n",
error);
return;
}
ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
eth_proto_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
eth_proto_oper);
i = ilog2(eth_proto_oper);
for (j = 0; j != MLX5E_LINK_MODES_NUMBER; j++) {
media_entry = ext ? mlx5e_ext_mode_table[i][j] :
mlx5e_mode_table[i][j];
if (media_entry.baudrate != 0)
break;
}
if (media_entry.subtype == 0) {
mlx5_en_err(priv->ifp,
"Could not find operational media subtype\n");
return;
}
switch (media_entry.subtype) {
case IFM_10G_ER:
error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
if (error != 0) {
mlx5_en_err(priv->ifp,
"query port pddr failed: %d\n", error);
}
if (error != 0 || is_er_type == 0)
media_entry.subtype = IFM_10G_LR;
break;
case IFM_40G_LR4:
error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
if (error != 0) {
mlx5_en_err(priv->ifp,
"query port pddr failed: %d\n", error);
}
if (error == 0 && is_er_type != 0)
media_entry.subtype = IFM_40G_ER4;
break;
}
priv->media_active_last = media_entry.subtype | IFM_ETHER | IFM_FDX;
priv->ifp->if_baudrate = media_entry.baudrate;
if_link_state_change(priv->ifp, LINK_STATE_UP);
}
static void
mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
{
struct mlx5e_priv *priv = dev->if_softc;
ifmr->ifm_status = priv->media_status_last;
ifmr->ifm_active = priv->media_active_last |
(priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
(priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
}
static u32
mlx5e_find_link_mode(u32 subtype, bool ext)
{
u32 i;
u32 j;
u32 link_mode = 0;
u32 speeds_num = 0;
struct media media_entry = {};
switch (subtype) {
case IFM_10G_LR:
subtype = IFM_10G_ER;
break;
case IFM_40G_ER4:
subtype = IFM_40G_LR4;
break;
}
speeds_num = ext ? MLX5E_EXT_LINK_SPEEDS_NUMBER :
MLX5E_LINK_SPEEDS_NUMBER;
for (i = 0; i != speeds_num; i++) {
for (j = 0; j < MLX5E_LINK_MODES_NUMBER ; ++j) {
media_entry = ext ? mlx5e_ext_mode_table[i][j] :
mlx5e_mode_table[i][j];
if (media_entry.baudrate == 0)
continue;
if (media_entry.subtype == subtype) {
link_mode |= MLX5E_PROT_MASK(i);
}
}
}
return (link_mode);
}
static int
mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv)
{
return (mlx5_set_port_pause_and_pfc(priv->mdev, 1,
priv->params.rx_pauseframe_control,
priv->params.tx_pauseframe_control,
priv->params.rx_priority_flow_control,
priv->params.tx_priority_flow_control));
}
static int
mlx5e_set_port_pfc(struct mlx5e_priv *priv)
{
int error;
if (priv->gone != 0) {
error = -ENXIO;
} else if (priv->params.rx_pauseframe_control ||
priv->params.tx_pauseframe_control) {
mlx5_en_err(priv->ifp,
"Global pauseframes must be disabled before enabling PFC.\n");
error = -EINVAL;
} else {
error = mlx5e_set_port_pause_and_pfc(priv);
}
return (error);
}
static int
mlx5e_media_change(struct ifnet *dev)
{
struct mlx5e_priv *priv = dev->if_softc;
struct mlx5_core_dev *mdev = priv->mdev;
u32 eth_proto_cap;
u32 link_mode;
u32 out[MLX5_ST_SZ_DW(ptys_reg)];
int was_opened;
int locked;
int error;
bool ext;
locked = PRIV_LOCKED(priv);
if (!locked)
PRIV_LOCK(priv);
if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
error = EINVAL;
goto done;
}
error = mlx5_query_port_ptys(mdev, out, sizeof(out),
MLX5_PTYS_EN, 1);
if (error != 0) {
mlx5_en_err(dev, "Query port media capability failed\n");
goto done;
}
ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media), ext);
/* query supported capabilities */
eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
eth_proto_capability);
/* check for autoselect */
if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
link_mode = eth_proto_cap;
if (link_mode == 0) {
mlx5_en_err(dev, "Port media capability is zero\n");
error = EINVAL;
goto done;
}
} else {
link_mode = link_mode & eth_proto_cap;
if (link_mode == 0) {
mlx5_en_err(dev, "Not supported link mode requested\n");
error = EINVAL;
goto done;
}
}
if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
/* check if PFC is enabled */
if (priv->params.rx_priority_flow_control ||
priv->params.tx_priority_flow_control) {
mlx5_en_err(dev, "PFC must be disabled before enabling global pauseframes.\n");
error = EINVAL;
goto done;
}
}
/* update pauseframe control bits */
priv->params.rx_pauseframe_control =
(priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
priv->params.tx_pauseframe_control =
(priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
/* check if device is opened */
was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
/* reconfigure the hardware */
mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN, ext);
error = -mlx5e_set_port_pause_and_pfc(priv);
if (was_opened)
mlx5_set_port_status(mdev, MLX5_PORT_UP);
done:
if (!locked)
PRIV_UNLOCK(priv);
return (error);
}
static void
mlx5e_update_carrier_work(struct work_struct *work)
{
struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
update_carrier_work);
PRIV_LOCK(priv);
if (test_bit(MLX5E_STATE_OPENED, &priv->state))
mlx5e_update_carrier(priv);
PRIV_UNLOCK(priv);
}
#define MLX5E_PCIE_PERF_GET_64(a,b,c,d,e,f) \
s_debug->c = MLX5_GET64(mpcnt_reg, out, counter_set.f.c);
#define MLX5E_PCIE_PERF_GET_32(a,b,c,d,e,f) \
s_debug->c = MLX5_GET(mpcnt_reg, out, counter_set.f.c);
static void
mlx5e_update_pcie_counters(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
const unsigned sz = MLX5_ST_SZ_BYTES(mpcnt_reg);
void *out;
void *in;
int err;
/* allocate firmware request structures */
in = mlx5_vzalloc(sz);
out = mlx5_vzalloc(sz);
if (in == NULL || out == NULL)
goto free_out;
MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP);
err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
if (err != 0)
goto free_out;
MLX5E_PCIE_PERFORMANCE_COUNTERS_64(MLX5E_PCIE_PERF_GET_64)
MLX5E_PCIE_PERFORMANCE_COUNTERS_32(MLX5E_PCIE_PERF_GET_32)
MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP);
err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
if (err != 0)
goto free_out;
MLX5E_PCIE_TIMERS_AND_STATES_COUNTERS_32(MLX5E_PCIE_PERF_GET_32)
MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_LANE_COUNTERS_GROUP);
err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
if (err != 0)
goto free_out;
MLX5E_PCIE_LANE_COUNTERS_32(MLX5E_PCIE_PERF_GET_32)
free_out:
/* free firmware request structures */
kvfree(in);
kvfree(out);
}
/*
* This function reads the physical port counters from the firmware
* using a pre-defined layout defined by various MLX5E_PPORT_XXX()
* macros. The output is converted from big-endian 64-bit values into
* host endian ones and stored in the "priv->stats.pport" structure.
*/
static void
mlx5e_update_pport_counters(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_pport_stats *s = &priv->stats.pport;
struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
u32 *in;
u32 *out;
const u64 *ptr;
unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
unsigned x;
unsigned y;
unsigned z;
/* allocate firmware request structures */
in = mlx5_vzalloc(sz);
out = mlx5_vzalloc(sz);
if (in == NULL || out == NULL)
goto free_out;
/*
* Get pointer to the 64-bit counter set which is located at a
* fixed offset in the output firmware request structure:
*/
ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
MLX5_SET(ppcnt_reg, in, local_port, 1);
/* read IEEE802_3 counter group using predefined counter layout */
MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM;
x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
s->arg[y] = be64toh(ptr[x]);
/* read RFC2819 counter group using predefined counter layout */
MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
s->arg[y] = be64toh(ptr[x]);
for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
s_debug->arg[y] = be64toh(ptr[x]);
/* read RFC2863 counter group using predefined counter layout */
MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
s_debug->arg[y] = be64toh(ptr[x]);
/* read physical layer stats counter group using predefined counter layout */
MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
s_debug->arg[y] = be64toh(ptr[x]);
/* read Extended Ethernet counter group using predefined counter layout */
MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
for (x = 0; x != MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG_NUM; x++, y++)
s_debug->arg[y] = be64toh(ptr[x]);
/* read Extended Statistical Group */
if (MLX5_CAP_GEN(mdev, pcam_reg) &&
MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) &&
MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) {
/* read Extended Statistical counter group using predefined counter layout */
MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP);
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
for (x = 0; x != MLX5E_PPORT_STATISTICAL_DEBUG_NUM; x++, y++)
s_debug->arg[y] = be64toh(ptr[x]);
}
/* read PCIE counters */
mlx5e_update_pcie_counters(priv);
/* read per-priority counters */
MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
/* iterate all the priorities */
for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) {
MLX5_SET(ppcnt_reg, in, prio_tc, z);
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
/* read per priority stats counter group using predefined counter layout */
for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM /
MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++)
s->arg[y] = be64toh(ptr[x]);
}
free_out:
/* free firmware request structures */
kvfree(in);
kvfree(out);
}
static void
mlx5e_grp_vnic_env_update_stats(struct mlx5e_priv *priv)
{
u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
return;
MLX5_SET(query_vnic_env_in, in, opcode,
MLX5_CMD_OP_QUERY_VNIC_ENV);
MLX5_SET(query_vnic_env_in, in, op_mod, 0);
MLX5_SET(query_vnic_env_in, in, other_vport, 0);
if (mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)) != 0)
return;
priv->stats.vport.rx_steer_missed_packets =
MLX5_GET64(query_vnic_env_out, out,
vport_env.nic_receive_steering_discard);
}
/*
* This function is called regularly to collect all statistics
* counters from the firmware. The values can be viewed through the
* sysctl interface. Execution is serialized using the priv's global
* configuration lock.
*/
static void
mlx5e_update_stats_locked(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_vport_stats *s = &priv->stats.vport;
struct mlx5e_sq_stats *sq_stats;
struct buf_ring *sq_br;
#if (__FreeBSD_version < 1100000)
struct ifnet *ifp = priv->ifp;
#endif
u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
u32 *out;
int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
u64 tso_packets = 0;
u64 tso_bytes = 0;
u64 tx_queue_dropped = 0;
u64 tx_defragged = 0;
u64 tx_offload_none = 0;
u64 lro_packets = 0;
u64 lro_bytes = 0;
u64 sw_lro_queued = 0;
u64 sw_lro_flushed = 0;
u64 rx_csum_none = 0;
u64 rx_wqe_err = 0;
u64 rx_packets = 0;
u64 rx_bytes = 0;
u32 rx_out_of_buffer = 0;
int error;
int i;
int j;
out = mlx5_vzalloc(outlen);
if (out == NULL)
goto free_out;
/* Collect firts the SW counters and then HW for consistency */
for (i = 0; i < priv->params.num_channels; i++) {
struct mlx5e_channel *pch = priv->channel + i;
struct mlx5e_rq *rq = &pch->rq;
struct mlx5e_rq_stats *rq_stats = &pch->rq.stats;
/* collect stats from LRO */
rq_stats->sw_lro_queued = rq->lro.lro_queued;
rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
sw_lro_queued += rq_stats->sw_lro_queued;
sw_lro_flushed += rq_stats->sw_lro_flushed;
lro_packets += rq_stats->lro_packets;
lro_bytes += rq_stats->lro_bytes;
rx_csum_none += rq_stats->csum_none;
rx_wqe_err += rq_stats->wqe_err;
rx_packets += rq_stats->packets;
rx_bytes += rq_stats->bytes;
for (j = 0; j < priv->num_tc; j++) {
sq_stats = &pch->sq[j].stats;
sq_br = pch->sq[j].br;
tso_packets += sq_stats->tso_packets;
tso_bytes += sq_stats->tso_bytes;
tx_queue_dropped += sq_stats->dropped;
if (sq_br != NULL)
tx_queue_dropped += sq_br->br_drops;
tx_defragged += sq_stats->defragged;
tx_offload_none += sq_stats->csum_offload_none;
}
}
/* update counters */
s->tso_packets = tso_packets;
s->tso_bytes = tso_bytes;
s->tx_queue_dropped = tx_queue_dropped;
s->tx_defragged = tx_defragged;
s->lro_packets = lro_packets;
s->lro_bytes = lro_bytes;
s->sw_lro_queued = sw_lro_queued;
s->sw_lro_flushed = sw_lro_flushed;
s->rx_csum_none = rx_csum_none;
s->rx_wqe_err = rx_wqe_err;
s->rx_packets = rx_packets;
s->rx_bytes = rx_bytes;
mlx5e_grp_vnic_env_update_stats(priv);
/* HW counters */
memset(in, 0, sizeof(in));
MLX5_SET(query_vport_counter_in, in, opcode,
MLX5_CMD_OP_QUERY_VPORT_COUNTER);
MLX5_SET(query_vport_counter_in, in, op_mod, 0);
MLX5_SET(query_vport_counter_in, in, other_vport, 0);
memset(out, 0, outlen);
/* get number of out-of-buffer drops first */
if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0 &&
mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
&rx_out_of_buffer) == 0) {
s->rx_out_of_buffer = rx_out_of_buffer;
}
/* get port statistics */
if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen) == 0) {
#define MLX5_GET_CTR(out, x) \
MLX5_GET64(query_vport_counter_out, out, x)
s->rx_error_packets =
MLX5_GET_CTR(out, received_errors.packets);
s->rx_error_bytes =
MLX5_GET_CTR(out, received_errors.octets);
s->tx_error_packets =
MLX5_GET_CTR(out, transmit_errors.packets);
s->tx_error_bytes =
MLX5_GET_CTR(out, transmit_errors.octets);
s->rx_unicast_packets =
MLX5_GET_CTR(out, received_eth_unicast.packets);
s->rx_unicast_bytes =
MLX5_GET_CTR(out, received_eth_unicast.octets);
s->tx_unicast_packets =
MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
s->tx_unicast_bytes =
MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
s->rx_multicast_packets =
MLX5_GET_CTR(out, received_eth_multicast.packets);
s->rx_multicast_bytes =
MLX5_GET_CTR(out, received_eth_multicast.octets);
s->tx_multicast_packets =
MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
s->tx_multicast_bytes =
MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
s->rx_broadcast_packets =
MLX5_GET_CTR(out, received_eth_broadcast.packets);
s->rx_broadcast_bytes =
MLX5_GET_CTR(out, received_eth_broadcast.octets);
s->tx_broadcast_packets =
MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
s->tx_broadcast_bytes =
MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
s->tx_packets = s->tx_unicast_packets +
s->tx_multicast_packets + s->tx_broadcast_packets;
s->tx_bytes = s->tx_unicast_bytes + s->tx_multicast_bytes +
s->tx_broadcast_bytes;
/* Update calculated offload counters */
s->tx_csum_offload = s->tx_packets - tx_offload_none;
s->rx_csum_good = s->rx_packets - s->rx_csum_none;
}
/* Get physical port counters */
mlx5e_update_pport_counters(priv);
s->tx_jumbo_packets =
priv->stats.port_stats_debug.tx_stat_p1519to2047octets +
priv->stats.port_stats_debug.tx_stat_p2048to4095octets +
priv->stats.port_stats_debug.tx_stat_p4096to8191octets +
priv->stats.port_stats_debug.tx_stat_p8192to10239octets;
#if (__FreeBSD_version < 1100000)
/* no get_counters interface in fbsd 10 */
ifp->if_ipackets = s->rx_packets;
ifp->if_ierrors = priv->stats.pport.in_range_len_errors +
priv->stats.pport.out_of_range_len +
priv->stats.pport.too_long_errors +
priv->stats.pport.check_seq_err +
priv->stats.pport.alignment_err;
ifp->if_iqdrops = s->rx_out_of_buffer;
ifp->if_opackets = s->tx_packets;
ifp->if_oerrors = priv->stats.port_stats_debug.out_discards;
ifp->if_snd.ifq_drops = s->tx_queue_dropped;
ifp->if_ibytes = s->rx_bytes;
ifp->if_obytes = s->tx_bytes;
ifp->if_collisions =
priv->stats.pport.collisions;
#endif
free_out:
kvfree(out);
/* Update diagnostics, if any */
if (priv->params_ethtool.diag_pci_enable ||
priv->params_ethtool.diag_general_enable) {
error = mlx5_core_get_diagnostics_full(mdev,
priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
if (error != 0)
mlx5_en_err(priv->ifp,
"Failed reading diagnostics: %d\n", error);
}
/* Update FEC, if any */
error = mlx5e_fec_update(priv);
if (error != 0 && error != EOPNOTSUPP) {
mlx5_en_err(priv->ifp,
"Updating FEC failed: %d\n", error);
}
}
static void
mlx5e_update_stats_work(struct work_struct *work)
{
struct mlx5e_priv *priv;
priv = container_of(work, struct mlx5e_priv, update_stats_work);
PRIV_LOCK(priv);
if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0 &&
!test_bit(MLX5_INTERFACE_STATE_TEARDOWN, &priv->mdev->intf_state))
mlx5e_update_stats_locked(priv);
PRIV_UNLOCK(priv);
}
static void
mlx5e_update_stats(void *arg)
{
struct mlx5e_priv *priv = arg;
queue_work(priv->wq, &priv->update_stats_work);
callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
}
static void
mlx5e_async_event_sub(struct mlx5e_priv *priv,
enum mlx5_dev_event event)
{
switch (event) {
case MLX5_DEV_EVENT_PORT_UP:
case MLX5_DEV_EVENT_PORT_DOWN:
queue_work(priv->wq, &priv->update_carrier_work);
break;
default:
break;
}
}
static void
mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
enum mlx5_dev_event event, unsigned long param)
{
struct mlx5e_priv *priv = vpriv;
mtx_lock(&priv->async_events_mtx);
if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
mlx5e_async_event_sub(priv, event);
mtx_unlock(&priv->async_events_mtx);
}
static void
mlx5e_enable_async_events(struct mlx5e_priv *priv)
{
set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
}
static void
mlx5e_disable_async_events(struct mlx5e_priv *priv)
{
mtx_lock(&priv->async_events_mtx);
clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
mtx_unlock(&priv->async_events_mtx);
}
static void mlx5e_calibration_callout(void *arg);
static int mlx5e_calibration_duration = 20;
static int mlx5e_fast_calibration = 1;
static int mlx5e_normal_calibration = 30;
static SYSCTL_NODE(_hw_mlx5, OID_AUTO, calibr, CTLFLAG_RW, 0,
"MLX5 timestamp calibration parameteres");
SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, duration, CTLFLAG_RWTUN,
&mlx5e_calibration_duration, 0,
"Duration of initial calibration");
SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, fast, CTLFLAG_RWTUN,
&mlx5e_fast_calibration, 0,
"Recalibration interval during initial calibration");
SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, normal, CTLFLAG_RWTUN,
&mlx5e_normal_calibration, 0,
"Recalibration interval during normal operations");
/*
* Ignites the calibration process.
*/
static void
mlx5e_reset_calibration_callout(struct mlx5e_priv *priv)
{
if (priv->clbr_done == 0)
mlx5e_calibration_callout(priv);
else
callout_reset_curcpu(&priv->tstmp_clbr, (priv->clbr_done <
mlx5e_calibration_duration ? mlx5e_fast_calibration :
mlx5e_normal_calibration) * hz, mlx5e_calibration_callout,
priv);
}
static uint64_t
mlx5e_timespec2usec(const struct timespec *ts)
{
return ((uint64_t)ts->tv_sec * 1000000000 + ts->tv_nsec);
}
static uint64_t
mlx5e_hw_clock(struct mlx5e_priv *priv)
{
struct mlx5_init_seg *iseg;
uint32_t hw_h, hw_h1, hw_l;
iseg = priv->mdev->iseg;
do {
hw_h = ioread32be(&iseg->internal_timer_h);
hw_l = ioread32be(&iseg->internal_timer_l);
hw_h1 = ioread32be(&iseg->internal_timer_h);
} while (hw_h1 != hw_h);
return (((uint64_t)hw_h << 32) | hw_l);
}
/*
* The calibration callout, it runs either in the context of the
* thread which enables calibration, or in callout. It takes the
* snapshot of system and adapter clocks, then advances the pointers to
* the calibration point to allow rx path to read the consistent data
* lockless.
*/
static void
mlx5e_calibration_callout(void *arg)
{
struct mlx5e_priv *priv;
struct mlx5e_clbr_point *next, *curr;
struct timespec ts;
int clbr_curr_next;
priv = arg;
curr = &priv->clbr_points[priv->clbr_curr];
clbr_curr_next = priv->clbr_curr + 1;
if (clbr_curr_next >= nitems(priv->clbr_points))
clbr_curr_next = 0;
next = &priv->clbr_points[clbr_curr_next];
next->base_prev = curr->base_curr;
next->clbr_hw_prev = curr->clbr_hw_curr;
next->clbr_hw_curr = mlx5e_hw_clock(priv);
if (((next->clbr_hw_curr - curr->clbr_hw_curr) >> MLX5E_TSTMP_PREC) ==
0) {
if (priv->clbr_done != 0) {
mlx5_en_err(priv->ifp,
"HW failed tstmp frozen %#jx %#jx, disabling\n",
next->clbr_hw_curr, curr->clbr_hw_prev);
priv->clbr_done = 0;
}
atomic_store_rel_int(&curr->clbr_gen, 0);
return;
}
nanouptime(&ts);
next->base_curr = mlx5e_timespec2usec(&ts);
curr->clbr_gen = 0;
atomic_thread_fence_rel();
priv->clbr_curr = clbr_curr_next;
atomic_store_rel_int(&next->clbr_gen, ++(priv->clbr_gen));
if (priv->clbr_done < mlx5e_calibration_duration)
priv->clbr_done++;
mlx5e_reset_calibration_callout(priv);
}
static const char *mlx5e_rq_stats_desc[] = {
MLX5E_RQ_STATS(MLX5E_STATS_DESC)
};
static int
mlx5e_create_rq(struct mlx5e_channel *c,
struct mlx5e_rq_param *param,
struct mlx5e_rq *rq)
{
struct mlx5e_priv *priv = c->priv;
struct mlx5_core_dev *mdev = priv->mdev;
char buffer[16];
void *rqc = param->rqc;
void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
int wq_sz;
int err;
int i;
u32 nsegs, wqe_sz;
err = mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
if (err != 0)
goto done;
/* Create DMA descriptor TAG */
if ((err = -bus_dma_tag_create(
bus_get_dma_tag(mdev->pdev->dev.bsddev),
1, /* any alignment */
0, /* no boundary */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
nsegs * MLX5E_MAX_RX_BYTES, /* maxsize */
nsegs, /* nsegments */
nsegs * MLX5E_MAX_RX_BYTES, /* maxsegsize */
0, /* flags */
NULL, NULL, /* lockfunc, lockfuncarg */
&rq->dma_tag)))
goto done;
err = mlx5_wq_ll_create(mdev, ¶m->wq, rqc_wq, &rq->wq,
&rq->wq_ctrl);
if (err)
goto err_free_dma_tag;
rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
err = mlx5e_get_wqe_sz(priv, &rq->wqe_sz, &rq->nsegs);
if (err != 0)
goto err_rq_wq_destroy;
wq_sz = mlx5_wq_ll_get_size(&rq->wq);
err = -tcp_lro_init_args(&rq->lro, priv->ifp, TCP_LRO_ENTRIES, wq_sz);
if (err)
goto err_rq_wq_destroy;
rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
for (i = 0; i != wq_sz; i++) {
struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
int j;
err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
if (err != 0) {
while (i--)
bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
goto err_rq_mbuf_free;
}
/* set value for constant fields */
for (j = 0; j < rq->nsegs; j++)
wqe->data[j].lkey = cpu_to_be32(priv->mr.key);
}
INIT_WORK(&rq->dim.work, mlx5e_dim_work);
if (priv->params.rx_cq_moderation_mode < 2) {
rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
} else {
void *cqc = container_of(param,
struct mlx5e_channel_param, rq)->rx_cq.cqc;
switch (MLX5_GET(cqc, cqc, cq_period_mode)) {
case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
break;
case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
break;
default:
rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
break;
}
}
rq->ifp = priv->ifp;
rq->channel = c;
rq->ix = c->ix;
snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
rq->stats.arg);
return (0);
err_rq_mbuf_free:
free(rq->mbuf, M_MLX5EN);
tcp_lro_free(&rq->lro);
err_rq_wq_destroy:
mlx5_wq_destroy(&rq->wq_ctrl);
err_free_dma_tag:
bus_dma_tag_destroy(rq->dma_tag);
done:
return (err);
}
static void
mlx5e_destroy_rq(struct mlx5e_rq *rq)
{
int wq_sz;
int i;
/* destroy all sysctl nodes */
sysctl_ctx_free(&rq->stats.ctx);
/* free leftover LRO packets, if any */
tcp_lro_free(&rq->lro);
wq_sz = mlx5_wq_ll_get_size(&rq->wq);
for (i = 0; i != wq_sz; i++) {
if (rq->mbuf[i].mbuf != NULL) {
bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map);
m_freem(rq->mbuf[i].mbuf);
}
bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
}
free(rq->mbuf, M_MLX5EN);
mlx5_wq_destroy(&rq->wq_ctrl);
bus_dma_tag_destroy(rq->dma_tag);
}
static int
mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
{
struct mlx5e_channel *c = rq->channel;
struct mlx5e_priv *priv = c->priv;
struct mlx5_core_dev *mdev = priv->mdev;
void *in;
void *rqc;
void *wq;
int inlen;
int err;
inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
sizeof(u64) * rq->wq_ctrl.buf.npages;
in = mlx5_vzalloc(inlen);
if (in == NULL)
return (-ENOMEM);
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
wq = MLX5_ADDR_OF(rqc, rqc, wq);
memcpy(rqc, param->rqc, sizeof(param->rqc));
MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
MLX5_SET(rqc, rqc, flush_in_error_en, 1);
if (priv->counter_set_id >= 0)
MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
PAGE_SHIFT);
MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
mlx5_fill_page_array(&rq->wq_ctrl.buf,
(__be64 *) MLX5_ADDR_OF(wq, wq, pas));
err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
kvfree(in);
return (err);
}
static int
mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
{
struct mlx5e_channel *c = rq->channel;
struct mlx5e_priv *priv = c->priv;
struct mlx5_core_dev *mdev = priv->mdev;
void *in;
void *rqc;
int inlen;
int err;
inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
in = mlx5_vzalloc(inlen);
if (in == NULL)
return (-ENOMEM);
rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
MLX5_SET(modify_rq_in, in, rq_state, curr_state);
MLX5_SET(rqc, rqc, state, next_state);
err = mlx5_core_modify_rq(mdev, in, inlen);
kvfree(in);
return (err);
}
static void
mlx5e_disable_rq(struct mlx5e_rq *rq)
{
struct mlx5e_channel *c = rq->channel;
struct mlx5e_priv *priv = c->priv;
struct mlx5_core_dev *mdev = priv->mdev;
mlx5_core_destroy_rq(mdev, rq->rqn);
}
static int
mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
{
struct mlx5e_channel *c = rq->channel;
struct mlx5e_priv *priv = c->priv;
struct mlx5_wq_ll *wq = &rq->wq;
int i;
for (i = 0; i < 1000; i++) {
if (wq->cur_sz >= priv->params.min_rx_wqes)
return (0);
msleep(4);
}
return (-ETIMEDOUT);
}
static int
mlx5e_open_rq(struct mlx5e_channel *c,
struct mlx5e_rq_param *param,
struct mlx5e_rq *rq)
{
int err;
err = mlx5e_create_rq(c, param, rq);
if (err)
return (err);
err = mlx5e_enable_rq(rq, param);
if (err)
goto err_destroy_rq;
err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
if (err)
goto err_disable_rq;
c->rq.enabled = 1;
return (0);
err_disable_rq:
mlx5e_disable_rq(rq);
err_destroy_rq:
mlx5e_destroy_rq(rq);
return (err);
}
static void
mlx5e_close_rq(struct mlx5e_rq *rq)
{
mtx_lock(&rq->mtx);
rq->enabled = 0;
callout_stop(&rq->watchdog);
mtx_unlock(&rq->mtx);
mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
}
static void
mlx5e_close_rq_wait(struct mlx5e_rq *rq)
{
mlx5e_disable_rq(rq);
mlx5e_close_cq(&rq->cq);
cancel_work_sync(&rq->dim.work);
mlx5e_destroy_rq(rq);
}
void
mlx5e_free_sq_db(struct mlx5e_sq *sq)
{
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
int x;
for (x = 0; x != wq_sz; x++) {
if (unlikely(sq->mbuf[x].p_refcount != NULL)) {
atomic_add_int(sq->mbuf[x].p_refcount, -1);
sq->mbuf[x].p_refcount = NULL;
}
if (sq->mbuf[x].mbuf != NULL) {
bus_dmamap_unload(sq->dma_tag, sq->mbuf[x].dma_map);
m_freem(sq->mbuf[x].mbuf);
}
bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
}
free(sq->mbuf, M_MLX5EN);
}
int
mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
{
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
int err;
int x;
sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
/* Create DMA descriptor MAPs */
for (x = 0; x != wq_sz; x++) {
err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
if (err != 0) {
while (x--)
bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
free(sq->mbuf, M_MLX5EN);
return (err);
}
}
return (0);
}
static const char *mlx5e_sq_stats_desc[] = {
MLX5E_SQ_STATS(MLX5E_STATS_DESC)
};
void
mlx5e_update_sq_inline(struct mlx5e_sq *sq)
{
sq->max_inline = sq->priv->params.tx_max_inline;
sq->min_inline_mode = sq->priv->params.tx_min_inline_mode;
/*
* Check if trust state is DSCP or if inline mode is NONE which
* indicates CX-5 or newer hardware.
*/
if (sq->priv->params_ethtool.trust_state != MLX5_QPTS_TRUST_PCP ||
sq->min_inline_mode == MLX5_INLINE_MODE_NONE) {
if (MLX5_CAP_ETH(sq->priv->mdev, wqe_vlan_insert))
sq->min_insert_caps = MLX5E_INSERT_VLAN | MLX5E_INSERT_NON_VLAN;
else
sq->min_insert_caps = MLX5E_INSERT_NON_VLAN;
} else {
sq->min_insert_caps = 0;
}
}
static void
mlx5e_refresh_sq_inline_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
{
int i;
for (i = 0; i != priv->num_tc; i++) {
mtx_lock(&c->sq[i].lock);
mlx5e_update_sq_inline(&c->sq[i]);
mtx_unlock(&c->sq[i].lock);
}
}
void
mlx5e_refresh_sq_inline(struct mlx5e_priv *priv)
{
int i;
/* check if channels are closed */
if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
return;
for (i = 0; i < priv->params.num_channels; i++)
mlx5e_refresh_sq_inline_sub(priv, &priv->channel[i]);
}
static int
mlx5e_create_sq(struct mlx5e_channel *c,
int tc,
struct mlx5e_sq_param *param,
struct mlx5e_sq *sq)
{
struct mlx5e_priv *priv = c->priv;
struct mlx5_core_dev *mdev = priv->mdev;
char buffer[16];
void *sqc = param->sqc;
void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
int err;
/* Create DMA descriptor TAG */
if ((err = -bus_dma_tag_create(
bus_get_dma_tag(mdev->pdev->dev.bsddev),
1, /* any alignment */
0, /* no boundary */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */
MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */
MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */
0, /* flags */
NULL, NULL, /* lockfunc, lockfuncarg */
&sq->dma_tag)))
goto done;
err = mlx5_alloc_map_uar(mdev, &sq->uar);
if (err)
goto err_free_dma_tag;
err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq,
&sq->wq_ctrl);
if (err)
goto err_unmap_free_uar;
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
err = mlx5e_alloc_sq_db(sq);
if (err)
goto err_sq_wq_destroy;
sq->mkey_be = cpu_to_be32(priv->mr.key);
sq->ifp = priv->ifp;
sq->priv = priv;
sq->tc = tc;
mlx5e_update_sq_inline(sq);
snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
sq->stats.arg);
return (0);
err_sq_wq_destroy:
mlx5_wq_destroy(&sq->wq_ctrl);
err_unmap_free_uar:
mlx5_unmap_free_uar(mdev, &sq->uar);
err_free_dma_tag:
bus_dma_tag_destroy(sq->dma_tag);
done:
return (err);
}
static void
mlx5e_destroy_sq(struct mlx5e_sq *sq)
{
/* destroy all sysctl nodes */
sysctl_ctx_free(&sq->stats.ctx);
mlx5e_free_sq_db(sq);
mlx5_wq_destroy(&sq->wq_ctrl);
mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
bus_dma_tag_destroy(sq->dma_tag);
}
int
mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
int tis_num)
{
void *in;
void *sqc;
void *wq;
int inlen;
int err;
inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
sizeof(u64) * sq->wq_ctrl.buf.npages;
in = mlx5_vzalloc(inlen);
if (in == NULL)
return (-ENOMEM);
sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
wq = MLX5_ADDR_OF(sqc, sqc, wq);
memcpy(sqc, param->sqc, sizeof(param->sqc));
MLX5_SET(sqc, sqc, tis_num_0, tis_num);
MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
MLX5_SET(sqc, sqc, tis_lst_sz, 1);
MLX5_SET(sqc, sqc, flush_in_error_en, 1);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
MLX5_SET(wq, wq, uar_page, sq->uar.index);
MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
PAGE_SHIFT);
MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
mlx5_fill_page_array(&sq->wq_ctrl.buf,
(__be64 *) MLX5_ADDR_OF(wq, wq, pas));
err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
kvfree(in);
return (err);
}
int
mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
{
void *in;
void *sqc;
int inlen;
int err;
inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
in = mlx5_vzalloc(inlen);
if (in == NULL)
return (-ENOMEM);
sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
MLX5_SET(modify_sq_in, in, sq_state, curr_state);
MLX5_SET(sqc, sqc, state, next_state);
err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
kvfree(in);
return (err);
}
void
mlx5e_disable_sq(struct mlx5e_sq *sq)
{
mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
}
static int
mlx5e_open_sq(struct mlx5e_channel *c,
int tc,
struct mlx5e_sq_param *param,
struct mlx5e_sq *sq)
{
int err;
sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
/* ensure the TX completion event factor is not zero */
if (sq->cev_factor == 0)
sq->cev_factor = 1;
err = mlx5e_create_sq(c, tc, param, sq);
if (err)
return (err);
err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
if (err)
goto err_destroy_sq;
err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
if (err)
goto err_disable_sq;
WRITE_ONCE(sq->running, 1);
return (0);
err_disable_sq:
mlx5e_disable_sq(sq);
err_destroy_sq:
mlx5e_destroy_sq(sq);
return (err);
}
static void
mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
{
/* fill up remainder with NOPs */
while (sq->cev_counter != 0) {
while (!mlx5e_sq_has_room_for(sq, 1)) {
if (can_sleep != 0) {
mtx_unlock(&sq->lock);
msleep(4);
mtx_lock(&sq->lock);
} else {
goto done;
}
}
/* send a single NOP */
mlx5e_send_nop(sq, 1);
atomic_thread_fence_rel();
}
done:
/* Check if we need to write the doorbell */
if (likely(sq->doorbell.d64 != 0)) {
mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
sq->doorbell.d64 = 0;
}
}
void
mlx5e_sq_cev_timeout(void *arg)
{
struct mlx5e_sq *sq = arg;
mtx_assert(&sq->lock, MA_OWNED);
/* check next state */
switch (sq->cev_next_state) {
case MLX5E_CEV_STATE_SEND_NOPS:
/* fill TX ring with NOPs, if any */
mlx5e_sq_send_nops_locked(sq, 0);
/* check if completed */
if (sq->cev_counter == 0) {
sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
return;
}
break;
default:
/* send NOPs on next timeout */
sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
break;
}
/* restart timer */
callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
}
void
mlx5e_drain_sq(struct mlx5e_sq *sq)
{
int error;
struct mlx5_core_dev *mdev= sq->priv->mdev;
/*
* Check if already stopped.
*
* NOTE: Serialization of this function is managed by the
* caller ensuring the priv's state lock is locked or in case
* of rate limit support, a single thread manages drain and
* resume of SQs. The "running" variable can therefore safely
* be read without any locks.
*/
if (READ_ONCE(sq->running) == 0)
return;
/* don't put more packets into the SQ */
WRITE_ONCE(sq->running, 0);
/* serialize access to DMA rings */
mtx_lock(&sq->lock);
/* teardown event factor timer, if any */
sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
callout_stop(&sq->cev_callout);
/* send dummy NOPs in order to flush the transmit ring */
mlx5e_sq_send_nops_locked(sq, 1);
mtx_unlock(&sq->lock);
/* wait till SQ is empty or link is down */
mtx_lock(&sq->lock);
while (sq->cc != sq->pc &&
(sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
mtx_unlock(&sq->lock);
msleep(1);
sq->cq.mcq.comp(&sq->cq.mcq);
mtx_lock(&sq->lock);
}
mtx_unlock(&sq->lock);
/* error out remaining requests */
error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
if (error != 0) {
mlx5_en_err(sq->ifp,
"mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
}
/* wait till SQ is empty */
mtx_lock(&sq->lock);
while (sq->cc != sq->pc &&
mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
mtx_unlock(&sq->lock);
msleep(1);
sq->cq.mcq.comp(&sq->cq.mcq);
mtx_lock(&sq->lock);
}
mtx_unlock(&sq->lock);
}
static void
mlx5e_close_sq_wait(struct mlx5e_sq *sq)
{
mlx5e_drain_sq(sq);
mlx5e_disable_sq(sq);
mlx5e_destroy_sq(sq);
}
static int
mlx5e_create_cq(struct mlx5e_priv *priv,
struct mlx5e_cq_param *param,
struct mlx5e_cq *cq,
mlx5e_cq_comp_t *comp,
int eq_ix)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5_core_cq *mcq = &cq->mcq;
int eqn_not_used;
int irqn;
int err;
u32 i;
param->wq.buf_numa_node = 0;
param->wq.db_numa_node = 0;
err = mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
if (err)
return (err);
err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq,
&cq->wq_ctrl);
if (err)
return (err);
mcq->cqe_sz = 64;
mcq->set_ci_db = cq->wq_ctrl.db.db;
mcq->arm_db = cq->wq_ctrl.db.db + 1;
*mcq->set_ci_db = 0;
*mcq->arm_db = 0;
mcq->vector = eq_ix;
mcq->comp = comp;
mcq->event = mlx5e_cq_error_event;
mcq->irqn = irqn;
mcq->uar = &priv->cq_uar;
for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
cqe->op_own = 0xf1;
}
cq->priv = priv;
return (0);
}
static void
mlx5e_destroy_cq(struct mlx5e_cq *cq)
{
mlx5_wq_destroy(&cq->wq_ctrl);
}
static int
mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
{
struct mlx5_core_cq *mcq = &cq->mcq;
void *in;
void *cqc;
int inlen;
int irqn_not_used;
int eqn;
int err;
inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
sizeof(u64) * cq->wq_ctrl.buf.npages;
in = mlx5_vzalloc(inlen);
if (in == NULL)
return (-ENOMEM);
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
memcpy(cqc, param->cqc, sizeof(param->cqc));
mlx5_fill_page_array(&cq->wq_ctrl.buf,
(__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
MLX5_SET(cqc, cqc, c_eqn, eqn);
MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
PAGE_SHIFT);
MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
kvfree(in);
if (err)
return (err);
mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock));
return (0);
}
static void
mlx5e_disable_cq(struct mlx5e_cq *cq)
{
mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
}
int
mlx5e_open_cq(struct mlx5e_priv *priv,
struct mlx5e_cq_param *param,
struct mlx5e_cq *cq,
mlx5e_cq_comp_t *comp,
int eq_ix)
{
int err;
err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
if (err)
return (err);
err = mlx5e_enable_cq(cq, param, eq_ix);
if (err)
goto err_destroy_cq;
return (0);
err_destroy_cq:
mlx5e_destroy_cq(cq);
return (err);
}
void
mlx5e_close_cq(struct mlx5e_cq *cq)
{
mlx5e_disable_cq(cq);
mlx5e_destroy_cq(cq);
}
static int
mlx5e_open_tx_cqs(struct mlx5e_channel *c,
struct mlx5e_channel_param *cparam)
{
int err;
int tc;
for (tc = 0; tc < c->priv->num_tc; tc++) {
/* open completion queue */
err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
&mlx5e_tx_cq_comp, c->ix);
if (err)
goto err_close_tx_cqs;
}
return (0);
err_close_tx_cqs:
for (tc--; tc >= 0; tc--)
mlx5e_close_cq(&c->sq[tc].cq);
return (err);
}
static void
mlx5e_close_tx_cqs(struct mlx5e_channel *c)
{
int tc;
for (tc = 0; tc < c->priv->num_tc; tc++)
mlx5e_close_cq(&c->sq[tc].cq);
}
static int
mlx5e_open_sqs(struct mlx5e_channel *c,
struct mlx5e_channel_param *cparam)
{
int err;
int tc;
for (tc = 0; tc < c->priv->num_tc; tc++) {
err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
if (err)
goto err_close_sqs;
}
return (0);
err_close_sqs:
for (tc--; tc >= 0; tc--)
mlx5e_close_sq_wait(&c->sq[tc]);
return (err);
}
static void
mlx5e_close_sqs_wait(struct mlx5e_channel *c)
{
int tc;
for (tc = 0; tc < c->priv->num_tc; tc++)
mlx5e_close_sq_wait(&c->sq[tc]);
}
static void
mlx5e_chan_static_init(struct mlx5e_priv *priv, struct mlx5e_channel *c, int ix)
{
int tc;
/* setup priv and channel number */
c->priv = priv;
c->ix = ix;
/* setup send tag */
c->tag.type = IF_SND_TAG_TYPE_UNLIMITED;
m_snd_tag_init(&c->tag.m_snd_tag, c->priv->ifp);
init_completion(&c->completion);
mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
for (tc = 0; tc != MLX5E_MAX_TX_NUM_TC; tc++) {
struct mlx5e_sq *sq = c->sq + tc;
mtx_init(&sq->lock, "mlx5tx",
MTX_NETWORK_LOCK " TX", MTX_DEF);
mtx_init(&sq->comp_lock, "mlx5comp",
MTX_NETWORK_LOCK " TX", MTX_DEF);
callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
}
}
static void
mlx5e_chan_wait_for_completion(struct mlx5e_channel *c)
{
m_snd_tag_rele(&c->tag.m_snd_tag);
wait_for_completion(&c->completion);
}
static void
mlx5e_priv_wait_for_completion(struct mlx5e_priv *priv, const uint32_t channels)
{
uint32_t x;
for (x = 0; x != channels; x++)
mlx5e_chan_wait_for_completion(&priv->channel[x]);
}
static void
mlx5e_chan_static_destroy(struct mlx5e_channel *c)
{
int tc;
callout_drain(&c->rq.watchdog);
mtx_destroy(&c->rq.mtx);
for (tc = 0; tc != MLX5E_MAX_TX_NUM_TC; tc++) {
callout_drain(&c->sq[tc].cev_callout);
mtx_destroy(&c->sq[tc].lock);
mtx_destroy(&c->sq[tc].comp_lock);
}
}
static int
mlx5e_open_channel(struct mlx5e_priv *priv,
struct mlx5e_channel_param *cparam,
struct mlx5e_channel *c)
{
struct epoch_tracker et;
int i, err;
/* zero non-persistant data */
MLX5E_ZERO(&c->rq, mlx5e_rq_zero_start);
for (i = 0; i != priv->num_tc; i++)
MLX5E_ZERO(&c->sq[i], mlx5e_sq_zero_start);
/* open transmit completion queue */
err = mlx5e_open_tx_cqs(c, cparam);
if (err)
goto err_free;
/* open receive completion queue */
err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
&mlx5e_rx_cq_comp, c->ix);
if (err)
goto err_close_tx_cqs;
err = mlx5e_open_sqs(c, cparam);
if (err)
goto err_close_rx_cq;
err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
if (err)
goto err_close_sqs;
/* poll receive queue initially */
NET_EPOCH_ENTER(et);
c->rq.cq.mcq.comp(&c->rq.cq.mcq);
NET_EPOCH_EXIT(et);
return (0);
err_close_sqs:
mlx5e_close_sqs_wait(c);
err_close_rx_cq:
mlx5e_close_cq(&c->rq.cq);
err_close_tx_cqs:
mlx5e_close_tx_cqs(c);
err_free:
return (err);
}
static void
mlx5e_close_channel(struct mlx5e_channel *c)
{
mlx5e_close_rq(&c->rq);
}
static void
mlx5e_close_channel_wait(struct mlx5e_channel *c)
{
mlx5e_close_rq_wait(&c->rq);
mlx5e_close_sqs_wait(c);
mlx5e_close_tx_cqs(c);
}
static int
mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs)
{
u32 r, n;
r = priv->params.hw_lro_en ? priv->params.lro_wqe_sz :
MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
if (r > MJUM16BYTES)
return (-ENOMEM);
if (r > MJUM9BYTES)
r = MJUM16BYTES;
else if (r > MJUMPAGESIZE)
r = MJUM9BYTES;
else if (r > MCLBYTES)
r = MJUMPAGESIZE;
else
r = MCLBYTES;
/*
* n + 1 must be a power of two, because stride size must be.
* Stride size is 16 * (n + 1), as the first segment is
* control.
*/
for (n = howmany(r, MLX5E_MAX_RX_BYTES); !powerof2(n + 1); n++)
;
if (n > MLX5E_MAX_BUSDMA_RX_SEGS)
return (-ENOMEM);
*wqe_sz = r;
*nsegs = n;
return (0);
}
static void
mlx5e_build_rq_param(struct mlx5e_priv *priv,
struct mlx5e_rq_param *param)
{
void *rqc = param->rqc;
void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
u32 wqe_sz, nsegs;
mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) +
nsegs * sizeof(struct mlx5_wqe_data_seg)));
MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
MLX5_SET(wq, wq, pd, priv->pdn);
param->wq.buf_numa_node = 0;
param->wq.db_numa_node = 0;
param->wq.linear = 1;
}
static void
mlx5e_build_sq_param(struct mlx5e_priv *priv,
struct mlx5e_sq_param *param)
{
void *sqc = param->sqc;
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
MLX5_SET(wq, wq, pd, priv->pdn);
param->wq.buf_numa_node = 0;
param->wq.db_numa_node = 0;
param->wq.linear = 1;
}
static void
mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
struct mlx5e_cq_param *param)
{
void *cqc = param->cqc;
MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
}
static void
mlx5e_get_default_profile(struct mlx5e_priv *priv, int mode, struct net_dim_cq_moder *ptr)
{
*ptr = net_dim_get_profile(mode, MLX5E_DIM_DEFAULT_PROFILE);
/* apply LRO restrictions */
if (priv->params.hw_lro_en &&
ptr->pkts > MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO) {
ptr->pkts = MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO;
}
}
static void
mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
struct mlx5e_cq_param *param)
{
struct net_dim_cq_moder curr;
void *cqc = param->cqc;
/*
* We use MLX5_CQE_FORMAT_HASH because the RX hash mini CQE
* format is more beneficial for FreeBSD use case.
*
* Adding support for MLX5_CQE_FORMAT_CSUM will require changes
* in mlx5e_decompress_cqe.
*/
if (priv->params.cqe_zipping_en) {
MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_HASH);
MLX5_SET(cqc, cqc, cqe_compression_en, 1);
}
MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
switch (priv->params.rx_cq_moderation_mode) {
case 0:
MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
break;
case 1:
MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
else
MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
break;
case 2:
mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE, &curr);
MLX5_SET(cqc, cqc, cq_period, curr.usec);
MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
break;
case 3:
mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE, &curr);
MLX5_SET(cqc, cqc, cq_period, curr.usec);
MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
else
MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
break;
default:
break;
}
mlx5e_dim_build_cq_param(priv, param);
mlx5e_build_common_cq_param(priv, param);
}
static void
mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
struct mlx5e_cq_param *param)
{
void *cqc = param->cqc;
MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
switch (priv->params.tx_cq_moderation_mode) {
case 0:
MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
break;
default:
if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
else
MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
break;
}
mlx5e_build_common_cq_param(priv, param);
}
static void
mlx5e_build_channel_param(struct mlx5e_priv *priv,
struct mlx5e_channel_param *cparam)
{
memset(cparam, 0, sizeof(*cparam));
mlx5e_build_rq_param(priv, &cparam->rq);
mlx5e_build_sq_param(priv, &cparam->sq);
mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
}
static int
mlx5e_open_channels(struct mlx5e_priv *priv)
{
struct mlx5e_channel_param *cparam;
int err;
int i;
int j;
cparam = malloc(sizeof(*cparam), M_MLX5EN, M_WAITOK);
mlx5e_build_channel_param(priv, cparam);
for (i = 0; i < priv->params.num_channels; i++) {
err = mlx5e_open_channel(priv, cparam, &priv->channel[i]);
if (err)
goto err_close_channels;
}
for (j = 0; j < priv->params.num_channels; j++) {
err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j].rq);
if (err)
goto err_close_channels;
}
free(cparam, M_MLX5EN);
return (0);
err_close_channels:
while (i--) {
mlx5e_close_channel(&priv->channel[i]);
mlx5e_close_channel_wait(&priv->channel[i]);
}
free(cparam, M_MLX5EN);
return (err);
}
static void
mlx5e_close_channels(struct mlx5e_priv *priv)
{
int i;
for (i = 0; i < priv->params.num_channels; i++)
mlx5e_close_channel(&priv->channel[i]);
for (i = 0; i < priv->params.num_channels; i++)
mlx5e_close_channel_wait(&priv->channel[i]);
}
static int
mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
{
if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
uint8_t cq_mode;
switch (priv->params.tx_cq_moderation_mode) {
case 0:
case 2:
cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
break;
default:
cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
break;
}
return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
priv->params.tx_cq_moderation_usec,
priv->params.tx_cq_moderation_pkts,
cq_mode));
}
return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
priv->params.tx_cq_moderation_usec,
priv->params.tx_cq_moderation_pkts));
}
static int
mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
{
if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
uint8_t cq_mode;
uint8_t dim_mode;
int retval;
switch (priv->params.rx_cq_moderation_mode) {
case 0:
case 2:
cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
break;
default:
cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
break;
}
/* tear down dynamic interrupt moderation */
mtx_lock(&rq->mtx);
rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
mtx_unlock(&rq->mtx);
/* wait for dynamic interrupt moderation work task, if any */
cancel_work_sync(&rq->dim.work);
if (priv->params.rx_cq_moderation_mode >= 2) {
struct net_dim_cq_moder curr;
mlx5e_get_default_profile(priv, dim_mode, &curr);
retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
curr.usec, curr.pkts, cq_mode);
/* set dynamic interrupt moderation mode and zero defaults */
mtx_lock(&rq->mtx);
rq->dim.mode = dim_mode;
rq->dim.state = 0;
rq->dim.profile_ix = MLX5E_DIM_DEFAULT_PROFILE;
mtx_unlock(&rq->mtx);
} else {
retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
priv->params.rx_cq_moderation_usec,
priv->params.rx_cq_moderation_pkts,
cq_mode);
}
return (retval);
}
return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
priv->params.rx_cq_moderation_usec,
priv->params.rx_cq_moderation_pkts));
}
static int
mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
{
int err;
int i;
err = mlx5e_refresh_rq_params(priv, &c->rq);
if (err)
goto done;
for (i = 0; i != priv->num_tc; i++) {
err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
if (err)
goto done;
}
done:
return (err);
}
int
mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
{
int i;
/* check if channels are closed */
if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
return (EINVAL);
for (i = 0; i < priv->params.num_channels; i++) {
int err;
err = mlx5e_refresh_channel_params_sub(priv, &priv->channel[i]);
if (err)
return (err);
}
return (0);
}
static int
mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
{
struct mlx5_core_dev *mdev = priv->mdev;
u32 in[MLX5_ST_SZ_DW(create_tis_in)];
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
memset(in, 0, sizeof(in));
MLX5_SET(tisc, tisc, prio, tc);
MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
}
static void
mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
{
mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
}
static int
mlx5e_open_tises(struct mlx5e_priv *priv)
{
int num_tc = priv->num_tc;
int err;
int tc;
for (tc = 0; tc < num_tc; tc++) {
err = mlx5e_open_tis(priv, tc);
if (err)
goto err_close_tises;
}
return (0);
err_close_tises:
for (tc--; tc >= 0; tc--)
mlx5e_close_tis(priv, tc);
return (err);
}
static void
mlx5e_close_tises(struct mlx5e_priv *priv)
{
int num_tc = priv->num_tc;
int tc;
for (tc = 0; tc < num_tc; tc++)
mlx5e_close_tis(priv, tc);
}
static int
mlx5e_open_rqt(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
u32 *in;
u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
void *rqtc;
int inlen;
int err;
int sz;
int i;
sz = 1 << priv->params.rx_hash_log_tbl_sz;
inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
in = mlx5_vzalloc(inlen);
if (in == NULL)
return (-ENOMEM);
rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
for (i = 0; i < sz; i++) {
int ix = i;
#ifdef RSS
ix = rss_get_indirection_to_bucket(ix);
#endif
/* ensure we don't overflow */
ix %= priv->params.num_channels;
/* apply receive side scaling stride, if any */
ix -= ix % (int)priv->params.channels_rsss;
MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix].rq.rqn);
}
MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
if (!err)
priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
kvfree(in);
return (err);
}
static void
mlx5e_close_rqt(struct mlx5e_priv *priv)
{
u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
}
static void
mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
{
void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
__be32 *hkey;
MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
#define ROUGH_MAX_L2_L3_HDR_SZ 256
#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
MLX5_HASH_FIELD_SEL_DST_IP)
#define MLX5_HASH_ALL (MLX5_HASH_FIELD_SEL_SRC_IP |\
MLX5_HASH_FIELD_SEL_DST_IP |\
MLX5_HASH_FIELD_SEL_L4_SPORT |\
MLX5_HASH_FIELD_SEL_L4_DPORT)
#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\
MLX5_HASH_FIELD_SEL_DST_IP |\
MLX5_HASH_FIELD_SEL_IPSEC_SPI)
if (priv->params.hw_lro_en) {
MLX5_SET(tirc, tirc, lro_enable_mask,
MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
MLX5_SET(tirc, tirc, lro_max_msg_sz,
(priv->params.lro_wqe_sz -
ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
/* TODO: add the option to choose timer value dynamically */
MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
MLX5_CAP_ETH(priv->mdev,
lro_timer_supported_periods[2]));
}
/* setup parameters for hashing TIR type, if any */
switch (tt) {
case MLX5E_TT_ANY:
MLX5_SET(tirc, tirc, disp_type,
MLX5_TIRC_DISP_TYPE_DIRECT);
MLX5_SET(tirc, tirc, inline_rqn,
priv->channel[0].rq.rqn);
break;
default:
MLX5_SET(tirc, tirc, disp_type,
MLX5_TIRC_DISP_TYPE_INDIRECT);
MLX5_SET(tirc, tirc, indirect_table,
priv->rqtn);
MLX5_SET(tirc, tirc, rx_hash_fn,
MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
#ifdef RSS
/*
* The FreeBSD RSS implementation does currently not
* support symmetric Toeplitz hashes:
*/
MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
rss_getkey((uint8_t *)hkey);
#else
MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
hkey[0] = cpu_to_be32(0xD181C62C);
hkey[1] = cpu_to_be32(0xF7F4DB5B);
hkey[2] = cpu_to_be32(0x1983A2FC);
hkey[3] = cpu_to_be32(0x943E1ADB);
hkey[4] = cpu_to_be32(0xD9389E6B);
hkey[5] = cpu_to_be32(0xD1039C2C);
hkey[6] = cpu_to_be32(0xA74499AD);
hkey[7] = cpu_to_be32(0x593D56D9);
hkey[8] = cpu_to_be32(0xF3253C06);
hkey[9] = cpu_to_be32(0x2ADC1FFC);
#endif
break;
}
switch (tt) {
case MLX5E_TT_IPV4_TCP:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV4);
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_TCP);
#ifdef RSS
if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP);
} else
#endif
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_ALL);
break;
case MLX5E_TT_IPV6_TCP:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_TCP);
#ifdef RSS
if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP);
} else
#endif
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_ALL);
break;
case MLX5E_TT_IPV4_UDP:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV4);
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_UDP);
#ifdef RSS
if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP);
} else
#endif
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_ALL);
break;
case MLX5E_TT_IPV6_UDP:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_UDP);
#ifdef RSS
if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP);
} else
#endif
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_ALL);
break;
case MLX5E_TT_IPV4_IPSEC_AH:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV4);
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP_IPSEC_SPI);
break;
case MLX5E_TT_IPV6_IPSEC_AH:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP_IPSEC_SPI);
break;
case MLX5E_TT_IPV4_IPSEC_ESP:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV4);
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP_IPSEC_SPI);
break;
case MLX5E_TT_IPV6_IPSEC_ESP:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP_IPSEC_SPI);
break;
case MLX5E_TT_IPV4:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV4);
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP);
break;
case MLX5E_TT_IPV6:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP);
break;
default:
break;
}
}
static int
mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
{
struct mlx5_core_dev *mdev = priv->mdev;
u32 *in;
void *tirc;
int inlen;
int err;
inlen = MLX5_ST_SZ_BYTES(create_tir_in);
in = mlx5_vzalloc(inlen);
if (in == NULL)
return (-ENOMEM);
tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
mlx5e_build_tir_ctx(priv, tirc, tt);
err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
kvfree(in);
return (err);
}
static void
mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
{
mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
}
static int
mlx5e_open_tirs(struct mlx5e_priv *priv)
{
int err;
int i;
for (i = 0; i < MLX5E_NUM_TT; i++) {
err = mlx5e_open_tir(priv, i);
if (err)
goto err_close_tirs;
}
return (0);
err_close_tirs:
for (i--; i >= 0; i--)
mlx5e_close_tir(priv, i);
return (err);
}
static void
mlx5e_close_tirs(struct mlx5e_priv *priv)
{
int i;
for (i = 0; i < MLX5E_NUM_TT; i++)
mlx5e_close_tir(priv, i);
}
/*
* SW MTU does not include headers,
* HW MTU includes all headers and checksums.
*/
static int
mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
{
struct mlx5e_priv *priv = ifp->if_softc;
struct mlx5_core_dev *mdev = priv->mdev;
int hw_mtu;
int err;
hw_mtu = MLX5E_SW2HW_MTU(sw_mtu);
err = mlx5_set_port_mtu(mdev, hw_mtu);
if (err) {
mlx5_en_err(ifp, "mlx5_set_port_mtu failed setting %d, err=%d\n",
sw_mtu, err);
return (err);
}
/* Update vport context MTU */
err = mlx5_set_vport_mtu(mdev, hw_mtu);
if (err) {
mlx5_en_err(ifp,
"Failed updating vport context with MTU size, err=%d\n",
err);
}
ifp->if_mtu = sw_mtu;
err = mlx5_query_vport_mtu(mdev, &hw_mtu);
if (err || !hw_mtu) {
/* fallback to port oper mtu */
err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
}
if (err) {
mlx5_en_err(ifp,
"Query port MTU, after setting new MTU value, failed\n");
return (err);
} else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
err = -E2BIG,
mlx5_en_err(ifp,
"Port MTU %d is smaller than ifp mtu %d\n",
hw_mtu, sw_mtu);
} else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
err = -EINVAL;
mlx5_en_err(ifp,
"Port MTU %d is bigger than ifp mtu %d\n",
hw_mtu, sw_mtu);
}
priv->params_ethtool.hw_mtu = hw_mtu;
/* compute MSB */
while (hw_mtu & (hw_mtu - 1))
hw_mtu &= (hw_mtu - 1);
priv->params_ethtool.hw_mtu_msb = hw_mtu;
return (err);
}
int
mlx5e_open_locked(struct ifnet *ifp)
{
struct mlx5e_priv *priv = ifp->if_softc;
int err;
u16 set_id;
/* check if already opened */
if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
return (0);
#ifdef RSS
if (rss_getnumbuckets() > priv->params.num_channels) {
mlx5_en_info(ifp,
"NOTE: There are more RSS buckets(%u) than channels(%u) available\n",
rss_getnumbuckets(), priv->params.num_channels);
}
#endif
err = mlx5e_open_tises(priv);
if (err) {
mlx5_en_err(ifp, "mlx5e_open_tises failed, %d\n", err);
return (err);
}
err = mlx5_vport_alloc_q_counter(priv->mdev,
MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
if (err) {
mlx5_en_err(priv->ifp,
"mlx5_vport_alloc_q_counter failed: %d\n", err);
goto err_close_tises;
}
/* store counter set ID */
priv->counter_set_id = set_id;
err = mlx5e_open_channels(priv);
if (err) {
mlx5_en_err(ifp,
"mlx5e_open_channels failed, %d\n", err);
goto err_dalloc_q_counter;
}
err = mlx5e_open_rqt(priv);
if (err) {
mlx5_en_err(ifp, "mlx5e_open_rqt failed, %d\n", err);
goto err_close_channels;
}
err = mlx5e_open_tirs(priv);
if (err) {
mlx5_en_err(ifp, "mlx5e_open_tir failed, %d\n", err);
goto err_close_rqls;
}
err = mlx5e_open_flow_table(priv);
if (err) {
mlx5_en_err(ifp,
"mlx5e_open_flow_table failed, %d\n", err);
goto err_close_tirs;
}
err = mlx5e_add_all_vlan_rules(priv);
if (err) {
mlx5_en_err(ifp,
"mlx5e_add_all_vlan_rules failed, %d\n", err);
goto err_close_flow_table;
}
set_bit(MLX5E_STATE_OPENED, &priv->state);
mlx5e_update_carrier(priv);
mlx5e_set_rx_mode_core(priv);
return (0);
err_close_flow_table:
mlx5e_close_flow_table(priv);
err_close_tirs:
mlx5e_close_tirs(priv);
err_close_rqls:
mlx5e_close_rqt(priv);
err_close_channels:
mlx5e_close_channels(priv);
err_dalloc_q_counter:
mlx5_vport_dealloc_q_counter(priv->mdev,
MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
err_close_tises:
mlx5e_close_tises(priv);
return (err);
}
static void
mlx5e_open(void *arg)
{
struct mlx5e_priv *priv = arg;
PRIV_LOCK(priv);
if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
mlx5_en_err(priv->ifp,
"Setting port status to up failed\n");
mlx5e_open_locked(priv->ifp);
priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
PRIV_UNLOCK(priv);
}
int
mlx5e_close_locked(struct ifnet *ifp)
{
struct mlx5e_priv *priv = ifp->if_softc;
/* check if already closed */
if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
return (0);
clear_bit(MLX5E_STATE_OPENED, &priv->state);
mlx5e_set_rx_mode_core(priv);
mlx5e_del_all_vlan_rules(priv);
if_link_state_change(priv->ifp, LINK_STATE_DOWN);
mlx5e_close_flow_table(priv);
mlx5e_close_tirs(priv);
mlx5e_close_rqt(priv);
mlx5e_close_channels(priv);
mlx5_vport_dealloc_q_counter(priv->mdev,
MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
mlx5e_close_tises(priv);
return (0);
}
#if (__FreeBSD_version >= 1100000)
static uint64_t
mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
{
struct mlx5e_priv *priv = ifp->if_softc;
u64 retval;
/* PRIV_LOCK(priv); XXX not allowed */
switch (cnt) {
case IFCOUNTER_IPACKETS:
retval = priv->stats.vport.rx_packets;
break;
case IFCOUNTER_IERRORS:
retval = priv->stats.pport.in_range_len_errors +
priv->stats.pport.out_of_range_len +
priv->stats.pport.too_long_errors +
priv->stats.pport.check_seq_err +
priv->stats.pport.alignment_err;
break;
case IFCOUNTER_IQDROPS:
retval = priv->stats.vport.rx_out_of_buffer;
break;
case IFCOUNTER_OPACKETS:
retval = priv->stats.vport.tx_packets;
break;
case IFCOUNTER_OERRORS:
retval = priv->stats.port_stats_debug.out_discards;
break;
case IFCOUNTER_IBYTES:
retval = priv->stats.vport.rx_bytes;
break;
case IFCOUNTER_OBYTES:
retval = priv->stats.vport.tx_bytes;
break;
case IFCOUNTER_IMCASTS:
retval = priv->stats.vport.rx_multicast_packets;
break;
case IFCOUNTER_OMCASTS:
retval = priv->stats.vport.tx_multicast_packets;
break;
case IFCOUNTER_OQDROPS:
retval = priv->stats.vport.tx_queue_dropped;
break;
case IFCOUNTER_COLLISIONS:
retval = priv->stats.pport.collisions;
break;
default:
retval = if_get_counter_default(ifp, cnt);
break;
}
/* PRIV_UNLOCK(priv); XXX not allowed */
return (retval);
}
#endif
static void
mlx5e_set_rx_mode(struct ifnet *ifp)
{
struct mlx5e_priv *priv = ifp->if_softc;
queue_work(priv->wq, &priv->set_rx_mode_work);
}
static int
mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
{
struct mlx5e_priv *priv;
struct ifreq *ifr;
struct ifi2creq i2c;
int error = 0;
int mask = 0;
int size_read = 0;
int module_status;
int module_num;
int max_mtu;
uint8_t read_addr;
priv = ifp->if_softc;
/* check if detaching */
if (priv == NULL || priv->gone != 0)
return (ENXIO);
switch (command) {
case SIOCSIFMTU:
ifr = (struct ifreq *)data;
PRIV_LOCK(priv);
mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
int was_opened;
was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
if (was_opened)
mlx5e_close_locked(ifp);
/* set new MTU */
mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
if (was_opened)
mlx5e_open_locked(ifp);
} else {
error = EINVAL;
mlx5_en_err(ifp,
"Invalid MTU value. Min val: %d, Max val: %d\n",
MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
}
PRIV_UNLOCK(priv);
break;
case SIOCSIFFLAGS:
if ((ifp->if_flags & IFF_UP) &&
(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
mlx5e_set_rx_mode(ifp);
break;
}
PRIV_LOCK(priv);
if (ifp->if_flags & IFF_UP) {
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
mlx5e_open_locked(ifp);
ifp->if_drv_flags |= IFF_DRV_RUNNING;
mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
}
} else {
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
mlx5_set_port_status(priv->mdev,
MLX5_PORT_DOWN);
if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
mlx5e_close_locked(ifp);
mlx5e_update_carrier(priv);
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
}
}
PRIV_UNLOCK(priv);
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
mlx5e_set_rx_mode(ifp);
break;
case SIOCSIFMEDIA:
case SIOCGIFMEDIA:
case SIOCGIFXMEDIA:
ifr = (struct ifreq *)data;
error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
break;
case SIOCSIFCAP:
ifr = (struct ifreq *)data;
PRIV_LOCK(priv);
mask = ifr->ifr_reqcap ^ ifp->if_capenable;
if (mask & IFCAP_TXCSUM) {
ifp->if_capenable ^= IFCAP_TXCSUM;
ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
if (IFCAP_TSO4 & ifp->if_capenable &&
!(IFCAP_TXCSUM & ifp->if_capenable)) {
ifp->if_capenable &= ~IFCAP_TSO4;
ifp->if_hwassist &= ~CSUM_IP_TSO;
mlx5_en_err(ifp,
"tso4 disabled due to -txcsum.\n");
}
}
if (mask & IFCAP_TXCSUM_IPV6) {
ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
if (IFCAP_TSO6 & ifp->if_capenable &&
!(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
ifp->if_capenable &= ~IFCAP_TSO6;
ifp->if_hwassist &= ~CSUM_IP6_TSO;
mlx5_en_err(ifp,
"tso6 disabled due to -txcsum6.\n");
}
}
if (mask & IFCAP_NOMAP)
ifp->if_capenable ^= IFCAP_NOMAP;
if (mask & IFCAP_TXTLS4)
ifp->if_capenable ^= IFCAP_TXTLS4;
if (mask & IFCAP_TXTLS6)
ifp->if_capenable ^= IFCAP_TXTLS6;
if (mask & IFCAP_RXCSUM)
ifp->if_capenable ^= IFCAP_RXCSUM;
if (mask & IFCAP_RXCSUM_IPV6)
ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
if (mask & IFCAP_TSO4) {
if (!(IFCAP_TSO4 & ifp->if_capenable) &&
!(IFCAP_TXCSUM & ifp->if_capenable)) {
mlx5_en_err(ifp, "enable txcsum first.\n");
error = EAGAIN;
goto out;
}
ifp->if_capenable ^= IFCAP_TSO4;
ifp->if_hwassist ^= CSUM_IP_TSO;
}
if (mask & IFCAP_TSO6) {
if (!(IFCAP_TSO6 & ifp->if_capenable) &&
!(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
mlx5_en_err(ifp, "enable txcsum6 first.\n");
error = EAGAIN;
goto out;
}
ifp->if_capenable ^= IFCAP_TSO6;
ifp->if_hwassist ^= CSUM_IP6_TSO;
}
if (mask & IFCAP_VLAN_HWFILTER) {
if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
mlx5e_disable_vlan_filter(priv);
else
mlx5e_enable_vlan_filter(priv);
ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
}
if (mask & IFCAP_VLAN_HWTAGGING)
ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
if (mask & IFCAP_WOL_MAGIC)
ifp->if_capenable ^= IFCAP_WOL_MAGIC;
VLAN_CAPABILITIES(ifp);
/* turn off LRO means also turn of HW LRO - if it's on */
if (mask & IFCAP_LRO) {
int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
bool need_restart = false;
ifp->if_capenable ^= IFCAP_LRO;
/* figure out if updating HW LRO is needed */
if (!(ifp->if_capenable & IFCAP_LRO)) {
if (priv->params.hw_lro_en) {
priv->params.hw_lro_en = false;
need_restart = true;
}
} else {
if (priv->params.hw_lro_en == false &&
priv->params_ethtool.hw_lro != 0) {
priv->params.hw_lro_en = true;
need_restart = true;
}
}
if (was_opened && need_restart) {
mlx5e_close_locked(ifp);
mlx5e_open_locked(ifp);
}
}
if (mask & IFCAP_HWRXTSTMP) {
ifp->if_capenable ^= IFCAP_HWRXTSTMP;
if (ifp->if_capenable & IFCAP_HWRXTSTMP) {
if (priv->clbr_done == 0)
mlx5e_reset_calibration_callout(priv);
} else {
callout_drain(&priv->tstmp_clbr);
priv->clbr_done = 0;
}
}
out:
PRIV_UNLOCK(priv);
break;
case SIOCGI2C:
ifr = (struct ifreq *)data;
/*
* Copy from the user-space address ifr_data to the
* kernel-space address i2c
*/
error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
if (error)
break;
if (i2c.len > sizeof(i2c.data)) {
error = EINVAL;
break;
}
PRIV_LOCK(priv);
/* Get module_num which is required for the query_eeprom */
error = mlx5_query_module_num(priv->mdev, &module_num);
if (error) {
mlx5_en_err(ifp,
"Query module num failed, eeprom reading is not supported\n");
error = EINVAL;
goto err_i2c;
}
/* Check if module is present before doing an access */
module_status = mlx5_query_module_status(priv->mdev, module_num);
if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED) {
error = EINVAL;
goto err_i2c;
}
/*
* Currently 0XA0 and 0xA2 are the only addresses permitted.
* The internal conversion is as follows:
*/
if (i2c.dev_addr == 0xA0)
read_addr = MLX5_I2C_ADDR_LOW;
else if (i2c.dev_addr == 0xA2)
read_addr = MLX5_I2C_ADDR_HIGH;
else {
mlx5_en_err(ifp,
"Query eeprom failed, Invalid Address: %X\n",
i2c.dev_addr);
error = EINVAL;
goto err_i2c;
}
error = mlx5_query_eeprom(priv->mdev,
read_addr, MLX5_EEPROM_LOW_PAGE,
(uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
(uint32_t *)i2c.data, &size_read);
if (error) {
mlx5_en_err(ifp,
"Query eeprom failed, eeprom reading is not supported\n");
error = EINVAL;
goto err_i2c;
}
if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
error = mlx5_query_eeprom(priv->mdev,
read_addr, MLX5_EEPROM_LOW_PAGE,
(uint32_t)(i2c.offset + size_read),
(uint32_t)(i2c.len - size_read), module_num,
(uint32_t *)(i2c.data + size_read), &size_read);
}
if (error) {
mlx5_en_err(ifp,
"Query eeprom failed, eeprom reading is not supported\n");
error = EINVAL;
goto err_i2c;
}
error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
err_i2c:
PRIV_UNLOCK(priv);
break;
default:
error = ether_ioctl(ifp, command, data);
break;
}
return (error);
}
static int
mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
{
/*
* TODO: uncoment once FW really sets all these bits if
* (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
* !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
* !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
* -ENOTSUPP;
*/
/* TODO: add more must-to-have features */
if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
return (-ENODEV);
return (0);
}
static u16
mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
{
uint32_t bf_buf_size = (1U << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2U;
bf_buf_size -= sizeof(struct mlx5e_tx_wqe) - 2;
/* verify against driver hardware limit */
if (bf_buf_size > MLX5E_MAX_TX_INLINE)
bf_buf_size = MLX5E_MAX_TX_INLINE;
return (bf_buf_size);
}
static int
mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
struct mlx5e_priv *priv,
int num_comp_vectors)
{
int err;
/*
* TODO: Consider link speed for setting "log_sq_size",
* "log_rq_size" and "cq_moderation_xxx":
*/
priv->params.log_sq_size =
MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
priv->params.log_rq_size =
MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
priv->params.rx_cq_moderation_usec =
MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
priv->params.rx_cq_moderation_mode =
MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
priv->params.rx_cq_moderation_pkts =
MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
priv->params.tx_cq_moderation_usec =
MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
priv->params.tx_cq_moderation_pkts =
MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
priv->params.min_rx_wqes =
MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
priv->params.rx_hash_log_tbl_sz =
(order_base_2(num_comp_vectors) >
MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
order_base_2(num_comp_vectors) :
MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
priv->params.num_tc = 1;
priv->params.default_vlan_prio = 0;
priv->counter_set_id = -1;
priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
err = mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
if (err)
return (err);
/*
* hw lro is currently defaulted to off. when it won't anymore we
* will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
*/
priv->params.hw_lro_en = false;
priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
/*
* CQE zipping is currently defaulted to off. when it won't
* anymore we will consider the HW capability:
* "!!MLX5_CAP_GEN(mdev, cqe_compression)"
*/
priv->params.cqe_zipping_en = false;
priv->mdev = mdev;
priv->params.num_channels = num_comp_vectors;
priv->params.channels_rsss = 1;
priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
priv->queue_mapping_channel_mask =
roundup_pow_of_two(num_comp_vectors) - 1;
priv->num_tc = priv->params.num_tc;
priv->default_vlan_prio = priv->params.default_vlan_prio;
INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
return (0);
}
static int
mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
struct mlx5_core_mr *mkey)
{
struct ifnet *ifp = priv->ifp;
struct mlx5_core_dev *mdev = priv->mdev;
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
void *mkc;
u32 *in;
int err;
in = mlx5_vzalloc(inlen);
if (in == NULL) {
mlx5_en_err(ifp, "failed to allocate inbox\n");
return (-ENOMEM);
}
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
MLX5_SET(mkc, mkc, umr_en, 1); /* used by HW TLS */
MLX5_SET(mkc, mkc, lw, 1);
MLX5_SET(mkc, mkc, lr, 1);
MLX5_SET(mkc, mkc, pd, pdn);
MLX5_SET(mkc, mkc, length64, 1);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
if (err)
mlx5_en_err(ifp, "mlx5_core_create_mkey failed, %d\n",
err);
kvfree(in);
return (err);
}
static const char *mlx5e_vport_stats_desc[] = {
MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
};
static const char *mlx5e_pport_stats_desc[] = {
MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
};
static void
mlx5e_priv_static_init(struct mlx5e_priv *priv, const uint32_t channels)
{
uint32_t x;
mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
sx_init(&priv->state_lock, "mlx5state");
callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
for (x = 0; x != channels; x++)
mlx5e_chan_static_init(priv, &priv->channel[x], x);
}
static void
mlx5e_priv_static_destroy(struct mlx5e_priv *priv, const uint32_t channels)
{
uint32_t x;
for (x = 0; x != channels; x++)
mlx5e_chan_static_destroy(&priv->channel[x]);
callout_drain(&priv->watchdog);
mtx_destroy(&priv->async_events_mtx);
sx_destroy(&priv->state_lock);
}
static int
sysctl_firmware(SYSCTL_HANDLER_ARGS)
{
/*
* %d.%d%.d the string format.
* fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
* We need at most 5 chars to store that.
* It also has: two "." and NULL at the end, which means we need 18
* (5*3 + 3) chars at most.
*/
char fw[18];
struct mlx5e_priv *priv = arg1;
int error;
snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
fw_rev_sub(priv->mdev));
error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
return (error);
}
static void
mlx5e_disable_tx_dma(struct mlx5e_channel *ch)
{
int i;
for (i = 0; i < ch->priv->num_tc; i++)
mlx5e_drain_sq(&ch->sq[i]);
}
static void
mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq)
{
sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP);
sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8);
mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
sq->doorbell.d64 = 0;
}
void
mlx5e_resume_sq(struct mlx5e_sq *sq)
{
int err;
/* check if already enabled */
if (READ_ONCE(sq->running) != 0)
return;
err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR,
MLX5_SQC_STATE_RST);
if (err != 0) {
mlx5_en_err(sq->ifp,
"mlx5e_modify_sq() from ERR to RST failed: %d\n", err);
}
sq->cc = 0;
sq->pc = 0;
/* reset doorbell prior to moving from RST to RDY */
mlx5e_reset_sq_doorbell_record(sq);
err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST,
MLX5_SQC_STATE_RDY);
if (err != 0) {
mlx5_en_err(sq->ifp,
"mlx5e_modify_sq() from RST to RDY failed: %d\n", err);
}
sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
WRITE_ONCE(sq->running, 1);
}
static void
mlx5e_enable_tx_dma(struct mlx5e_channel *ch)
{
int i;
for (i = 0; i < ch->priv->num_tc; i++)
mlx5e_resume_sq(&ch->sq[i]);
}
static void
mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
{
struct mlx5e_rq *rq = &ch->rq;
struct epoch_tracker et;
int err;
mtx_lock(&rq->mtx);
rq->enabled = 0;
callout_stop(&rq->watchdog);
mtx_unlock(&rq->mtx);
err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
if (err != 0) {
mlx5_en_err(rq->ifp,
"mlx5e_modify_rq() from RDY to RST failed: %d\n", err);
}
while (!mlx5_wq_ll_is_empty(&rq->wq)) {
msleep(1);
NET_EPOCH_ENTER(et);
rq->cq.mcq.comp(&rq->cq.mcq);
NET_EPOCH_EXIT(et);
}
/*
* Transitioning into RST state will allow the FW to track less ERR state queues,
* thus reducing the recv queue flushing time
*/
err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST);
if (err != 0) {
mlx5_en_err(rq->ifp,
"mlx5e_modify_rq() from ERR to RST failed: %d\n", err);
}
}
static void
mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
{
struct mlx5e_rq *rq = &ch->rq;
struct epoch_tracker et;
int err;
rq->wq.wqe_ctr = 0;
mlx5_wq_ll_update_db_record(&rq->wq);
err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
if (err != 0) {
mlx5_en_err(rq->ifp,
"mlx5e_modify_rq() from RST to RDY failed: %d\n", err);
}
rq->enabled = 1;
NET_EPOCH_ENTER(et);
rq->cq.mcq.comp(&rq->cq.mcq);
NET_EPOCH_EXIT(et);
}
void
mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value)
{
int i;
if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
return;
for (i = 0; i < priv->params.num_channels; i++) {
if (value)
mlx5e_disable_tx_dma(&priv->channel[i]);
else
mlx5e_enable_tx_dma(&priv->channel[i]);
}
}
void
mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
{
int i;
if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
return;
for (i = 0; i < priv->params.num_channels; i++) {
if (value)
mlx5e_disable_rx_dma(&priv->channel[i]);
else
mlx5e_enable_rx_dma(&priv->channel[i]);
}
}
static void
mlx5e_add_hw_stats(struct mlx5e_priv *priv)
{
SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
sysctl_firmware, "A", "HCA firmware version");
SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
"Board ID");
}
static int
mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS)
{
struct mlx5e_priv *priv = arg1;
uint8_t temp[MLX5E_MAX_PRIORITY];
uint32_t tx_pfc;
int err;
int i;
PRIV_LOCK(priv);
tx_pfc = priv->params.tx_priority_flow_control;
for (i = 0; i != MLX5E_MAX_PRIORITY; i++)
temp[i] = (tx_pfc >> i) & 1;
err = SYSCTL_OUT(req, temp, MLX5E_MAX_PRIORITY);
if (err || !req->newptr)
goto done;
err = SYSCTL_IN(req, temp, MLX5E_MAX_PRIORITY);
if (err)
goto done;
priv->params.tx_priority_flow_control = 0;
/* range check input value */
for (i = 0; i != MLX5E_MAX_PRIORITY; i++) {
if (temp[i] > 1) {
err = ERANGE;
goto done;
}
priv->params.tx_priority_flow_control |= (temp[i] << i);
}
/* check if update is required */
if (tx_pfc != priv->params.tx_priority_flow_control)
err = -mlx5e_set_port_pfc(priv);
done:
if (err != 0)
priv->params.tx_priority_flow_control= tx_pfc;
PRIV_UNLOCK(priv);
return (err);
}
static int
mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS)
{
struct mlx5e_priv *priv = arg1;
uint8_t temp[MLX5E_MAX_PRIORITY];
uint32_t rx_pfc;
int err;
int i;
PRIV_LOCK(priv);
rx_pfc = priv->params.rx_priority_flow_control;
for (i = 0; i != MLX5E_MAX_PRIORITY; i++)
temp[i] = (rx_pfc >> i) & 1;
err = SYSCTL_OUT(req, temp, MLX5E_MAX_PRIORITY);
if (err || !req->newptr)
goto done;
err = SYSCTL_IN(req, temp, MLX5E_MAX_PRIORITY);
if (err)
goto done;
priv->params.rx_priority_flow_control = 0;
/* range check input value */
for (i = 0; i != MLX5E_MAX_PRIORITY; i++) {
if (temp[i] > 1) {
err = ERANGE;
goto done;
}
priv->params.rx_priority_flow_control |= (temp[i] << i);
}
/* check if update is required */
if (rx_pfc != priv->params.rx_priority_flow_control) {
err = -mlx5e_set_port_pfc(priv);
if (err == 0 && priv->sw_is_port_buf_owner)
err = mlx5e_update_buf_lossy(priv);
}
done:
if (err != 0)
priv->params.rx_priority_flow_control= rx_pfc;
PRIV_UNLOCK(priv);
return (err);
}
static void
mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
{
#if (__FreeBSD_version < 1100000)
char path[96];
#endif
int error;
/* enable pauseframes by default */
priv->params.tx_pauseframe_control = 1;
priv->params.rx_pauseframe_control = 1;
/* disable ports flow control, PFC, by default */
priv->params.tx_priority_flow_control = 0;
priv->params.rx_priority_flow_control = 0;
#if (__FreeBSD_version < 1100000)
/* compute path for sysctl */
snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
device_get_unit(priv->mdev->pdev->dev.bsddev));
/* try to fetch tunable, if any */
TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
/* compute path for sysctl */
snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
device_get_unit(priv->mdev->pdev->dev.bsddev));
/* try to fetch tunable, if any */
TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
#endif
/* register pauseframe SYSCTLs */
SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
&priv->params.tx_pauseframe_control, 0,
"Set to enable TX pause frames. Clear to disable.");
SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
&priv->params.rx_pauseframe_control, 0,
"Set to enable RX pause frames. Clear to disable.");
/* register priority flow control, PFC, SYSCTLs */
SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
OID_AUTO, "tx_priority_flow_control", CTLTYPE_U8 | CTLFLAG_RWTUN |
CTLFLAG_MPSAFE, priv, 0, &mlx5e_sysctl_tx_priority_flow_control, "CU",
"Set to enable TX ports flow control frames for priorities 0..7. Clear to disable.");
SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
OID_AUTO, "rx_priority_flow_control", CTLTYPE_U8 | CTLFLAG_RWTUN |
CTLFLAG_MPSAFE, priv, 0, &mlx5e_sysctl_rx_priority_flow_control, "CU",
"Set to enable RX ports flow control frames for priorities 0..7. Clear to disable.");
PRIV_LOCK(priv);
/* range check */
priv->params.tx_pauseframe_control =
priv->params.tx_pauseframe_control ? 1 : 0;
priv->params.rx_pauseframe_control =
priv->params.rx_pauseframe_control ? 1 : 0;
/* update firmware */
error = mlx5e_set_port_pause_and_pfc(priv);
if (error == -EINVAL) {
mlx5_en_err(priv->ifp,
"Global pauseframes must be disabled before enabling PFC.\n");
priv->params.rx_priority_flow_control = 0;
priv->params.tx_priority_flow_control = 0;
/* update firmware */
(void) mlx5e_set_port_pause_and_pfc(priv);
}
PRIV_UNLOCK(priv);
}
int
mlx5e_ul_snd_tag_alloc(struct ifnet *ifp,
union if_snd_tag_alloc_params *params,
struct m_snd_tag **ppmt)
{
struct mlx5e_priv *priv;
struct mlx5e_channel *pch;
priv = ifp->if_softc;
if (unlikely(priv->gone || params->hdr.flowtype == M_HASHTYPE_NONE)) {
return (EOPNOTSUPP);
} else {
/* keep this code synced with mlx5e_select_queue() */
u32 ch = priv->params.num_channels;
#ifdef RSS
u32 temp;
if (rss_hash2bucket(params->hdr.flowid,
params->hdr.flowtype, &temp) == 0)
ch = temp % ch;
else
#endif
ch = (params->hdr.flowid % 128) % ch;
/*
* NOTE: The channels array is only freed at detach
* and it safe to return a pointer to the send tag
* inside the channels structure as long as we
* reference the priv.
*/
pch = priv->channel + ch;
/* check if send queue is not running */
if (unlikely(pch->sq[0].running == 0))
return (ENXIO);
m_snd_tag_ref(&pch->tag.m_snd_tag);
*ppmt = &pch->tag.m_snd_tag;
return (0);
}
}
int
mlx5e_ul_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params)
{
struct mlx5e_channel *pch =
container_of(pmt, struct mlx5e_channel, tag.m_snd_tag);
params->unlimited.max_rate = -1ULL;
params->unlimited.queue_level = mlx5e_sq_queue_level(&pch->sq[0]);
return (0);
}
void
mlx5e_ul_snd_tag_free(struct m_snd_tag *pmt)
{
struct mlx5e_channel *pch =
container_of(pmt, struct mlx5e_channel, tag.m_snd_tag);
complete(&pch->completion);
}
static int
mlx5e_snd_tag_alloc(struct ifnet *ifp,
union if_snd_tag_alloc_params *params,
struct m_snd_tag **ppmt)
{
switch (params->hdr.type) {
#ifdef RATELIMIT
case IF_SND_TAG_TYPE_RATE_LIMIT:
return (mlx5e_rl_snd_tag_alloc(ifp, params, ppmt));
#if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT)
case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
return (mlx5e_tls_snd_tag_alloc(ifp, params, ppmt));
#endif
#endif
case IF_SND_TAG_TYPE_UNLIMITED:
return (mlx5e_ul_snd_tag_alloc(ifp, params, ppmt));
#ifdef KERN_TLS
case IF_SND_TAG_TYPE_TLS:
return (mlx5e_tls_snd_tag_alloc(ifp, params, ppmt));
#endif
default:
return (EOPNOTSUPP);
}
}
static int
mlx5e_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params)
{
struct mlx5e_snd_tag *tag =
container_of(pmt, struct mlx5e_snd_tag, m_snd_tag);
switch (tag->type) {
#ifdef RATELIMIT
case IF_SND_TAG_TYPE_RATE_LIMIT:
return (mlx5e_rl_snd_tag_modify(pmt, params));
#if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT)
case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
return (mlx5e_tls_snd_tag_modify(pmt, params));
#endif
#endif
case IF_SND_TAG_TYPE_UNLIMITED:
#ifdef KERN_TLS
case IF_SND_TAG_TYPE_TLS:
#endif
default:
return (EOPNOTSUPP);
}
}
static int
mlx5e_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params)
{
struct mlx5e_snd_tag *tag =
container_of(pmt, struct mlx5e_snd_tag, m_snd_tag);
switch (tag->type) {
#ifdef RATELIMIT
case IF_SND_TAG_TYPE_RATE_LIMIT:
return (mlx5e_rl_snd_tag_query(pmt, params));
#if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT)
case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
return (mlx5e_tls_snd_tag_query(pmt, params));
#endif
#endif
case IF_SND_TAG_TYPE_UNLIMITED:
return (mlx5e_ul_snd_tag_query(pmt, params));
#ifdef KERN_TLS
case IF_SND_TAG_TYPE_TLS:
return (mlx5e_tls_snd_tag_query(pmt, params));
#endif
default:
return (EOPNOTSUPP);
}
}
#ifdef RATELIMIT
#define NUM_HDWR_RATES_MLX 13
static const uint64_t adapter_rates_mlx[NUM_HDWR_RATES_MLX] = {
135375, /* 1,083,000 */
180500, /* 1,444,000 */
270750, /* 2,166,000 */
361000, /* 2,888,000 */
541500, /* 4,332,000 */
721875, /* 5,775,000 */
1082875, /* 8,663,000 */
1443875, /* 11,551,000 */
2165750, /* 17,326,000 */
2887750, /* 23,102,000 */
4331625, /* 34,653,000 */
5775500, /* 46,204,000 */
8663125 /* 69,305,000 */
};
static void
mlx5e_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q)
{
/*
* This function needs updating by the driver maintainer!
* For the MLX card there are currently (ConectX-4?) 13
* pre-set rates and others i.e. ConnectX-5, 6, 7??
*
* This will change based on later adapters
* and this code should be updated to look at ifp
* and figure out the specific adapter type
* settings i.e. how many rates as well
* as if they are fixed (as is shown here) or
* if they are dynamic (example chelsio t4). Also if there
* is a maximum number of flows that the adapter
* can handle that too needs to be updated in
* the max_flows field.
*/
q->rate_table = adapter_rates_mlx;
q->flags = RT_IS_FIXED_TABLE;
q->max_flows = 0; /* mlx has no limit */
q->number_of_rates = NUM_HDWR_RATES_MLX;
q->min_segment_burst = 1;
}
#endif
static void
mlx5e_snd_tag_free(struct m_snd_tag *pmt)
{
struct mlx5e_snd_tag *tag =
container_of(pmt, struct mlx5e_snd_tag, m_snd_tag);
switch (tag->type) {
#ifdef RATELIMIT
case IF_SND_TAG_TYPE_RATE_LIMIT:
mlx5e_rl_snd_tag_free(pmt);
break;
#if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT)
case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
mlx5e_tls_snd_tag_free(pmt);
break;
#endif
#endif
case IF_SND_TAG_TYPE_UNLIMITED:
mlx5e_ul_snd_tag_free(pmt);
break;
#ifdef KERN_TLS
case IF_SND_TAG_TYPE_TLS:
mlx5e_tls_snd_tag_free(pmt);
break;
#endif
default:
break;
}
}
static void *
mlx5e_create_ifp(struct mlx5_core_dev *mdev)
{
struct ifnet *ifp;
struct mlx5e_priv *priv;
u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
u8 connector_type;
struct sysctl_oid_list *child;
int ncv = mdev->priv.eq_table.num_comp_vectors;
char unit[16];
struct pfil_head_args pa;
int err;
int i,j;
u32 eth_proto_cap;
u32 out[MLX5_ST_SZ_DW(ptys_reg)];
bool ext = 0;
u32 speeds_num;
struct media media_entry = {};
if (mlx5e_check_required_hca_cap(mdev)) {
mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
return (NULL);
}
/*
* Try to allocate the priv and make room for worst-case
* number of channel structures:
*/
priv = malloc(sizeof(*priv) +
(sizeof(priv->channel[0]) * mdev->priv.eq_table.num_comp_vectors),
M_MLX5EN, M_WAITOK | M_ZERO);
ifp = priv->ifp = if_alloc_dev(IFT_ETHER, mdev->pdev->dev.bsddev);
if (ifp == NULL) {
mlx5_core_err(mdev, "if_alloc() failed\n");
goto err_free_priv;
}
/* setup all static fields */
mlx5e_priv_static_init(priv, mdev->priv.eq_table.num_comp_vectors);
ifp->if_softc = priv;
if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev));
ifp->if_mtu = ETHERMTU;
ifp->if_init = mlx5e_open;
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
+ IFF_KNOWSEPOCH;
ifp->if_ioctl = mlx5e_ioctl;
ifp->if_transmit = mlx5e_xmit;
ifp->if_qflush = if_qflush;
#if (__FreeBSD_version >= 1100000)
ifp->if_get_counter = mlx5e_get_counter;
#endif
ifp->if_snd.ifq_maxlen = ifqmaxlen;
/*
* Set driver features
*/
ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
ifp->if_capabilities |= IFCAP_LRO;
ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP;
ifp->if_capabilities |= IFCAP_NOMAP;
ifp->if_capabilities |= IFCAP_TXTLS4 | IFCAP_TXTLS6;
ifp->if_capabilities |= IFCAP_TXRTLMT;
ifp->if_snd_tag_alloc = mlx5e_snd_tag_alloc;
ifp->if_snd_tag_free = mlx5e_snd_tag_free;
ifp->if_snd_tag_modify = mlx5e_snd_tag_modify;
ifp->if_snd_tag_query = mlx5e_snd_tag_query;
#ifdef RATELIMIT
ifp->if_ratelimit_query = mlx5e_ratelimit_query;
#endif
/* set TSO limits so that we don't have to drop TX packets */
ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
ifp->if_capenable = ifp->if_capabilities;
ifp->if_hwassist = 0;
if (ifp->if_capenable & IFCAP_TSO)
ifp->if_hwassist |= CSUM_TSO;
if (ifp->if_capenable & IFCAP_TXCSUM)
ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
/* ifnet sysctl tree */
sysctl_ctx_init(&priv->sysctl_ctx);
priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
if (priv->sysctl_ifnet == NULL) {
mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
goto err_free_sysctl;
}
snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
if (priv->sysctl_ifnet == NULL) {
mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
goto err_free_sysctl;
}
/* HW sysctl tree */
child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
if (priv->sysctl_hw == NULL) {
mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
goto err_free_sysctl;
}
err = mlx5e_build_ifp_priv(mdev, priv, ncv);
if (err) {
mlx5_core_err(mdev, "mlx5e_build_ifp_priv() failed (%d)\n", err);
goto err_free_sysctl;
}
/* reuse mlx5core's watchdog workqueue */
priv->wq = mdev->priv.health.wq_watchdog;
err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
if (err) {
mlx5_en_err(ifp, "mlx5_alloc_map_uar failed, %d\n", err);
goto err_free_wq;
}
err = mlx5_core_alloc_pd(mdev, &priv->pdn);
if (err) {
mlx5_en_err(ifp, "mlx5_core_alloc_pd failed, %d\n", err);
goto err_unmap_free_uar;
}
err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
if (err) {
mlx5_en_err(ifp,
"mlx5_alloc_transport_domain failed, %d\n", err);
goto err_dealloc_pd;
}
err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
if (err) {
mlx5_en_err(ifp, "mlx5e_create_mkey failed, %d\n", err);
goto err_dealloc_transport_domain;
}
mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
/* check if we should generate a random MAC address */
if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
is_zero_ether_addr(dev_addr)) {
random_ether_addr(dev_addr);
mlx5_en_err(ifp, "Assigned random MAC address\n");
}
err = mlx5e_rl_init(priv);
if (err) {
mlx5_en_err(ifp, "mlx5e_rl_init failed, %d\n", err);
goto err_create_mkey;
}
err = mlx5e_tls_init(priv);
if (err) {
if_printf(ifp, "%s: mlx5e_tls_init failed\n", __func__);
goto err_rl_init;
}
/* set default MTU */
mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
/* Set default media status */
priv->media_status_last = IFM_AVALID;
priv->media_active_last = IFM_ETHER | IFM_AUTO |
IFM_ETH_RXPAUSE | IFM_FDX;
/* setup default pauseframes configuration */
mlx5e_setup_pauseframes(priv);
/* Setup supported medias */
//TODO: If we failed to query ptys is it ok to proceed??
if (!mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1)) {
ext = MLX5_CAP_PCAM_FEATURE(mdev,
ptys_extended_ethernet);
eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
eth_proto_capability);
if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type))
connector_type = MLX5_GET(ptys_reg, out,
connector_type);
} else {
eth_proto_cap = 0;
mlx5_en_err(ifp, "Query port media capability failed, %d\n", err);
}
ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
mlx5e_media_change, mlx5e_media_status);
speeds_num = ext ? MLX5E_EXT_LINK_SPEEDS_NUMBER : MLX5E_LINK_SPEEDS_NUMBER;
for (i = 0; i != speeds_num; i++) {
for (j = 0; j < MLX5E_LINK_MODES_NUMBER ; ++j) {
media_entry = ext ? mlx5e_ext_mode_table[i][j] :
mlx5e_mode_table[i][j];
if (media_entry.baudrate == 0)
continue;
if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
ifmedia_add(&priv->media,
media_entry.subtype |
IFM_ETHER, 0, NULL);
ifmedia_add(&priv->media,
media_entry.subtype |
IFM_ETHER | IFM_FDX |
IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
}
}
}
ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
/* Set autoselect by default */
ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
DEBUGNET_SET(ifp, mlx5_en);
ether_ifattach(ifp, dev_addr);
/* Register for VLAN events */
priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
/* Link is down by default */
if_link_state_change(ifp, LINK_STATE_DOWN);
mlx5e_enable_async_events(priv);
mlx5e_add_hw_stats(priv);
mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
"vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
priv->stats.vport.arg);
mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
"pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
priv->stats.pport.arg);
mlx5e_create_ethtool(priv);
mtx_lock(&priv->async_events_mtx);
mlx5e_update_stats(priv);
mtx_unlock(&priv->async_events_mtx);
SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
OID_AUTO, "rx_clbr_done", CTLFLAG_RD,
&priv->clbr_done, 0,
"RX timestamps calibration state");
callout_init(&priv->tstmp_clbr, CALLOUT_DIRECT);
mlx5e_reset_calibration_callout(priv);
pa.pa_version = PFIL_VERSION;
pa.pa_flags = PFIL_IN;
pa.pa_type = PFIL_TYPE_ETHERNET;
pa.pa_headname = ifp->if_xname;
priv->pfil = pfil_head_register(&pa);
return (priv);
err_rl_init:
mlx5e_rl_cleanup(priv);
err_create_mkey:
mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
err_dealloc_transport_domain:
mlx5_dealloc_transport_domain(mdev, priv->tdn);
err_dealloc_pd:
mlx5_core_dealloc_pd(mdev, priv->pdn);
err_unmap_free_uar:
mlx5_unmap_free_uar(mdev, &priv->cq_uar);
err_free_wq:
flush_workqueue(priv->wq);
err_free_sysctl:
sysctl_ctx_free(&priv->sysctl_ctx);
if (priv->sysctl_debug)
sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
mlx5e_priv_static_destroy(priv, mdev->priv.eq_table.num_comp_vectors);
if_free(ifp);
err_free_priv:
free(priv, M_MLX5EN);
return (NULL);
}
static void
mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
{
struct mlx5e_priv *priv = vpriv;
struct ifnet *ifp = priv->ifp;
/* don't allow more IOCTLs */
priv->gone = 1;
/* XXX wait a bit to allow IOCTL handlers to complete */
pause("W", hz);
#ifdef RATELIMIT
/*
* The kernel can have reference(s) via the m_snd_tag's into
* the ratelimit channels, and these must go away before
* detaching:
*/
while (READ_ONCE(priv->rl.stats.tx_active_connections) != 0) {
mlx5_en_err(priv->ifp,
"Waiting for all ratelimit connections to terminate\n");
pause("W", hz);
}
#endif
/* wait for all unlimited send tags to complete */
mlx5e_priv_wait_for_completion(priv, mdev->priv.eq_table.num_comp_vectors);
/* stop watchdog timer */
callout_drain(&priv->watchdog);
callout_drain(&priv->tstmp_clbr);
if (priv->vlan_attach != NULL)
EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
if (priv->vlan_detach != NULL)
EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
/* make sure device gets closed */
PRIV_LOCK(priv);
mlx5e_close_locked(ifp);
PRIV_UNLOCK(priv);
/* deregister pfil */
if (priv->pfil != NULL) {
pfil_head_unregister(priv->pfil);
priv->pfil = NULL;
}
/* unregister device */
ifmedia_removeall(&priv->media);
ether_ifdetach(ifp);
mlx5e_tls_cleanup(priv);
mlx5e_rl_cleanup(priv);
/* destroy all remaining sysctl nodes */
sysctl_ctx_free(&priv->stats.vport.ctx);
sysctl_ctx_free(&priv->stats.pport.ctx);
if (priv->sysctl_debug)
sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
sysctl_ctx_free(&priv->sysctl_ctx);
mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
mlx5e_disable_async_events(priv);
flush_workqueue(priv->wq);
mlx5e_priv_static_destroy(priv, mdev->priv.eq_table.num_comp_vectors);
if_free(ifp);
free(priv, M_MLX5EN);
}
#ifdef DEBUGNET
static void
mlx5_en_debugnet_init(struct ifnet *dev, int *nrxr, int *ncl, int *clsize)
{
struct mlx5e_priv *priv = if_getsoftc(dev);
PRIV_LOCK(priv);
*nrxr = priv->params.num_channels;
*ncl = DEBUGNET_MAX_IN_FLIGHT;
*clsize = MLX5E_MAX_RX_BYTES;
PRIV_UNLOCK(priv);
}
static void
mlx5_en_debugnet_event(struct ifnet *dev, enum debugnet_ev event)
{
}
static int
mlx5_en_debugnet_transmit(struct ifnet *dev, struct mbuf *m)
{
struct mlx5e_priv *priv = if_getsoftc(dev);
struct mlx5e_sq *sq;
int err;
if ((if_getdrvflags(dev) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING || (priv->media_status_last & IFM_ACTIVE) == 0)
return (ENOENT);
sq = &priv->channel[0].sq[0];
if (sq->running == 0) {
m_freem(m);
return (ENOENT);
}
if (mlx5e_sq_xmit(sq, &m) != 0) {
m_freem(m);
err = ENOBUFS;
} else {
err = 0;
}
if (likely(sq->doorbell.d64 != 0)) {
mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
sq->doorbell.d64 = 0;
}
return (err);
}
static int
mlx5_en_debugnet_poll(struct ifnet *dev, int count)
{
struct mlx5e_priv *priv = if_getsoftc(dev);
if ((if_getdrvflags(dev) & IFF_DRV_RUNNING) == 0 ||
(priv->media_status_last & IFM_ACTIVE) == 0)
return (ENOENT);
mlx5_poll_interrupts(priv->mdev);
return (0);
}
#endif /* DEBUGNET */
static void *
mlx5e_get_ifp(void *vpriv)
{
struct mlx5e_priv *priv = vpriv;
return (priv->ifp);
}
static struct mlx5_interface mlx5e_interface = {
.add = mlx5e_create_ifp,
.remove = mlx5e_destroy_ifp,
.event = mlx5e_async_event,
.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
.get_dev = mlx5e_get_ifp,
};
void
mlx5e_init(void)
{
mlx5_register_interface(&mlx5e_interface);
}
void
mlx5e_cleanup(void)
{
mlx5_unregister_interface(&mlx5e_interface);
}
static void
mlx5e_show_version(void __unused *arg)
{
printf("%s", mlx5e_version);
}
SYSINIT(mlx5e_show_version, SI_SUB_DRIVERS, SI_ORDER_ANY, mlx5e_show_version, NULL);
module_init_order(mlx5e_init, SI_ORDER_THIRD);
module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
#if (__FreeBSD_version >= 1100000)
MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
#endif
MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
MODULE_VERSION(mlx5en, 1);
diff --git a/sys/dev/ntb/if_ntb/if_ntb.c b/sys/dev/ntb/if_ntb/if_ntb.c
index 1e1f98a54132..3bae01aae49d 100644
--- a/sys/dev/ntb/if_ntb/if_ntb.c
+++ b/sys/dev/ntb/if_ntb/if_ntb.c
@@ -1,514 +1,513 @@
/*-
* Copyright (c) 2016 Alexander Motin
* Copyright (C) 2013 Intel Corporation
* Copyright (C) 2015 EMC Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* The Non-Transparent Bridge (NTB) is a device that allows you to connect
* two or more systems using a PCI-e links, providing remote memory access.
*
* This module contains a driver for simulated Ethernet device, using
* underlying NTB Transport device.
*
* NOTE: Much of the code in this module is shared with Linux. Any patches may
* be picked up and redistributed in Linux with a dual GPL/BSD license.
*/
#include
__FBSDID("$FreeBSD$");
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "../ntb_transport.h"
#define KTR_NTB KTR_SPARE3
#define NTB_MEDIATYPE (IFM_ETHER | IFM_AUTO | IFM_FDX)
#define NTB_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
#define NTB_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
#define NTB_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
CSUM_PSEUDO_HDR | \
CSUM_IP_CHECKED | CSUM_IP_VALID | \
CSUM_SCTP_VALID)
static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW, 0, "if_ntb");
static unsigned g_if_ntb_num_queues = UINT_MAX;
SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN,
&g_if_ntb_num_queues, 0, "Number of queues per interface");
struct ntb_net_queue {
struct ntb_net_ctx *sc;
if_t ifp;
struct ntb_transport_qp *qp;
struct buf_ring *br;
struct task tx_task;
struct taskqueue *tx_tq;
struct mtx tx_lock;
struct callout queue_full;
};
struct ntb_net_ctx {
if_t ifp;
struct ifmedia media;
u_char eaddr[ETHER_ADDR_LEN];
int num_queues;
struct ntb_net_queue *queues;
int mtu;
};
static int ntb_net_probe(device_t dev);
static int ntb_net_attach(device_t dev);
static int ntb_net_detach(device_t dev);
static void ntb_net_init(void *arg);
static int ntb_ifmedia_upd(struct ifnet *);
static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *);
static int ntb_ioctl(if_t ifp, u_long command, caddr_t data);
static int ntb_transmit(if_t ifp, struct mbuf *m);
static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
void *data, int len);
static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
void *data, int len);
static void ntb_net_event_handler(void *data, enum ntb_link_event status);
static void ntb_handle_tx(void *arg, int pending);
static void ntb_qp_full(void *arg);
static void ntb_qflush(if_t ifp);
static void create_random_local_eui48(u_char *eaddr);
static int
ntb_net_probe(device_t dev)
{
device_set_desc(dev, "NTB Network Interface");
return (0);
}
static int
ntb_net_attach(device_t dev)
{
struct ntb_net_ctx *sc = device_get_softc(dev);
struct ntb_net_queue *q;
if_t ifp;
struct ntb_queue_handlers handlers = { ntb_net_rx_handler,
ntb_net_tx_handler, ntb_net_event_handler };
int i;
ifp = sc->ifp = if_gethandle(IFT_ETHER);
if (ifp == NULL) {
printf("ntb: Cannot allocate ifnet structure\n");
return (ENOMEM);
}
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
if_setdev(ifp, dev);
sc->num_queues = min(g_if_ntb_num_queues,
ntb_transport_queue_count(dev));
sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue),
M_DEVBUF, M_WAITOK | M_ZERO);
sc->mtu = INT_MAX;
for (i = 0; i < sc->num_queues; i++) {
q = &sc->queues[i];
q->sc = sc;
q->ifp = ifp;
q->qp = ntb_transport_create_queue(dev, i, &handlers, q);
if (q->qp == NULL)
break;
sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp));
mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF);
q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock);
TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q);
q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT,
taskqueue_thread_enqueue, &q->tx_tq);
taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d",
device_get_nameunit(dev), i);
callout_init(&q->queue_full, 1);
}
sc->num_queues = i;
device_printf(dev, "%d queue(s)\n", sc->num_queues);
if_setinitfn(ifp, ntb_net_init);
if_setsoftc(ifp, sc);
- if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
- IFF_NEEDSEPOCH);
+ if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
if_setioctlfn(ifp, ntb_ioctl);
if_settransmitfn(ifp, ntb_transmit);
if_setqflushfn(ifp, ntb_qflush);
create_random_local_eui48(sc->eaddr);
ether_ifattach(ifp, sc->eaddr);
if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
if_setcapenable(ifp, IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
if_setmtu(ifp, sc->mtu - ETHER_HDR_LEN);
ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd,
ntb_ifmedia_sts);
ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL);
ifmedia_set(&sc->media, NTB_MEDIATYPE);
for (i = 0; i < sc->num_queues; i++)
ntb_transport_link_up(sc->queues[i].qp);
return (0);
}
static int
ntb_net_detach(device_t dev)
{
struct ntb_net_ctx *sc = device_get_softc(dev);
struct ntb_net_queue *q;
int i;
for (i = 0; i < sc->num_queues; i++)
ntb_transport_link_down(sc->queues[i].qp);
ether_ifdetach(sc->ifp);
if_free(sc->ifp);
ifmedia_removeall(&sc->media);
for (i = 0; i < sc->num_queues; i++) {
q = &sc->queues[i];
ntb_transport_free_queue(q->qp);
buf_ring_free(q->br, M_DEVBUF);
callout_drain(&q->queue_full);
taskqueue_drain_all(q->tx_tq);
mtx_destroy(&q->tx_lock);
}
free(sc->queues, M_DEVBUF);
return (0);
}
/* Network device interface */
static void
ntb_net_init(void *arg)
{
struct ntb_net_ctx *sc = arg;
if_t ifp = sc->ifp;
if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
if_setbaudrate(ifp, ntb_transport_link_speed(sc->queues[0].qp));
if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ?
LINK_STATE_UP : LINK_STATE_DOWN);
}
static int
ntb_ioctl(if_t ifp, u_long command, caddr_t data)
{
struct ntb_net_ctx *sc = if_getsoftc(ifp);
struct ifreq *ifr = (struct ifreq *)data;
int error = 0;
switch (command) {
case SIOCSIFFLAGS:
case SIOCADDMULTI:
case SIOCDELMULTI:
break;
case SIOCSIFMTU:
{
if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) {
error = EINVAL;
break;
}
if_setmtu(ifp, ifr->ifr_mtu);
break;
}
case SIOCSIFMEDIA:
case SIOCGIFMEDIA:
error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
break;
case SIOCSIFCAP:
if (ifr->ifr_reqcap & IFCAP_RXCSUM)
if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
else
if_setcapenablebit(ifp, 0, IFCAP_RXCSUM);
if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
if_setcapenablebit(ifp, IFCAP_TXCSUM, 0);
if_sethwassistbits(ifp, NTB_CSUM_FEATURES, 0);
} else {
if_setcapenablebit(ifp, 0, IFCAP_TXCSUM);
if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES);
}
if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6)
if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
else
if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6);
if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) {
if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0);
if_sethwassistbits(ifp, NTB_CSUM_FEATURES6, 0);
} else {
if_setcapenablebit(ifp, 0, IFCAP_TXCSUM_IPV6);
if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES6);
}
break;
default:
error = ether_ioctl(ifp, command, data);
break;
}
return (error);
}
static int
ntb_ifmedia_upd(struct ifnet *ifp)
{
struct ntb_net_ctx *sc = if_getsoftc(ifp);
struct ifmedia *ifm = &sc->media;
if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
return (EINVAL);
return (0);
}
static void
ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
{
struct ntb_net_ctx *sc = if_getsoftc(ifp);
ifmr->ifm_status = IFM_AVALID;
ifmr->ifm_active = NTB_MEDIATYPE;
if (ntb_transport_link_query(sc->queues[0].qp))
ifmr->ifm_status |= IFM_ACTIVE;
}
static void
ntb_transmit_locked(struct ntb_net_queue *q)
{
if_t ifp = q->ifp;
struct mbuf *m;
int rc, len;
short mflags;
CTR0(KTR_NTB, "TX: ntb_transmit_locked");
while ((m = drbr_peek(ifp, q->br)) != NULL) {
CTR1(KTR_NTB, "TX: start mbuf %p", m);
if_etherbpfmtap(ifp, m);
len = m->m_pkthdr.len;
mflags = m->m_flags;
rc = ntb_transport_tx_enqueue(q->qp, m, m, len);
if (rc != 0) {
CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc);
if (rc == EAGAIN) {
drbr_putback(ifp, q->br, m);
callout_reset_sbt(&q->queue_full,
SBT_1MS / 4, SBT_1MS / 4,
ntb_qp_full, q, 0);
} else {
m_freem(m);
drbr_advance(ifp, q->br);
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
break;
}
drbr_advance(ifp, q->br);
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
if (mflags & M_MCAST)
if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
}
}
static int
ntb_transmit(if_t ifp, struct mbuf *m)
{
struct ntb_net_ctx *sc = if_getsoftc(ifp);
struct ntb_net_queue *q;
int error, i;
CTR0(KTR_NTB, "TX: ntb_transmit");
if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
i = m->m_pkthdr.flowid % sc->num_queues;
else
i = curcpu % sc->num_queues;
q = &sc->queues[i];
error = drbr_enqueue(ifp, q->br, m);
if (error)
return (error);
if (mtx_trylock(&q->tx_lock)) {
ntb_transmit_locked(q);
mtx_unlock(&q->tx_lock);
} else
taskqueue_enqueue(q->tx_tq, &q->tx_task);
return (0);
}
static void
ntb_handle_tx(void *arg, int pending)
{
struct ntb_net_queue *q = arg;
mtx_lock(&q->tx_lock);
ntb_transmit_locked(q);
mtx_unlock(&q->tx_lock);
}
static void
ntb_qp_full(void *arg)
{
struct ntb_net_queue *q = arg;
CTR0(KTR_NTB, "TX: qp_full callout");
if (ntb_transport_tx_free_entry(q->qp) > 0)
taskqueue_enqueue(q->tx_tq, &q->tx_task);
else
callout_schedule_sbt(&q->queue_full,
SBT_1MS / 4, SBT_1MS / 4, 0);
}
static void
ntb_qflush(if_t ifp)
{
struct ntb_net_ctx *sc = if_getsoftc(ifp);
struct ntb_net_queue *q;
struct mbuf *m;
int i;
for (i = 0; i < sc->num_queues; i++) {
q = &sc->queues[i];
mtx_lock(&q->tx_lock);
while ((m = buf_ring_dequeue_sc(q->br)) != NULL)
m_freem(m);
mtx_unlock(&q->tx_lock);
}
if_qflush(ifp);
}
/* Network Device Callbacks */
static void
ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
int len)
{
m_freem(data);
CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
}
static void
ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
int len)
{
struct ntb_net_queue *q = qp_data;
struct ntb_net_ctx *sc = q->sc;
struct mbuf *m = data;
if_t ifp = q->ifp;
uint16_t proto;
CTR1(KTR_NTB, "RX: rx handler (%d)", len);
if (len < 0) {
if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
return;
}
m->m_pkthdr.rcvif = ifp;
if (sc->num_queues > 1) {
m->m_pkthdr.flowid = q - sc->queues;
M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
}
if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
m_copydata(m, 12, 2, (void *)&proto);
switch (ntohs(proto)) {
case ETHERTYPE_IP:
if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
m->m_pkthdr.csum_data = 0xffff;
m->m_pkthdr.csum_flags = NTB_CSUM_SET;
}
break;
case ETHERTYPE_IPV6:
if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) {
m->m_pkthdr.csum_data = 0xffff;
m->m_pkthdr.csum_flags = NTB_CSUM_SET;
}
break;
}
}
if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
if_input(ifp, m);
}
static void
ntb_net_event_handler(void *data, enum ntb_link_event status)
{
struct ntb_net_queue *q = data;
if_setbaudrate(q->ifp, ntb_transport_link_speed(q->qp));
if_link_state_change(q->ifp, (status == NTB_LINK_UP) ? LINK_STATE_UP :
LINK_STATE_DOWN);
}
/* Helper functions */
/* TODO: This too should really be part of the kernel */
#define EUI48_MULTICAST 1 << 0
#define EUI48_LOCALLY_ADMINISTERED 1 << 1
static void
create_random_local_eui48(u_char *eaddr)
{
static uint8_t counter = 0;
eaddr[0] = EUI48_LOCALLY_ADMINISTERED;
arc4rand(&eaddr[1], 4, 0);
eaddr[5] = counter++;
}
static device_method_t ntb_net_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, ntb_net_probe),
DEVMETHOD(device_attach, ntb_net_attach),
DEVMETHOD(device_detach, ntb_net_detach),
DEVMETHOD_END
};
devclass_t ntb_net_devclass;
static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods,
sizeof(struct ntb_net_ctx));
DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, ntb_net_devclass,
NULL, NULL);
MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1);
MODULE_VERSION(if_ntb, 1);
diff --git a/sys/dev/sbni/if_sbni.c b/sys/dev/sbni/if_sbni.c
index 62b86112b26f..267001f7897d 100644
--- a/sys/dev/sbni/if_sbni.c
+++ b/sys/dev/sbni/if_sbni.c
@@ -1,1278 +1,1277 @@
/*-
* Copyright (c) 1997-2001 Granch, Ltd. All rights reserved.
* Author: Denis I.Timofeev
*
* Redistributon and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include
__FBSDID("$FreeBSD$");
/*
* Device driver for Granch SBNI12 leased line adapters
*
* Revision 2.0.0 1997/08/06
* Initial revision by Alexey Zverev
*
* Revision 2.0.1 1997/08/11
* Additional internal statistics support (tx statistics)
*
* Revision 2.0.2 1997/11/05
* if_bpf bug has been fixed
*
* Revision 2.0.3 1998/12/20
* Memory leakage has been eliminated in
* the sbni_st and sbni_timeout routines.
*
* Revision 3.0 2000/08/10 by Yaroslav Polyakov
* Support for PCI cards. 4.1 modification.
*
* Revision 3.1 2000/09/12
* Removed extra #defines around bpf functions
*
* Revision 4.0 2000/11/23 by Denis Timofeev
* Completely redesigned the buffer management
*
* Revision 4.1 2001/01/21
* Support for PCI Dual cards and new SBNI12D-10, -11 Dual/ISA cards
*
* Written with reference to NE2000 driver developed by David Greenman.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
static void sbni_init(void *);
static void sbni_init_locked(struct sbni_softc *);
static void sbni_start(struct ifnet *);
static void sbni_start_locked(struct ifnet *);
static int sbni_ioctl(struct ifnet *, u_long, caddr_t);
static void sbni_stop(struct sbni_softc *);
static void handle_channel(struct sbni_softc *);
static void card_start(struct sbni_softc *);
static int recv_frame(struct sbni_softc *);
static void send_frame(struct sbni_softc *);
static int upload_data(struct sbni_softc *, u_int, u_int, u_int, u_int32_t);
static int skip_tail(struct sbni_softc *, u_int, u_int32_t);
static void interpret_ack(struct sbni_softc *, u_int);
static void download_data(struct sbni_softc *, u_int32_t *);
static void prepare_to_send(struct sbni_softc *);
static void drop_xmit_queue(struct sbni_softc *);
static int get_rx_buf(struct sbni_softc *);
static void indicate_pkt(struct sbni_softc *);
static void change_level(struct sbni_softc *);
static int check_fhdr(struct sbni_softc *, u_int *, u_int *,
u_int *, u_int *, u_int32_t *);
static int append_frame_to_pkt(struct sbni_softc *, u_int, u_int32_t);
static void timeout_change_level(struct sbni_softc *);
static void send_frame_header(struct sbni_softc *, u_int32_t *);
static void set_initial_values(struct sbni_softc *, struct sbni_flags);
static u_int32_t calc_crc32(u_int32_t, caddr_t, u_int);
static callout_func_t sbni_timeout;
static __inline u_char sbni_inb(struct sbni_softc *, enum sbni_reg);
static __inline void sbni_outb(struct sbni_softc *, enum sbni_reg, u_char);
static __inline void sbni_insb(struct sbni_softc *, u_char *, u_int);
static __inline void sbni_outsb(struct sbni_softc *, u_char *, u_int);
static u_int32_t crc32tab[];
#ifdef SBNI_DUAL_COMPOUND
static struct mtx headlist_lock;
MTX_SYSINIT(headlist_lock, &headlist_lock, "sbni headlist", MTX_DEF);
static struct sbni_softc *sbni_headlist;
#endif
/* -------------------------------------------------------------------------- */
static __inline u_char
sbni_inb(struct sbni_softc *sc, enum sbni_reg reg)
{
return bus_space_read_1(
rman_get_bustag(sc->io_res),
rman_get_bushandle(sc->io_res),
sc->io_off + reg);
}
static __inline void
sbni_outb(struct sbni_softc *sc, enum sbni_reg reg, u_char value)
{
bus_space_write_1(
rman_get_bustag(sc->io_res),
rman_get_bushandle(sc->io_res),
sc->io_off + reg, value);
}
static __inline void
sbni_insb(struct sbni_softc *sc, u_char *to, u_int len)
{
bus_space_read_multi_1(
rman_get_bustag(sc->io_res),
rman_get_bushandle(sc->io_res),
sc->io_off + DAT, to, len);
}
static __inline void
sbni_outsb(struct sbni_softc *sc, u_char *from, u_int len)
{
bus_space_write_multi_1(
rman_get_bustag(sc->io_res),
rman_get_bushandle(sc->io_res),
sc->io_off + DAT, from, len);
}
/*
Valid combinations in CSR0 (for probing):
VALID_DECODER 0000,0011,1011,1010
; 0 ; -
TR_REQ ; 1 ; +
TR_RDY ; 2 ; -
TR_RDY TR_REQ ; 3 ; +
BU_EMP ; 4 ; +
BU_EMP TR_REQ ; 5 ; +
BU_EMP TR_RDY ; 6 ; -
BU_EMP TR_RDY TR_REQ ; 7 ; +
RC_RDY ; 8 ; +
RC_RDY TR_REQ ; 9 ; +
RC_RDY TR_RDY ; 10 ; -
RC_RDY TR_RDY TR_REQ ; 11 ; -
RC_RDY BU_EMP ; 12 ; -
RC_RDY BU_EMP TR_REQ ; 13 ; -
RC_RDY BU_EMP TR_RDY ; 14 ; -
RC_RDY BU_EMP TR_RDY TR_REQ ; 15 ; -
*/
#define VALID_DECODER (2 + 8 + 0x10 + 0x20 + 0x80 + 0x100 + 0x200)
int
sbni_probe(struct sbni_softc *sc)
{
u_char csr0;
csr0 = sbni_inb(sc, CSR0);
if (csr0 != 0xff && csr0 != 0x00) {
csr0 &= ~EN_INT;
if (csr0 & BU_EMP)
csr0 |= EN_INT;
if (VALID_DECODER & (1 << (csr0 >> 4)))
return (0);
}
return (ENXIO);
}
/*
* Install interface into kernel networking data structures
*/
int
sbni_attach(struct sbni_softc *sc, int unit, struct sbni_flags flags)
{
struct ifnet *ifp;
u_char csr0;
ifp = sc->ifp = if_alloc(IFT_ETHER);
if (ifp == NULL)
return (ENOMEM);
sbni_outb(sc, CSR0, 0);
set_initial_values(sc, flags);
/* Initialize ifnet structure */
ifp->if_softc = sc;
if_initname(ifp, "sbni", unit);
ifp->if_init = sbni_init;
ifp->if_start = sbni_start;
ifp->if_ioctl = sbni_ioctl;
IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
/* report real baud rate */
csr0 = sbni_inb(sc, CSR0);
ifp->if_baudrate =
(csr0 & 0x01 ? 500000 : 2000000) / (1 << flags.rate);
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
- IFF_NEEDSEPOCH;
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
mtx_init(&sc->lock, ifp->if_xname, MTX_NETWORK_LOCK, MTX_DEF);
callout_init_mtx(&sc->wch, &sc->lock, 0);
ether_ifattach(ifp, sc->enaddr);
/* device attach does transition from UNCONFIGURED to IDLE state */
if_printf(ifp, "speed %ju, rxl ", (uintmax_t)ifp->if_baudrate);
if (sc->delta_rxl)
printf("auto\n");
else
printf("%d (fixed)\n", sc->cur_rxl_index);
return (0);
}
void
sbni_detach(struct sbni_softc *sc)
{
SBNI_LOCK(sc);
sbni_stop(sc);
SBNI_UNLOCK(sc);
callout_drain(&sc->wch);
ether_ifdetach(sc->ifp);
if (sc->irq_handle)
bus_teardown_intr(sc->dev, sc->irq_res, sc->irq_handle);
mtx_destroy(&sc->lock);
if_free(sc->ifp);
}
void
sbni_release_resources(struct sbni_softc *sc)
{
if (sc->irq_res)
bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
sc->irq_res);
if (sc->io_res && sc->io_off == 0)
bus_release_resource(sc->dev, SYS_RES_IOPORT, sc->io_rid,
sc->io_res);
}
/* -------------------------------------------------------------------------- */
static void
sbni_init(void *xsc)
{
struct sbni_softc *sc;
sc = (struct sbni_softc *)xsc;
SBNI_LOCK(sc);
sbni_init_locked(sc);
SBNI_UNLOCK(sc);
}
static void
sbni_init_locked(struct sbni_softc *sc)
{
struct ifnet *ifp;
ifp = sc->ifp;
/*
* kludge to avoid multiple initialization when more than once
* protocols configured
*/
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
return;
card_start(sc);
callout_reset(&sc->wch, hz/SBNI_HZ, sbni_timeout, sc);
ifp->if_drv_flags |= IFF_DRV_RUNNING;
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
/* attempt to start output */
sbni_start_locked(ifp);
}
static void
sbni_start(struct ifnet *ifp)
{
struct sbni_softc *sc = ifp->if_softc;
SBNI_LOCK(sc);
sbni_start_locked(ifp);
SBNI_UNLOCK(sc);
}
static void
sbni_start_locked(struct ifnet *ifp)
{
struct sbni_softc *sc = ifp->if_softc;
if (sc->tx_frameno == 0)
prepare_to_send(sc);
}
static void
sbni_stop(struct sbni_softc *sc)
{
sbni_outb(sc, CSR0, 0);
drop_xmit_queue(sc);
if (sc->rx_buf_p) {
m_freem(sc->rx_buf_p);
sc->rx_buf_p = NULL;
}
callout_stop(&sc->wch);
sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
}
/* -------------------------------------------------------------------------- */
/* interrupt handler */
/*
* SBNI12D-10, -11/ISA boards within "common interrupt" mode could not
* be looked as two independent single-channel devices. Every channel seems
* as Ethernet interface but interrupt handler must be common. Really, first
* channel ("master") driver only registers the handler. In it's struct softc
* it has got pointer to "slave" channel's struct softc and handles that's
* interrupts too.
* softc of successfully attached ISA SBNI boards is linked to list.
* While next board driver is initialized, it scans this list. If one
* has found softc with same irq and ioaddr different by 4 then it assumes
* this board to be "master".
*/
void
sbni_intr(void *arg)
{
struct sbni_softc *sc;
int repeat;
sc = (struct sbni_softc *)arg;
do {
repeat = 0;
SBNI_LOCK(sc);
if (sbni_inb(sc, CSR0) & (RC_RDY | TR_RDY)) {
handle_channel(sc);
repeat = 1;
}
SBNI_UNLOCK(sc);
if (sc->slave_sc) {
/* second channel present */
SBNI_LOCK(sc->slave_sc);
if (sbni_inb(sc->slave_sc, CSR0) & (RC_RDY | TR_RDY)) {
handle_channel(sc->slave_sc);
repeat = 1;
}
SBNI_UNLOCK(sc->slave_sc);
}
} while (repeat);
}
static void
handle_channel(struct sbni_softc *sc)
{
int req_ans;
u_char csr0;
sbni_outb(sc, CSR0, (sbni_inb(sc, CSR0) & ~EN_INT) | TR_REQ);
sc->timer_ticks = CHANGE_LEVEL_START_TICKS;
for (;;) {
csr0 = sbni_inb(sc, CSR0);
if ((csr0 & (RC_RDY | TR_RDY)) == 0)
break;
req_ans = !(sc->state & FL_PREV_OK);
if (csr0 & RC_RDY)
req_ans = recv_frame(sc);
/*
* TR_RDY always equals 1 here because we have owned the marker,
* and we set TR_REQ when disabled interrupts
*/
csr0 = sbni_inb(sc, CSR0);
if ((csr0 & TR_RDY) == 0 || (csr0 & RC_RDY) != 0)
if_printf(sc->ifp, "internal error!\n");
/* if state & FL_NEED_RESEND != 0 then tx_frameno != 0 */
if (req_ans || sc->tx_frameno != 0)
send_frame(sc);
else {
/* send the marker without any data */
sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) & ~TR_REQ);
}
}
sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) | EN_INT);
}
/*
* Routine returns 1 if it need to acknoweledge received frame.
* Empty frame received without errors won't be acknoweledged.
*/
static int
recv_frame(struct sbni_softc *sc)
{
u_int32_t crc;
u_int framelen, frameno, ack;
u_int is_first, frame_ok;
crc = CRC32_INITIAL;
if (check_fhdr(sc, &framelen, &frameno, &ack, &is_first, &crc)) {
frame_ok = framelen > 4 ?
upload_data(sc, framelen, frameno, is_first, crc) :
skip_tail(sc, framelen, crc);
if (frame_ok)
interpret_ack(sc, ack);
} else {
framelen = 0;
frame_ok = 0;
}
sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) ^ CT_ZER);
if (frame_ok) {
sc->state |= FL_PREV_OK;
if (framelen > 4)
sc->in_stats.all_rx_number++;
} else {
sc->state &= ~FL_PREV_OK;
change_level(sc);
sc->in_stats.all_rx_number++;
sc->in_stats.bad_rx_number++;
}
return (!frame_ok || framelen > 4);
}
static void
send_frame(struct sbni_softc *sc)
{
u_int32_t crc;
u_char csr0;
crc = CRC32_INITIAL;
if (sc->state & FL_NEED_RESEND) {
/* if frame was sended but not ACK'ed - resend it */
if (sc->trans_errors) {
sc->trans_errors--;
if (sc->framelen != 0)
sc->in_stats.resend_tx_number++;
} else {
/* cannot xmit with many attempts */
drop_xmit_queue(sc);
goto do_send;
}
} else
sc->trans_errors = TR_ERROR_COUNT;
send_frame_header(sc, &crc);
sc->state |= FL_NEED_RESEND;
/*
* FL_NEED_RESEND will be cleared after ACK, but if empty
* frame sended then in prepare_to_send next frame
*/
if (sc->framelen) {
download_data(sc, &crc);
sc->in_stats.all_tx_number++;
sc->state |= FL_WAIT_ACK;
}
sbni_outsb(sc, (u_char *)&crc, sizeof crc);
do_send:
csr0 = sbni_inb(sc, CSR0);
sbni_outb(sc, CSR0, csr0 & ~TR_REQ);
if (sc->tx_frameno) {
/* next frame exists - request to send */
sbni_outb(sc, CSR0, csr0 | TR_REQ);
}
}
static void
download_data(struct sbni_softc *sc, u_int32_t *crc_p)
{
struct mbuf *m;
caddr_t data_p;
u_int data_len, pos, slice;
data_p = NULL; /* initialized to avoid warn */
pos = 0;
for (m = sc->tx_buf_p; m != NULL && pos < sc->pktlen; m = m->m_next) {
if (pos + m->m_len > sc->outpos) {
data_len = m->m_len - (sc->outpos - pos);
data_p = mtod(m, caddr_t) + (sc->outpos - pos);
goto do_copy;
} else
pos += m->m_len;
}
data_len = 0;
do_copy:
pos = 0;
do {
if (data_len) {
slice = min(data_len, sc->framelen - pos);
sbni_outsb(sc, data_p, slice);
*crc_p = calc_crc32(*crc_p, data_p, slice);
pos += slice;
if (data_len -= slice)
data_p += slice;
else {
do {
m = m->m_next;
} while (m != NULL && m->m_len == 0);
if (m) {
data_len = m->m_len;
data_p = mtod(m, caddr_t);
}
}
} else {
/* frame too short - zero padding */
pos = sc->framelen - pos;
while (pos--) {
sbni_outb(sc, DAT, 0);
*crc_p = CRC32(0, *crc_p);
}
return;
}
} while (pos < sc->framelen);
}
static int
upload_data(struct sbni_softc *sc, u_int framelen, u_int frameno,
u_int is_first, u_int32_t crc)
{
int frame_ok;
if (is_first) {
sc->wait_frameno = frameno;
sc->inppos = 0;
}
if (sc->wait_frameno == frameno) {
if (sc->inppos + framelen <= ETHER_MAX_LEN) {
frame_ok = append_frame_to_pkt(sc, framelen, crc);
/*
* if CRC is right but framelen incorrect then transmitter
* error was occurred... drop entire packet
*/
} else if ((frame_ok = skip_tail(sc, framelen, crc)) != 0) {
sc->wait_frameno = 0;
sc->inppos = 0;
if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1);
/* now skip all frames until is_first != 0 */
}
} else
frame_ok = skip_tail(sc, framelen, crc);
if (is_first && !frame_ok) {
/*
* Frame has been violated, but we have stored
* is_first already... Drop entire packet.
*/
sc->wait_frameno = 0;
if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1);
}
return (frame_ok);
}
static __inline void send_complete(struct sbni_softc *);
static __inline void
send_complete(struct sbni_softc *sc)
{
m_freem(sc->tx_buf_p);
sc->tx_buf_p = NULL;
if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1);
}
static void
interpret_ack(struct sbni_softc *sc, u_int ack)
{
if (ack == FRAME_SENT_OK) {
sc->state &= ~FL_NEED_RESEND;
if (sc->state & FL_WAIT_ACK) {
sc->outpos += sc->framelen;
if (--sc->tx_frameno) {
sc->framelen = min(
sc->maxframe, sc->pktlen - sc->outpos);
} else {
send_complete(sc);
prepare_to_send(sc);
}
}
}
sc->state &= ~FL_WAIT_ACK;
}
/*
* Glue received frame with previous fragments of packet.
* Indicate packet when last frame would be accepted.
*/
static int
append_frame_to_pkt(struct sbni_softc *sc, u_int framelen, u_int32_t crc)
{
caddr_t p;
if (sc->inppos + framelen > ETHER_MAX_LEN)
return (0);
if (!sc->rx_buf_p && !get_rx_buf(sc))
return (0);
p = sc->rx_buf_p->m_data + sc->inppos;
sbni_insb(sc, p, framelen);
if (calc_crc32(crc, p, framelen) != CRC32_REMAINDER)
return (0);
sc->inppos += framelen - 4;
if (--sc->wait_frameno == 0) { /* last frame received */
indicate_pkt(sc);
if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1);
}
return (1);
}
/*
* Prepare to start output on adapter. Current priority must be set to splimp
* before this routine is called.
* Transmitter will be actually activated when marker has been accepted.
*/
static void
prepare_to_send(struct sbni_softc *sc)
{
struct mbuf *m;
u_int len;
/* sc->tx_buf_p == NULL here! */
if (sc->tx_buf_p)
printf("sbni: memory leak!\n");
sc->outpos = 0;
sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
for (;;) {
IF_DEQUEUE(&sc->ifp->if_snd, sc->tx_buf_p);
if (!sc->tx_buf_p) {
/* nothing to transmit... */
sc->pktlen = 0;
sc->tx_frameno = 0;
sc->framelen = 0;
sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
return;
}
for (len = 0, m = sc->tx_buf_p; m; m = m->m_next)
len += m->m_len;
if (len != 0)
break;
m_freem(sc->tx_buf_p);
}
if (len < SBNI_MIN_LEN)
len = SBNI_MIN_LEN;
sc->pktlen = len;
sc->tx_frameno = howmany(len, sc->maxframe);
sc->framelen = min(len, sc->maxframe);
sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) | TR_REQ);
sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
BPF_MTAP(sc->ifp, sc->tx_buf_p);
}
static void
drop_xmit_queue(struct sbni_softc *sc)
{
struct mbuf *m;
if (sc->tx_buf_p) {
m_freem(sc->tx_buf_p);
sc->tx_buf_p = NULL;
if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1);
}
for (;;) {
IF_DEQUEUE(&sc->ifp->if_snd, m);
if (m == NULL)
break;
m_freem(m);
if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1);
}
sc->tx_frameno = 0;
sc->framelen = 0;
sc->outpos = 0;
sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
}
static void
send_frame_header(struct sbni_softc *sc, u_int32_t *crc_p)
{
u_int32_t crc;
u_int len_field;
u_char value;
crc = *crc_p;
len_field = sc->framelen + 6; /* CRC + frameno + reserved */
if (sc->state & FL_NEED_RESEND)
len_field |= FRAME_RETRY; /* non-first attempt... */
if (sc->outpos == 0)
len_field |= FRAME_FIRST;
len_field |= (sc->state & FL_PREV_OK) ? FRAME_SENT_OK : FRAME_SENT_BAD;
sbni_outb(sc, DAT, SBNI_SIG);
value = (u_char)len_field;
sbni_outb(sc, DAT, value);
crc = CRC32(value, crc);
value = (u_char)(len_field >> 8);
sbni_outb(sc, DAT, value);
crc = CRC32(value, crc);
sbni_outb(sc, DAT, sc->tx_frameno);
crc = CRC32(sc->tx_frameno, crc);
sbni_outb(sc, DAT, 0);
crc = CRC32(0, crc);
*crc_p = crc;
}
/*
* if frame tail not needed (incorrect number or received twice),
* it won't store, but CRC will be calculated
*/
static int
skip_tail(struct sbni_softc *sc, u_int tail_len, u_int32_t crc)
{
while (tail_len--)
crc = CRC32(sbni_inb(sc, DAT), crc);
return (crc == CRC32_REMAINDER);
}
static int
check_fhdr(struct sbni_softc *sc, u_int *framelen, u_int *frameno,
u_int *ack, u_int *is_first, u_int32_t *crc_p)
{
u_int32_t crc;
u_char value;
crc = *crc_p;
if (sbni_inb(sc, DAT) != SBNI_SIG)
return (0);
value = sbni_inb(sc, DAT);
*framelen = (u_int)value;
crc = CRC32(value, crc);
value = sbni_inb(sc, DAT);
*framelen |= ((u_int)value) << 8;
crc = CRC32(value, crc);
*ack = *framelen & FRAME_ACK_MASK;
*is_first = (*framelen & FRAME_FIRST) != 0;
if ((*framelen &= FRAME_LEN_MASK) < 6 || *framelen > SBNI_MAX_FRAME - 3)
return (0);
value = sbni_inb(sc, DAT);
*frameno = (u_int)value;
crc = CRC32(value, crc);
crc = CRC32(sbni_inb(sc, DAT), crc); /* reserved byte */
*framelen -= 2;
*crc_p = crc;
return (1);
}
static int
get_rx_buf(struct sbni_softc *sc)
{
struct mbuf *m;
MGETHDR(m, M_NOWAIT, MT_DATA);
if (m == NULL) {
if_printf(sc->ifp, "cannot allocate header mbuf\n");
return (0);
}
/*
* We always put the received packet in a single buffer -
* either with just an mbuf header or in a cluster attached
* to the header. The +2 is to compensate for the alignment
* fixup below.
*/
if (ETHER_MAX_LEN + 2 > MHLEN) {
/* Attach an mbuf cluster */
if (!(MCLGET(m, M_NOWAIT))) {
m_freem(m);
return (0);
}
}
m->m_pkthdr.len = m->m_len = ETHER_MAX_LEN + 2;
/*
* The +2 is to longword align the start of the real packet.
* (sizeof ether_header == 14)
* This is important for NFS.
*/
m_adj(m, 2);
sc->rx_buf_p = m;
return (1);
}
static void
indicate_pkt(struct sbni_softc *sc)
{
struct ifnet *ifp = sc->ifp;
struct mbuf *m;
m = sc->rx_buf_p;
m->m_pkthdr.rcvif = ifp;
m->m_pkthdr.len = m->m_len = sc->inppos;
sc->rx_buf_p = NULL;
SBNI_UNLOCK(sc);
(*ifp->if_input)(ifp, m);
SBNI_LOCK(sc);
}
/* -------------------------------------------------------------------------- */
/*
* Routine checks periodically wire activity and regenerates marker if
* connect was inactive for a long time.
*/
static void
sbni_timeout(void *xsc)
{
struct sbni_softc *sc;
u_char csr0;
sc = (struct sbni_softc *)xsc;
SBNI_ASSERT_LOCKED(sc);
csr0 = sbni_inb(sc, CSR0);
if (csr0 & RC_CHK) {
if (sc->timer_ticks) {
if (csr0 & (RC_RDY | BU_EMP))
/* receiving not active */
sc->timer_ticks--;
} else {
sc->in_stats.timeout_number++;
if (sc->delta_rxl)
timeout_change_level(sc);
sbni_outb(sc, CSR1, *(u_char *)&sc->csr1 | PR_RES);
csr0 = sbni_inb(sc, CSR0);
}
}
sbni_outb(sc, CSR0, csr0 | RC_CHK);
callout_reset(&sc->wch, hz/SBNI_HZ, sbni_timeout, sc);
}
/* -------------------------------------------------------------------------- */
static void
card_start(struct sbni_softc *sc)
{
sc->timer_ticks = CHANGE_LEVEL_START_TICKS;
sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
sc->state |= FL_PREV_OK;
sc->inppos = 0;
sc->wait_frameno = 0;
sbni_outb(sc, CSR1, *(u_char *)&sc->csr1 | PR_RES);
sbni_outb(sc, CSR0, EN_INT);
}
/* -------------------------------------------------------------------------- */
static u_char rxl_tab[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
0x0a, 0x0c, 0x0f, 0x16, 0x18, 0x1a, 0x1c, 0x1f
};
#define SIZE_OF_TIMEOUT_RXL_TAB 4
static u_char timeout_rxl_tab[] = {
0x03, 0x05, 0x08, 0x0b
};
static void
set_initial_values(struct sbni_softc *sc, struct sbni_flags flags)
{
if (flags.fixed_rxl) {
sc->delta_rxl = 0; /* disable receive level autodetection */
sc->cur_rxl_index = flags.rxl;
} else {
sc->delta_rxl = DEF_RXL_DELTA;
sc->cur_rxl_index = DEF_RXL;
}
sc->csr1.rate = flags.fixed_rate ? flags.rate : DEFAULT_RATE;
sc->csr1.rxl = rxl_tab[sc->cur_rxl_index];
sc->maxframe = DEFAULT_FRAME_LEN;
/*
* generate Ethernet address (0x00ff01xxxxxx)
*/
*(u_int16_t *) sc->enaddr = htons(0x00ff);
if (flags.mac_addr) {
*(u_int32_t *) (sc->enaddr + 2) =
htonl(flags.mac_addr | 0x01000000);
} else {
*(u_char *) (sc->enaddr + 2) = 0x01;
read_random(sc->enaddr + 3, 3);
}
}
#ifdef SBNI_DUAL_COMPOUND
void
sbni_add(struct sbni_softc *sc)
{
mtx_lock(&headlist_lock);
sc->link = sbni_headlist;
sbni_headlist = sc;
mtx_unlock(&headlist_lock);
}
struct sbni_softc *
connect_to_master(struct sbni_softc *sc)
{
struct sbni_softc *p, *p_prev;
mtx_lock(&headlist_lock);
for (p = sbni_headlist, p_prev = NULL; p; p_prev = p, p = p->link) {
if (rman_get_start(p->io_res) == rman_get_start(sc->io_res) + 4 ||
rman_get_start(p->io_res) == rman_get_start(sc->io_res) - 4) {
p->slave_sc = sc;
if (p_prev)
p_prev->link = p->link;
else
sbni_headlist = p->link;
mtx_unlock(&headlist_lock);
return p;
}
}
mtx_unlock(&headlist_lock);
return (NULL);
}
#endif /* SBNI_DUAL_COMPOUND */
/* Receive level auto-selection */
static void
change_level(struct sbni_softc *sc)
{
if (sc->delta_rxl == 0) /* do not auto-negotiate RxL */
return;
if (sc->cur_rxl_index == 0)
sc->delta_rxl = 1;
else if (sc->cur_rxl_index == 15)
sc->delta_rxl = -1;
else if (sc->cur_rxl_rcvd < sc->prev_rxl_rcvd)
sc->delta_rxl = -sc->delta_rxl;
sc->csr1.rxl = rxl_tab[sc->cur_rxl_index += sc->delta_rxl];
sbni_inb(sc, CSR0); /* it needed for PCI cards */
sbni_outb(sc, CSR1, *(u_char *)&sc->csr1);
sc->prev_rxl_rcvd = sc->cur_rxl_rcvd;
sc->cur_rxl_rcvd = 0;
}
static void
timeout_change_level(struct sbni_softc *sc)
{
sc->cur_rxl_index = timeout_rxl_tab[sc->timeout_rxl];
if (++sc->timeout_rxl >= 4)
sc->timeout_rxl = 0;
sc->csr1.rxl = rxl_tab[sc->cur_rxl_index];
sbni_inb(sc, CSR0);
sbni_outb(sc, CSR1, *(u_char *)&sc->csr1);
sc->prev_rxl_rcvd = sc->cur_rxl_rcvd;
sc->cur_rxl_rcvd = 0;
}
/* -------------------------------------------------------------------------- */
/*
* Process an ioctl request. This code needs some work - it looks
* pretty ugly.
*/
static int
sbni_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
{
struct sbni_softc *sc;
struct ifreq *ifr;
struct thread *td;
struct sbni_in_stats *in_stats;
struct sbni_flags flags;
int error;
sc = ifp->if_softc;
ifr = (struct ifreq *)data;
td = curthread;
error = 0;
switch (command) {
case SIOCSIFFLAGS:
/*
* If the interface is marked up and stopped, then start it.
* If it is marked down and running, then stop it.
*/
SBNI_LOCK(sc);
if (ifp->if_flags & IFF_UP) {
if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
sbni_init_locked(sc);
} else {
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
sbni_stop(sc);
}
}
SBNI_UNLOCK(sc);
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
/*
* Multicast list has changed; set the hardware filter
* accordingly.
*/
error = 0;
/* if (ifr == NULL)
error = EAFNOSUPPORT; */
break;
/*
* SBNI specific ioctl
*/
case SIOCGHWFLAGS: /* get flags */
SBNI_LOCK(sc);
bcopy((caddr_t)IF_LLADDR(sc->ifp)+3, (caddr_t) &flags, 3);
flags.rxl = sc->cur_rxl_index;
flags.rate = sc->csr1.rate;
flags.fixed_rxl = (sc->delta_rxl == 0);
flags.fixed_rate = 1;
SBNI_UNLOCK(sc);
bcopy(&flags, &ifr->ifr_ifru, sizeof(flags));
break;
case SIOCGINSTATS:
in_stats = malloc(sizeof(struct sbni_in_stats), M_DEVBUF,
M_WAITOK);
SBNI_LOCK(sc);
bcopy(&sc->in_stats, in_stats, sizeof(struct sbni_in_stats));
SBNI_UNLOCK(sc);
error = copyout(in_stats, ifr_data_get_ptr(ifr),
sizeof(struct sbni_in_stats));
free(in_stats, M_DEVBUF);
break;
case SIOCSHWFLAGS: /* set flags */
/* root only */
error = priv_check(td, PRIV_DRIVER);
if (error)
break;
bcopy(&ifr->ifr_ifru, &flags, sizeof(flags));
SBNI_LOCK(sc);
if (flags.fixed_rxl) {
sc->delta_rxl = 0;
sc->cur_rxl_index = flags.rxl;
} else {
sc->delta_rxl = DEF_RXL_DELTA;
sc->cur_rxl_index = DEF_RXL;
}
sc->csr1.rxl = rxl_tab[sc->cur_rxl_index];
sc->csr1.rate = flags.fixed_rate ? flags.rate : DEFAULT_RATE;
if (flags.mac_addr)
bcopy((caddr_t) &flags,
(caddr_t) IF_LLADDR(sc->ifp)+3, 3);
/* Don't be afraid... */
sbni_outb(sc, CSR1, *(char*)(&sc->csr1) | PR_RES);
SBNI_UNLOCK(sc);
break;
case SIOCRINSTATS:
SBNI_LOCK(sc);
if (!(error = priv_check(td, PRIV_DRIVER))) /* root only */
bzero(&sc->in_stats, sizeof(struct sbni_in_stats));
SBNI_UNLOCK(sc);
break;
default:
error = ether_ioctl(ifp, command, data);
break;
}
return (error);
}
/* -------------------------------------------------------------------------- */
static u_int32_t
calc_crc32(u_int32_t crc, caddr_t p, u_int len)
{
while (len--)
crc = CRC32(*p++, crc);
return (crc);
}
static u_int32_t crc32tab[] __aligned(8) = {
0xD202EF8D, 0xA505DF1B, 0x3C0C8EA1, 0x4B0BBE37,
0xD56F2B94, 0xA2681B02, 0x3B614AB8, 0x4C667A2E,
0xDCD967BF, 0xABDE5729, 0x32D70693, 0x45D03605,
0xDBB4A3A6, 0xACB39330, 0x35BAC28A, 0x42BDF21C,
0xCFB5FFE9, 0xB8B2CF7F, 0x21BB9EC5, 0x56BCAE53,
0xC8D83BF0, 0xBFDF0B66, 0x26D65ADC, 0x51D16A4A,
0xC16E77DB, 0xB669474D, 0x2F6016F7, 0x58672661,
0xC603B3C2, 0xB1048354, 0x280DD2EE, 0x5F0AE278,
0xE96CCF45, 0x9E6BFFD3, 0x0762AE69, 0x70659EFF,
0xEE010B5C, 0x99063BCA, 0x000F6A70, 0x77085AE6,
0xE7B74777, 0x90B077E1, 0x09B9265B, 0x7EBE16CD,
0xE0DA836E, 0x97DDB3F8, 0x0ED4E242, 0x79D3D2D4,
0xF4DBDF21, 0x83DCEFB7, 0x1AD5BE0D, 0x6DD28E9B,
0xF3B61B38, 0x84B12BAE, 0x1DB87A14, 0x6ABF4A82,
0xFA005713, 0x8D076785, 0x140E363F, 0x630906A9,
0xFD6D930A, 0x8A6AA39C, 0x1363F226, 0x6464C2B0,
0xA4DEAE1D, 0xD3D99E8B, 0x4AD0CF31, 0x3DD7FFA7,
0xA3B36A04, 0xD4B45A92, 0x4DBD0B28, 0x3ABA3BBE,
0xAA05262F, 0xDD0216B9, 0x440B4703, 0x330C7795,
0xAD68E236, 0xDA6FD2A0, 0x4366831A, 0x3461B38C,
0xB969BE79, 0xCE6E8EEF, 0x5767DF55, 0x2060EFC3,
0xBE047A60, 0xC9034AF6, 0x500A1B4C, 0x270D2BDA,
0xB7B2364B, 0xC0B506DD, 0x59BC5767, 0x2EBB67F1,
0xB0DFF252, 0xC7D8C2C4, 0x5ED1937E, 0x29D6A3E8,
0x9FB08ED5, 0xE8B7BE43, 0x71BEEFF9, 0x06B9DF6F,
0x98DD4ACC, 0xEFDA7A5A, 0x76D32BE0, 0x01D41B76,
0x916B06E7, 0xE66C3671, 0x7F6567CB, 0x0862575D,
0x9606C2FE, 0xE101F268, 0x7808A3D2, 0x0F0F9344,
0x82079EB1, 0xF500AE27, 0x6C09FF9D, 0x1B0ECF0B,
0x856A5AA8, 0xF26D6A3E, 0x6B643B84, 0x1C630B12,
0x8CDC1683, 0xFBDB2615, 0x62D277AF, 0x15D54739,
0x8BB1D29A, 0xFCB6E20C, 0x65BFB3B6, 0x12B88320,
0x3FBA6CAD, 0x48BD5C3B, 0xD1B40D81, 0xA6B33D17,
0x38D7A8B4, 0x4FD09822, 0xD6D9C998, 0xA1DEF90E,
0x3161E49F, 0x4666D409, 0xDF6F85B3, 0xA868B525,
0x360C2086, 0x410B1010, 0xD80241AA, 0xAF05713C,
0x220D7CC9, 0x550A4C5F, 0xCC031DE5, 0xBB042D73,
0x2560B8D0, 0x52678846, 0xCB6ED9FC, 0xBC69E96A,
0x2CD6F4FB, 0x5BD1C46D, 0xC2D895D7, 0xB5DFA541,
0x2BBB30E2, 0x5CBC0074, 0xC5B551CE, 0xB2B26158,
0x04D44C65, 0x73D37CF3, 0xEADA2D49, 0x9DDD1DDF,
0x03B9887C, 0x74BEB8EA, 0xEDB7E950, 0x9AB0D9C6,
0x0A0FC457, 0x7D08F4C1, 0xE401A57B, 0x930695ED,
0x0D62004E, 0x7A6530D8, 0xE36C6162, 0x946B51F4,
0x19635C01, 0x6E646C97, 0xF76D3D2D, 0x806A0DBB,
0x1E0E9818, 0x6909A88E, 0xF000F934, 0x8707C9A2,
0x17B8D433, 0x60BFE4A5, 0xF9B6B51F, 0x8EB18589,
0x10D5102A, 0x67D220BC, 0xFEDB7106, 0x89DC4190,
0x49662D3D, 0x3E611DAB, 0xA7684C11, 0xD06F7C87,
0x4E0BE924, 0x390CD9B2, 0xA0058808, 0xD702B89E,
0x47BDA50F, 0x30BA9599, 0xA9B3C423, 0xDEB4F4B5,
0x40D06116, 0x37D75180, 0xAEDE003A, 0xD9D930AC,
0x54D13D59, 0x23D60DCF, 0xBADF5C75, 0xCDD86CE3,
0x53BCF940, 0x24BBC9D6, 0xBDB2986C, 0xCAB5A8FA,
0x5A0AB56B, 0x2D0D85FD, 0xB404D447, 0xC303E4D1,
0x5D677172, 0x2A6041E4, 0xB369105E, 0xC46E20C8,
0x72080DF5, 0x050F3D63, 0x9C066CD9, 0xEB015C4F,
0x7565C9EC, 0x0262F97A, 0x9B6BA8C0, 0xEC6C9856,
0x7CD385C7, 0x0BD4B551, 0x92DDE4EB, 0xE5DAD47D,
0x7BBE41DE, 0x0CB97148, 0x95B020F2, 0xE2B71064,
0x6FBF1D91, 0x18B82D07, 0x81B17CBD, 0xF6B64C2B,
0x68D2D988, 0x1FD5E91E, 0x86DCB8A4, 0xF1DB8832,
0x616495A3, 0x1663A535, 0x8F6AF48F, 0xF86DC419,
0x660951BA, 0x110E612C, 0x88073096, 0xFF000000
};
diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index ceb3ffaaf2b4..b388e43d92a6 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -1,4090 +1,4091 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2011, Bryan Venteicher
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Driver for VirtIO network devices. */
#include
__FBSDID("$FreeBSD$");
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include