Index: head/sys/dev/altera/atse/if_atse.c
===================================================================
--- head/sys/dev/altera/atse/if_atse.c	(revision 346895)
+++ head/sys/dev/altera/atse/if_atse.c	(revision 346896)
@@ -1,1603 +1,1608 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2012, 2013 Bjoern A. Zeeb
  * Copyright (c) 2014 Robert N. M. Watson
  * Copyright (c) 2016-2017 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-11-C-0249)
  * ("MRC2"), as part of the DARPA MRC research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*
  * Altera Triple-Speed Ethernet MegaCore, Function User Guide
  * UG-01008-3.0, Software Version: 12.0, June 2012.
  * Available at the time of writing at:
  * http://www.altera.com/literature/ug/ug_ethernet.pdf
  *
  * We are using an Marvell E1111 (Alaska) PHY on the DE4.  See mii/e1000phy.c.
  */
 /*
  * XXX-BZ NOTES:
  * - ifOutBroadcastPkts are only counted if both ether dst and src are all-1s;
  *   seems an IP core bug, they count ether broadcasts as multicast.  Is this
  *   still the case?
  * - figure out why the TX FIFO fill status and intr did not work as expected.
  * - test 100Mbit/s and 10Mbit/s
  * - blacklist the one special factory programmed ethernet address (for now
  *   hardcoded, later from loader?)
  * - resolve all XXX, left as reminders to shake out details later
  * - Jumbo frame support
  */
 
 #include 
 __FBSDID("$FreeBSD$");
 
 #include "opt_device_polling.h"
 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 
 #include 
 
 #include 
 #include 
 #include 
 
 #include 
 #include 
 
 #include 
 #include 
 
 #define	RX_QUEUE_SIZE		4096
 #define	TX_QUEUE_SIZE		4096
 #define	NUM_RX_MBUF		512
 #define	BUFRING_SIZE		8192
 
 #include 
 
 /* XXX once we'd do parallel attach, we need a global lock for this. */
 #define	ATSE_ETHERNET_OPTION_BITS_UNDEF	0
 #define	ATSE_ETHERNET_OPTION_BITS_READ	1
 static int atse_ethernet_option_bits_flag = ATSE_ETHERNET_OPTION_BITS_UNDEF;
 static uint8_t atse_ethernet_option_bits[ALTERA_ETHERNET_OPTION_BITS_LEN];
 
 /*
  * Softc and critical resource locking.
  */
 #define	ATSE_LOCK(_sc)		mtx_lock(&(_sc)->atse_mtx)
 #define	ATSE_UNLOCK(_sc)	mtx_unlock(&(_sc)->atse_mtx)
 #define	ATSE_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->atse_mtx, MA_OWNED)
 
 #define ATSE_DEBUG
 #undef ATSE_DEBUG
 
 #ifdef ATSE_DEBUG
 #define	DPRINTF(format, ...)	printf(format, __VA_ARGS__)
 #else
 #define	DPRINTF(format, ...)
 #endif
 
 /*
  * Register space access macros.
  */
 static inline void
 csr_write_4(struct atse_softc *sc, uint32_t reg, uint32_t val4,
     const char *f, const int l)
 {
 
 	val4 = htole32(val4);
 	DPRINTF("[%s:%d] CSR W %s 0x%08x (0x%08x) = 0x%08x\n", f, l,
 	    "atse_mem_res", reg, reg * 4, val4);
 	bus_write_4(sc->atse_mem_res, reg * 4, val4);
 }
 
 static inline uint32_t
 csr_read_4(struct atse_softc *sc, uint32_t reg, const char *f, const int l)
 {
 	uint32_t val4;
 
 	val4 = le32toh(bus_read_4(sc->atse_mem_res, reg * 4));
 	DPRINTF("[%s:%d] CSR R %s 0x%08x (0x%08x) = 0x%08x\n", f, l, 
 	    "atse_mem_res", reg, reg * 4, val4);
 
 	return (val4);
 }
 
 /*
  * See page 5-2 that it's all dword offsets and the MS 16 bits need to be zero
  * on write and ignored on read.
  */
 static inline void
 pxx_write_2(struct atse_softc *sc, bus_addr_t bmcr, uint32_t reg, uint16_t val,
     const char *f, const int l, const char *s)
 {
 	uint32_t val4;
 
 	val4 = htole32(val & 0x0000ffff);
 	DPRINTF("[%s:%d] %s W %s 0x%08x (0x%08jx) = 0x%08x\n", f, l, s,
 	    "atse_mem_res", reg, (bmcr + reg) * 4, val4);
 	bus_write_4(sc->atse_mem_res, (bmcr + reg) * 4, val4);
 }
 
 static inline uint16_t
 pxx_read_2(struct atse_softc *sc, bus_addr_t bmcr, uint32_t reg, const char *f,
     const int l, const char *s)
 {
 	uint32_t val4;
 	uint16_t val;
 
 	val4 = bus_read_4(sc->atse_mem_res, (bmcr + reg) * 4);
 	val = le32toh(val4) & 0x0000ffff;
 	DPRINTF("[%s:%d] %s R %s 0x%08x (0x%08jx) = 0x%04x\n", f, l, s,
 	    "atse_mem_res", reg, (bmcr + reg) * 4, val);
 
 	return (val);
 }
 
 #define	CSR_WRITE_4(sc, reg, val)	\
 	csr_write_4((sc), (reg), (val), __func__, __LINE__)
 #define	CSR_READ_4(sc, reg)		\
 	csr_read_4((sc), (reg), __func__, __LINE__)
 #define	PCS_WRITE_2(sc, reg, val)	\
 	pxx_write_2((sc), sc->atse_bmcr0, (reg), (val), __func__, __LINE__, \
 	    "PCS")
 #define	PCS_READ_2(sc, reg)		\
 	pxx_read_2((sc), sc->atse_bmcr0, (reg), __func__, __LINE__, "PCS")
 #define	PHY_WRITE_2(sc, reg, val)	\
 	pxx_write_2((sc), sc->atse_bmcr1, (reg), (val), __func__, __LINE__, \
 	    "PHY")
 #define	PHY_READ_2(sc, reg)		\
 	pxx_read_2((sc), sc->atse_bmcr1, (reg), __func__, __LINE__, "PHY")
 
 static void atse_tick(void *);
 static int atse_detach(device_t);
 
 devclass_t atse_devclass;
 
 static int
 atse_rx_enqueue(struct atse_softc *sc, uint32_t n)
 {
 	struct mbuf *m;
 	int i;
 
 	for (i = 0; i < n; i++) {
 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		if (m == NULL) {
 			device_printf(sc->dev,
 			    "%s: Can't alloc rx mbuf\n", __func__);
 			return (-1);
 		}
 
 		m->m_pkthdr.len = m->m_len = m->m_ext.ext_size;
 		xdma_enqueue_mbuf(sc->xchan_rx, &m, 0, 4, 4, XDMA_DEV_TO_MEM);
 	}
 
 	return (0);
 }
 
 static int
 atse_xdma_tx_intr(void *arg, xdma_transfer_status_t *status)
 {
 	xdma_transfer_status_t st;
 	struct atse_softc *sc;
 	struct ifnet *ifp;
 	struct mbuf *m;
 	int err;
 
 	sc = arg;
 
 	ATSE_LOCK(sc);
 
 	ifp = sc->atse_ifp;
 
 	for (;;) {
 		err = xdma_dequeue_mbuf(sc->xchan_tx, &m, &st);
 		if (err != 0) {
 			break;
 		}
 
 		if (st.error != 0) {
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		}
 
 		m_freem(m);
 		sc->txcount--;
 	}
 
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	ATSE_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 atse_xdma_rx_intr(void *arg, xdma_transfer_status_t *status)
 {
 	xdma_transfer_status_t st;
 	struct atse_softc *sc;
 	struct ifnet *ifp;
 	struct mbuf *m;
 	int err;
 	uint32_t cnt_processed;
 
 	sc = arg;
 
 	ATSE_LOCK(sc);
 
 	ifp = sc->atse_ifp;
 
 	cnt_processed = 0;
 	for (;;) {
 		err = xdma_dequeue_mbuf(sc->xchan_rx, &m, &st);
 		if (err != 0) {
 			break;
 		}
 		cnt_processed++;
 
 		if (st.error != 0) {
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			m_freem(m);
 			continue;
 		}
 
 		m->m_pkthdr.len = m->m_len = st.transferred;
 		m->m_pkthdr.rcvif = ifp;
 		m_adj(m, ETHER_ALIGN);
 		ATSE_UNLOCK(sc);
 		(*ifp->if_input)(ifp, m);
 		ATSE_LOCK(sc);
 	}
 
 	atse_rx_enqueue(sc, cnt_processed);
 
 	ATSE_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 atse_transmit_locked(struct ifnet *ifp)
 {
 	struct atse_softc *sc;
 	struct mbuf *m;
 	struct buf_ring *br;
 	int error;
 	int enq;
 
 	sc = ifp->if_softc;
 	br = sc->br;
 
 	enq = 0;
 
 	while ((m = drbr_peek(ifp, br)) != NULL) {
 		error = xdma_enqueue_mbuf(sc->xchan_tx, &m, 0, 4, 4, XDMA_MEM_TO_DEV);
 		if (error != 0) {
 			/* No space in request queue available yet. */
 			drbr_putback(ifp, br, m);
 			break;
 		}
 
 		drbr_advance(ifp, br);
 
 		sc->txcount++;
 		enq++;
 
 		/* If anyone is interested give them a copy. */
 		ETHER_BPF_MTAP(ifp, m);
         }
 
 	if (enq > 0)
 		xdma_queue_submit(sc->xchan_tx);
 
 	return (0);
 }
 
 static int
 atse_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct atse_softc *sc;
 	struct buf_ring *br;
 	int error;
 
 	sc = ifp->if_softc;
 	br = sc->br;
 
 	ATSE_LOCK(sc);
 
 	mtx_lock(&sc->br_mtx);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) {
 		error = drbr_enqueue(ifp, sc->br, m);
 		mtx_unlock(&sc->br_mtx);
 		ATSE_UNLOCK(sc);
 		return (error);
 	}
 
 	if ((sc->atse_flags & ATSE_FLAGS_LINK) == 0) {
 		error = drbr_enqueue(ifp, sc->br, m);
 		mtx_unlock(&sc->br_mtx);
 		ATSE_UNLOCK(sc);
 		return (error);
 	}
 
 	error = drbr_enqueue(ifp, br, m);
 	if (error) {
 		mtx_unlock(&sc->br_mtx);
 		ATSE_UNLOCK(sc);
 		return (error);
 	}
 	error = atse_transmit_locked(ifp);
 
 	mtx_unlock(&sc->br_mtx);
 	ATSE_UNLOCK(sc);
 
 	return (error);
 }
 
 static void
 atse_qflush(struct ifnet *ifp)
 {
 	struct atse_softc *sc;
 
 	sc = ifp->if_softc;
 
 	printf("%s\n", __func__);
 }
 
 static int
 atse_stop_locked(struct atse_softc *sc)
 {
 	uint32_t mask, val4;
 	struct ifnet *ifp;
 	int i;
 
 	ATSE_LOCK_ASSERT(sc);
 
 	callout_stop(&sc->atse_tick);
 
 	ifp = sc->atse_ifp;
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
 	/* Disable MAC transmit and receive datapath. */
 	mask = BASE_CFG_COMMAND_CONFIG_TX_ENA|BASE_CFG_COMMAND_CONFIG_RX_ENA;
 	val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 	val4 &= ~mask;
 	CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4);
 
 	/* Wait for bits to be cleared; i=100 is excessive. */
 	for (i = 0; i < 100; i++) {
 		val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 		if ((val4 & mask) == 0) {
 			break;
 		}
 		DELAY(10);
 	}
 
 	if ((val4 & mask) != 0) {
 		device_printf(sc->atse_dev, "Disabling MAC TX/RX timed out.\n");
 		/* Punt. */
 	}
 
 	sc->atse_flags &= ~ATSE_FLAGS_LINK;
 
 	return (0);
 }
 
 static uint8_t
 atse_mchash(struct atse_softc *sc __unused, const uint8_t *addr)
 {
 	uint8_t x, y;
 	int i, j;
 
 	x = 0;
 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
 		y = addr[i] & 0x01;
 		for (j = 1; j < 8; j++)
 			y ^= (addr[i] >> j) & 0x01;
 		x |= (y << i);
 	}
 
 	return (x);
 }
 
 static int
 atse_rxfilter_locked(struct atse_softc *sc)
 {
 	struct ifmultiaddr *ifma;
 	struct ifnet *ifp;
 	uint32_t val4;
 	int i;
 
 	/* XXX-BZ can we find out if we have the MHASH synthesized? */
 	val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 	/* For simplicity always hash full 48 bits of addresses. */
 	if ((val4 & BASE_CFG_COMMAND_CONFIG_MHASH_SEL) != 0)
 		val4 &= ~BASE_CFG_COMMAND_CONFIG_MHASH_SEL;
 
 	ifp = sc->atse_ifp;
 	if (ifp->if_flags & IFF_PROMISC) {
 		val4 |= BASE_CFG_COMMAND_CONFIG_PROMIS_EN;
 	} else {
 		val4 &= ~BASE_CFG_COMMAND_CONFIG_PROMIS_EN;
 	}
 
 	CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4);
 
 	if (ifp->if_flags & IFF_ALLMULTI) {
 		/* Accept all multicast addresses. */
 		for (i = 0; i <= MHASH_LEN; i++)
 			CSR_WRITE_4(sc, MHASH_START + i, 0x1);
 	} else {
 		/*
 		 * Can hold MHASH_LEN entries.
 		 * XXX-BZ bitstring.h would be more general.
 		 */
 		uint64_t h;
 
 		h = 0;
 		/*
 		 * Re-build and re-program hash table.  First build the
 		 * bit-field "yes" or "no" for each slot per address, then
 		 * do all the programming afterwards.
 		 */
 		if_maddr_rlock(ifp);
 		CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_LINK) {
 				continue;
 			}
 
 			h |= (1 << atse_mchash(sc,
 			    LLADDR((struct sockaddr_dl *)ifma->ifma_addr)));
 		}
 		if_maddr_runlock(ifp);
 		for (i = 0; i <= MHASH_LEN; i++) {
 			CSR_WRITE_4(sc, MHASH_START + i,
 			    (h & (1 << i)) ? 0x01 : 0x00);
 		}
 	}
 
 	return (0);
 }
 
 static int
 atse_ethernet_option_bits_read_fdt(device_t dev)
 {
 	struct resource *res;
 	device_t fdev;
 	int i, rid;
 
 	if (atse_ethernet_option_bits_flag & ATSE_ETHERNET_OPTION_BITS_READ) {
 		return (0);
 	}
 
 	fdev = device_find_child(device_get_parent(dev), "cfi", 0);
 	if (fdev == NULL) {
 		return (ENOENT);
 	}
 
 	rid = 0;
 	res = bus_alloc_resource_any(fdev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE | RF_SHAREABLE);
 	if (res == NULL) {
 		return (ENXIO);
 	}
 
 	for (i = 0; i < ALTERA_ETHERNET_OPTION_BITS_LEN; i++) {
 		atse_ethernet_option_bits[i] = bus_read_1(res,
 		    ALTERA_ETHERNET_OPTION_BITS_OFF + i);
 	}
 
 	bus_release_resource(fdev, SYS_RES_MEMORY, rid, res);
 	atse_ethernet_option_bits_flag |= ATSE_ETHERNET_OPTION_BITS_READ;
 
 	return (0);
 }
 
 static int
 atse_ethernet_option_bits_read(device_t dev)
 {
 	int error;
 
 	error = atse_ethernet_option_bits_read_fdt(dev);
 	if (error == 0)
 		return (0);
 
 	device_printf(dev, "Cannot read Ethernet addresses from flash.\n");
 
 	return (error);
 }
 
 static int
 atse_get_eth_address(struct atse_softc *sc)
 {
 	unsigned long hostid;
 	uint32_t val4;
 	int unit;
 
 	/*
 	 * Make sure to only ever do this once.  Otherwise a reset would
 	 * possibly change our ethernet address, which is not good at all.
 	 */
 	if (sc->atse_eth_addr[0] != 0x00 || sc->atse_eth_addr[1] != 0x00 ||
 	    sc->atse_eth_addr[2] != 0x00) {
 		return (0);
 	}
 
 	if ((atse_ethernet_option_bits_flag &
 	    ATSE_ETHERNET_OPTION_BITS_READ) == 0) {
 		goto get_random;
 	}
 
 	val4 = atse_ethernet_option_bits[0] << 24;
 	val4 |= atse_ethernet_option_bits[1] << 16;
 	val4 |= atse_ethernet_option_bits[2] << 8;
 	val4 |= atse_ethernet_option_bits[3];
 	/* They chose "safe". */
 	if (val4 != le32toh(0x00005afe)) {
 		device_printf(sc->atse_dev, "Magic '5afe' is not safe: 0x%08x. "
 		    "Falling back to random numbers for hardware address.\n",
 		     val4);
 		goto get_random;
 	}
 
 	sc->atse_eth_addr[0] = atse_ethernet_option_bits[4];
 	sc->atse_eth_addr[1] = atse_ethernet_option_bits[5];
 	sc->atse_eth_addr[2] = atse_ethernet_option_bits[6];
 	sc->atse_eth_addr[3] = atse_ethernet_option_bits[7];
 	sc->atse_eth_addr[4] = atse_ethernet_option_bits[8];
 	sc->atse_eth_addr[5] = atse_ethernet_option_bits[9];
 
 	/* Handle factory default ethernet addresss: 00:07:ed:ff:ed:15 */
 	if (sc->atse_eth_addr[0] == 0x00 && sc->atse_eth_addr[1] == 0x07 &&
 	    sc->atse_eth_addr[2] == 0xed && sc->atse_eth_addr[3] == 0xff &&
 	    sc->atse_eth_addr[4] == 0xed && sc->atse_eth_addr[5] == 0x15) {
 
 		device_printf(sc->atse_dev, "Factory programmed Ethernet "
 		    "hardware address blacklisted.  Falling back to random "
 		    "address to avoid collisions.\n");
 		device_printf(sc->atse_dev, "Please re-program your flash.\n");
 		goto get_random;
 	}
 
 	if (sc->atse_eth_addr[0] == 0x00 && sc->atse_eth_addr[1] == 0x00 &&
 	    sc->atse_eth_addr[2] == 0x00 && sc->atse_eth_addr[3] == 0x00 &&
 	    sc->atse_eth_addr[4] == 0x00 && sc->atse_eth_addr[5] == 0x00) {
 		device_printf(sc->atse_dev, "All zero's Ethernet hardware "
 		    "address blacklisted.  Falling back to random address.\n");
 		device_printf(sc->atse_dev, "Please re-program your flash.\n");
 		goto get_random;
 	}
 
 	if (ETHER_IS_MULTICAST(sc->atse_eth_addr)) {
 		device_printf(sc->atse_dev, "Multicast Ethernet hardware "
 		    "address blacklisted.  Falling back to random address.\n");
 		device_printf(sc->atse_dev, "Please re-program your flash.\n");
 		goto get_random;
 	}
 
 	/*
 	 * If we find an Altera prefixed address with a 0x0 ending
 	 * adjust by device unit.  If not and this is not the first
 	 * Ethernet, go to random.
 	 */
 	unit = device_get_unit(sc->atse_dev);
 	if (unit == 0x00) {
 		return (0);
 	}
 
 	if (unit > 0x0f) {
 		device_printf(sc->atse_dev, "We do not support Ethernet "
 		    "addresses for more than 16 MACs. Falling back to "
 		    "random hadware address.\n");
 		goto get_random;
 	}
 	if ((sc->atse_eth_addr[0] & ~0x2) != 0 ||
 	    sc->atse_eth_addr[1] != 0x07 || sc->atse_eth_addr[2] != 0xed ||
 	    (sc->atse_eth_addr[5] & 0x0f) != 0x0) {
 		device_printf(sc->atse_dev, "Ethernet address not meeting our "
 		    "multi-MAC standards. Falling back to random hadware "
 		    "address.\n");
 		goto get_random;
 	}
 	sc->atse_eth_addr[5] |= (unit & 0x0f);
 
 	return (0);
 
 get_random:
 	/*
 	 * Fall back to random code we also use on bridge(4).
 	 */
 	getcredhostid(curthread->td_ucred, &hostid);
 	if (hostid == 0) {
 		arc4rand(sc->atse_eth_addr, ETHER_ADDR_LEN, 1);
 		sc->atse_eth_addr[0] &= ~1;/* clear multicast bit */
 		sc->atse_eth_addr[0] |= 2; /* set the LAA bit */
 	} else {
 		sc->atse_eth_addr[0] = 0x2;
 		sc->atse_eth_addr[1] = (hostid >> 24)	& 0xff;
 		sc->atse_eth_addr[2] = (hostid >> 16)	& 0xff;
 		sc->atse_eth_addr[3] = (hostid >> 8 )	& 0xff;
 		sc->atse_eth_addr[4] = hostid		& 0xff;
 		sc->atse_eth_addr[5] = sc->atse_unit	& 0xff;
 	}
 
 	return (0);
 }
 
 static int
 atse_set_eth_address(struct atse_softc *sc, int n)
 {
 	uint32_t v0, v1;
 
 	v0 = (sc->atse_eth_addr[3] << 24) | (sc->atse_eth_addr[2] << 16) |
 	    (sc->atse_eth_addr[1] << 8) | sc->atse_eth_addr[0];
 	v1 = (sc->atse_eth_addr[5] << 8) | sc->atse_eth_addr[4];
 
 	if (n & ATSE_ETH_ADDR_DEF) {
 		CSR_WRITE_4(sc, BASE_CFG_MAC_0, v0);
 		CSR_WRITE_4(sc, BASE_CFG_MAC_1, v1);
 	}
 	if (n & ATSE_ETH_ADDR_SUPP1) {
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_0_0, v0);
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_0_1, v1);
 	}
 	if (n & ATSE_ETH_ADDR_SUPP2) {
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_1_0, v0);
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_1_1, v1);
 	}
 	if (n & ATSE_ETH_ADDR_SUPP3) {
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_2_0, v0);
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_2_1, v1);
 	}
 	if (n & ATSE_ETH_ADDR_SUPP4) {
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_3_0, v0);
 		CSR_WRITE_4(sc, SUPPL_ADDR_SMAC_3_1, v1);
 	}
 
 	return (0);
 }
 
 static int
 atse_reset(struct atse_softc *sc)
 {
 	uint32_t val4, mask;
 	uint16_t val;
 	int i;
 
 	/* 1. External PHY Initialization using MDIO. */
 	/*
 	 * We select the right MDIO space in atse_attach() and let MII do
 	 * anything else.
 	 */
 
 	/* 2. PCS Configuration Register Initialization. */
 	/* a. Set auto negotiation link timer to 1.6ms for SGMII. */
 	PCS_WRITE_2(sc, PCS_EXT_LINK_TIMER_0, 0x0D40);
 	PCS_WRITE_2(sc, PCS_EXT_LINK_TIMER_1, 0x0003);
 
 	/* b. Configure SGMII. */
 	val = PCS_EXT_IF_MODE_SGMII_ENA|PCS_EXT_IF_MODE_USE_SGMII_AN;
 	PCS_WRITE_2(sc, PCS_EXT_IF_MODE, val);
 
 	/* c. Enable auto negotiation. */
 	/* Ignore Bits 6,8,13; should be set,set,unset. */
 	val = PCS_READ_2(sc, PCS_CONTROL);
 	val &= ~(PCS_CONTROL_ISOLATE|PCS_CONTROL_POWERDOWN);
 	val &= ~PCS_CONTROL_LOOPBACK;		/* Make this a -link1 option? */
 	val |= PCS_CONTROL_AUTO_NEGOTIATION_ENABLE;
 	PCS_WRITE_2(sc, PCS_CONTROL, val);
 
 	/* d. PCS reset. */
 	val = PCS_READ_2(sc, PCS_CONTROL);
 	val |= PCS_CONTROL_RESET;
 	PCS_WRITE_2(sc, PCS_CONTROL, val);
 
 	/* Wait for reset bit to clear; i=100 is excessive. */
 	for (i = 0; i < 100; i++) {
 		val = PCS_READ_2(sc, PCS_CONTROL);
 		if ((val & PCS_CONTROL_RESET) == 0) {
 			break;
 		}
 		DELAY(10);
 	}
 
 	if ((val & PCS_CONTROL_RESET) != 0) {
 		device_printf(sc->atse_dev, "PCS reset timed out.\n");
 		return (ENXIO);
 	}
 
 	/* 3. MAC Configuration Register Initialization. */
 	/* a. Disable MAC transmit and receive datapath. */
 	mask = BASE_CFG_COMMAND_CONFIG_TX_ENA|BASE_CFG_COMMAND_CONFIG_RX_ENA;
 	val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 	val4 &= ~mask;
 	/* Samples in the manual do have the SW_RESET bit set here, why? */
 	CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4);
 	/* Wait for bits to be cleared; i=100 is excessive. */
 	for (i = 0; i < 100; i++) {
 		val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 		if ((val4 & mask) == 0) {
 			break;
 		}
 		DELAY(10);
 	}
 	if ((val4 & mask) != 0) {
 		device_printf(sc->atse_dev, "Disabling MAC TX/RX timed out.\n");
 		return (ENXIO);
 	}
 	/* b. MAC FIFO configuration. */
 	CSR_WRITE_4(sc, BASE_CFG_TX_SECTION_EMPTY, FIFO_DEPTH_TX - 16);
 	CSR_WRITE_4(sc, BASE_CFG_TX_ALMOST_FULL, 3);
 	CSR_WRITE_4(sc, BASE_CFG_TX_ALMOST_EMPTY, 8);
 	CSR_WRITE_4(sc, BASE_CFG_RX_SECTION_EMPTY, FIFO_DEPTH_RX - 16);
 	CSR_WRITE_4(sc, BASE_CFG_RX_ALMOST_FULL, 8);
 	CSR_WRITE_4(sc, BASE_CFG_RX_ALMOST_EMPTY, 8);
 #if 0
 	CSR_WRITE_4(sc, BASE_CFG_TX_SECTION_FULL, 16);
 	CSR_WRITE_4(sc, BASE_CFG_RX_SECTION_FULL, 16);
 #else
 	/* For store-and-forward mode, set this threshold to 0. */
 	CSR_WRITE_4(sc, BASE_CFG_TX_SECTION_FULL, 0);
 	CSR_WRITE_4(sc, BASE_CFG_RX_SECTION_FULL, 0);
 #endif
 	/* c. MAC address configuration. */
 	/* Also intialize supplementary addresses to our primary one. */
 	/* XXX-BZ FreeBSD really needs to grow and API for using these. */
 	atse_get_eth_address(sc);
 	atse_set_eth_address(sc, ATSE_ETH_ADDR_ALL);
 
 	/* d. MAC function configuration. */
 	CSR_WRITE_4(sc, BASE_CFG_FRM_LENGTH, 1518);	/* Default. */
 	CSR_WRITE_4(sc, BASE_CFG_TX_IPG_LENGTH, 12);
 	CSR_WRITE_4(sc, BASE_CFG_PAUSE_QUANT, 0xFFFF);
 
 	val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 	/*
 	 * If 1000BASE-X/SGMII PCS is initialized, set the ETH_SPEED (bit 3)
 	 * and ENA_10 (bit 25) in command_config register to 0.  If half duplex
 	 * is reported in the PHY/PCS status register, set the HD_ENA (bit 10)
 	 * to 1 in command_config register.
 	 * BZ: We shoot for 1000 instead.
 	 */
 #if 0
 	val4 |= BASE_CFG_COMMAND_CONFIG_ETH_SPEED;
 #else
 	val4 &= ~BASE_CFG_COMMAND_CONFIG_ETH_SPEED;
 #endif
 	val4 &= ~BASE_CFG_COMMAND_CONFIG_ENA_10;
 #if 0
 	/*
 	 * We do not want to set this, otherwise, we could not even send
 	 * random raw ethernet frames for various other research.  By default
 	 * FreeBSD will use the right ether source address.
 	 */
 	val4 |= BASE_CFG_COMMAND_CONFIG_TX_ADDR_INS;
 #endif
 	val4 |= BASE_CFG_COMMAND_CONFIG_PAD_EN;
 	val4 &= ~BASE_CFG_COMMAND_CONFIG_CRC_FWD;
 #if 0
 	val4 |= BASE_CFG_COMMAND_CONFIG_CNTL_FRM_ENA;
 #endif
 #if 1
 	val4 |= BASE_CFG_COMMAND_CONFIG_RX_ERR_DISC;
 #endif
 	val &= ~BASE_CFG_COMMAND_CONFIG_LOOP_ENA;		/* link0? */
 	CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4);
 
 	/*
 	 * Make sure we do not enable 32bit alignment;  FreeBSD cannot
 	 * cope with the additional padding (though we should!?).
 	 * Also make sure we get the CRC appended.
 	 */
 	val4 = CSR_READ_4(sc, TX_CMD_STAT);
 	val4 &= ~(TX_CMD_STAT_OMIT_CRC|TX_CMD_STAT_TX_SHIFT16);
 	CSR_WRITE_4(sc, TX_CMD_STAT, val4);
 
 	val4 = CSR_READ_4(sc, RX_CMD_STAT);
 	val4 &= ~RX_CMD_STAT_RX_SHIFT16;
 	val4 |= RX_CMD_STAT_RX_SHIFT16;
 	CSR_WRITE_4(sc, RX_CMD_STAT, val4);
 
 	/* e. Reset MAC. */
 	val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 	val4 |= BASE_CFG_COMMAND_CONFIG_SW_RESET;
 	CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4);
 	/* Wait for bits to be cleared; i=100 is excessive. */
 	for (i = 0; i < 100; i++) {
 		val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 		if ((val4 & BASE_CFG_COMMAND_CONFIG_SW_RESET) == 0) {
 			break;
 		}
 		DELAY(10);
 	}
 	if ((val4 & BASE_CFG_COMMAND_CONFIG_SW_RESET) != 0) {
 		device_printf(sc->atse_dev, "MAC reset timed out.\n");
 		return (ENXIO);
 	}
 
 	/* f. Enable MAC transmit and receive datapath. */
 	mask = BASE_CFG_COMMAND_CONFIG_TX_ENA|BASE_CFG_COMMAND_CONFIG_RX_ENA;
 	val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 	val4 |= mask;
 	CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4);
 	/* Wait for bits to be cleared; i=100 is excessive. */
 	for (i = 0; i < 100; i++) {
 		val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 		if ((val4 & mask) == mask) {
 			break;
 		}
 		DELAY(10);
 	}
 	if ((val4 & mask) != mask) {
 		device_printf(sc->atse_dev, "Enabling MAC TX/RX timed out.\n");
 		return (ENXIO);
 	}
 
 	return (0);
 }
 
 static void
 atse_init_locked(struct atse_softc *sc)
 {
 	struct ifnet *ifp;
 	struct mii_data *mii;
 	uint8_t *eaddr;
 
 	ATSE_LOCK_ASSERT(sc);
 	ifp = sc->atse_ifp;
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
 		return;
 	}
 
 	/*
 	 * Must update the ether address if changed.  Given we do not handle
 	 * in atse_ioctl() but it's in the general framework, just always
 	 * do it here before atse_reset().
 	 */
 	eaddr = IF_LLADDR(sc->atse_ifp);
 	bcopy(eaddr, &sc->atse_eth_addr, ETHER_ADDR_LEN);
 
 	/* Make things frind to halt, cleanup, ... */
 	atse_stop_locked(sc);
 
 	atse_reset(sc);
 
 	/* ... and fire up the engine again. */
 	atse_rxfilter_locked(sc);
 
 	sc->atse_flags &= ATSE_FLAGS_LINK;	/* Preserve. */
 
 	mii = device_get_softc(sc->atse_miibus);
 
 	sc->atse_flags &= ~ATSE_FLAGS_LINK;
 	mii_mediachg(mii);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	callout_reset(&sc->atse_tick, hz, atse_tick, sc);
 }
 
 static void
 atse_init(void *xsc)
 {
 	struct atse_softc *sc;
 
 	/*
 	 * XXXRW: There is some argument that we should immediately do RX
 	 * processing after enabling interrupts, or one may not fire if there
 	 * are buffered packets.
 	 */
 	sc = (struct atse_softc *)xsc;
 	ATSE_LOCK(sc);
 	atse_init_locked(sc);
 	ATSE_UNLOCK(sc);
 }
 
 static int
 atse_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct atse_softc *sc;
 	struct ifreq *ifr;
 	int error, mask;
 
 	error = 0;
 	sc = ifp->if_softc;
 	ifr = (struct ifreq *)data;
 
 	switch (command) {
 	case SIOCSIFFLAGS:
 		ATSE_LOCK(sc);
 		if (ifp->if_flags & IFF_UP) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 &&
 			    ((ifp->if_flags ^ sc->atse_if_flags) &
 			    (IFF_PROMISC | IFF_ALLMULTI)) != 0)
 				atse_rxfilter_locked(sc);
 			else
 				atse_init_locked(sc);
 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 			atse_stop_locked(sc);
 		sc->atse_if_flags = ifp->if_flags;
 		ATSE_UNLOCK(sc);
 		break;
 	case SIOCSIFCAP:
 		ATSE_LOCK(sc);
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		ATSE_UNLOCK(sc);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		ATSE_LOCK(sc);
 		atse_rxfilter_locked(sc);
 		ATSE_UNLOCK(sc);
 		break;
 	case SIOCGIFMEDIA:
 	case SIOCSIFMEDIA:
 	{
 		struct mii_data *mii;
 		struct ifreq *ifr;
 
 		mii = device_get_softc(sc->atse_miibus);
 		ifr = (struct ifreq *)data;
 		error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
 		break;
 	}
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 static void
 atse_tick(void *xsc)
 {
 	struct atse_softc *sc;
 	struct mii_data *mii;
 	struct ifnet *ifp;
 
 	sc = (struct atse_softc *)xsc;
 	ATSE_LOCK_ASSERT(sc);
 	ifp = sc->atse_ifp;
 
 	mii = device_get_softc(sc->atse_miibus);
 	mii_tick(mii);
 	if ((sc->atse_flags & ATSE_FLAGS_LINK) == 0) {
 		atse_miibus_statchg(sc->atse_dev);
 	}
 
 	callout_reset(&sc->atse_tick, hz, atse_tick, sc);
 }
 
 /*
  * Set media options.
  */
 static int
 atse_ifmedia_upd(struct ifnet *ifp)
 {
 	struct atse_softc *sc;
 	struct mii_data *mii;
 	struct mii_softc *miisc;
 	int error;
 
 	sc = ifp->if_softc;
 
 	ATSE_LOCK(sc);
 	mii = device_get_softc(sc->atse_miibus);
 	LIST_FOREACH(miisc, &mii->mii_phys, mii_list) {
 		PHY_RESET(miisc);
 	}
 	error = mii_mediachg(mii);
 	ATSE_UNLOCK(sc);
 
 	return (error);
 }
 
 /*
  * Report current media status.
  */
 static void
 atse_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct atse_softc *sc;
 	struct mii_data *mii;
 
 	sc = ifp->if_softc;
 
 	ATSE_LOCK(sc);
 	mii = device_get_softc(sc->atse_miibus);
 	mii_pollstat(mii);
 	ifmr->ifm_active = mii->mii_media_active;
 	ifmr->ifm_status = mii->mii_media_status;
 	ATSE_UNLOCK(sc);
 }
 
 static struct atse_mac_stats_regs {
 	const char *name;
 	const char *descr;	/* Mostly copied from Altera datasheet. */
 } atse_mac_stats_regs[] = {
 	[0x1a] =
 	{ "aFramesTransmittedOK",
 	    "The number of frames that are successfully transmitted including "
 	    "the pause frames." },
 	{ "aFramesReceivedOK",
 	    "The number of frames that are successfully received including the "
 	    "pause frames." },
 	{ "aFrameCheckSequenceErrors",
 	    "The number of receive frames with CRC error." },
 	{ "aAlignmentErrors",
 	    "The number of receive frames with alignment error." },
 	{ "aOctetsTransmittedOK",
 	    "The lower 32 bits of the number of data and padding octets that "
 	    "are successfully transmitted." },
 	{ "aOctetsReceivedOK",
 	    "The lower 32 bits of the number of data and padding octets that "
 	    " are successfully received." },
 	{ "aTxPAUSEMACCtrlFrames",
 	    "The number of pause frames transmitted." },
 	{ "aRxPAUSEMACCtrlFrames",
 	    "The number received pause frames received." },
 	{ "ifInErrors",
 	    "The number of errored frames received." },
 	{ "ifOutErrors",
 	    "The number of transmit frames with either a FIFO overflow error, "
 	    "a FIFO underflow error, or a error defined by the user "
 	    "application." },
 	{ "ifInUcastPkts",
 	    "The number of valid unicast frames received." },
 	{ "ifInMulticastPkts",
 	    "The number of valid multicast frames received. The count does "
 	    "not include pause frames." },
 	{ "ifInBroadcastPkts",
 	    "The number of valid broadcast frames received." },
 	{ "ifOutDiscards",
 	    "This statistics counter is not in use.  The MAC function does not "
 	    "discard frames that are written to the FIFO buffer by the user "
 	    "application." },
 	{ "ifOutUcastPkts",
 	    "The number of valid unicast frames transmitted." },
 	{ "ifOutMulticastPkts",
 	    "The number of valid multicast frames transmitted, excluding pause "
 	    "frames." },
 	{ "ifOutBroadcastPkts",
 	    "The number of valid broadcast frames transmitted." },
 	{ "etherStatsDropEvents",
 	    "The number of frames that are dropped due to MAC internal errors "
 	    "when FIFO buffer overflow persists." },
 	{ "etherStatsOctets",
 	    "The lower 32 bits of the total number of octets received. This "
 	    "count includes both good and errored frames." },
 	{ "etherStatsPkts",
 	    "The total number of good and errored frames received." },
 	{ "etherStatsUndersizePkts",
 	    "The number of frames received with length less than 64 bytes. "
 	    "This count does not include errored frames." },
 	{ "etherStatsOversizePkts",
 	    "The number of frames received that are longer than the value "
 	    "configured in the frm_length register. This count does not "
 	    "include errored frames." },
 	{ "etherStatsPkts64Octets",
 	    "The number of 64-byte frames received. This count includes good "
 	    "and errored frames." },
 	{ "etherStatsPkts65to127Octets",
 	    "The number of received good and errored frames between the length "
 	    "of 65 and 127 bytes." },
 	{ "etherStatsPkts128to255Octets",
 	    "The number of received good and errored frames between the length "
 	    "of 128 and 255 bytes." },
 	{ "etherStatsPkts256to511Octets",
 	    "The number of received good and errored frames between the length "
 	    "of 256 and 511 bytes." },
 	{ "etherStatsPkts512to1023Octets",
 	    "The number of received good and errored frames between the length "
 	    "of 512 and 1023 bytes." },
 	{ "etherStatsPkts1024to1518Octets",
 	    "The number of received good and errored frames between the length "
 	    "of 1024 and 1518 bytes." },
 	{ "etherStatsPkts1519toXOctets",
 	    "The number of received good and errored frames between the length "
 	    "of 1519 and the maximum frame length configured in the frm_length "
 	    "register." },
 	{ "etherStatsJabbers",
 	    "Too long frames with CRC error." },
 	{ "etherStatsFragments",
 	    "Too short frames with CRC error." },
 	/* 0x39 unused, 0x3a/b non-stats. */
 	[0x3c] =
 	/* Extended Statistics Counters */
 	{ "msb_aOctetsTransmittedOK",
 	    "Upper 32 bits of the number of data and padding octets that are "
 	    "successfully transmitted." },
 	{ "msb_aOctetsReceivedOK",
 	    "Upper 32 bits of the number of data and padding octets that are "
 	    "successfully received." },
 	{ "msb_etherStatsOctets",
 	    "Upper 32 bits of the total number of octets received. This count "
 	    "includes both good and errored frames." }
 };
 
 static int
 sysctl_atse_mac_stats_proc(SYSCTL_HANDLER_ARGS)
 {
 	struct atse_softc *sc;
 	int error, offset, s;
 
 	sc = arg1;
 	offset = arg2;
 
 	s = CSR_READ_4(sc, offset);
 	error = sysctl_handle_int(oidp, &s, 0, req);
 	if (error || !req->newptr) {
 		return (error);
 	}
 
 	return (0);
 }
 
 static struct atse_rx_err_stats_regs {
 	const char *name;
 	const char *descr;
 } atse_rx_err_stats_regs[] = {
 
 #define	ATSE_RX_ERR_FIFO_THRES_EOP	0 /* FIFO threshold reached, on EOP. */
 #define	ATSE_RX_ERR_ELEN		1 /* Frame/payload length not valid. */
 #define	ATSE_RX_ERR_CRC32		2 /* CRC-32 error. */
 #define	ATSE_RX_ERR_FIFO_THRES_TRUNC	3 /* FIFO thresh., truncated frame. */
 #define	ATSE_RX_ERR_4			4 /* ? */
 #define	ATSE_RX_ERR_5			5 /* / */
 
 	{ "rx_err_fifo_thres_eop",
 	    "FIFO threshold reached, reported on EOP." },
 	{ "rx_err_fifo_elen",
 	    "Frame or payload length not valid." },
 	{ "rx_err_fifo_crc32",
 	    "CRC-32 error." },
 	{ "rx_err_fifo_thres_trunc",
 	    "FIFO threshold reached, truncated frame" },
 	{ "rx_err_4",
 	    "?" },
 	{ "rx_err_5",
 	    "?" },
 };
 
 static int
 sysctl_atse_rx_err_stats_proc(SYSCTL_HANDLER_ARGS)
 {
 	struct atse_softc *sc;
 	int error, offset, s;
 
 	sc = arg1;
 	offset = arg2;
 
 	s = sc->atse_rx_err[offset];
 	error = sysctl_handle_int(oidp, &s, 0, req);
 	if (error || !req->newptr) {
 		return (error);
 	}
 
 	return (0);
 }
 
 static void
 atse_sysctl_stats_attach(device_t dev)
 {
 	struct sysctl_ctx_list *sctx;
 	struct sysctl_oid *soid;
 	struct atse_softc *sc;
 	int i;
 
 	sc = device_get_softc(dev);
 	sctx = device_get_sysctl_ctx(dev);
 	soid = device_get_sysctl_tree(dev);
 
 	/* MAC statistics. */
 	for (i = 0; i < nitems(atse_mac_stats_regs); i++) {
 		if (atse_mac_stats_regs[i].name == NULL ||
 		    atse_mac_stats_regs[i].descr == NULL) {
 			continue;
 		}
 
 		SYSCTL_ADD_PROC(sctx, SYSCTL_CHILDREN(soid), OID_AUTO,
 		    atse_mac_stats_regs[i].name, CTLTYPE_UINT|CTLFLAG_RD,
 		    sc, i, sysctl_atse_mac_stats_proc, "IU",
 		    atse_mac_stats_regs[i].descr);
 	}
 
 	/* rx_err[]. */
 	for (i = 0; i < ATSE_RX_ERR_MAX; i++) {
 		if (atse_rx_err_stats_regs[i].name == NULL ||
 		    atse_rx_err_stats_regs[i].descr == NULL) {
 			continue;
 		}
 
 		SYSCTL_ADD_PROC(sctx, SYSCTL_CHILDREN(soid), OID_AUTO,
 		    atse_rx_err_stats_regs[i].name, CTLTYPE_UINT|CTLFLAG_RD,
 		    sc, i, sysctl_atse_rx_err_stats_proc, "IU",
 		    atse_rx_err_stats_regs[i].descr);
 	}
 }
 
 /*
  * Generic device handling routines.
  */
 int
 atse_attach(device_t dev)
 {
 	struct atse_softc *sc;
 	struct ifnet *ifp;
 	uint32_t caps;
 	int error;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	/* Get xDMA controller */
 	sc->xdma_tx = xdma_ofw_get(sc->dev, "tx");
 	if (sc->xdma_tx == NULL) {
 		device_printf(dev, "Can't find DMA controller.\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * Only final (EOP) write can be less than "symbols per beat" value
 	 * so we have to defrag mbuf chain.
 	 * Chapter 15. On-Chip FIFO Memory Core.
 	 * Embedded Peripherals IP User Guide.
 	 */
-	caps = XCHAN_CAP_BUSDMA_NOSEG;
+	caps = XCHAN_CAP_NOSEG;
 
 	/* Alloc xDMA virtual channel. */
 	sc->xchan_tx = xdma_channel_alloc(sc->xdma_tx, caps);
 	if (sc->xchan_tx == NULL) {
 		device_printf(dev, "Can't alloc virtual DMA channel.\n");
 		return (ENXIO);
 	}
 
 	/* Setup interrupt handler. */
 	error = xdma_setup_intr(sc->xchan_tx, atse_xdma_tx_intr, sc, &sc->ih_tx);
 	if (error) {
 		device_printf(sc->dev,
 		    "Can't setup xDMA interrupt handler.\n");
 		return (ENXIO);
 	}
 
 	xdma_prep_sg(sc->xchan_tx,
 	    TX_QUEUE_SIZE,	/* xchan requests queue size */
 	    MCLBYTES,	/* maxsegsize */
 	    8,		/* maxnsegs */
 	    16,		/* alignment */
 	    0,		/* boundary */
 	    BUS_SPACE_MAXADDR_32BIT,
 	    BUS_SPACE_MAXADDR);
 
 	/* Get RX xDMA controller */
 	sc->xdma_rx = xdma_ofw_get(sc->dev, "rx");
 	if (sc->xdma_rx == NULL) {
 		device_printf(dev, "Can't find DMA controller.\n");
 		return (ENXIO);
 	}
 
 	/* Alloc xDMA virtual channel. */
 	sc->xchan_rx = xdma_channel_alloc(sc->xdma_rx, caps);
 	if (sc->xchan_rx == NULL) {
 		device_printf(dev, "Can't alloc virtual DMA channel.\n");
 		return (ENXIO);
 	}
 
 	/* Setup interrupt handler. */
 	error = xdma_setup_intr(sc->xchan_rx, atse_xdma_rx_intr, sc, &sc->ih_rx);
 	if (error) {
 		device_printf(sc->dev,
 		    "Can't setup xDMA interrupt handler.\n");
 		return (ENXIO);
 	}
 
 	xdma_prep_sg(sc->xchan_rx,
 	    RX_QUEUE_SIZE,	/* xchan requests queue size */
 	    MCLBYTES,		/* maxsegsize */
 	    1,			/* maxnsegs */
 	    16,			/* alignment */
 	    0,			/* boundary */
 	    BUS_SPACE_MAXADDR_32BIT,
 	    BUS_SPACE_MAXADDR);
 
 	mtx_init(&sc->br_mtx, "buf ring mtx", NULL, MTX_DEF);
 	sc->br = buf_ring_alloc(BUFRING_SIZE, M_DEVBUF,
 	    M_NOWAIT, &sc->br_mtx);
 	if (sc->br == NULL) {
 		return (ENOMEM);
 	}
 
 	atse_ethernet_option_bits_read(dev);
 
 	mtx_init(&sc->atse_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
 	    MTX_DEF);
 
 	callout_init_mtx(&sc->atse_tick, &sc->atse_mtx, 0);
 
 	/*
 	 * We are only doing single-PHY with this driver currently.  The
 	 * defaults would be right so that BASE_CFG_MDIO_ADDR0 points to the
 	 * 1st PHY address (0) apart from the fact that BMCR0 is always
 	 * the PCS mapping, so we always use BMCR1. See Table 5-1 0xA0-0xBF.
 	 */
 #if 0	/* Always PCS. */
 	sc->atse_bmcr0 = MDIO_0_START;
 	CSR_WRITE_4(sc, BASE_CFG_MDIO_ADDR0, 0x00);
 #endif
 	/* Always use matching PHY for atse[0..]. */
 	sc->atse_phy_addr = device_get_unit(dev);
 	sc->atse_bmcr1 = MDIO_1_START;
 	CSR_WRITE_4(sc, BASE_CFG_MDIO_ADDR1, sc->atse_phy_addr);
 
 	/* Reset the adapter. */
 	atse_reset(sc);
 
 	/* Setup interface. */
 	ifp = sc->atse_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "if_alloc() failed\n");
 		error = ENOSPC;
 		goto err;
 	}
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = atse_ioctl;
 	ifp->if_transmit = atse_transmit;
 	ifp->if_qflush = atse_qflush;
 	ifp->if_init = atse_init;
 	IFQ_SET_MAXLEN(&ifp->if_snd, ATSE_TX_LIST_CNT - 1);
 	ifp->if_snd.ifq_drv_maxlen = ATSE_TX_LIST_CNT - 1;
 	IFQ_SET_READY(&ifp->if_snd);
 
 	/* MII setup. */
 	error = mii_attach(dev, &sc->atse_miibus, ifp, atse_ifmedia_upd,
 	    atse_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0);
 	if (error != 0) {
 		device_printf(dev, "attaching PHY failed: %d\n", error);
 		goto err;
 	}
 
 	/* Call media-indepedent attach routine. */
 	ether_ifattach(ifp, sc->atse_eth_addr);
 
 	/* Tell the upper layer(s) about vlan mtu support. */
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 	ifp->if_capabilities |= IFCAP_VLAN_MTU;
 	ifp->if_capenable = ifp->if_capabilities;
 
 err:
 	if (error != 0) {
 		atse_detach(dev);
 	}
 
 	if (error == 0) {
 		atse_sysctl_stats_attach(dev);
 	}
 
 	atse_rx_enqueue(sc, NUM_RX_MBUF);
 	xdma_queue_submit(sc->xchan_rx);
 
 	return (error);
 }
 
 static int
 atse_detach(device_t dev)
 {
 	struct atse_softc *sc;
 	struct ifnet *ifp;
 
 	sc = device_get_softc(dev);
 	KASSERT(mtx_initialized(&sc->atse_mtx), ("%s: mutex not initialized",
 	    device_get_nameunit(dev)));
 	ifp = sc->atse_ifp;
 
 	/* Only cleanup if attach succeeded. */
 	if (device_is_attached(dev)) {
 		ATSE_LOCK(sc);
 		atse_stop_locked(sc);
 		ATSE_UNLOCK(sc);
 		callout_drain(&sc->atse_tick);
 		ether_ifdetach(ifp);
 	}
 	if (sc->atse_miibus != NULL) {
 		device_delete_child(dev, sc->atse_miibus);
 	}
 
 	if (ifp != NULL) {
 		if_free(ifp);
 	}
 
 	mtx_destroy(&sc->atse_mtx);
+
+	xdma_channel_free(sc->xchan_tx);
+	xdma_channel_free(sc->xchan_rx);
+	xdma_put(sc->xdma_tx);
+	xdma_put(sc->xdma_rx);
 
 	return (0);
 }
 
 /* Shared between nexus and fdt implementation. */
 void
 atse_detach_resources(device_t dev)
 {
 	struct atse_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	if (sc->atse_mem_res != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY, sc->atse_mem_rid,
 		    sc->atse_mem_res);
 		sc->atse_mem_res = NULL;
 	}
 }
 
 int
 atse_detach_dev(device_t dev)
 {
 	int error;
 
 	error = atse_detach(dev);
 	if (error) {
 		/* We are basically in undefined state now. */
 		device_printf(dev, "atse_detach() failed: %d\n", error);
 		return (error);
 	}
 
 	atse_detach_resources(dev);
 
 	return (0);
 }
 
 int
 atse_miibus_readreg(device_t dev, int phy, int reg)
 {
 	struct atse_softc *sc;
 	int val;
 
 	sc = device_get_softc(dev);
 
 	/*
 	 * We currently do not support re-mapping of MDIO space on-the-fly
 	 * but de-facto hard-code the phy#.
 	 */
 	if (phy != sc->atse_phy_addr) {
 		return (0);
 	}
 
 	val = PHY_READ_2(sc, reg);
 
 	return (val);
 }
 
 int
 atse_miibus_writereg(device_t dev, int phy, int reg, int data)
 {
 	struct atse_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	/*
 	 * We currently do not support re-mapping of MDIO space on-the-fly
 	 * but de-facto hard-code the phy#.
 	 */
 	if (phy != sc->atse_phy_addr) {
 		return (0);
 	}
 
 	PHY_WRITE_2(sc, reg, data);
 	return (0);
 }
 
 void
 atse_miibus_statchg(device_t dev)
 {
 	struct atse_softc *sc;
 	struct mii_data *mii;
 	struct ifnet *ifp;
 	uint32_t val4;
 
 	sc = device_get_softc(dev);
 	ATSE_LOCK_ASSERT(sc);
 
 	mii = device_get_softc(sc->atse_miibus);
 	ifp = sc->atse_ifp;
 	if (mii == NULL || ifp == NULL ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		return;
 	}
 
 	val4 = CSR_READ_4(sc, BASE_CFG_COMMAND_CONFIG);
 
 	/* Assume no link. */
 	sc->atse_flags &= ~ATSE_FLAGS_LINK;
 
 	if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
 	    (IFM_ACTIVE | IFM_AVALID)) {
 
 		switch (IFM_SUBTYPE(mii->mii_media_active)) {
 		case IFM_10_T:
 			val4 |= BASE_CFG_COMMAND_CONFIG_ENA_10;
 			val4 &= ~BASE_CFG_COMMAND_CONFIG_ETH_SPEED;
 			sc->atse_flags |= ATSE_FLAGS_LINK;
 			break;
 		case IFM_100_TX:
 			val4 &= ~BASE_CFG_COMMAND_CONFIG_ENA_10;
 			val4 &= ~BASE_CFG_COMMAND_CONFIG_ETH_SPEED;
 			sc->atse_flags |= ATSE_FLAGS_LINK;
 			break;
 		case IFM_1000_T:
 			val4 &= ~BASE_CFG_COMMAND_CONFIG_ENA_10;
 			val4 |= BASE_CFG_COMMAND_CONFIG_ETH_SPEED;
 			sc->atse_flags |= ATSE_FLAGS_LINK;
 			break;
 		default:
 			break;
 		}
 	}
 
 	if ((sc->atse_flags & ATSE_FLAGS_LINK) == 0) {
 		/* Need to stop the MAC? */
 		return;
 	}
 
 	if (IFM_OPTIONS(mii->mii_media_active & IFM_FDX) != 0) {
 		val4 &= ~BASE_CFG_COMMAND_CONFIG_HD_ENA;
 	} else {
 		val4 |= BASE_CFG_COMMAND_CONFIG_HD_ENA;
 	}
 
 	/* flow control? */
 
 	/* Make sure the MAC is activated. */
 	val4 |= BASE_CFG_COMMAND_CONFIG_TX_ENA;
 	val4 |= BASE_CFG_COMMAND_CONFIG_RX_ENA;
 
 	CSR_WRITE_4(sc, BASE_CFG_COMMAND_CONFIG, val4);
 }
 
 MODULE_DEPEND(atse, ether, 1, 1, 1);
 MODULE_DEPEND(atse, miibus, 1, 1, 1);
Index: head/sys/dev/altera/softdma/softdma.c
===================================================================
--- head/sys/dev/altera/softdma/softdma.c	(revision 346895)
+++ head/sys/dev/altera/softdma/softdma.c	(revision 346896)
@@ -1,864 +1,888 @@
 /*-
  * Copyright (c) 2017-2018 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /* This is driver for SoftDMA device built using Altera FIFO component. */
 
 #include 
 __FBSDID("$FreeBSD$");
 
 #include "opt_platform.h"
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 
 #include 
 
 #ifdef FDT
 #include 
 #include 
 #include 
 #endif
 
 #include 
 
 #include 
 #include "xdma_if.h"
 
 #define SOFTDMA_DEBUG
 #undef SOFTDMA_DEBUG
 
 #ifdef SOFTDMA_DEBUG
 #define dprintf(fmt, ...)  printf(fmt, ##__VA_ARGS__)
 #else
 #define dprintf(fmt, ...)
 #endif
 
 #define	AVALON_FIFO_TX_BASIC_OPTS_DEPTH		16
 #define	SOFTDMA_NCHANNELS			1
 #define	CONTROL_GEN_SOP				(1 << 0)
 #define	CONTROL_GEN_EOP				(1 << 1)
 #define	CONTROL_OWN				(1 << 31)
 
 #define	SOFTDMA_RX_EVENTS	\
 	(A_ONCHIP_FIFO_MEM_CORE_INTR_FULL	| \
 	 A_ONCHIP_FIFO_MEM_CORE_INTR_OVERFLOW	| \
 	 A_ONCHIP_FIFO_MEM_CORE_INTR_UNDERFLOW)
 #define	SOFTDMA_TX_EVENTS	\
 	(A_ONCHIP_FIFO_MEM_CORE_INTR_EMPTY	| \
  	A_ONCHIP_FIFO_MEM_CORE_INTR_OVERFLOW	| \
  	A_ONCHIP_FIFO_MEM_CORE_INTR_UNDERFLOW)
 
 struct softdma_channel {
 	struct softdma_softc	*sc;
 	struct mtx		mtx;
 	xdma_channel_t		*xchan;
 	struct proc		*p;
 	int			used;
 	int			index;
 	int			run;
 	uint32_t		idx_tail;
 	uint32_t		idx_head;
 	struct softdma_desc	*descs;
 
 	uint32_t		descs_num;
 	uint32_t		descs_used_count;
 };
 
 struct softdma_desc {
 	uint64_t		src_addr;
 	uint64_t		dst_addr;
 	uint32_t		len;
 	uint32_t		access_width;
 	uint32_t		count;
 	uint16_t		src_incr;
 	uint16_t		dst_incr;
 	uint32_t		direction;
 	struct softdma_desc	*next;
 	uint32_t		transfered;
 	uint32_t		status;
 	uint32_t		reserved;
 	uint32_t		control;
 };
 
 struct softdma_softc {
 	device_t		dev;
 	struct resource		*res[3];
 	bus_space_tag_t		bst;
 	bus_space_handle_t	bsh;
 	bus_space_tag_t		bst_c;
 	bus_space_handle_t	bsh_c;
 	void			*ih;
 	struct softdma_channel	channels[SOFTDMA_NCHANNELS];
 };
 
 static struct resource_spec softdma_spec[] = {
 	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },	/* fifo */
 	{ SYS_RES_MEMORY,	1,	RF_ACTIVE },	/* core */
 	{ SYS_RES_IRQ,		0,	RF_ACTIVE },
 	{ -1, 0 }
 };
 
 static int softdma_probe(device_t dev);
 static int softdma_attach(device_t dev);
 static int softdma_detach(device_t dev);
 
 static inline uint32_t
 softdma_next_desc(struct softdma_channel *chan, uint32_t curidx)
 {
 
 	return ((curidx + 1) % chan->descs_num);
 }
 
 static void
 softdma_mem_write(struct softdma_softc *sc, uint32_t reg, uint32_t val)
 {
 
 	bus_write_4(sc->res[0], reg, htole32(val));
 }
 
 static uint32_t
 softdma_mem_read(struct softdma_softc *sc, uint32_t reg)
 {
 	uint32_t val;
 
 	val = bus_read_4(sc->res[0], reg);
 
 	return (le32toh(val));
 }
 
 static void
 softdma_memc_write(struct softdma_softc *sc, uint32_t reg, uint32_t val)
 {
 
 	bus_write_4(sc->res[1], reg, htole32(val));
 }
 
 static uint32_t
 softdma_memc_read(struct softdma_softc *sc, uint32_t reg)
 {
 	uint32_t val;
 
 	val = bus_read_4(sc->res[1], reg);
 
 	return (le32toh(val));
 }
 
 static uint32_t
 softdma_fill_level(struct softdma_softc *sc)
 {
 	uint32_t val;
 
 	val = softdma_memc_read(sc,
 	    A_ONCHIP_FIFO_MEM_CORE_STATUS_REG_FILL_LEVEL);
 
 	return (val);
 }
 
+static uint32_t
+fifo_fill_level_wait(struct softdma_softc *sc)
+{
+	uint32_t val;
+
+	do
+		val = softdma_fill_level(sc);
+	while (val == AVALON_FIFO_TX_BASIC_OPTS_DEPTH);
+
+	return (val);
+}
+
 static void
 softdma_intr(void *arg)
 {
 	struct softdma_channel *chan;
 	struct softdma_softc *sc;
 	int reg;
 	int err;
 
 	sc = arg;
 
 	chan = &sc->channels[0];
 
 	reg = softdma_memc_read(sc, A_ONCHIP_FIFO_MEM_CORE_STATUS_REG_EVENT);
 
 	if (reg & (A_ONCHIP_FIFO_MEM_CORE_EVENT_OVERFLOW | 
 	    A_ONCHIP_FIFO_MEM_CORE_EVENT_UNDERFLOW)) {
 		/* Errors */
 		err = (((reg & A_ONCHIP_FIFO_MEM_CORE_ERROR_MASK) >> \
 		    A_ONCHIP_FIFO_MEM_CORE_ERROR_SHIFT) & 0xff);
 	}
 
 	if (reg != 0) {
 		softdma_memc_write(sc,
 		    A_ONCHIP_FIFO_MEM_CORE_STATUS_REG_EVENT, reg);
 		chan->run = 1;
 		wakeup(chan);
 	}
 }
 
 static int
 softdma_probe(device_t dev)
 {
 
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 
 	if (!ofw_bus_is_compatible(dev, "altr,softdma"))
 		return (ENXIO);
 
 	device_set_desc(dev, "SoftDMA");
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 softdma_attach(device_t dev)
 {
 	struct softdma_softc *sc;
 	phandle_t xref, node;
 	int err;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	if (bus_alloc_resources(dev, softdma_spec, sc->res)) {
 		device_printf(dev,
 		    "could not allocate resources for device\n");
 		return (ENXIO);
 	}
 
 	/* FIFO memory interface */
 	sc->bst = rman_get_bustag(sc->res[0]);
 	sc->bsh = rman_get_bushandle(sc->res[0]);
 
 	/* FIFO control memory interface */
 	sc->bst_c = rman_get_bustag(sc->res[1]);
 	sc->bsh_c = rman_get_bushandle(sc->res[1]);
 
 	/* Setup interrupt handler */
 	err = bus_setup_intr(dev, sc->res[2], INTR_TYPE_MISC | INTR_MPSAFE,
 	    NULL, softdma_intr, sc, &sc->ih);
 	if (err) {
 		device_printf(dev, "Unable to alloc interrupt resource.\n");
 		return (ENXIO);
 	}
 
 	node = ofw_bus_get_node(dev);
 	xref = OF_xref_from_node(node);
 	OF_device_register_xref(xref, dev);
 
 	return (0);
 }
 
 static int
 softdma_detach(device_t dev)
 {
 	struct softdma_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	return (0);
 }
 
 static int
 softdma_process_tx(struct softdma_channel *chan, struct softdma_desc *desc)
 {
 	struct softdma_softc *sc;
-	uint32_t src_offs, dst_offs;
+	uint64_t addr;
+	uint64_t buf;
+	uint32_t word;
+	uint32_t missing;
 	uint32_t reg;
-	uint32_t fill_level;
-	uint32_t leftm;
-	uint32_t tmp;
-	uint32_t val;
-	uint32_t c;
+	int got_bits;
+	int len;
 
 	sc = chan->sc;
 
-	fill_level = softdma_fill_level(sc);
-	while (fill_level == AVALON_FIFO_TX_BASIC_OPTS_DEPTH)
-		fill_level = softdma_fill_level(sc);
+	fifo_fill_level_wait(sc);
 
 	/* Set start of packet. */
-	if (desc->control & CONTROL_GEN_SOP) {
-		reg = 0;
-		reg |= A_ONCHIP_FIFO_MEM_CORE_SOP;
-		softdma_mem_write(sc, A_ONCHIP_FIFO_MEM_CORE_METADATA, reg);
-	}
+	if (desc->control & CONTROL_GEN_SOP)
+		softdma_mem_write(sc, A_ONCHIP_FIFO_MEM_CORE_METADATA,
+		    A_ONCHIP_FIFO_MEM_CORE_SOP);
 
-	src_offs = dst_offs = 0;
-	c = 0;
-	while ((desc->len - c) >= 4) {
-		val = *(uint32_t *)(desc->src_addr + src_offs);
-		bus_write_4(sc->res[0], A_ONCHIP_FIFO_MEM_CORE_DATA, val);
-		if (desc->src_incr)
-			src_offs += 4;
-		if (desc->dst_incr)
-			dst_offs += 4;
-		fill_level += 1;
+	got_bits = 0;
+	buf = 0;
 
-		while (fill_level == AVALON_FIFO_TX_BASIC_OPTS_DEPTH) {
-			fill_level = softdma_fill_level(sc);
-		}
-		c += 4;
+	addr = desc->src_addr;
+	len = desc->len;
+
+	if (addr & 1) {
+		buf = (buf << 8) | *(uint8_t *)addr;
+		got_bits += 8;
+		addr += 1;
+		len -= 1;
 	}
 
-	val = 0;
-	leftm = (desc->len - c);
+	if (len >= 2 && addr & 2) {
+		buf = (buf << 16) | *(uint16_t *)addr;
+		got_bits += 16;
+		addr += 2;
+		len -= 2;
+	}
 
-	switch (leftm) {
-	case 1:
-		val = *(uint8_t *)(desc->src_addr + src_offs);
-		val <<= 24;
-		src_offs += 1;
-		break;
-	case 2:
-	case 3:
-		val = *(uint16_t *)(desc->src_addr + src_offs);
-		val <<= 16;
-		src_offs += 2;
+	while (len >= 4) {
+		buf = (buf << 32) | (uint64_t)*(uint32_t *)addr;
+		addr += 4;
+		len -= 4;
+		word = (uint32_t)((buf >> got_bits) & 0xffffffff);
 
-		if (leftm == 3) {
-			tmp = *(uint8_t *)(desc->src_addr + src_offs);
-			val |= (tmp << 8);
-			src_offs += 1;
-		}
-		break;
-	case 0:
-	default:
-		break;
+		fifo_fill_level_wait(sc);
+		if (len == 0 && got_bits == 0 &&
+		    (desc->control & CONTROL_GEN_EOP) != 0)
+			softdma_mem_write(sc, A_ONCHIP_FIFO_MEM_CORE_METADATA,
+			    A_ONCHIP_FIFO_MEM_CORE_EOP);
+		bus_write_4(sc->res[0], A_ONCHIP_FIFO_MEM_CORE_DATA, word);
 	}
 
-	/* Set end of packet. */
-	reg = 0;
-	if (desc->control & CONTROL_GEN_EOP)
-		reg |= A_ONCHIP_FIFO_MEM_CORE_EOP;
-	reg |= ((4 - leftm) << A_ONCHIP_FIFO_MEM_CORE_EMPTY_SHIFT);
-	softdma_mem_write(sc, A_ONCHIP_FIFO_MEM_CORE_METADATA, reg);
+	if (len & 2) {
+		buf = (buf << 16) | *(uint16_t *)addr;
+		got_bits += 16;
+		addr += 2;
+		len -= 2;
+	}
 
-	/* Ensure there is a FIFO entry available. */
-	fill_level = softdma_fill_level(sc);
-	while (fill_level == AVALON_FIFO_TX_BASIC_OPTS_DEPTH)
-		fill_level = softdma_fill_level(sc);
+	if (len & 1) {
+		buf = (buf << 8) | *(uint8_t *)addr;
+		got_bits += 8;
+		addr += 1;
+		len -= 1;
+	}
 
-	/* Final write */
-	bus_write_4(sc->res[0], A_ONCHIP_FIFO_MEM_CORE_DATA, val);
+	if (got_bits >= 32) {
+		got_bits -= 32;
+		word = (uint32_t)((buf >> got_bits) & 0xffffffff);
 
-	return (dst_offs);
+		fifo_fill_level_wait(sc);
+		if (len == 0 && got_bits == 0 &&
+		    (desc->control & CONTROL_GEN_EOP) != 0)
+			softdma_mem_write(sc, A_ONCHIP_FIFO_MEM_CORE_METADATA,
+			    A_ONCHIP_FIFO_MEM_CORE_EOP);
+		bus_write_4(sc->res[0], A_ONCHIP_FIFO_MEM_CORE_DATA, word);
+	}
+
+	if (got_bits) {
+		missing = 32 - got_bits;
+		got_bits /= 8;
+
+		fifo_fill_level_wait(sc);
+		reg = A_ONCHIP_FIFO_MEM_CORE_EOP |
+		    ((4 - got_bits) << A_ONCHIP_FIFO_MEM_CORE_EMPTY_SHIFT);
+		softdma_mem_write(sc, A_ONCHIP_FIFO_MEM_CORE_METADATA, reg);
+		word = (uint32_t)((buf << missing) & 0xffffffff);
+		bus_write_4(sc->res[0], A_ONCHIP_FIFO_MEM_CORE_DATA, word);
+	}
+
+	return (desc->len);
 }
 
 static int
 softdma_process_rx(struct softdma_channel *chan, struct softdma_desc *desc)
 {
 	uint32_t src_offs, dst_offs;
 	struct softdma_softc *sc;
 	uint32_t fill_level;
 	uint32_t empty;
 	uint32_t meta;
 	uint32_t data;
 	int sop_rcvd;
 	int timeout;
 	size_t len;
 	int error;
 
 	sc = chan->sc;
 	empty = 0;
 	src_offs = dst_offs = 0;
 	error = 0;
 
 	fill_level = softdma_fill_level(sc);
 	if (fill_level == 0) {
 		/* Nothing to receive. */
 		return (0);
 	}
 
 	len = desc->len;
 
 	sop_rcvd = 0;
 	while (fill_level) {
 		empty = 0;
 		data = bus_read_4(sc->res[0], A_ONCHIP_FIFO_MEM_CORE_DATA);
 		meta = softdma_mem_read(sc, A_ONCHIP_FIFO_MEM_CORE_METADATA);
 
 		if (meta & A_ONCHIP_FIFO_MEM_CORE_ERROR_MASK) {
 			error = 1;
 			break;
 		}
 
 		if ((meta & A_ONCHIP_FIFO_MEM_CORE_CHANNEL_MASK) != 0) {
 			error = 1;
 			break;
 		}
 
 		if (meta & A_ONCHIP_FIFO_MEM_CORE_SOP) {
 			sop_rcvd = 1;
 		}
 
 		if (meta & A_ONCHIP_FIFO_MEM_CORE_EOP) {
 			empty = (meta & A_ONCHIP_FIFO_MEM_CORE_EMPTY_MASK) >>
 			    A_ONCHIP_FIFO_MEM_CORE_EMPTY_SHIFT;
 		}
 
 		if (sop_rcvd == 0) {
 			error = 1;
 			break;
 		}
 
 		if (empty == 0) {
 			*(uint32_t *)(desc->dst_addr + dst_offs) = data;
 			dst_offs += 4;
 		} else if (empty == 1) {
 			*(uint16_t *)(desc->dst_addr + dst_offs) =
 			    ((data >> 16) & 0xffff);
 			dst_offs += 2;
 
 			*(uint8_t *)(desc->dst_addr + dst_offs) =
 			    ((data >> 8) & 0xff);
 			dst_offs += 1;
 		} else {
 			panic("empty %d\n", empty);
 		}
 
 		if (meta & A_ONCHIP_FIFO_MEM_CORE_EOP)
 			break;
 
 		fill_level = softdma_fill_level(sc);
 		timeout = 100;
 		while (fill_level == 0 && timeout--)
 			fill_level = softdma_fill_level(sc);
 		if (timeout == 0) {
 			/* No EOP received. Broken packet. */
 			error = 1;
 			break;
 		}
 	}
 
 	if (error) {
 		return (-1);
 	}
 
 	return (dst_offs);
 }
 
 static uint32_t
 softdma_process_descriptors(struct softdma_channel *chan,
     xdma_transfer_status_t *status)
 {
 	struct xdma_channel *xchan;
 	struct softdma_desc *desc;
 	struct softdma_softc *sc;
 	xdma_transfer_status_t st;
 	int ret;
 
 	sc = chan->sc;
 
 	xchan = chan->xchan;
 
 	desc = &chan->descs[chan->idx_tail];
 
 	while (desc != NULL) {
 
 		if ((desc->control & CONTROL_OWN) == 0) {
 			break;
 		}
 
 		if (desc->direction == XDMA_MEM_TO_DEV) {
 			ret = softdma_process_tx(chan, desc);
 		} else {
 			ret = softdma_process_rx(chan, desc);
 			if (ret == 0) {
 				/* No new data available. */
 				break;
 			}
 		}
 
 		/* Descriptor processed. */
 		desc->control = 0;
 
 		if (ret >= 0) {
 			st.error = 0;
 			st.transferred = ret;
 		} else {
 			st.error = ret;
 			st.transferred = 0;
 		}
 
 		xchan_seg_done(xchan, &st);
 		atomic_subtract_int(&chan->descs_used_count, 1);
 
 		if (ret >= 0) {
 			status->transferred += ret;
 		} else {
 			status->error = 1;
 			break;
 		}
 
 		chan->idx_tail = softdma_next_desc(chan, chan->idx_tail);
 
 		/* Process next descriptor, if any. */
 		desc = desc->next;
 	}
 
 	return (0);
 }
 
 static void
 softdma_worker(void *arg)
 {
 	xdma_transfer_status_t status;
 	struct softdma_channel *chan;
 	struct softdma_softc *sc;
 
 	chan = arg;
 
 	sc = chan->sc;
 
 	while (1) {
 		mtx_lock(&chan->mtx);
 
 		do {
 			mtx_sleep(chan, &chan->mtx, 0, "softdma_wait", hz / 2);
 		} while (chan->run == 0);
 
 		status.error = 0;
 		status.transferred = 0;
 
 		softdma_process_descriptors(chan, &status);
 
 		/* Finish operation */
 		chan->run = 0;
 		xdma_callback(chan->xchan, &status);
 
 		mtx_unlock(&chan->mtx);
 	}
 
 }
 
 static int
 softdma_proc_create(struct softdma_channel *chan)
 {
 	struct softdma_softc *sc;
 
 	sc = chan->sc;
 
 	if (chan->p != NULL) {
 		/* Already created */
 		return (0);
 	}
 
 	mtx_init(&chan->mtx, "SoftDMA", NULL, MTX_DEF);
 
 	if (kproc_create(softdma_worker, (void *)chan, &chan->p, 0, 0,
 	    "softdma_worker") != 0) {
 		device_printf(sc->dev,
 		    "%s: Failed to create worker thread.\n", __func__);
 		return (-1);
 	}
 
 	return (0);
 }
 
 static int
 softdma_channel_alloc(device_t dev, struct xdma_channel *xchan)
 {
 	struct softdma_channel *chan;
 	struct softdma_softc *sc;
 	int i;
 
 	sc = device_get_softc(dev);
 
 	for (i = 0; i < SOFTDMA_NCHANNELS; i++) {
 		chan = &sc->channels[i];
 		if (chan->used == 0) {
 			chan->xchan = xchan;
 			xchan->chan = (void *)chan;
+			xchan->caps |= XCHAN_CAP_NOBUFS;
+			xchan->caps |= XCHAN_CAP_NOSEG;
 			chan->index = i;
 			chan->idx_head = 0;
 			chan->idx_tail = 0;
 			chan->descs_used_count = 0;
 			chan->descs_num = 1024;
 			chan->sc = sc;
 
 			if (softdma_proc_create(chan) != 0) {
 				return (-1);
 			}
 
 			chan->used = 1;
 
 			return (0);
 		}
 	}
 
 	return (-1);
 }
 
 static int
 softdma_channel_free(device_t dev, struct xdma_channel *xchan)
 {
 	struct softdma_channel *chan;
 	struct softdma_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	chan = (struct softdma_channel *)xchan->chan;
 
 	if (chan->descs != NULL) {
 		free(chan->descs, M_DEVBUF);
 	}
 
 	chan->used = 0;
 
 	return (0);
 }
 
 static int
 softdma_desc_alloc(struct xdma_channel *xchan)
 {
 	struct softdma_channel *chan;
 	uint32_t nsegments;
 
 	chan = (struct softdma_channel *)xchan->chan;
 
 	nsegments = chan->descs_num;
 
 	chan->descs = malloc(nsegments * sizeof(struct softdma_desc),
 	    M_DEVBUF, (M_WAITOK | M_ZERO));
 
 	return (0);
 }
 
 static int
 softdma_channel_prep_sg(device_t dev, struct xdma_channel *xchan)
 {
 	struct softdma_channel *chan;
 	struct softdma_desc *desc;
 	struct softdma_softc *sc;
 	int ret;
 	int i;
 
 	sc = device_get_softc(dev);
 
 	chan = (struct softdma_channel *)xchan->chan;
 
 	ret = softdma_desc_alloc(xchan);
 	if (ret != 0) {
 		device_printf(sc->dev,
 		    "%s: Can't allocate descriptors.\n", __func__);
 		return (-1);
 	}
 
 	for (i = 0; i < chan->descs_num; i++) {
 		desc = &chan->descs[i];
 
 		if (i == (chan->descs_num - 1)) {
 			desc->next = &chan->descs[0];
 		} else {
 			desc->next = &chan->descs[i+1];
 		}
 	}
 
 	return (0);
 }
 
 static int
 softdma_channel_capacity(device_t dev, xdma_channel_t *xchan,
     uint32_t *capacity)
 {
 	struct softdma_channel *chan;
 	uint32_t c;
 
 	chan = (struct softdma_channel *)xchan->chan;
 
 	/* At least one descriptor must be left empty. */
 	c = (chan->descs_num - chan->descs_used_count - 1);
 
 	*capacity = c;
 
 	return (0);
 }
 
 static int
 softdma_channel_submit_sg(device_t dev, struct xdma_channel *xchan,
     struct xdma_sglist *sg, uint32_t sg_n)
 {
 	struct softdma_channel *chan;
 	struct softdma_desc *desc;
 	struct softdma_softc *sc;
 	uint32_t enqueued;
 	uint32_t saved_dir;
 	uint32_t tmp;
 	uint32_t len;
 	int i;
 
 	sc = device_get_softc(dev);
 
 	chan = (struct softdma_channel *)xchan->chan;
 
 	enqueued = 0;
 
 	for (i = 0; i < sg_n; i++) {
 		len = (uint32_t)sg[i].len;
 
 		desc = &chan->descs[chan->idx_head];
 		desc->src_addr = sg[i].src_addr;
 		desc->dst_addr = sg[i].dst_addr;
 		if (sg[i].direction == XDMA_MEM_TO_DEV) {
 			desc->src_incr = 1;
 			desc->dst_incr = 0;
 		} else {
 			desc->src_incr = 0;
 			desc->dst_incr = 1;
 		}
 		desc->direction = sg[i].direction;
 		saved_dir = sg[i].direction;
 		desc->len = len;
 		desc->transfered = 0;
 		desc->status = 0;
 		desc->reserved = 0;
 		desc->control = 0;
 
 		if (sg[i].first == 1)
 			desc->control |= CONTROL_GEN_SOP;
 		if (sg[i].last == 1)
 			desc->control |= CONTROL_GEN_EOP;
 
 		tmp = chan->idx_head;
 		chan->idx_head = softdma_next_desc(chan, chan->idx_head);
 		atomic_add_int(&chan->descs_used_count, 1);
 		desc->control |= CONTROL_OWN;
 		enqueued += 1;
 	}
 
 	if (enqueued == 0)
 		return (0);
 
 	if (saved_dir == XDMA_MEM_TO_DEV) {
 		chan->run = 1;
 		wakeup(chan);
 	} else
 		softdma_memc_write(sc,
 		    A_ONCHIP_FIFO_MEM_CORE_STATUS_REG_INT_ENABLE,
 		    SOFTDMA_RX_EVENTS);
 
 	return (0);
 }
 
 static int
 softdma_channel_request(device_t dev, struct xdma_channel *xchan,
     struct xdma_request *req)
 {
 	struct softdma_channel *chan;
 	struct softdma_desc *desc;
 	struct softdma_softc *sc;
 	int ret;
 
 	sc = device_get_softc(dev);
 
 	chan = (struct softdma_channel *)xchan->chan;
 
 	ret = softdma_desc_alloc(xchan);
 	if (ret != 0) {
 		device_printf(sc->dev,
 		    "%s: Can't allocate descriptors.\n", __func__);
 		return (-1);
 	}
 
 	desc = &chan->descs[0];
 
 	desc->src_addr = req->src_addr;
 	desc->dst_addr = req->dst_addr;
 	desc->len = req->block_len;
 	desc->src_incr = 1;
 	desc->dst_incr = 1;
 	desc->next = NULL;
 
 	return (0);
 }
 
 static int
 softdma_channel_control(device_t dev, xdma_channel_t *xchan, int cmd)
 {
 	struct softdma_channel *chan;
 	struct softdma_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	chan = (struct softdma_channel *)xchan->chan;
 
 	switch (cmd) {
 	case XDMA_CMD_BEGIN:
 	case XDMA_CMD_TERMINATE:
 	case XDMA_CMD_PAUSE:
 		/* TODO: implement me */
 		return (-1);
 	}
 
 	return (0);
 }
 
 #ifdef FDT
 static int
 softdma_ofw_md_data(device_t dev, pcell_t *cells,
     int ncells, void **ptr)
 {
 
 	return (0);
 }
 #endif
 
 static device_method_t softdma_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,			softdma_probe),
 	DEVMETHOD(device_attach,		softdma_attach),
 	DEVMETHOD(device_detach,		softdma_detach),
 
 	/* xDMA Interface */
 	DEVMETHOD(xdma_channel_alloc,		softdma_channel_alloc),
 	DEVMETHOD(xdma_channel_free,		softdma_channel_free),
 	DEVMETHOD(xdma_channel_request,		softdma_channel_request),
 	DEVMETHOD(xdma_channel_control,		softdma_channel_control),
 
 	/* xDMA SG Interface */
 	DEVMETHOD(xdma_channel_prep_sg,		softdma_channel_prep_sg),
 	DEVMETHOD(xdma_channel_submit_sg,	softdma_channel_submit_sg),
 	DEVMETHOD(xdma_channel_capacity,	softdma_channel_capacity),
 
 #ifdef FDT
 	DEVMETHOD(xdma_ofw_md_data,		softdma_ofw_md_data),
 #endif
 
 	DEVMETHOD_END
 };
 
 static driver_t softdma_driver = {
 	"softdma",
 	softdma_methods,
 	sizeof(struct softdma_softc),
 };
 
 static devclass_t softdma_devclass;
 
 EARLY_DRIVER_MODULE(softdma, simplebus, softdma_driver, softdma_devclass, 0, 0,
     BUS_PASS_INTERRUPT + BUS_PASS_ORDER_LATE);
Index: head/sys/dev/xdma/xdma.h
===================================================================
--- head/sys/dev/xdma/xdma.h	(revision 346895)
+++ head/sys/dev/xdma/xdma.h	(revision 346896)
@@ -1,264 +1,264 @@
 /*-
  * Copyright (c) 2016-2018 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _DEV_XDMA_XDMA_H_
 #define _DEV_XDMA_XDMA_H_
 
 #include 
 
 enum xdma_direction {
 	XDMA_MEM_TO_MEM,
 	XDMA_MEM_TO_DEV,
 	XDMA_DEV_TO_MEM,
 	XDMA_DEV_TO_DEV,
 };
 
 enum xdma_operation_type {
 	XDMA_MEMCPY,
 	XDMA_CYCLIC,
 	XDMA_FIFO,
 	XDMA_SG,
 };
 
 enum xdma_request_type {
 	XR_TYPE_PHYS,
 	XR_TYPE_VIRT,
 	XR_TYPE_MBUF,
 	XR_TYPE_BIO,
 };
 
 enum xdma_command {
 	XDMA_CMD_BEGIN,
 	XDMA_CMD_PAUSE,
 	XDMA_CMD_TERMINATE,
 };
 
 struct xdma_transfer_status {
 	uint32_t	transferred;
 	int		error;
 };
 
 typedef struct xdma_transfer_status xdma_transfer_status_t;
 
 struct xdma_controller {
 	device_t dev;		/* DMA consumer device_t. */
 	device_t dma_dev;	/* A real DMA device_t. */
 	void *data;		/* OFW MD part. */
 
 	/* List of virtual channels allocated. */
 	TAILQ_HEAD(xdma_channel_list, xdma_channel)	channels;
 };
 
 typedef struct xdma_controller xdma_controller_t;
 
 struct xchan_buf {
 	bus_dmamap_t			map;
 	uint32_t			nsegs;
 	uint32_t			nsegs_left;
-	void				*cbuf;
 };
 
 struct xdma_request {
 	struct mbuf			*m;
 	struct bio			*bp;
 	enum xdma_operation_type	operation;
 	enum xdma_request_type		req_type;
 	enum xdma_direction		direction;
 	bus_addr_t			src_addr;
 	bus_addr_t			dst_addr;
 	uint8_t				src_width;
 	uint8_t				dst_width;
 	bus_size_t			block_num;
 	bus_size_t			block_len;
 	xdma_transfer_status_t		status;
 	void				*user;
 	TAILQ_ENTRY(xdma_request)	xr_next;
 	struct xchan_buf		buf;
 };
 
 struct xdma_sglist {
 	bus_addr_t			src_addr;
 	bus_addr_t			dst_addr;
 	size_t				len;
 	uint8_t				src_width;
 	uint8_t				dst_width;
 	enum xdma_direction		direction;
 	bool				first;
 	bool				last;
 };
 
 struct xdma_channel {
 	xdma_controller_t		*xdma;
 
 	uint32_t			flags;
 #define	XCHAN_BUFS_ALLOCATED		(1 << 0)
 #define	XCHAN_SGLIST_ALLOCATED		(1 << 1)
 #define	XCHAN_CONFIGURED		(1 << 2)
 #define	XCHAN_TYPE_CYCLIC		(1 << 3)
 #define	XCHAN_TYPE_MEMCPY		(1 << 4)
 #define	XCHAN_TYPE_FIFO			(1 << 5)
 #define	XCHAN_TYPE_SG			(1 << 6)
 
 	uint32_t			caps;
 #define	XCHAN_CAP_BUSDMA		(1 << 0)
-#define	XCHAN_CAP_BUSDMA_NOSEG		(1 << 1)
+#define	XCHAN_CAP_NOSEG			(1 << 1)
+#define	XCHAN_CAP_NOBUFS		(1 << 2)
 
 	/* A real hardware driver channel. */
 	void				*chan;
 
 	/* Interrupt handlers. */
 	TAILQ_HEAD(, xdma_intr_handler)	ie_handlers;
 	TAILQ_ENTRY(xdma_channel)	xchan_next;
 
 	struct sx			sx_lock;
 	struct sx			sx_qin_lock;
 	struct sx			sx_qout_lock;
 	struct sx			sx_bank_lock;
 	struct sx			sx_proc_lock;
 
 	/* Request queue. */
 	bus_dma_tag_t			dma_tag_bufs;
 	struct xdma_request		*xr_mem;
 	uint32_t			xr_num;
 
 	/* Bus dma tag options. */
 	bus_size_t			maxsegsize;
 	bus_size_t			maxnsegs;
 	bus_size_t			alignment;
 	bus_addr_t			boundary;
 	bus_addr_t			lowaddr;
 	bus_addr_t			highaddr;
 
 	struct xdma_sglist		*sg;
 
 	TAILQ_HEAD(, xdma_request)	bank;
 	TAILQ_HEAD(, xdma_request)	queue_in;
 	TAILQ_HEAD(, xdma_request)	queue_out;
 	TAILQ_HEAD(, xdma_request)	processing;
 };
 
 typedef struct xdma_channel xdma_channel_t;
 
 struct xdma_intr_handler {
 	int		(*cb)(void *cb_user, xdma_transfer_status_t *status);
 	void		*cb_user;
 	TAILQ_ENTRY(xdma_intr_handler)	ih_next;
 };
 
 static MALLOC_DEFINE(M_XDMA, "xdma", "xDMA framework");
 
 #define	XCHAN_LOCK(xchan)		sx_xlock(&(xchan)->sx_lock)
 #define	XCHAN_UNLOCK(xchan)		sx_xunlock(&(xchan)->sx_lock)
 #define	XCHAN_ASSERT_LOCKED(xchan)	\
     sx_assert(&(xchan)->sx_lock, SX_XLOCKED)
 
 #define	QUEUE_IN_LOCK(xchan)		sx_xlock(&(xchan)->sx_qin_lock)
 #define	QUEUE_IN_UNLOCK(xchan)		sx_xunlock(&(xchan)->sx_qin_lock)
 #define	QUEUE_IN_ASSERT_LOCKED(xchan)	\
     sx_assert(&(xchan)->sx_qin_lock, SX_XLOCKED)
 
 #define	QUEUE_OUT_LOCK(xchan)		sx_xlock(&(xchan)->sx_qout_lock)
 #define	QUEUE_OUT_UNLOCK(xchan)		sx_xunlock(&(xchan)->sx_qout_lock)
 #define	QUEUE_OUT_ASSERT_LOCKED(xchan)	\
     sx_assert(&(xchan)->sx_qout_lock, SX_XLOCKED)
 
 #define	QUEUE_BANK_LOCK(xchan)		sx_xlock(&(xchan)->sx_bank_lock)
 #define	QUEUE_BANK_UNLOCK(xchan)	sx_xunlock(&(xchan)->sx_bank_lock)
 #define	QUEUE_BANK_ASSERT_LOCKED(xchan)	\
     sx_assert(&(xchan)->sx_bank_lock, SX_XLOCKED)
 
 #define	QUEUE_PROC_LOCK(xchan)		sx_xlock(&(xchan)->sx_proc_lock)
 #define	QUEUE_PROC_UNLOCK(xchan)	sx_xunlock(&(xchan)->sx_proc_lock)
 #define	QUEUE_PROC_ASSERT_LOCKED(xchan)	\
     sx_assert(&(xchan)->sx_proc_lock, SX_XLOCKED)
 
 #define	XDMA_SGLIST_MAXLEN	2048
 #define	XDMA_MAX_SEG		128
 
 /* xDMA controller ops */
 xdma_controller_t *xdma_ofw_get(device_t dev, const char *prop);
 int xdma_put(xdma_controller_t *xdma);
 
 /* xDMA channel ops */
 xdma_channel_t * xdma_channel_alloc(xdma_controller_t *, uint32_t caps);
 int xdma_channel_free(xdma_channel_t *);
 int xdma_request(xdma_channel_t *xchan, struct xdma_request *r);
 
 /* SG interface */
 int xdma_prep_sg(xdma_channel_t *, uint32_t,
     bus_size_t, bus_size_t, bus_size_t, bus_addr_t, bus_addr_t, bus_addr_t);
 void xdma_channel_free_sg(xdma_channel_t *xchan);
 int xdma_queue_submit_sg(xdma_channel_t *xchan);
 void xchan_seg_done(xdma_channel_t *xchan, xdma_transfer_status_t *);
 
 /* Queue operations */
 int xdma_dequeue_mbuf(xdma_channel_t *xchan, struct mbuf **m,
     xdma_transfer_status_t *);
 int xdma_enqueue_mbuf(xdma_channel_t *xchan, struct mbuf **m, uintptr_t addr,
     uint8_t, uint8_t, enum xdma_direction dir);
 int xdma_dequeue_bio(xdma_channel_t *xchan, struct bio **bp,
     xdma_transfer_status_t *status);
 int xdma_enqueue_bio(xdma_channel_t *xchan, struct bio **bp, bus_addr_t addr,
     uint8_t, uint8_t, enum xdma_direction dir);
 int xdma_dequeue(xdma_channel_t *xchan, void **user,
     xdma_transfer_status_t *status);
 int xdma_enqueue(xdma_channel_t *xchan, uintptr_t src, uintptr_t dst,
     uint8_t, uint8_t, bus_size_t, enum xdma_direction dir, void *);
 int xdma_queue_submit(xdma_channel_t *xchan);
 
 /* Mbuf operations */
 uint32_t xdma_mbuf_defrag(xdma_channel_t *xchan, struct xdma_request *xr);
 uint32_t xdma_mbuf_chain_count(struct mbuf *m0);
 
 /* Channel Control */
 int xdma_control(xdma_channel_t *xchan, enum xdma_command cmd);
 
 /* Interrupt callback */
 int xdma_setup_intr(xdma_channel_t *xchan, int (*cb)(void *,
     xdma_transfer_status_t *), void *arg, void **);
 int xdma_teardown_intr(xdma_channel_t *xchan, struct xdma_intr_handler *ih);
 int xdma_teardown_all_intr(xdma_channel_t *xchan);
 void xdma_callback(struct xdma_channel *xchan, xdma_transfer_status_t *status);
 
 /* Sglist */
 int xchan_sglist_alloc(xdma_channel_t *xchan);
 void xchan_sglist_free(xdma_channel_t *xchan);
 int xdma_sglist_add(struct xdma_sglist *sg, struct bus_dma_segment *seg,
     uint32_t nsegs, struct xdma_request *xr);
 
 /* Requests bank */
 void xchan_bank_init(xdma_channel_t *xchan);
 int xchan_bank_free(xdma_channel_t *xchan);
 struct xdma_request * xchan_bank_get(xdma_channel_t *xchan);
 int xchan_bank_put(xdma_channel_t *xchan, struct xdma_request *xr);
 
 #endif /* !_DEV_XDMA_XDMA_H_ */
Index: head/sys/dev/xdma/xdma_mbuf.c
===================================================================
--- head/sys/dev/xdma/xdma_mbuf.c	(revision 346895)
+++ head/sys/dev/xdma/xdma_mbuf.c	(revision 346896)
@@ -1,154 +1,150 @@
 /*-
  * Copyright (c) 2017-2018 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include 
 __FBSDID("$FreeBSD$");
 
 #include "opt_platform.h"
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 
 #include 
 
 #ifdef FDT
 #include 
 #include 
 #include 
 #endif
 
 #include 
 
 int
 xdma_dequeue_mbuf(xdma_channel_t *xchan, struct mbuf **mp,
     xdma_transfer_status_t *status)
 {
 	struct xdma_request *xr;
 	struct xdma_request *xr_tmp;
 
 	QUEUE_OUT_LOCK(xchan);
 	TAILQ_FOREACH_SAFE(xr, &xchan->queue_out, xr_next, xr_tmp) {
 		TAILQ_REMOVE(&xchan->queue_out, xr, xr_next);
 		break;
 	}
 	QUEUE_OUT_UNLOCK(xchan);
 
 	if (xr == NULL)
 		return (-1);
 
 	*mp = xr->m;
 	status->error = xr->status.error;
 	status->transferred = xr->status.transferred;
 
 	xchan_bank_put(xchan, xr);
 
 	return (0);
 }
 
 int
 xdma_enqueue_mbuf(xdma_channel_t *xchan, struct mbuf **mp,
     uintptr_t addr, uint8_t src_width, uint8_t dst_width,
     enum xdma_direction dir)
 {
 	struct xdma_request *xr;
 	xdma_controller_t *xdma;
 
 	xdma = xchan->xdma;
 
 	xr = xchan_bank_get(xchan);
 	if (xr == NULL)
 		return (-1); /* No space is available yet. */
 
 	xr->direction = dir;
 	xr->m = *mp;
 	xr->req_type = XR_TYPE_MBUF;
 	if (dir == XDMA_MEM_TO_DEV) {
 		xr->dst_addr = addr;
 		xr->src_addr = 0;
 	} else {
 		xr->src_addr = addr;
 		xr->dst_addr = 0;
 	}
 	xr->src_width = src_width;
 	xr->dst_width = dst_width;
 
 	QUEUE_IN_LOCK(xchan);
 	TAILQ_INSERT_TAIL(&xchan->queue_in, xr, xr_next);
 	QUEUE_IN_UNLOCK(xchan);
 
 	return (0);
 }
 
 uint32_t
 xdma_mbuf_chain_count(struct mbuf *m0)
 {
 	struct mbuf *m;
 	uint32_t c;
 
 	c = 0;
 
 	for (m = m0; m != NULL; m = m->m_next)
 		c++;
 
 	return (c);
 }
 
 uint32_t
 xdma_mbuf_defrag(xdma_channel_t *xchan, struct xdma_request *xr)
 {
 	xdma_controller_t *xdma;
 	struct mbuf *m;
 	uint32_t c;
 
 	xdma = xchan->xdma;
 
 	c = xdma_mbuf_chain_count(xr->m);
 	if (c == 1)
 		return (c); /* Nothing to do. */
 
-	if (xchan->caps & XCHAN_CAP_BUSDMA) {
-		if ((xchan->caps & XCHAN_CAP_BUSDMA_NOSEG) || \
-		    (c > xchan->maxnsegs)) {
-			if ((m = m_defrag(xr->m, M_NOWAIT)) == NULL) {
-				device_printf(xdma->dma_dev,
-				    "%s: Can't defrag mbuf\n",
-				    __func__);
-				return (c);
-			}
-			xr->m = m;
-			c = 1;
-		}
+	if ((m = m_defrag(xr->m, M_NOWAIT)) == NULL) {
+		device_printf(xdma->dma_dev,
+		    "%s: Can't defrag mbuf\n",
+		    __func__);
+		return (c);
 	}
+
+	xr->m = m;
+	c = 1;
 
 	return (c);
 }
Index: head/sys/dev/xdma/xdma_sg.c
===================================================================
--- head/sys/dev/xdma/xdma_sg.c	(revision 346895)
+++ head/sys/dev/xdma/xdma_sg.c	(revision 346896)
@@ -1,594 +1,586 @@
 /*-
  * Copyright (c) 2018 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include 
 __FBSDID("$FreeBSD$");
 
 #include "opt_platform.h"
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
 
 #include 
 
 #ifdef FDT
 #include 
 #include 
 #include 
 #endif
 
 #include 
 
 #include 
 
 struct seg_load_request {
 	struct bus_dma_segment *seg;
 	uint32_t nsegs;
 	uint32_t error;
 };
 
 static int
 _xchan_bufs_alloc(xdma_channel_t *xchan)
 {
 	xdma_controller_t *xdma;
 	struct xdma_request *xr;
 	int i;
 
 	xdma = xchan->xdma;
 
 	for (i = 0; i < xchan->xr_num; i++) {
 		xr = &xchan->xr_mem[i];
-		xr->buf.cbuf = contigmalloc(xchan->maxsegsize,
-		    M_XDMA, 0, 0, ~0, PAGE_SIZE, 0);
-		if (xr->buf.cbuf == NULL) {
-			device_printf(xdma->dev,
-			    "%s: Can't allocate contiguous kernel"
-			    " physical memory\n", __func__);
-			return (-1);
-		}
+		/* TODO: bounce buffer */
 	}
 
 	return (0);
 }
 
 static int
 _xchan_bufs_alloc_busdma(xdma_channel_t *xchan)
 {
 	xdma_controller_t *xdma;
 	struct xdma_request *xr;
 	int err;
 	int i;
 
 	xdma = xchan->xdma;
 
 	/* Create bus_dma tag */
 	err = bus_dma_tag_create(
 	    bus_get_dma_tag(xdma->dev),	/* Parent tag. */
 	    xchan->alignment,		/* alignment */
 	    xchan->boundary,		/* boundary */
 	    xchan->lowaddr,		/* lowaddr */
 	    xchan->highaddr,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    xchan->maxsegsize * xchan->maxnsegs, /* maxsize */
 	    xchan->maxnsegs,		/* nsegments */
 	    xchan->maxsegsize,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL, NULL,			/* lockfunc, lockarg */
 	    &xchan->dma_tag_bufs);
 	if (err != 0) {
 		device_printf(xdma->dev,
 		    "%s: Can't create bus_dma tag.\n", __func__);
 		return (-1);
 	}
 
 	for (i = 0; i < xchan->xr_num; i++) {
 		xr = &xchan->xr_mem[i];
 		err = bus_dmamap_create(xchan->dma_tag_bufs, 0,
 		    &xr->buf.map);
 		if (err != 0) {
 			device_printf(xdma->dev,
 			    "%s: Can't create buf DMA map.\n", __func__);
 
 			/* Cleanup. */
 			bus_dma_tag_destroy(xchan->dma_tag_bufs);
 
 			return (-1);
 		}
 	}
 
 	return (0);
 }
 
 static int
 xchan_bufs_alloc(xdma_channel_t *xchan)
 {
 	xdma_controller_t *xdma;
 	int ret;
 
 	xdma = xchan->xdma;
 
 	if (xdma == NULL) {
 		device_printf(xdma->dev,
 		    "%s: Channel was not allocated properly.\n", __func__);
 		return (-1);
 	}
 
 	if (xchan->caps & XCHAN_CAP_BUSDMA)
 		ret = _xchan_bufs_alloc_busdma(xchan);
 	else
 		ret = _xchan_bufs_alloc(xchan);
 	if (ret != 0) {
 		device_printf(xdma->dev,
 		    "%s: Can't allocate bufs.\n", __func__);
 		return (-1);
 	}
 
 	xchan->flags |= XCHAN_BUFS_ALLOCATED;
 
 	return (0);
 }
 
 static int
 xchan_bufs_free(xdma_channel_t *xchan)
 {
 	struct xdma_request *xr;
 	struct xchan_buf *b;
 	int i;
 
 	if ((xchan->flags & XCHAN_BUFS_ALLOCATED) == 0)
 		return (-1);
 
 	if (xchan->caps & XCHAN_CAP_BUSDMA) {
 		for (i = 0; i < xchan->xr_num; i++) {
 			xr = &xchan->xr_mem[i];
 			b = &xr->buf;
 			bus_dmamap_destroy(xchan->dma_tag_bufs, b->map);
 		}
 		bus_dma_tag_destroy(xchan->dma_tag_bufs);
 	} else {
 		for (i = 0; i < xchan->xr_num; i++) {
 			xr = &xchan->xr_mem[i];
-			contigfree(xr->buf.cbuf, xchan->maxsegsize, M_XDMA);
+			/* TODO: bounce buffer */
 		}
 	}
 
 	xchan->flags &= ~XCHAN_BUFS_ALLOCATED;
 
 	return (0);
 }
 
 void
 xdma_channel_free_sg(xdma_channel_t *xchan)
 {
 
 	xchan_bufs_free(xchan);
 	xchan_sglist_free(xchan);
 	xchan_bank_free(xchan);
 }
 
 /*
  * Prepare xchan for a scatter-gather transfer.
  * xr_num - xdma requests queue size,
  * maxsegsize - maximum allowed scatter-gather list element size in bytes
  */
 int
 xdma_prep_sg(xdma_channel_t *xchan, uint32_t xr_num,
     bus_size_t maxsegsize, bus_size_t maxnsegs,
     bus_size_t alignment, bus_addr_t boundary,
     bus_addr_t lowaddr, bus_addr_t highaddr)
 {
 	xdma_controller_t *xdma;
 	int ret;
 
 	xdma = xchan->xdma;
 
 	KASSERT(xdma != NULL, ("xdma is NULL"));
 
 	if (xchan->flags & XCHAN_CONFIGURED) {
 		device_printf(xdma->dev,
 		    "%s: Channel is already configured.\n", __func__);
 		return (-1);
 	}
 
 	xchan->xr_num = xr_num;
 	xchan->maxsegsize = maxsegsize;
 	xchan->maxnsegs = maxnsegs;
 	xchan->alignment = alignment;
 	xchan->boundary = boundary;
 	xchan->lowaddr = lowaddr;
 	xchan->highaddr = highaddr;
 
 	if (xchan->maxnsegs > XDMA_MAX_SEG) {
 		device_printf(xdma->dev, "%s: maxnsegs is too big\n",
 		    __func__);
 		return (-1);
 	}
 
 	xchan_bank_init(xchan);
 
 	/* Allocate sglist. */
 	ret = xchan_sglist_alloc(xchan);
 	if (ret != 0) {
 		device_printf(xdma->dev,
 		    "%s: Can't allocate sglist.\n", __func__);
 		return (-1);
 	}
 
-	/* Allocate bufs. */
-	ret = xchan_bufs_alloc(xchan);
-	if (ret != 0) {
-		device_printf(xdma->dev,
-		    "%s: Can't allocate bufs.\n", __func__);
+	/* Allocate buffers if required. */
+	if ((xchan->caps & XCHAN_CAP_NOBUFS) == 0) {
+		ret = xchan_bufs_alloc(xchan);
+		if (ret != 0) {
+			device_printf(xdma->dev,
+			    "%s: Can't allocate bufs.\n", __func__);
 
-		/* Cleanup */
-		xchan_sglist_free(xchan);
-		xchan_bank_free(xchan);
+			/* Cleanup */
+			xchan_sglist_free(xchan);
+			xchan_bank_free(xchan);
 
-		return (-1);
+			return (-1);
+		}
 	}
 
 	xchan->flags |= (XCHAN_CONFIGURED | XCHAN_TYPE_SG);
 
 	XCHAN_LOCK(xchan);
 	ret = XDMA_CHANNEL_PREP_SG(xdma->dma_dev, xchan);
 	if (ret != 0) {
 		device_printf(xdma->dev,
 		    "%s: Can't prepare SG transfer.\n", __func__);
 		XCHAN_UNLOCK(xchan);
 
 		return (-1);
 	}
 	XCHAN_UNLOCK(xchan);
 
 	return (0);
 }
 
 void
 xchan_seg_done(xdma_channel_t *xchan,
     struct xdma_transfer_status *st)
 {
 	struct xdma_request *xr;
 	xdma_controller_t *xdma;
 	struct xchan_buf *b;
 
 	xdma = xchan->xdma;
 
 	xr = TAILQ_FIRST(&xchan->processing);
 	if (xr == NULL)
 		panic("request not found\n");
 
 	b = &xr->buf;
 
 	atomic_subtract_int(&b->nsegs_left, 1);
 
 	if (b->nsegs_left == 0) {
 		if (xchan->caps & XCHAN_CAP_BUSDMA) {
 			if (xr->direction == XDMA_MEM_TO_DEV)
 				bus_dmamap_sync(xchan->dma_tag_bufs, b->map, 
 				    BUS_DMASYNC_POSTWRITE);
 			else
 				bus_dmamap_sync(xchan->dma_tag_bufs, b->map, 
 				    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(xchan->dma_tag_bufs, b->map);
 		}
 		xr->status.error = st->error;
 		xr->status.transferred = st->transferred;
 
 		QUEUE_PROC_LOCK(xchan);
 		TAILQ_REMOVE(&xchan->processing, xr, xr_next);
 		QUEUE_PROC_UNLOCK(xchan);
 
 		QUEUE_OUT_LOCK(xchan);
 		TAILQ_INSERT_TAIL(&xchan->queue_out, xr, xr_next);
 		QUEUE_OUT_UNLOCK(xchan);
 	}
 }
 
 static void
 xdma_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 {
 	struct seg_load_request *slr;
 	struct bus_dma_segment *seg;
 	int i;
 
 	slr = arg;
 	seg = slr->seg;
 
 	if (error != 0) {
 		slr->error = error;
 		return;
 	}
 
 	slr->nsegs = nsegs;
 
 	for (i = 0; i < nsegs; i++) {
 		seg[i].ds_addr = segs[i].ds_addr;
 		seg[i].ds_len = segs[i].ds_len;
 	}
 }
 
 static int
 _xdma_load_data_busdma(xdma_channel_t *xchan, struct xdma_request *xr,
     struct bus_dma_segment *seg)
 {
 	xdma_controller_t *xdma;
 	struct seg_load_request slr;
 	uint32_t nsegs;
 	void *addr;
 	int error;
 
 	xdma = xchan->xdma;
 
 	error = 0;
 	nsegs = 0;
 
 	switch (xr->req_type) {
 	case XR_TYPE_MBUF:
 		error = bus_dmamap_load_mbuf_sg(xchan->dma_tag_bufs,
 		    xr->buf.map, xr->m, seg, &nsegs, BUS_DMA_NOWAIT);
 		break;
 	case XR_TYPE_BIO:
 		slr.nsegs = 0;
 		slr.error = 0;
 		slr.seg = seg;
 		error = bus_dmamap_load_bio(xchan->dma_tag_bufs,
 		    xr->buf.map, xr->bp, xdma_dmamap_cb, &slr, BUS_DMA_NOWAIT);
 		if (slr.error != 0) {
 			device_printf(xdma->dma_dev,
 			    "%s: bus_dmamap_load failed, err %d\n",
 			    __func__, slr.error);
 			return (0);
 		}
 		nsegs = slr.nsegs;
 		break;
 	case XR_TYPE_VIRT:
 		switch (xr->direction) {
 		case XDMA_MEM_TO_DEV:
 			addr = (void *)xr->src_addr;
 			break;
 		case XDMA_DEV_TO_MEM:
 			addr = (void *)xr->dst_addr;
 			break;
 		default:
 			device_printf(xdma->dma_dev,
 			    "%s: Direction is not supported\n", __func__);
 			return (0);
 		}
 		slr.nsegs = 0;
 		slr.error = 0;
 		slr.seg = seg;
 		error = bus_dmamap_load(xchan->dma_tag_bufs, xr->buf.map,
 		    addr, (xr->block_len * xr->block_num),
 		    xdma_dmamap_cb, &slr, BUS_DMA_NOWAIT);
 		if (slr.error != 0) {
 			device_printf(xdma->dma_dev,
 			    "%s: bus_dmamap_load failed, err %d\n",
 			    __func__, slr.error);
 			return (0);
 		}
 		nsegs = slr.nsegs;
 		break;
 	default:
 		break;
 	}
 
 	if (error != 0) {
 		if (error == ENOMEM) {
 			/*
 			 * Out of memory. Try again later.
 			 * TODO: count errors.
 			 */
 		} else
 			device_printf(xdma->dma_dev,
 			    "%s: bus_dmamap_load failed with err %d\n",
 			    __func__, error);
 		return (0);
 	}
 
 	if (xr->direction == XDMA_MEM_TO_DEV)
 		bus_dmamap_sync(xchan->dma_tag_bufs, xr->buf.map,
 		    BUS_DMASYNC_PREWRITE);
 	else
 		bus_dmamap_sync(xchan->dma_tag_bufs, xr->buf.map,
 		    BUS_DMASYNC_PREREAD);
 
 	return (nsegs);
 }
 
 static int
 _xdma_load_data(xdma_channel_t *xchan, struct xdma_request *xr,
     struct bus_dma_segment *seg)
 {
 	xdma_controller_t *xdma;
 	struct mbuf *m;
 	uint32_t nsegs;
 
 	xdma = xchan->xdma;
 
 	m = xr->m;
 
 	nsegs = 1;
 
 	switch (xr->req_type) {
 	case XR_TYPE_MBUF:
-		if (xr->direction == XDMA_MEM_TO_DEV) {
-			m_copydata(m, 0, m->m_pkthdr.len, xr->buf.cbuf);
-			seg[0].ds_addr = (bus_addr_t)xr->buf.cbuf;
-			seg[0].ds_len = m->m_pkthdr.len;
-		} else {
-			seg[0].ds_addr = mtod(m, bus_addr_t);
-			seg[0].ds_len = m->m_pkthdr.len;
-		}
+		seg[0].ds_addr = mtod(m, bus_addr_t);
+		seg[0].ds_len = m->m_pkthdr.len;
 		break;
 	case XR_TYPE_BIO:
 	case XR_TYPE_VIRT:
 	default:
 		panic("implement me\n");
 	}
 
 	return (nsegs);
 }
 
 static int
 xdma_load_data(xdma_channel_t *xchan,
     struct xdma_request *xr, struct bus_dma_segment *seg)
 {
 	xdma_controller_t *xdma;
 	int error;
 	int nsegs;
 
 	xdma = xchan->xdma;
 
 	error = 0;
 	nsegs = 0;
 
 	if (xchan->caps & XCHAN_CAP_BUSDMA)
 		nsegs = _xdma_load_data_busdma(xchan, xr, seg);
 	else
 		nsegs = _xdma_load_data(xchan, xr, seg);
 	if (nsegs == 0)
 		return (0); /* Try again later. */
 
 	xr->buf.nsegs = nsegs;
 	xr->buf.nsegs_left = nsegs;
 
 	return (nsegs);
 }
 
 static int
 xdma_process(xdma_channel_t *xchan,
     struct xdma_sglist *sg)
 {
 	struct bus_dma_segment seg[XDMA_MAX_SEG];
 	struct xdma_request *xr;
 	struct xdma_request *xr_tmp;
 	xdma_controller_t *xdma;
 	uint32_t capacity;
 	uint32_t n;
 	uint32_t c;
 	int nsegs;
 	int ret;
 
 	XCHAN_ASSERT_LOCKED(xchan);
 
 	xdma = xchan->xdma;
 
 	n = 0;
 
 	ret = XDMA_CHANNEL_CAPACITY(xdma->dma_dev, xchan, &capacity);
 	if (ret != 0) {
 		device_printf(xdma->dev,
 		    "%s: Can't get DMA controller capacity.\n", __func__);
 		return (-1);
 	}
 
 	TAILQ_FOREACH_SAFE(xr, &xchan->queue_in, xr_next, xr_tmp) {
 		switch (xr->req_type) {
 		case XR_TYPE_MBUF:
-			c = xdma_mbuf_defrag(xchan, xr);
+			if ((xchan->caps & XCHAN_CAP_NOSEG) ||
+			    (c > xchan->maxnsegs))
+				c = xdma_mbuf_defrag(xchan, xr);
 			break;
 		case XR_TYPE_BIO:
 		case XR_TYPE_VIRT:
 		default:
 			c = 1;
 		}
 
 		if (capacity <= (c + n)) {
 			/*
 			 * No space yet available for the entire
 			 * request in the DMA engine.
 			 */
 			break;
 		}
 
 		if ((c + n + xchan->maxnsegs) >= XDMA_SGLIST_MAXLEN) {
 			/* Sglist is full. */
 			break;
 		}
 
 		nsegs = xdma_load_data(xchan, xr, seg);
 		if (nsegs == 0)
 			break;
 
 		xdma_sglist_add(&sg[n], seg, nsegs, xr);
 		n += nsegs;
 
 		QUEUE_IN_LOCK(xchan);
 		TAILQ_REMOVE(&xchan->queue_in, xr, xr_next);
 		QUEUE_IN_UNLOCK(xchan);
 
 		QUEUE_PROC_LOCK(xchan);
 		TAILQ_INSERT_TAIL(&xchan->processing, xr, xr_next);
 		QUEUE_PROC_UNLOCK(xchan);
 	}
 
 	return (n);
 }
 
 int
 xdma_queue_submit_sg(xdma_channel_t *xchan)
 {
 	struct xdma_sglist *sg;
 	xdma_controller_t *xdma;
 	uint32_t sg_n;
 	int ret;
 
 	xdma = xchan->xdma;
 	KASSERT(xdma != NULL, ("xdma is NULL"));
 
 	XCHAN_ASSERT_LOCKED(xchan);
 
 	sg = xchan->sg;
 
-	if ((xchan->flags & XCHAN_BUFS_ALLOCATED) == 0) {
+	if ((xchan->caps & XCHAN_CAP_NOBUFS) == 0 &&
+	   (xchan->flags & XCHAN_BUFS_ALLOCATED) == 0) {
 		device_printf(xdma->dev,
 		    "%s: Can't submit a transfer: no bufs\n",
 		    __func__);
 		return (-1);
 	}
 
 	sg_n = xdma_process(xchan, sg);
 	if (sg_n == 0)
 		return (0); /* Nothing to submit */
 
 	/* Now submit sglist to DMA engine driver. */
 	ret = XDMA_CHANNEL_SUBMIT_SG(xdma->dma_dev, xchan, sg, sg_n);
 	if (ret != 0) {
 		device_printf(xdma->dev,
 		    "%s: Can't submit an sglist.\n", __func__);
 		return (-1);
 	}
 
 	return (0);
 }