Index: sys/dev/xdma/xdma.h
===================================================================
--- sys/dev/xdma/xdma.h
+++ sys/dev/xdma/xdma.h
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2016 Ruslan Bukin 
+ * Copyright (c) 2016-2017 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
@@ -30,8 +30,8 @@
  * $FreeBSD$
  */
 
-#ifndef _DEV_EXTRES_XDMA_H_
-#define _DEV_EXTRES_XDMA_H_
+#ifndef _DEV_XDMA_H_
+#define _DEV_XDMA_H_
 
 enum xdma_direction {
 	XDMA_MEM_TO_MEM,
@@ -53,6 +53,13 @@
 	XDMA_CMD_TERMINATE_ALL,
 };
 
+struct xdma_transfer_status {
+	uint32_t	transferred;
+	int		error;
+};
+
+typedef struct xdma_transfer_status xdma_transfer_status_t;
+
 struct xdma_controller {
 	device_t dev;		/* DMA consumer device_t. */
 	device_t dma_dev;	/* A real DMA device_t. */
@@ -64,6 +71,22 @@
 
 typedef struct xdma_controller xdma_controller_t;
 
+/* SG type of transfer. */
+struct xdma_request {
+	struct mbuf			*m;
+	enum xdma_direction		direction;
+	bus_addr_t			src_addr;	/* Physical address. */
+	bus_addr_t			dst_addr;	/* Physical address. */
+	bus_size_t			len;
+	xdma_transfer_status_t		status;
+	bool				done;
+};
+
+/*
+ * Cyclic/memcpy type of transfer.
+ * Legacy configuration struct
+ * TODO: replace with xdma_request.
+ */
 struct xdma_channel_config {
 	enum xdma_direction	direction;
 	uintptr_t		src_addr;	/* Physical address. */
@@ -77,54 +100,109 @@
 typedef struct xdma_channel_config xdma_config_t;
 
 struct xdma_descriptor {
-	bus_addr_t	ds_addr;
-	bus_size_t	ds_len;
+	bus_addr_t			ds_addr;
+	bus_size_t			ds_len;
+	bus_dmamap_t			dma_map;
+	void				*desc;
 };
 
 typedef struct xdma_descriptor xdma_descriptor_t;
 
+struct xdma_sglist {
+	bool				first;
+	bool				last;
+	vm_paddr_t			paddr;
+	size_t				len;
+	enum xdma_direction		direction;
+};
+
+struct xchan_buf {
+	bus_dmamap_t			map;
+	struct xdma_request		*xr;
+	uint32_t			nsegs;
+	uint32_t			nsegs_left;
+};
+
+typedef struct xchan_buf xdma_buf_t;
+
 struct xdma_channel {
 	xdma_controller_t		*xdma;
 	xdma_config_t			conf;
 
-	uint8_t				flags;
+	uint16_t			flags;
 #define	XCHAN_DESC_ALLOCATED		(1 << 0)
-#define	XCHAN_CONFIGURED		(1 << 1)
-#define	XCHAN_TYPE_CYCLIC		(1 << 2)
-#define	XCHAN_TYPE_MEMCPY		(1 << 3)
+#define	XCHAN_BUFS_ALLOCATED		(1 << 1)
+#define	XCHAN_SGLIST_ALLOCATED		(1 << 2)
+#define	XCHAN_CONFIGURED		(1 << 3)
+#define	XCHAN_TYPE_CYCLIC		(1 << 4)
+#define	XCHAN_TYPE_MEMCPY		(1 << 5)
+#define	XCHAN_TYPE_FIFO			(1 << 6)
+#define	XCHAN_TYPE_SG			(1 << 7)
 
 	/* A real hardware driver channel. */
 	void				*chan;
 
 	/* Interrupt handlers. */
 	TAILQ_HEAD(, xdma_intr_handler)	ie_handlers;
+	TAILQ_ENTRY(xdma_channel)	xchan_next;
+
+	struct mtx			mtx_lock;
+
+	/* Request queue. */
+	struct xdma_request		*xr;
+	uint32_t			xr_num;
+	uint32_t			xr_count;
+	uint32_t			xr_head;
+	uint32_t			xr_processed;
+	uint32_t			xr_tail;
+
+	/* Bus dma bufs. */
+	xdma_buf_t			*bufs;
+	uint32_t			bufs_num;
+	bus_dma_tag_t			dma_tag_bufs;
+	uint32_t			buf_head;
+	uint32_t			buf_tail;
 
 	/* Descriptors. */
+	xdma_descriptor_t		*descs;
+	uint32_t			descs_num;
+	uint32_t			descs_used_count;
 	bus_dma_tag_t			dma_tag;
-	bus_dmamap_t			dma_map;
-	void				*descs;
-	xdma_descriptor_t		*descs_phys;
+	uint32_t			map_descr;
 	uint8_t				map_err;
-
-	struct mtx			mtx_lock;
-
-	TAILQ_ENTRY(xdma_channel)	xchan_next;
+	struct xdma_sglist		*sg;
 };
 
 typedef struct xdma_channel xdma_channel_t;
 
-/* xDMA controller alloc/free */
+/* xDMA controller ops */
 xdma_controller_t *xdma_ofw_get(device_t dev, const char *prop);
 int xdma_put(xdma_controller_t *xdma);
 
+/* xDMA channel ops */
 xdma_channel_t * xdma_channel_alloc(xdma_controller_t *);
 int xdma_channel_free(xdma_channel_t *);
 
 int xdma_prep_cyclic(xdma_channel_t *, enum xdma_direction,
     uintptr_t, uintptr_t, int, int, int, int);
 int xdma_prep_memcpy(xdma_channel_t *, uintptr_t, uintptr_t, size_t len);
-int xdma_desc_alloc(xdma_channel_t *, uint32_t, uint32_t);
-int xdma_desc_free(xdma_channel_t *xchan);
+int xdma_prep_sg(xdma_channel_t *xchan, uint32_t, uint32_t);
+
+int xchan_desc_alloc(xdma_channel_t *, uint32_t, uint32_t);
+int xchan_desc_free(xdma_channel_t *xchan);
+int xchan_desc_done(xdma_channel_t *xchan, uint32_t idx, xdma_transfer_status_t *);
+int xchan_desc_sync_pre(xdma_channel_t *xchan, uint32_t);
+int xchan_desc_sync_post(xdma_channel_t *xchan, uint32_t);
+int xchan_bufs_free(xdma_channel_t *xchan);
+
+uint32_t xchan_next_buf(xdma_channel_t *xchan, uint32_t curidx);
+uint32_t xchan_next_desc(xdma_channel_t *xchan, uint32_t curidx);
+uint32_t xchan_next_req(xdma_channel_t *xchan, uint32_t curidx);
+
+/* xchan queues operations */
+int xdma_dequeue_mbuf(xdma_channel_t *xchan, struct mbuf **m, xdma_transfer_status_t *);
+int xdma_enqueue_mbuf(xdma_channel_t *xchan, struct mbuf **m, uintptr_t addr, enum xdma_direction dir);
+int xdma_queue_submit(xdma_channel_t *xchan);
 
 /* Channel Control */
 int xdma_begin(xdma_channel_t *xchan);
@@ -132,17 +210,17 @@
 int xdma_terminate(xdma_channel_t *xchan);
 
 /* Interrupt callback */
-int xdma_setup_intr(xdma_channel_t *xchan, int (*cb)(void *), void *arg, void **);
+int xdma_setup_intr(xdma_channel_t *xchan, int (*cb)(void *, xdma_transfer_status_t *), void *arg, void **);
 int xdma_teardown_intr(xdma_channel_t *xchan, struct xdma_intr_handler *ih);
 int xdma_teardown_all_intr(xdma_channel_t *xchan);
-int xdma_callback(struct xdma_channel *xchan);
+int xdma_callback(struct xdma_channel *xchan, xdma_transfer_status_t *status);
 void xdma_assert_locked(void);
 
 struct xdma_intr_handler {
-	int				(*cb)(void *);
+	int				(*cb)(void *cb_user, xdma_transfer_status_t *status);
 	void				*cb_user;
 	struct mtx			ih_lock;
 	TAILQ_ENTRY(xdma_intr_handler)	ih_next;
 };
 
-#endif /* !_DEV_EXTRES_XDMA_H_ */
+#endif /* !_DEV_XDMA_H_ */
Index: sys/dev/xdma/xdma.c
===================================================================
--- sys/dev/xdma/xdma.c
+++ sys/dev/xdma/xdma.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2016 Ruslan Bukin 
+ * Copyright (c) 2016-2017 Ruslan Bukin 
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -62,10 +63,18 @@
 MALLOC_DEFINE(M_XDMA, "xdma", "xDMA framework");
 
 /*
+ * Maximum number of segments per mbuf chain supported,
+ * bigger will be merged using m_defrag().
+ * TODO: dehardcode. Not all DMA engines can do >1 segs.
+ */
+#define	MAX_NSEGS	1
+
+/*
  * Multiple xDMA controllers may work with single DMA device,
  * so we have global lock for physical channel management.
  */
 static struct mtx xdma_mtx;
+
 #define	XDMA_LOCK()		mtx_lock(&xdma_mtx)
 #define	XDMA_UNLOCK()		mtx_unlock(&xdma_mtx)
 #define	XDMA_ASSERT_LOCKED()	mtx_assert(&xdma_mtx, MA_OWNED)
@@ -77,6 +86,9 @@
 #define	XCHAN_UNLOCK(xchan)		mtx_unlock(&(xchan)->mtx_lock)
 #define	XCHAN_ASSERT_LOCKED(xchan)	mtx_assert(&(xchan)->mtx_lock, MA_OWNED)
 
+static int xchan_sglist_init(xdma_channel_t *xchan);
+static int xchan_sglist_free(xdma_channel_t *xchan);
+
 /*
  * Allocate virtual xDMA channel.
  */
@@ -139,7 +151,13 @@
 	xdma_teardown_all_intr(xchan);
 
 	/* Deallocate descriptors, if any. */
-	xdma_desc_free(xchan);
+	xchan_desc_free(xchan);
+	xchan_bufs_free(xchan);
+	xchan_sglist_free(xchan);
+
+	if (xchan->flags & XCHAN_TYPE_SG) {
+		free(xchan->xr, M_XDMA);
+	}
 
 	mtx_destroy(&xchan->mtx_lock);
 
@@ -153,8 +171,9 @@
 }
 
 int
-xdma_setup_intr(xdma_channel_t *xchan, int (*cb)(void *), void *arg,
-    void **ihandler)
+xdma_setup_intr(xdma_channel_t *xchan,
+    int (*cb)(void *, xdma_transfer_status_t *),
+    void *arg, void **ihandler)
 {
 	struct xdma_intr_handler *ih;
 	xdma_controller_t *xdma;
@@ -235,46 +254,46 @@
 static void
 xdma_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
 {
+	xdma_controller_t *xdma;
 	xdma_channel_t *xchan;
-	int i;
 
 	xchan = (xdma_channel_t *)arg;
 	KASSERT(xchan != NULL, ("xchan is NULL"));
 
+	xdma = xchan->xdma;
+	KASSERT(xdma != NULL, ("xdma is NULL"));
+
 	if (err) {
+		device_printf(xdma->dma_dev, "%s failed\n", __func__);
 		xchan->map_err = 1;
 		return;
 	}
 
-	for (i = 0; i < nseg; i++) {
-		xchan->descs_phys[i].ds_addr = segs[i].ds_addr;
-		xchan->descs_phys[i].ds_len = segs[i].ds_len;
-	}
+	xchan->descs[xchan->map_descr].ds_addr = segs[0].ds_addr;
+	xchan->descs[xchan->map_descr].ds_len = segs[0].ds_len;
 }
 
 static int
 xdma_desc_alloc_bus_dma(xdma_channel_t *xchan, uint32_t desc_size,
     uint32_t align)
 {
+	xdma_descriptor_t *desc;
 	xdma_controller_t *xdma;
-	bus_size_t all_desc_sz;
-	xdma_config_t *conf;
 	int nsegments;
 	int err;
+	int i;
 
 	xdma = xchan->xdma;
-	conf = &xchan->conf;
 
-	nsegments = conf->block_num;
-	all_desc_sz = (nsegments * desc_size);
+	nsegments = xchan->descs_num;
 
 	err = bus_dma_tag_create(
 	    bus_get_dma_tag(xdma->dev),
-	    align, desc_size,		/* alignment, boundary */
+	    align, 0,			/* alignment, boundary */
 	    BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
-	    all_desc_sz, nsegments,	/* maxsize, nsegments*/
+	    desc_size, 1,		/* maxsize, nsegments*/
 	    desc_size, 0,		/* maxsegsize, flags */
 	    NULL, NULL,			/* lockfunc, lockarg */
 	    &xchan->dma_tag);
@@ -284,32 +303,94 @@
 		return (-1);
 	}
 
-	err = bus_dmamem_alloc(xchan->dma_tag, (void **)&xchan->descs,
-	    BUS_DMA_WAITOK | BUS_DMA_COHERENT, &xchan->dma_map);
-	if (err) {
+	/* Descriptors. */
+	xchan->descs = malloc(nsegments * sizeof(xdma_descriptor_t),
+	    M_XDMA, (M_WAITOK | M_ZERO));
+	if (xchan->descs == NULL) {
 		device_printf(xdma->dev,
-		    "%s: Can't allocate memory for descriptors.\n", __func__);
+		    "%s: Can't allocate memory.\n", __func__);
 		return (-1);
 	}
 
-	xchan->descs_phys = malloc(nsegments * sizeof(xdma_descriptor_t), M_XDMA,
-	    (M_WAITOK | M_ZERO));
+	/* Allocate bus_dma memory for each descriptor. */
+	for (i = 0; i < nsegments; i++) {
+		desc = &xchan->descs[i];
+		err = bus_dmamem_alloc(xchan->dma_tag, (void **)&desc->desc,
+		    BUS_DMA_WAITOK | BUS_DMA_ZERO, &desc->dma_map);
+		if (err) {
+			device_printf(xdma->dev,
+			    "%s: Can't allocate memory for descriptors.\n", __func__);
+			return (-1);
+		}
+
+		xchan->map_err = 0;
+		xchan->map_descr = i;
+		err = bus_dmamap_load(xchan->dma_tag, desc->dma_map, desc->desc,
+		    desc_size, xdma_dmamap_cb, xchan, BUS_DMA_WAITOK);
+		if (err) {
+			device_printf(xdma->dev,
+			    "%s: Can't load DMA map.\n", __func__);
+			return (-1);
+		}
+
+		if (xchan->map_err != 0) {
+			device_printf(xdma->dev,
+			    "%s: Can't load DMA map.\n", __func__);
+			return (-1);
+		}
+	}
+
+	return (0);
+}
 
-	xchan->map_err = 0;
-	err = bus_dmamap_load(xchan->dma_tag, xchan->dma_map, xchan->descs,
-	    all_desc_sz, xdma_dmamap_cb, xchan, BUS_DMA_WAITOK);
-	if (err) {
+static int
+xdma_bufs_alloc_bus_dma(xdma_channel_t *xchan, uint32_t align)
+{
+	xdma_controller_t *xdma;
+	int nsegments;
+	int err;
+	int i;
+
+	xdma = xchan->xdma;
+
+	nsegments = xchan->bufs_num;
+
+	xchan->bufs = malloc(nsegments * sizeof(struct xchan_buf),
+	    M_XDMA, (M_WAITOK | M_ZERO));
+	if (xchan->bufs == NULL) {
 		device_printf(xdma->dev,
-		    "%s: Can't load DMA map.\n", __func__);
+		    "%s: Can't allocate memory.\n", __func__);
 		return (-1);
 	}
 
-	if (xchan->map_err != 0) {
+	/* Allocate bus_dma memory for mbufs. */
+	err = bus_dma_tag_create(
+	    bus_get_dma_tag(xdma->dev),	/* Parent tag. */
+	    align, 0,			/* alignment, boundary */
+	    BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
+	    BUS_SPACE_MAXADDR,		/* highaddr */
+	    NULL, NULL,			/* filter, filterarg */
+	    MCLBYTES, MAX_NSEGS, 	/* maxsize, nsegments */
+	    MCLBYTES,			/* maxsegsize */
+	    0,				/* flags */
+	    NULL, NULL,			/* lockfunc, lockarg */
+	    &xchan->dma_tag_bufs);
+	if (err != 0) {
 		device_printf(xdma->dev,
-		    "%s: Can't load DMA map.\n", __func__);
+		    "%s: Can't create bus_dma tag.\n", __func__);
 		return (-1);
 	}
 
+	for (i = 0; i < nsegments; i++) {
+		err = bus_dmamap_create(xchan->dma_tag_bufs, BUS_DMA_COHERENT,
+		    &xchan->bufs[i].map);
+		if (err != 0) {
+			device_printf(xdma->dev,
+			    "%s: Can't create buf DMA map.\n", __func__);
+			return (-1);
+		}
+	}
+
 	return (0);
 }
 
@@ -317,10 +398,9 @@
  * This function called by DMA controller driver.
  */
 int
-xdma_desc_alloc(xdma_channel_t *xchan, uint32_t desc_size, uint32_t align)
+xchan_desc_alloc(xdma_channel_t *xchan, uint32_t desc_size, uint32_t align)
 {
 	xdma_controller_t *xdma;
-	xdma_config_t *conf;
 	int ret;
 
 	XCHAN_ASSERT_LOCKED(xchan);
@@ -344,11 +424,7 @@
 		return (-1);
 	}
 
-	conf = &xchan->conf;
-
-	XCHAN_UNLOCK(xchan);
 	ret = xdma_desc_alloc_bus_dma(xchan, desc_size, align);
-	XCHAN_LOCK(xchan);
 	if (ret != 0) {
 		device_printf(xdma->dev,
 		    "%s: Can't allocate memory for descriptors.\n",
@@ -358,25 +434,38 @@
 
 	xchan->flags |= XCHAN_DESC_ALLOCATED;
 
-	/* We are going to write to descriptors. */
-	bus_dmamap_sync(xchan->dma_tag, xchan->dma_map, BUS_DMASYNC_PREWRITE);
+	ret = xdma_bufs_alloc_bus_dma(xchan, align);
+	if (ret != 0) {
+		device_printf(xdma->dev,
+		    "%s: Can't allocate memory for mbufs.\n",
+		    __func__);
+		return (-1);
+	}
+
+	xchan->flags |= XCHAN_BUFS_ALLOCATED;
 
 	return (0);
 }
 
 int
-xdma_desc_free(xdma_channel_t *xchan)
+xchan_desc_free(xdma_channel_t *xchan)
 {
+	xdma_descriptor_t *desc;
+	int i;
 
 	if ((xchan->flags & XCHAN_DESC_ALLOCATED) == 0) {
 		/* No descriptors allocated. */
 		return (-1);
 	}
 
-	bus_dmamap_unload(xchan->dma_tag, xchan->dma_map);
-	bus_dmamem_free(xchan->dma_tag, xchan->descs, xchan->dma_map);
+	for (i = 0; i < xchan->descs_num; i++) {
+		desc = &xchan->descs[i];
+		bus_dmamap_unload(xchan->dma_tag, desc->dma_map);
+		bus_dmamem_free(xchan->dma_tag, desc->desc, desc->dma_map);
+	}
+
 	bus_dma_tag_destroy(xchan->dma_tag);
-	free(xchan->descs_phys, M_XDMA);
+	free(xchan->descs, M_XDMA);
 
 	xchan->flags &= ~(XCHAN_DESC_ALLOCATED);
 
@@ -384,6 +473,30 @@
 }
 
 int
+xchan_bufs_free(xdma_channel_t *xchan)
+{
+	xdma_buf_t *b;
+	int i;
+
+	if ((xchan->flags & XCHAN_BUFS_ALLOCATED) == 0) {
+		/* No bufs allocated. */
+		return (-1);
+	}
+
+	for (i = 0; i < xchan->bufs_num; i++) {
+		b = &xchan->bufs[i];
+		bus_dmamap_destroy(xchan->dma_tag_bufs, b->map);
+	}
+
+	bus_dma_tag_destroy(xchan->dma_tag_bufs);
+	free(xchan->bufs, M_XDMA);
+
+	xchan->flags &= ~(XCHAN_BUFS_ALLOCATED);
+
+	return (0);
+}
+
+int
 xdma_prep_memcpy(xdma_channel_t *xchan, uintptr_t src_addr,
     uintptr_t dst_addr, size_t len)
 {
@@ -401,12 +514,14 @@
 	conf->block_len = len;
 	conf->block_num = 1;
 
+	xchan->descs_num = conf->block_num;
+
 	xchan->flags |= (XCHAN_CONFIGURED | XCHAN_TYPE_MEMCPY);
 
 	XCHAN_LOCK(xchan);
 
 	/* Deallocate old descriptors, if any. */
-	xdma_desc_free(xchan);
+	xchan_desc_free(xchan);
 
 	ret = XDMA_CHANNEL_PREP_MEMCPY(xdma->dma_dev, xchan);
 	if (ret != 0) {
@@ -417,10 +532,348 @@
 		return (-1);
 	}
 
+	XCHAN_UNLOCK(xchan);
+
+	return (0);
+}
+
+int
+xdma_prep_sg(xdma_channel_t *xchan, uint32_t ndesc, uint32_t xr_num)
+{
+	xdma_controller_t *xdma;
+	int ret;
+
+	xdma = xchan->xdma;
+
+	KASSERT(xdma != NULL, ("xdma is NULL"));
+
+	if (xchan->flags & XCHAN_CONFIGURED) {
+		device_printf(xdma->dev,
+		    "%s: Channel is already configured.\n", __func__);
+		return (-1);
+	}
+
+	xchan->descs_num = ndesc;
+	xchan->bufs_num = ndesc;
+	xchan->xr_num = xr_num;
+
+	/* Allocate sglist. */
+	ret = xchan_sglist_init(xchan);
+	if (ret != 0) {
+		device_printf(xdma->dev,
+		    "%s: Can't allocate sglist.\n", __func__);
+		return (-1);
+	}
+
+	/* Allocate request queue. */
+	xchan->xr = malloc(sizeof(struct xdma_request) * xr_num,
+	    M_XDMA, M_WAITOK | M_ZERO);
+	if (xchan->xr == NULL) {
+		device_printf(xdma->dev,
+		    "%s: Can't allocate request queue.\n", __func__);
+		return (-1);
+	}
+
+	xchan->flags |= (XCHAN_CONFIGURED | XCHAN_TYPE_SG);
+
+	XCHAN_LOCK(xchan);
+
+	/* Deallocate old descriptors, if any. */
+	xchan_desc_free(xchan);
+	xchan_bufs_free(xchan);
+
+	ret = XDMA_CHANNEL_PREP_SG(xdma->dma_dev, xchan);
+	if (ret != 0) {
+		device_printf(xdma->dev,
+		    "%s: Can't prepare SG transfer.\n", __func__);
+		XCHAN_UNLOCK(xchan);
+
+		return (-1);
+	}
+
+	XCHAN_UNLOCK(xchan);
+
+	return (0);
+}
+
+inline uint32_t
+xchan_next_req(xdma_channel_t *xchan, uint32_t curidx)
+{
+
+	return ((curidx + 1) % xchan->xr_num);
+}
+
+inline uint32_t
+xchan_next_buf(xdma_channel_t *xchan, uint32_t curidx)
+{
+
+	return ((curidx + 1) % xchan->bufs_num);
+}
+
+inline uint32_t
+xchan_next_desc(xdma_channel_t *xchan, uint32_t curidx)
+{
+
+	return ((curidx + 1) % xchan->descs_num);
+}
+
+int
+xdma_dequeue_mbuf(xdma_channel_t *xchan, struct mbuf **mp,
+    xdma_transfer_status_t *status)
+{
+	struct xdma_request *xr;
+
+	if (xchan->xr_tail == xchan->xr_processed) {
+		return (-1);
+	}
+
+	xr = &xchan->xr[xchan->xr_tail];
+	if (xr->done == 0) {
+		return (-1);
+	}
+
+	*mp = xr->m;
+	status->error = xr->status.error;
+	status->transferred = xr->status.transferred;
+	xchan->xr_tail = xchan_next_req(xchan, xchan->xr_tail);
+	atomic_subtract_int(&xchan->xr_count, 1);
+
+	return (0);
+}
+
+int
+xdma_enqueue_mbuf(xdma_channel_t *xchan, struct mbuf **mp,
+    uintptr_t addr, enum xdma_direction dir)
+{
+	struct xdma_request *xr;
+	xdma_controller_t *xdma;
+
+	xdma = xchan->xdma;
+
+	if (xchan->xr_count >= (xchan->xr_num - 1)) {
+		/* No space is available yet. */
+		return (-1);
+	}
+
+	xr = &xchan->xr[xchan->xr_head];
+	xr->direction = dir;
+	xr->m = *mp;
+	if (dir == XDMA_MEM_TO_DEV) {
+		xr->dst_addr = addr;
+	} else {
+		xr->src_addr = addr;
+	}
+	xr->done = 0;
+	xchan->xr_head = xchan_next_req(xchan, xchan->xr_head);
+	atomic_add_int(&xchan->xr_count, 1);
+
+	return (0);
+}
+
+int
+xchan_desc_sync_post(xdma_channel_t *xchan, uint32_t i)
+{
+
 	if (xchan->flags & XCHAN_DESC_ALLOCATED) {
-		/* Driver created xDMA descriptors. */
-		bus_dmamap_sync(xchan->dma_tag, xchan->dma_map,
-		    BUS_DMASYNC_POSTWRITE);
+		bus_dmamap_sync(xchan->dma_tag, xchan->descs[i].dma_map,
+		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+	}
+
+	return (0);
+}
+
+int
+xchan_desc_sync_pre(xdma_channel_t *xchan, uint32_t i)
+{
+
+	if (xchan->flags & XCHAN_DESC_ALLOCATED) {
+		bus_dmamap_sync(xchan->dma_tag, xchan->descs[i].dma_map,
+		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+	}
+
+	return (0);
+}
+
+static int
+xchan_sglist_init(xdma_channel_t *xchan)
+{
+	uint32_t sz;
+
+	if (xchan->flags & XCHAN_SGLIST_ALLOCATED) {
+		return (-1);
+	}
+
+	sz = (sizeof(struct xdma_sglist) * xchan->descs_num);
+
+	xchan->sg = malloc(sz, M_XDMA, M_WAITOK | M_ZERO);
+	if (xchan->sg == NULL) {
+		return (-1);
+	}
+
+	xchan->flags |= XCHAN_SGLIST_ALLOCATED;
+
+	return (0);
+}
+
+static int
+xchan_sglist_free(xdma_channel_t *xchan)
+{
+
+	if (xchan->flags & XCHAN_SGLIST_ALLOCATED) {
+		free(xchan->sg, M_XDMA);
+	}
+
+	xchan->flags &= ~(XCHAN_SGLIST_ALLOCATED);
+
+	return (0);
+}
+
+static int
+xdma_sglist_add(struct xdma_sglist *sg, struct bus_dma_segment *seg,
+    uint32_t nsegs, enum xdma_direction dir)
+{
+	int i;
+
+	for (i = 0; i < nsegs; i++) {
+		sg[i].paddr = seg[i].ds_addr;
+		sg[i].len = seg[i].ds_len;
+		sg[i].direction = dir;
+		sg[i].first = 0;
+		sg[i].last = 0;
+		if (i == 0) {
+			sg[i].first = 1;
+		}
+		if (i == (nsegs - 1)) {
+			sg[i].last = 1;
+		}
+	}
+
+	return (0);
+}
+
+static int
+xdma_sglist_prepare(xdma_channel_t *xchan,
+    struct xdma_sglist *sg)
+{
+	struct bus_dma_segment seg[MAX_NSEGS];
+	struct xdma_request *xr;
+	xdma_controller_t *xdma;
+	struct mbuf *m;
+	int error, nsegs;
+	uint32_t c;
+	uint32_t n;
+	int i;
+
+	xdma = xchan->xdma;
+
+	n = 0;
+
+	for (;;) {
+		if (xchan->xr_processed == xchan->xr_head) {
+			/* All the requests processed. */
+			break;
+		}
+		xr = &xchan->xr[xchan->xr_processed];
+		c = 0;
+		for (m = xr->m; m != NULL; m = m->m_next) {
+			c++;
+		}
+
+		if (c > MAX_NSEGS) {
+			if ((m = m_defrag(xr->m, M_NOWAIT)) == NULL) {
+				device_printf(xdma->dma_dev,
+				    "%s: Can't defrag mbuf\n", __func__);
+				break;
+			}
+			xr->m = m;
+			c = 1;
+		}
+		m = xr->m;
+
+		/* At least one descriptor must be left empty. */
+		if (xchan->descs_used_count >= (xchan->descs_num - c)) {
+			/*
+			 * No space yet available for entire
+			 * mbuf chain in the descriptor ring.
+			 */
+			break;
+		}
+
+		i = xchan->buf_head;
+
+		error = bus_dmamap_load_mbuf_sg(xchan->dma_tag_bufs,
+		    xchan->bufs[i].map, m, seg, &nsegs, 0);
+		if (error != 0) {
+			if (error == ENOMEM) {
+				/*
+				 * Out of memory. Try again later.
+				 * TODO: count errors.
+				 */
+			} else {
+				device_printf(xdma->dma_dev,
+				    "%s: bus_dmamap_load_mbuf_sg failed with err %d\n",
+				    __func__, error);
+			}
+			break;
+		}
+
+		KASSERT(nsegs < MAX_NSEGS, ("%s: %d segments returned!", __func__, nsegs));
+
+		if (xr->direction == XDMA_MEM_TO_DEV) {
+			bus_dmamap_sync(xchan->dma_tag_bufs, xchan->bufs[i].map,
+			    BUS_DMASYNC_PREWRITE);
+		} else {
+			bus_dmamap_sync(xchan->dma_tag_bufs, xchan->bufs[i].map,
+			    BUS_DMASYNC_PREREAD);
+		}
+
+		xchan->bufs[i].xr = xr;
+		xchan->bufs[i].nsegs = nsegs;
+		xchan->bufs[i].nsegs_left = nsegs;
+
+		xdma_sglist_add(&sg[n], seg, nsegs, xr->direction);
+		n += nsegs;
+
+		xchan->buf_head = xchan_next_buf(xchan, xchan->buf_head);
+		atomic_add_int(&xchan->descs_used_count, nsegs);
+
+		xchan->xr_processed = xchan_next_req(xchan, xchan->xr_processed);
+	}
+
+	return (n);
+}
+
+int
+xdma_queue_submit(xdma_channel_t *xchan)
+{
+	struct xdma_sglist *sg;
+	xdma_controller_t *xdma;
+	uint32_t sg_n;
+	int ret;
+
+	xdma = xchan->xdma;
+	KASSERT(xdma != NULL, ("xdma is NULL"));
+
+	sg = xchan->sg;
+
+	sg_n = xdma_sglist_prepare(xchan, sg);
+	if (sg_n == 0) {
+		/* Nothing to submit */
+		return (0);
+	}
+
+	/* Now submit xdma_sglist to DMA engine driver. */
+
+	XCHAN_LOCK(xchan);
+
+	ret = XDMA_CHANNEL_SUBMIT_SG(xdma->dma_dev, xchan, sg, sg_n);
+	if (ret != 0) {
+		device_printf(xdma->dev,
+		    "%s: Can't submit SG transfer.\n", __func__);
+
+		XCHAN_UNLOCK(xchan);
+
+		return (-1);
 	}
 
 	XCHAN_UNLOCK(xchan);
@@ -449,12 +902,15 @@
 	conf->src_width = src_width;
 	conf->dst_width = dst_width;
 
+	xchan->descs_num = conf->block_num;
+
 	xchan->flags |= (XCHAN_CONFIGURED | XCHAN_TYPE_CYCLIC);
 
 	XCHAN_LOCK(xchan);
 
 	/* Deallocate old descriptors, if any. */
-	xdma_desc_free(xchan);
+	xchan_desc_free(xchan);
+	xchan_bufs_free(xchan);
 
 	ret = XDMA_CHANNEL_PREP_CYCLIC(xdma->dma_dev, xchan);
 	if (ret != 0) {
@@ -465,12 +921,6 @@
 		return (-1);
 	}
 
-	if (xchan->flags & XCHAN_DESC_ALLOCATED) {
-		/* Driver has created xDMA descriptors. */
-		bus_dmamap_sync(xchan->dma_tag, xchan->dma_map,
-		    BUS_DMASYNC_POSTWRITE);
-	}
-
 	XCHAN_UNLOCK(xchan);
 
 	return (0);
@@ -484,6 +934,11 @@
 
 	xdma = xchan->xdma;
 
+	if (xchan->flags & XCHAN_TYPE_SG) {
+		/* Not valid. */
+		return (0);
+	};
+
 	ret = XDMA_CHANNEL_CONTROL(xdma->dma_dev, xchan, XDMA_CMD_BEGIN);
 	if (ret != 0) {
 		device_printf(xdma->dev,
@@ -531,17 +986,60 @@
 }
 
 int
-xdma_callback(xdma_channel_t *xchan)
+xchan_desc_done(xdma_channel_t *xchan, uint32_t idx,
+    struct xdma_transfer_status *status)
+{
+	struct xdma_request *xr;
+	xdma_controller_t *xdma;
+	xdma_buf_t *b;
+
+	xdma = xchan->xdma;
+
+	b = &xchan->bufs[xchan->buf_tail];
+	xr = b->xr;
+
+	atomic_subtract_int(&b->nsegs_left, 1);
+
+	if (b->nsegs_left == 0) {
+		if (xr->direction == XDMA_MEM_TO_DEV) {
+			bus_dmamap_sync(xchan->dma_tag_bufs, b->map, 
+			    BUS_DMASYNC_POSTWRITE);
+		} else {
+			bus_dmamap_sync(xchan->dma_tag_bufs, b->map, 
+			    BUS_DMASYNC_POSTREAD);
+		}
+
+		bus_dmamap_unload(xchan->dma_tag_bufs, b->map);
+		xr->status.error = status->error;
+		xr->status.transferred = status->transferred;
+		xr->done = 1;
+
+		xchan->buf_tail = xchan_next_buf(xchan, xchan->buf_tail);
+	}
+
+	atomic_subtract_int(&xchan->descs_used_count, 1);
+
+	return (0);
+}
+
+int
+xdma_callback(xdma_channel_t *xchan, xdma_transfer_status_t *status)
 {
 	struct xdma_intr_handler *ih_tmp;
 	struct xdma_intr_handler *ih;
 
 	TAILQ_FOREACH_SAFE(ih, &xchan->ie_handlers, ih_next, ih_tmp) {
 		if (ih->cb != NULL) {
-			ih->cb(ih->cb_user);
+			ih->cb(ih->cb_user, status);
 		}
 	}
 
+	if (xchan->flags & XCHAN_TYPE_SG) {
+		/* Check if more entries available in queue to submit. */
+		xdma_queue_submit(xchan);
+		return (0);
+	};
+
 	return (0);
 }
 
@@ -561,7 +1059,8 @@
 {
 	uint32_t ret;
 
-	ret = XDMA_OFW_MD_DATA(xdma->dma_dev, cells, ncells, (void **)&xdma->data);
+	ret = XDMA_OFW_MD_DATA(xdma->dma_dev,
+	    cells, ncells, (void **)&xdma->data);
 
 	return (ret);
 }
Index: sys/dev/xdma/xdma_fdt_test.c
===================================================================
--- sys/dev/xdma/xdma_fdt_test.c
+++ sys/dev/xdma/xdma_fdt_test.c
@@ -297,7 +297,12 @@
 
 		mtx_lock(&sc->mtx);
 
-		xdmatest_test(sc);
+		if (xdmatest_test(sc) != 0) {
+			mtx_unlock(&sc->mtx);
+			device_printf(sc->dev,
+			    "%s: Test failed.\n", __func__);
+			break;
+		}
 
 		timeout = 100;
 
Index: sys/dev/xdma/xdma_if.m
===================================================================
--- sys/dev/xdma/xdma_if.m
+++ sys/dev/xdma/xdma_if.m
@@ -59,6 +59,24 @@
 };
 
 #
+# Prepare xDMA channel for a scatter-gather transfer.
+#
+METHOD int channel_prep_sg {
+	device_t		dev;
+	struct xdma_channel	*xchan;
+};
+
+#
+# Submit scatter-gather list to DMA.
+#
+METHOD int channel_submit_sg {
+	device_t			dev;
+	struct xdma_channel		*xchan;
+	struct xdma_sglist		*sg;
+	uint32_t			sg_n;
+};
+
+#
 # Notify driver we have machine-dependend data.
 #
 METHOD int ofw_md_data {