Index: head/sys/mips/include/cache.h
===================================================================
--- head/sys/mips/include/cache.h	(revision 308844)
+++ head/sys/mips/include/cache.h	(revision 308845)
@@ -1,222 +1,224 @@
 /*	$NetBSD: cache.h,v 1.6 2003/02/17 11:35:01 simonb Exp $	*/
 
 /*
  * Copyright 2001 Wasabi Systems, Inc.
  * All rights reserved.
  *
  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed for the NetBSD Project by
  *	Wasabi Systems, Inc.
  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  *    or promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_CACHE_H_
 #define	_MACHINE_CACHE_H_
 
 /*
  * Cache operations.
  *
  * We define the following primitives:
  *
  * --- Instruction cache synchronization (mandatory):
  *
  *	icache_sync_all		Synchronize I-cache
  *
  *	icache_sync_range	Synchronize I-cache range
  *
  *	icache_sync_range_index	(index ops)
  *
  * --- Primary data cache (mandatory):
  *
  *	pdcache_wbinv_all	Write-back Invalidate primary D-cache
  *
  *	pdcache_wbinv_range	Write-back Invalidate primary D-cache range
  *
  *	pdcache_wbinv_range_index (index ops)
  *
  *	pdcache_inv_range	Invalidate primary D-cache range
  *
  *	pdcache_wb_range	Write-back primary D-cache range
  *
  * --- Secondary data cache (optional):
  *
  *	sdcache_wbinv_all	Write-back Invalidate secondary D-cache
  *
  *	sdcache_wbinv_range	Write-back Invalidate secondary D-cache range
  *
  *	sdcache_wbinv_range_index (index ops)
  *
  *	sdcache_inv_range	Invalidate secondary D-cache range
  *
  *	sdcache_wb_range	Write-back secondary D-cache range
  *
  * There are some rules that must be followed:
  *
  *	I-cache Synch (all or range):
  *		The goal is to synchronize the instruction stream,
  *		so you may need to write-back dirty data cache
  *		blocks first.  If a range is requested, and you
  *		can't synchronize just a range, you have to hit
  *		the whole thing.
  *
  *	D-cache Write-back Invalidate range:
  *		If you can't WB-Inv a range, you must WB-Inv the
  *		entire D-cache.
  *
  *	D-cache Invalidate:
  *		If you can't Inv the D-cache without doing a
  *		Write-back, YOU MUST PANIC.  This is to catch
  *		errors in calling code.  Callers must be aware
  *		of this scenario, and must handle it appropriately
  *		(consider the bus_dma(9) operations).
  *
  *	D-cache Write-back:
  *		If you can't Write-back without doing an invalidate,
  *		that's fine.  Then treat this as a WB-Inv.  Skipping
  *		the invalidate is merely an optimization.
  *
  *	All operations:
  *		Valid virtual addresses must be passed to the
  *		cache operation.
  *
  * Finally, these primitives are grouped together in reasonable
  * ways.  For all operations described here, first the primary
  * cache is frobbed, then the secondary cache frobbed, if the
  * operation for the secondary cache exists.
  *
  *	mips_icache_sync_all	Synchronize I-cache
  *
  *	mips_icache_sync_range	Synchronize I-cache range
  *
  *	mips_icache_sync_range_index (index ops)
  *
  *	mips_dcache_wbinv_all	Write-back Invalidate D-cache
  *
  *	mips_dcache_wbinv_range	Write-back Invalidate D-cache range
  *
  *	mips_dcache_wbinv_range_index (index ops)
  *
  *	mips_dcache_inv_range	Invalidate D-cache range
  *
  *	mips_dcache_wb_range	Write-back D-cache range
  */
 
 struct mips_cache_ops {
 	void	(*mco_icache_sync_all)(void);
 	void	(*mco_icache_sync_range)(vm_offset_t, vm_size_t);
 	void	(*mco_icache_sync_range_index)(vm_offset_t, vm_size_t);
 
 	void	(*mco_pdcache_wbinv_all)(void);
 	void	(*mco_pdcache_wbinv_range)(vm_offset_t, vm_size_t);
 	void	(*mco_pdcache_wbinv_range_index)(vm_offset_t, vm_size_t);
 	void	(*mco_pdcache_inv_range)(vm_offset_t, vm_size_t);
 	void	(*mco_pdcache_wb_range)(vm_offset_t, vm_size_t);
 
 	/* These are called only by the (mipsNN) icache functions. */
 	void    (*mco_intern_pdcache_wbinv_all)(void);
 	void    (*mco_intern_pdcache_wbinv_range_index)(vm_offset_t, vm_size_t);
 	void    (*mco_intern_pdcache_wb_range)(vm_offset_t, vm_size_t);
 
 	void	(*mco_sdcache_wbinv_all)(void);
 	void	(*mco_sdcache_wbinv_range)(vm_offset_t, vm_size_t);
 	void	(*mco_sdcache_wbinv_range_index)(vm_offset_t, vm_size_t);
 	void	(*mco_sdcache_inv_range)(vm_offset_t, vm_size_t);
 	void	(*mco_sdcache_wb_range)(vm_offset_t, vm_size_t);
 
 	/* These are called only by the (mipsNN) icache functions. */
 	void    (*mco_intern_sdcache_wbinv_all)(void);
 	void    (*mco_intern_sdcache_wbinv_range_index)(vm_offset_t, vm_size_t);
 	void    (*mco_intern_sdcache_wb_range)(vm_offset_t, vm_size_t);
 };
 
 extern struct mips_cache_ops mips_cache_ops;
 
 /* PRIMARY CACHE VARIABLES */
 extern int mips_picache_linesize;
 extern int mips_pdcache_linesize;
+extern int mips_sdcache_linesize;
+extern int mips_dcache_max_linesize;
 
 #define	__mco_noargs(prefix, x)						\
 do {									\
 	(*mips_cache_ops.mco_ ## prefix ## p ## x )();			\
 	if (*mips_cache_ops.mco_ ## prefix ## s ## x )			\
 		(*mips_cache_ops.mco_ ## prefix ## s ## x )();		\
 } while (/*CONSTCOND*/0)
 
 #define	__mco_2args(prefix, x, a, b)					\
 do {									\
 	(*mips_cache_ops.mco_ ## prefix ## p ## x )((a), (b));		\
 	if (*mips_cache_ops.mco_ ## prefix ## s ## x )			\
 		(*mips_cache_ops.mco_ ## prefix ## s ## x )((a), (b));	\
 } while (/*CONSTCOND*/0)
 
 #define	mips_icache_sync_all()						\
 	(*mips_cache_ops.mco_icache_sync_all)()
 
 #define	mips_icache_sync_range(v, s)					\
 	(*mips_cache_ops.mco_icache_sync_range)((v), (s))
 
 #define	mips_icache_sync_range_index(v, s)				\
 	(*mips_cache_ops.mco_icache_sync_range_index)((v), (s))
 
 #define	mips_dcache_wbinv_all()						\
 	__mco_noargs(, dcache_wbinv_all)
 
 #define	mips_dcache_wbinv_range(v, s)					\
 	__mco_2args(, dcache_wbinv_range, (v), (s))
 
 #define	mips_dcache_wbinv_range_index(v, s)				\
 	__mco_2args(, dcache_wbinv_range_index, (v), (s))
 
 #define	mips_dcache_inv_range(v, s)					\
 	__mco_2args(, dcache_inv_range, (v), (s))
 
 #define	mips_dcache_wb_range(v, s)					\
 	__mco_2args(, dcache_wb_range, (v), (s))
 
 /*
  * Private D-cache functions only called from (currently only the
  * mipsNN) I-cache functions.
  */
 #define mips_intern_dcache_wbinv_all()					\
 	__mco_noargs(intern_, dcache_wbinv_all)
 
 #define mips_intern_dcache_wbinv_range_index(v, s)			\
 	__mco_2args(intern_, dcache_wbinv_range_index, (v), (s))
 
 #define mips_intern_dcache_wb_range(v, s)				\
 	__mco_2args(intern_, dcache_wb_range, (v), (s))
 
 /* forward declaration */
 struct mips_cpuinfo;
 
 void    mips_config_cache(struct mips_cpuinfo *);
 
 #include <machine/cache_mipsNN.h>
 #endif	/* _MACHINE_CACHE_H_ */
Index: head/sys/mips/mips/busdma_machdep.c
===================================================================
--- head/sys/mips/mips/busdma_machdep.c	(revision 308844)
+++ head/sys/mips/mips/busdma_machdep.c	(revision 308845)
@@ -1,1493 +1,1493 @@
 /*-
  * Copyright (c) 2006 Oleksandr Tymoshenko
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *  From i386/busdma_machdep.c,v 1.26 2002/04/19 22:58:09 alfred
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * MIPS bus dma support routines
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/busdma_bufalloc.h>
 #include <sys/interrupt.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/memdesc.h>
 #include <sys/mutex.h>
 #include <sys/ktr.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <vm/uma.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/cache.h>
 #include <machine/cpufunc.h>
 #include <machine/cpuinfo.h>
 #include <machine/md_var.h>
 
 #define MAX_BPAGES 64
 #define BUS_DMA_COULD_BOUNCE	BUS_DMA_BUS3
 #define BUS_DMA_MIN_ALLOC_COMP	BUS_DMA_BUS4
 
 struct bounce_zone;
 
 struct bus_dma_tag {
 	bus_dma_tag_t		parent;
 	bus_size_t		alignment;
 	bus_addr_t		boundary;
 	bus_addr_t		lowaddr;
 	bus_addr_t		highaddr;
 	bus_dma_filter_t	*filter;
 	void			*filterarg;
 	bus_size_t		maxsize;
 	u_int			nsegments;
 	bus_size_t		maxsegsz;
 	int			flags;
 	int			ref_count;
 	int			map_count;
 	bus_dma_lock_t		*lockfunc;
 	void			*lockfuncarg;
 	bus_dma_segment_t	*segments;
 	struct bounce_zone *bounce_zone;
 };
 
 struct bounce_page {
 	vm_offset_t	vaddr;		/* kva of bounce buffer */
 	vm_offset_t	vaddr_nocache;	/* kva of bounce buffer uncached */
 	bus_addr_t	busaddr;	/* Physical address */
 	vm_offset_t	datavaddr;	/* kva of client data */
 	bus_addr_t	dataaddr;	/* client physical address */
 	bus_size_t	datacount;	/* client data count */
 	STAILQ_ENTRY(bounce_page) links;
 };
 
 struct sync_list {
 	vm_offset_t	vaddr;		/* kva of bounce buffer */
 	bus_addr_t	busaddr;	/* Physical address */
 	bus_size_t	datacount;	/* client data count */
 };
 
 int busdma_swi_pending;
 
 struct bounce_zone {
 	STAILQ_ENTRY(bounce_zone) links;
 	STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
 	int		total_bpages;
 	int		free_bpages;
 	int		reserved_bpages;
 	int		active_bpages;
 	int		total_bounced;
 	int		total_deferred;
 	int		map_count;
 	bus_size_t	alignment;
 	bus_addr_t	lowaddr;
 	char		zoneid[8];
 	char		lowaddrid[20];
 	struct sysctl_ctx_list sysctl_tree;
 	struct sysctl_oid *sysctl_tree_top;
 };
 
 static struct mtx bounce_lock;
 static int total_bpages;
 static int busdma_zonecount;
 static STAILQ_HEAD(, bounce_zone) bounce_zone_list;
 
 static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters");
 SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0,
 	   "Total bounce pages");
 
 #define DMAMAP_UNCACHEABLE	0x08
 #define DMAMAP_CACHE_ALIGNED	0x10
 
 struct bus_dmamap {
 	struct bp_list	bpages;
 	int		pagesneeded;
 	int		pagesreserved;
 	bus_dma_tag_t	dmat;
 	struct memdesc	mem;
 	int		flags;
 	void		*origbuffer;
 	void		*allocbuffer;
 	TAILQ_ENTRY(bus_dmamap)	freelist;
 	STAILQ_ENTRY(bus_dmamap) links;
 	bus_dmamap_callback_t *callback;
 	void		*callback_arg;
 	int		sync_count;
 	struct sync_list *slist;
 };
 
 static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
 static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
 
 static void init_bounce_pages(void *dummy);
 static int alloc_bounce_zone(bus_dma_tag_t dmat);
 static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
 static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
 				int commit);
 static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
 				  vm_offset_t vaddr, bus_addr_t addr,
 				  bus_size_t size);
 static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
 
 /* Default tag, as most drivers provide no parent tag. */
 bus_dma_tag_t mips_root_dma_tag;
 
 static uma_zone_t dmamap_zone;	/* Cache of struct bus_dmamap items */
 
 static busdma_bufalloc_t coherent_allocator;	/* Cache of coherent buffers */
 static busdma_bufalloc_t standard_allocator;	/* Cache of standard buffers */
 
 MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata");
 MALLOC_DEFINE(M_BOUNCE, "bounce", "busdma bounce pages");
 
 /*
  * This is the ctor function passed to uma_zcreate() for the pool of dma maps.
  * It'll need platform-specific changes if this code is copied.
  */
 static int
 dmamap_ctor(void *mem, int size, void *arg, int flags)
 {
 	bus_dmamap_t map;
 	bus_dma_tag_t dmat;
 
 	map = (bus_dmamap_t)mem;
 	dmat = (bus_dma_tag_t)arg;
 
 	dmat->map_count++;
 
 	map->dmat = dmat;
 	map->flags = 0;
 	map->slist = NULL;
 	map->allocbuffer = NULL;
 	map->sync_count = 0;
 	STAILQ_INIT(&map->bpages);
 
 	return (0);
 }
 
 /*
  * This is the dtor function passed to uma_zcreate() for the pool of dma maps.
  * It may need platform-specific changes if this code is copied              .
  */
 static void
 dmamap_dtor(void *mem, int size, void *arg)
 {
 	bus_dmamap_t map;
 
 	map = (bus_dmamap_t)mem;
 
 	map->dmat->map_count--;
 }
 
 static void
 busdma_init(void *dummy)
 {
 
 	/* Create a cache of maps for bus_dmamap_create(). */
 	dmamap_zone = uma_zcreate("dma maps", sizeof(struct bus_dmamap),
 	    dmamap_ctor, dmamap_dtor, NULL, NULL, UMA_ALIGN_PTR, 0);
 
 	/* Create a cache of buffers in standard (cacheable) memory. */
 	standard_allocator = busdma_bufalloc_create("buffer",
-	    mips_pdcache_linesize,	/* minimum_alignment */
+	    mips_dcache_max_linesize,	/* minimum_alignment */
 	    NULL,			/* uma_alloc func */
 	    NULL,			/* uma_free func */
 	    0);				/* uma_zcreate_flags */
 
 	/*
 	 * Create a cache of buffers in uncacheable memory, to implement the
 	 * BUS_DMA_COHERENT flag.
 	 */
 	coherent_allocator = busdma_bufalloc_create("coherent",
-	    mips_pdcache_linesize,	/* minimum_alignment */
+	    mips_dcache_max_linesize,	/* minimum_alignment */
 	    busdma_bufalloc_alloc_uncacheable,
 	    busdma_bufalloc_free_uncacheable,
 	    0);				/* uma_zcreate_flags */
 }
 SYSINIT(busdma, SI_SUB_KMEM, SI_ORDER_FOURTH, busdma_init, NULL);
 
 /*
  * Return true if a match is made.
  *
  * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'.
  *
  * If paddr is within the bounds of the dma tag then call the filter callback
  * to check for a match, if there is no filter callback then assume a match.
  */
 static int
 run_filter(bus_dma_tag_t dmat, bus_addr_t paddr)
 {
 	int retval;
 
 	retval = 0;
 
 	do {
 		if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr)
 		 || ((paddr & (dmat->alignment - 1)) != 0))
 		 && (dmat->filter == NULL
 		  || (*dmat->filter)(dmat->filterarg, paddr) != 0))
 			retval = 1;
 
 		dmat = dmat->parent;		
 	} while (retval == 0 && dmat != NULL);
 	return (retval);
 }
 
 /*
  * Check to see if the specified page is in an allowed DMA range.
  */
 
 static __inline int
 _bus_dma_can_bounce(vm_offset_t lowaddr, vm_offset_t highaddr)
 {
 	int i;
 	for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) {
 		if ((lowaddr >= phys_avail[i] && lowaddr <= phys_avail[i + 1])
 		    || (lowaddr < phys_avail[i] && 
 		    highaddr > phys_avail[i]))
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Convenience function for manipulating driver locks from busdma (during
  * busdma_swi, for example).  Drivers that don't provide their own locks
  * should specify &Giant to dmat->lockfuncarg.  Drivers that use their own
  * non-mutex locking scheme don't have to use this at all.
  */
 void
 busdma_lock_mutex(void *arg, bus_dma_lock_op_t op)
 {
 	struct mtx *dmtx;
 
 	dmtx = (struct mtx *)arg;
 	switch (op) {
 	case BUS_DMA_LOCK:
 		mtx_lock(dmtx);
 		break;
 	case BUS_DMA_UNLOCK:
 		mtx_unlock(dmtx);
 		break;
 	default:
 		panic("Unknown operation 0x%x for busdma_lock_mutex!", op);
 	}
 }
 
 /*
  * dflt_lock should never get called.  It gets put into the dma tag when
  * lockfunc == NULL, which is only valid if the maps that are associated
  * with the tag are meant to never be defered.
  * XXX Should have a way to identify which driver is responsible here.
  */
 static void
 dflt_lock(void *arg, bus_dma_lock_op_t op)
 {
 #ifdef INVARIANTS
 	panic("driver error: busdma dflt_lock called");
 #else
 	printf("DRIVER_ERROR: busdma dflt_lock called\n");
 #endif
 }
 
 static __inline bus_dmamap_t
 _busdma_alloc_dmamap(bus_dma_tag_t dmat)
 {
 	struct sync_list *slist;
 	bus_dmamap_t map;
 
 	slist = malloc(sizeof(*slist) * dmat->nsegments, M_BUSDMA, M_NOWAIT);
 	if (slist == NULL)
 		return (NULL);
 	map = uma_zalloc_arg(dmamap_zone, dmat, M_NOWAIT);
 	if (map != NULL)
 		map->slist = slist;
 	else
 		free(slist, M_BUSDMA);
 	return (map);
 }
 
 static __inline void 
 _busdma_free_dmamap(bus_dmamap_t map)
 {
 
 	free(map->slist, M_BUSDMA);
 	uma_zfree(dmamap_zone, map);
 }
 
 /*
  * Allocate a device specific dma_tag.
  */
 #define SEG_NB 1024
 
 int
 bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
     bus_addr_t boundary, bus_addr_t lowaddr,
     bus_addr_t highaddr, bus_dma_filter_t *filter,
     void *filterarg, bus_size_t maxsize, int nsegments,
     bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
     void *lockfuncarg, bus_dma_tag_t *dmat)
 {
 	bus_dma_tag_t newtag;
 	int error = 0;
 	/* Return a NULL tag on failure */
 	*dmat = NULL;
 	if (!parent)
 		parent = mips_root_dma_tag;
 
 	newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA, M_NOWAIT);
 	if (newtag == NULL) {
 		CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 		    __func__, newtag, 0, error);
 		return (ENOMEM);
 	}
 
 	newtag->parent = parent;
 	newtag->alignment = alignment;
 	newtag->boundary = boundary;
 	newtag->lowaddr = trunc_page((vm_offset_t)lowaddr) + (PAGE_SIZE - 1);
 	newtag->highaddr = trunc_page((vm_offset_t)highaddr) + (PAGE_SIZE - 1);
 	newtag->filter = filter;
 	newtag->filterarg = filterarg;
 	newtag->maxsize = maxsize;
 	newtag->nsegments = nsegments;
 	newtag->maxsegsz = maxsegsz;
 	newtag->flags = flags;
 	if (cpuinfo.cache_coherent_dma)
 		newtag->flags |= BUS_DMA_COHERENT;
 	newtag->ref_count = 1; /* Count ourself */
 	newtag->map_count = 0;
 	if (lockfunc != NULL) {
 		newtag->lockfunc = lockfunc;
 		newtag->lockfuncarg = lockfuncarg;
 	} else {
 		newtag->lockfunc = dflt_lock;
 		newtag->lockfuncarg = NULL;
 	}
 	newtag->segments = NULL;
 
 	/*
 	 * Take into account any restrictions imposed by our parent tag
 	 */
 	if (parent != NULL) {
 		newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr);
 		newtag->highaddr = MAX(parent->highaddr, newtag->highaddr);
 		if (newtag->boundary == 0)
 			newtag->boundary = parent->boundary;
 		else if (parent->boundary != 0)
 			newtag->boundary =
 			    MIN(parent->boundary, newtag->boundary);
 		if ((newtag->filter != NULL) ||
 		    ((parent->flags & BUS_DMA_COULD_BOUNCE) != 0))
 			newtag->flags |= BUS_DMA_COULD_BOUNCE;
 		if (newtag->filter == NULL) {
 			/*
 			* Short circuit looking at our parent directly
 			* since we have encapsulated all of its information
 			*/
 			newtag->filter = parent->filter;
 			newtag->filterarg = parent->filterarg;
 			newtag->parent = parent->parent;
 		}
 		if (newtag->parent != NULL)
 			atomic_add_int(&parent->ref_count, 1);
 	}
 	if (_bus_dma_can_bounce(newtag->lowaddr, newtag->highaddr)
 	 || newtag->alignment > 1)
 		newtag->flags |= BUS_DMA_COULD_BOUNCE;
 
 	if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 	    (flags & BUS_DMA_ALLOCNOW) != 0) {
 		struct bounce_zone *bz;
 
 		/* Must bounce */
 
 		if ((error = alloc_bounce_zone(newtag)) != 0) {
 			free(newtag, M_BUSDMA);
 			return (error);
 		}
 		bz = newtag->bounce_zone;
 
 		if (ptoa(bz->total_bpages) < maxsize) {
 			int pages;
 
 			pages = atop(maxsize) - bz->total_bpages;
 
 			/* Add pages to our bounce pool */
 			if (alloc_bounce_pages(newtag, pages) < pages)
 				error = ENOMEM;
 		}
 		/* Performed initial allocation */
 		newtag->flags |= BUS_DMA_MIN_ALLOC_COMP;
 	} else
 		newtag->bounce_zone = NULL;
 	if (error != 0)
 		free(newtag, M_BUSDMA);
 	else
 		*dmat = newtag;
 	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 	    __func__, newtag, (newtag != NULL ? newtag->flags : 0), error);
 
 	return (error);
 }
 
 int
 bus_dma_tag_destroy(bus_dma_tag_t dmat)
 {
 #ifdef KTR
 	bus_dma_tag_t dmat_copy = dmat;
 #endif
 
 	if (dmat != NULL) {
 		if (dmat->map_count != 0)
 			return (EBUSY);
 		
 		while (dmat != NULL) {
 			bus_dma_tag_t parent;
 			
 			parent = dmat->parent;
 			atomic_subtract_int(&dmat->ref_count, 1);
 			if (dmat->ref_count == 0) {
 				if (dmat->segments != NULL)
 					free(dmat->segments, M_BUSDMA);
 				free(dmat, M_BUSDMA);
 				/*
 				 * Last reference count, so
 				 * release our reference
 				 * count on our parent.
 				 */
 				dmat = parent;
 			} else
 				dmat = NULL;
 		}
 	}
 	CTR2(KTR_BUSDMA, "%s tag %p", __func__, dmat_copy);
 
 	return (0);
 }
 
 #include <sys/kdb.h>
 /*
  * Allocate a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 int
 bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	bus_dmamap_t newmap;
 	int error = 0;
 
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc(
 		    sizeof(bus_dma_segment_t) * dmat->nsegments, M_BUSDMA,
 		    M_NOWAIT);
 		if (dmat->segments == NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 
 	newmap = _busdma_alloc_dmamap(dmat);
 	if (newmap == NULL) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM);
 		return (ENOMEM);
 	}
 	*mapp = newmap;
 
 	/*
 	 * Bouncing might be required if the driver asks for an active
 	 * exclusion region, a data alignment that is stricter than 1, and/or
 	 * an active address boundary.
 	 */
 	if (dmat->flags & BUS_DMA_COULD_BOUNCE) {
 
 		/* Must bounce */
 		struct bounce_zone *bz;
 		int maxpages;
 
 		if (dmat->bounce_zone == NULL) {
 			if ((error = alloc_bounce_zone(dmat)) != 0) {
 				_busdma_free_dmamap(newmap);
 				*mapp = NULL;
 				return (error);
 			}
 		}
 		bz = dmat->bounce_zone;
 
 		/* Initialize the new map */
 		STAILQ_INIT(&((*mapp)->bpages));
 
 		/*
 		 * Attempt to add pages to our pool on a per-instance
 		 * basis up to a sane limit.
 		 */
 		maxpages = MAX_BPAGES;
 		if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0
 		 || (bz->map_count > 0 && bz->total_bpages < maxpages)) {
 			int pages;
 
 			pages = MAX(atop(dmat->maxsize), 1);
 			pages = MIN(maxpages - bz->total_bpages, pages);
 			pages = MAX(pages, 1);
 			if (alloc_bounce_pages(dmat, pages) < pages)
 				error = ENOMEM;
 
 			if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) {
 				if (error == 0)
 					dmat->flags |= BUS_DMA_MIN_ALLOC_COMP;
 			} else {
 				error = 0;
 			}
 		}
 		bz->map_count++;
 	}
 
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->flags, error);
 
 	return (0);
 }
 
 /*
  * Destroy a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 int
 bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 
 	if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 		    __func__, dmat, EBUSY);
 		return (EBUSY);
 	}
 	if (dmat->bounce_zone)
 		dmat->bounce_zone->map_count--;
 	_busdma_free_dmamap(map);
 	CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
         return (0);
 }
 
 /*
  * Allocate a piece of memory that can be efficiently mapped into
  * bus device space based on the constraints lited in the dma tag.
  * A dmamap to for use with dmamap_load is also allocated.
  */
 int
 bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddrp, int flags,
     bus_dmamap_t *mapp)
 {
 	bus_dmamap_t newmap = NULL;
 	busdma_bufalloc_t ba;
 	struct busdma_bufzone *bufzone;
 	vm_memattr_t memattr;
 	void *vaddr;
 
 	int mflags;
 
 	if (flags & BUS_DMA_NOWAIT)
 		mflags = M_NOWAIT;
 	else
 		mflags = M_WAITOK;
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc(
 		    sizeof(bus_dma_segment_t) * dmat->nsegments, M_BUSDMA,
 		    mflags);
 		if (dmat->segments == NULL) {
 			CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 			    __func__, dmat, dmat->flags, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 
 	newmap = _busdma_alloc_dmamap(dmat);
 	if (newmap == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->flags, ENOMEM);
 		return (ENOMEM);
 	}
 
 	/*
 	 * If all the memory is coherent with DMA then we don't need to
 	 * do anything special for a coherent mapping request.
 	 */
 	if (dmat->flags & BUS_DMA_COHERENT)
 	    flags &= ~BUS_DMA_COHERENT;
 
 	if (flags & BUS_DMA_COHERENT) {
 		memattr = VM_MEMATTR_UNCACHEABLE;
 		ba = coherent_allocator;
 		newmap->flags |= DMAMAP_UNCACHEABLE;
 	} else {
 		memattr = VM_MEMATTR_DEFAULT;
 		ba = standard_allocator;
 	}
 	/* All buffers we allocate are cache-aligned. */
 	newmap->flags |= DMAMAP_CACHE_ALIGNED;
 
 	if (flags & BUS_DMA_ZERO)
 		mflags |= M_ZERO;
 
 	/*
 	 * Try to find a bufzone in the allocator that holds a cache of buffers
 	 * of the right size for this request.  If the buffer is too big to be
 	 * held in the allocator cache, this returns NULL.
 	 */
 	bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize);
 
 	/*
 	 * Allocate the buffer from the uma(9) allocator if...
 	 *  - It's small enough to be in the allocator (bufzone not NULL).
 	 *  - The alignment constraint isn't larger than the allocation size
 	 *    (the allocator aligns buffers to their size boundaries).
 	 *  - There's no need to handle lowaddr/highaddr exclusion zones.
 	 * else allocate non-contiguous pages if...
 	 *  - The page count that could get allocated doesn't exceed nsegments.
 	 *  - The alignment constraint isn't larger than a page boundary.
 	 *  - There are no boundary-crossing constraints.
 	 * else allocate a block of contiguous pages because one or more of the
 	 * constraints is something that only the contig allocator can fulfill.
 	 */
 	if (bufzone != NULL && dmat->alignment <= bufzone->size &&
 	    !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) {
 		vaddr = uma_zalloc(bufzone->umazone, mflags);
 	} else if (dmat->nsegments >= btoc(dmat->maxsize) &&
 	    dmat->alignment <= PAGE_SIZE && dmat->boundary == 0) {
 		vaddr = (void *)kmem_alloc_attr(kernel_arena, dmat->maxsize,
 		    mflags, 0, dmat->lowaddr, memattr);
 	} else {
 		vaddr = (void *)kmem_alloc_contig(kernel_arena, dmat->maxsize,
 		    mflags, 0, dmat->lowaddr, dmat->alignment, dmat->boundary,
 		    memattr);
 	}
 	if (vaddr == NULL) {
 		_busdma_free_dmamap(newmap);
 		newmap = NULL;
 	} else {
 		newmap->sync_count = 0;
 	}
 	*vaddrp = vaddr;
 	*mapp = newmap;
 
 	return (vaddr == NULL ? ENOMEM : 0);
 }
 
 /*
  * Free a piece of memory and it's allocated dmamap, that was allocated
  * via bus_dmamem_alloc.  Make the same choice for free/contigfree.
  */
 void
 bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
 {
 	struct busdma_bufzone *bufzone;
 	busdma_bufalloc_t ba;
 
 	if (map->flags & DMAMAP_UNCACHEABLE)
 		ba = coherent_allocator;
 	else
 		ba = standard_allocator;
 
 	free(map->slist, M_BUSDMA);
 	uma_zfree(dmamap_zone, map);
 
 	bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize);
 
 	if (bufzone != NULL && dmat->alignment <= bufzone->size &&
 	    !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr))
 		uma_zfree(bufzone->umazone, vaddr);
 	else
 		kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->maxsize);
 	CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags);
 }
 
 static void
 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags)
 {
 	bus_addr_t curaddr;
 	bus_size_t sgsize;
 
 	if ((map->pagesneeded == 0)) {
 		CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d",
 		    dmat->lowaddr, dmat->boundary, dmat->alignment);
 		CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d",
 		    map, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		curaddr = buf;
 		while (buflen != 0) {
 			sgsize = MIN(buflen, dmat->maxsegsz);
 			if (run_filter(dmat, curaddr) != 0) {
 				sgsize = MIN(sgsize, PAGE_SIZE);
 				map->pagesneeded++;
 			}
 			curaddr += sgsize;
 			buflen -= sgsize;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static void
 _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap,
     void *buf, bus_size_t buflen, int flags)
 {
 	vm_offset_t vaddr;
 	vm_offset_t vendaddr;
 	bus_addr_t paddr;
 
 	if ((map->pagesneeded == 0)) {
 		CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d",
 		    dmat->lowaddr, dmat->boundary, dmat->alignment);
 		CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d",
 		    map, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		vaddr = (vm_offset_t)buf;
 		vendaddr = (vm_offset_t)buf + buflen;
 
 		while (vaddr < vendaddr) {
 			bus_size_t sg_len;
 
 			KASSERT(kernel_pmap == pmap, ("pmap is not kernel pmap"));
 			sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK);
 			paddr = pmap_kextract(vaddr);
 			if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 			    run_filter(dmat, paddr) != 0) {
 				sg_len = roundup2(sg_len, dmat->alignment);
 				map->pagesneeded++;
 			}
 			vaddr += sg_len;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static int
 _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,int flags)
 {
 
 	/* Reserve Necessary Bounce Pages */
 	mtx_lock(&bounce_lock);
 	if (flags & BUS_DMA_NOWAIT) {
 		if (reserve_bounce_pages(dmat, map, 0) != 0) {
 			mtx_unlock(&bounce_lock);
 			return (ENOMEM);
 		}
 	} else {
 		if (reserve_bounce_pages(dmat, map, 1) != 0) {
 			/* Queue us for resources */
 			STAILQ_INSERT_TAIL(&bounce_map_waitinglist,
 			    map, links);
 			mtx_unlock(&bounce_lock);
 			return (EINPROGRESS);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 
 	return (0);
 }
 
 /*
  * Add a single contiguous physical range to the segment list.
  */
 static int
 _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr,
     bus_size_t sgsize, bus_dma_segment_t *segs, int *segp)
 {
 	bus_addr_t baddr, bmask;
 	int seg;
 
 	/*
 	 * Make sure we don't cross any boundaries.
 	 */
 	bmask = ~(dmat->boundary - 1);
 	if (dmat->boundary > 0) {
 		baddr = (curaddr + dmat->boundary) & bmask;
 		if (sgsize > (baddr - curaddr))
 			sgsize = (baddr - curaddr);
 	}
 	/*
 	 * Insert chunk into a segment, coalescing with
 	 * the previous segment if possible.
 	 */
 	seg = *segp;
 	if (seg >= 0 &&
 	    curaddr == segs[seg].ds_addr + segs[seg].ds_len &&
 	    (segs[seg].ds_len + sgsize) <= dmat->maxsegsz &&
 	    (dmat->boundary == 0 ||
 	     (segs[seg].ds_addr & bmask) == (curaddr & bmask))) {
 		segs[seg].ds_len += sgsize;
 	} else {
 		if (++seg >= dmat->nsegments)
 			return (0);
 		segs[seg].ds_addr = curaddr;
 		segs[seg].ds_len = sgsize;
 	}
 	*segp = seg;
 	return (sgsize);
 }
 
 /*
  * Utility function to load a physical buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 int
 _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	bus_addr_t curaddr;
 	bus_size_t sgsize;
 	int error;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_phys(dmat, map, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	while (buflen > 0) {
 		curaddr = buf;
 		sgsize = MIN(buflen, dmat->maxsegsz);
 		if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 		    map->pagesneeded != 0 && run_filter(dmat, curaddr)) {
 			sgsize = MIN(sgsize, PAGE_SIZE);
 			curaddr = add_bounce_page(dmat, map, 0, curaddr,
 			    sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		buf += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	if (buflen != 0) {
 		_bus_dmamap_unload(dmat, map);
 		return (EFBIG); /* XXX better return value here? */
 	}
 	return (0);
 }
 
 int
 _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
     bus_dma_segment_t *segs, int *segp)
 {
 
 	return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags,
 	    segs, segp));
 }
 
 /*
  * Utility function to load a linear buffer.  segp contains
  * the starting segment on entrance, and the ending segment on exit.
  * first indicates if this is the first invocation of this function.
  */
 int
 _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
     bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	bus_size_t sgsize;
 	bus_addr_t curaddr;
 	struct sync_list *sl;
 	vm_offset_t vaddr = (vm_offset_t)buf;
 	int error = 0;
 
 
 	if (segs == NULL)
 		segs = dmat->segments;
 	if ((flags & BUS_DMA_LOAD_MBUF) != 0)
 		map->flags |= DMAMAP_CACHE_ALIGNED;
 
 	if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 	CTR3(KTR_BUSDMA, "lowaddr= %d boundary= %d, "
 	    "alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment);
 
 	while (buflen > 0) {
 		/*
 		 * Get the physical address for this segment.
 		 *
 		 * XXX Don't support checking for coherent mappings
 		 * XXX in user address space.
 		 */
 		KASSERT(kernel_pmap == pmap, ("pmap is not kernel pmap"));
 		curaddr = pmap_kextract(vaddr);
 
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
 		sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK);
 		if (sgsize > dmat->maxsegsz)
 			sgsize = dmat->maxsegsz;
 		if (buflen < sgsize)
 			sgsize = buflen;
 
 		if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 		    map->pagesneeded != 0 && run_filter(dmat, curaddr)) {
 			curaddr = add_bounce_page(dmat, map, vaddr, curaddr,
 			    sgsize);
 		} else {
 			sl = &map->slist[map->sync_count - 1];
 			if (map->sync_count == 0 ||
 			    vaddr != sl->vaddr + sl->datacount) {
 				if (++map->sync_count > dmat->nsegments)
 					goto cleanup;
 				sl++;
 				sl->vaddr = vaddr;
 				sl->datacount = sgsize;
 				sl->busaddr = curaddr;
 			} else
 				sl->datacount += sgsize;
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		vaddr += sgsize;
 		buflen -= sgsize;
 	}
 
 cleanup:
 	/*
 	 * Did we fit?
 	 */
 	if (buflen != 0) {
 		_bus_dmamap_unload(dmat, map);
 		error = EFBIG; /* XXX better return value here? */
 	}
 	return (error);
 }
 
 void
 __bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
 {
 
 	KASSERT(dmat != NULL, ("dmatag is NULL"));
 	KASSERT(map != NULL, ("dmamap is NULL"));
 	map->mem = *mem;
 	map->callback = callback;
 	map->callback_arg = callback_arg;
 }
 
 bus_dma_segment_t *
 _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dma_segment_t *segs, int nsegs, int error)
 {
 
 	if (segs == NULL)
 		segs = dmat->segments;
 	return (segs);
 }
 
 /*
  * Release the mapping held by map.
  */
 void
 _bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bounce_page *bpage;
 
 	while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 		STAILQ_REMOVE_HEAD(&map->bpages, links);
 		free_bounce_page(dmat, bpage);
 	}
 	map->sync_count = 0;
 	return;
 }
 
 static void
 bus_dmamap_sync_buf(vm_offset_t buf, int len, bus_dmasync_op_t op, int aligned)
 {
-	char tmp_cl[mips_pdcache_linesize], tmp_clend[mips_pdcache_linesize];
+	char tmp_cl[mips_dcache_max_linesize], tmp_clend[mips_dcache_max_linesize];
 	vm_offset_t buf_cl, buf_clend;
 	vm_size_t size_cl, size_clend;
-	int cache_linesize_mask = mips_pdcache_linesize - 1;
+	int cache_linesize_mask = mips_dcache_max_linesize - 1;
 
 	/*
 	 * dcache invalidation operates on cache line aligned addresses
 	 * and could modify areas of memory that share the same cache line
 	 * at the beginning and the ending of the buffer. In order to 
 	 * prevent a data loss we save these chunks in temporary buffer
 	 * before invalidation and restore them afer it.
 	 *
 	 * If the aligned flag is set the buffer is either an mbuf or came from
 	 * our allocator caches.  In both cases they are always sized and
 	 * aligned to cacheline boundaries, so we can skip preserving nearby
 	 * data if a transfer appears to overlap cachelines.  An mbuf in
 	 * particular will usually appear to be overlapped because of offsetting
 	 * within the buffer to align the L3 headers, but we know that the bytes
 	 * preceeding that offset are part of the same mbuf memory and are not
 	 * unrelated adjacent data (and a rule of mbuf handling is that the cpu
 	 * is not allowed to touch the mbuf while dma is in progress, including
 	 * header fields).
 	 */
 	if (aligned) {
 		size_cl = 0;
 		size_clend = 0;
 	} else {
 		buf_cl = buf & ~cache_linesize_mask;
 		size_cl = buf & cache_linesize_mask;
 		buf_clend = buf + len;
-		size_clend = (mips_pdcache_linesize - 
+		size_clend = (mips_dcache_max_linesize -
 		    (buf_clend & cache_linesize_mask)) & cache_linesize_mask;
 	}
 
 	switch (op) {
 	case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE:
 	case BUS_DMASYNC_POSTREAD:
 
 		/* 
 		 * Save buffers that might be modified by invalidation
 		 */
 		if (size_cl)
 			memcpy (tmp_cl, (void*)buf_cl, size_cl);
 		if (size_clend)
 			memcpy (tmp_clend, (void*)buf_clend, size_clend);
 		mips_dcache_inv_range(buf, len);
 		/* 
 		 * Restore them
 		 */
 		if (size_cl)
 			memcpy ((void*)buf_cl, tmp_cl, size_cl);
 		if (size_clend)
 			memcpy ((void*)buf_clend, tmp_clend, size_clend);
 		/* 
 		 * Copies above have brought corresponding memory
 		 * cache lines back into dirty state. Write them back
 		 * out and invalidate affected cache lines again if
 		 * necessary.
 		 */
 		if (size_cl)
 			mips_dcache_wbinv_range(buf_cl, size_cl);
 		if (size_clend && (size_cl == 0 ||
-                    buf_clend - buf_cl > mips_pdcache_linesize))
+                    buf_clend - buf_cl > mips_dcache_max_linesize))
 			mips_dcache_wbinv_range(buf_clend, size_clend);
 		break;
 
 	case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE:
 		mips_dcache_wbinv_range(buf, len);
 		break;
 
 	case BUS_DMASYNC_PREREAD:
 		/* 
 		 * Save buffers that might be modified by invalidation
 		 */
 		if (size_cl)
 			memcpy (tmp_cl, (void *)buf_cl, size_cl);
 		if (size_clend)
 			memcpy (tmp_clend, (void *)buf_clend, size_clend);
 		mips_dcache_inv_range(buf, len);
 		/*
 		 * Restore them
 		 */
 		if (size_cl)
 			memcpy ((void *)buf_cl, tmp_cl, size_cl);
 		if (size_clend)
 			memcpy ((void *)buf_clend, tmp_clend, size_clend);
 		/* 
 		 * Copies above have brought corresponding memory
 		 * cache lines back into dirty state. Write them back
 		 * out and invalidate affected cache lines again if
 		 * necessary.
 		 */
 		if (size_cl)
 			mips_dcache_wbinv_range(buf_cl, size_cl);
 		if (size_clend && (size_cl == 0 ||
-                    buf_clend - buf_cl > mips_pdcache_linesize))
+                    buf_clend - buf_cl > mips_dcache_max_linesize))
 			mips_dcache_wbinv_range(buf_clend, size_clend);
 		break;
 
 	case BUS_DMASYNC_PREWRITE:
 		mips_dcache_wb_range(buf, len);
 		break;
 	}
 }
 
 static void
 _bus_dmamap_sync_bp(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
 {
 	struct bounce_page *bpage;
 
 	STAILQ_FOREACH(bpage, &map->bpages, links) {
 		if (op & BUS_DMASYNC_PREWRITE) {
 			if (bpage->datavaddr != 0)
 				bcopy((void *)bpage->datavaddr,
 				    (void *)(bpage->vaddr_nocache != 0 ? 
 					     bpage->vaddr_nocache :
 					     bpage->vaddr),
 				    bpage->datacount);
 			else
 				physcopyout(bpage->dataaddr,
 				    (void *)(bpage->vaddr_nocache != 0 ? 
 					     bpage->vaddr_nocache :
 					     bpage->vaddr),
 				    bpage->datacount);
 			if (bpage->vaddr_nocache == 0) {
 				mips_dcache_wb_range(bpage->vaddr,
 				    bpage->datacount);
 			}
 			dmat->bounce_zone->total_bounced++;
 		}
 		if (op & BUS_DMASYNC_POSTREAD) {
 			if (bpage->vaddr_nocache == 0) {
 				mips_dcache_inv_range(bpage->vaddr,
 				    bpage->datacount);
 			}
 			if (bpage->datavaddr != 0)
 				bcopy((void *)(bpage->vaddr_nocache != 0 ? 
 				    bpage->vaddr_nocache : bpage->vaddr),
 				    (void *)bpage->datavaddr, bpage->datacount);
 			else
 				physcopyin((void *)(bpage->vaddr_nocache != 0 ? 
 				    bpage->vaddr_nocache : bpage->vaddr),
 				    bpage->dataaddr, bpage->datacount);
 			dmat->bounce_zone->total_bounced++;
 		}
 	}
 }
 
 void
 _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
 {
 	struct sync_list *sl, *end;
 	int aligned;
 	
 	if (op == BUS_DMASYNC_POSTWRITE)
 		return;
 	if (STAILQ_FIRST(&map->bpages))
 		_bus_dmamap_sync_bp(dmat, map, op);
 
 	if ((dmat->flags & BUS_DMA_COHERENT) ||
 	    (map->flags & DMAMAP_UNCACHEABLE)) {
 		if (op & BUS_DMASYNC_PREWRITE)
 			mips_sync();
 		return;
 	}
 
 	aligned = (map->flags & DMAMAP_CACHE_ALIGNED) ? 1 : 0;
 
 	CTR3(KTR_BUSDMA, "%s: op %x flags %x", __func__, op, map->flags);
 	if (map->sync_count) {
 		end = &map->slist[map->sync_count];
 		for (sl = &map->slist[0]; sl != end; sl++)
 			bus_dmamap_sync_buf(sl->vaddr, sl->datacount, op, 
 			    aligned);
 	}
 }
 
 static void
 init_bounce_pages(void *dummy __unused)
 {
 
 	total_bpages = 0;
 	STAILQ_INIT(&bounce_zone_list);
 	STAILQ_INIT(&bounce_map_waitinglist);
 	STAILQ_INIT(&bounce_map_callbacklist);
 	mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF);
 }
 SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL);
 
 static struct sysctl_ctx_list *
 busdma_sysctl_tree(struct bounce_zone *bz)
 {
 	return (&bz->sysctl_tree);
 }
 
 static struct sysctl_oid *
 busdma_sysctl_tree_top(struct bounce_zone *bz)
 {
 	return (bz->sysctl_tree_top);
 }
 
 static int
 alloc_bounce_zone(bus_dma_tag_t dmat)
 {
 	struct bounce_zone *bz;
 
 	/* Check to see if we already have a suitable zone */
 	STAILQ_FOREACH(bz, &bounce_zone_list, links) {
 		if ((dmat->alignment <= bz->alignment)
 		 && (dmat->lowaddr >= bz->lowaddr)) {
 			dmat->bounce_zone = bz;
 			return (0);
 		}
 	}
 
 	if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_BUSDMA,
 	    M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	STAILQ_INIT(&bz->bounce_page_list);
 	bz->free_bpages = 0;
 	bz->reserved_bpages = 0;
 	bz->active_bpages = 0;
 	bz->lowaddr = dmat->lowaddr;
 	bz->alignment = MAX(dmat->alignment, PAGE_SIZE);
 	bz->map_count = 0;
 	snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount);
 	busdma_zonecount++;
 	snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr);
 	STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links);
 	dmat->bounce_zone = bz;
 
 	sysctl_ctx_init(&bz->sysctl_tree);
 	bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree,
 	    SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid,
 	    CTLFLAG_RD, 0, "");
 	if (bz->sysctl_tree_top == NULL) {
 		sysctl_ctx_free(&bz->sysctl_tree);
 		return (0);	/* XXX error code? */
 	}
 
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0,
 	    "Total bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0,
 	    "Free bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0,
 	    "Reserved bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0,
 	    "Active bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0,
 	    "Total bounce requests");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0,
 	    "Total bounce requests that were deferred");
 	SYSCTL_ADD_STRING(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, "");
 	SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "alignment", CTLFLAG_RD, &bz->alignment, "");
 
 	return (0);
 }
 
 static int
 alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
 {
 	struct bounce_zone *bz;
 	int count;
 
 	bz = dmat->bounce_zone;
 	count = 0;
 	while (numpages > 0) {
 		struct bounce_page *bpage;
 
 		bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_BUSDMA,
 						     M_NOWAIT | M_ZERO);
 
 		if (bpage == NULL)
 			break;
 		bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_BOUNCE,
 							 M_NOWAIT, 0ul,
 							 bz->lowaddr,
 							 PAGE_SIZE,
 							 0);
 		if (bpage->vaddr == 0) {
 			free(bpage, M_BUSDMA);
 			break;
 		}
 		bpage->busaddr = pmap_kextract(bpage->vaddr);
 		bpage->vaddr_nocache = 
 		    (vm_offset_t)pmap_mapdev(bpage->busaddr, PAGE_SIZE);
 		mtx_lock(&bounce_lock);
 		STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links);
 		total_bpages++;
 		bz->total_bpages++;
 		bz->free_bpages++;
 		mtx_unlock(&bounce_lock);
 		count++;
 		numpages--;
 	}
 	return (count);
 }
 
 static int
 reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit)
 {
 	struct bounce_zone *bz;
 	int pages;
 
 	mtx_assert(&bounce_lock, MA_OWNED);
 	bz = dmat->bounce_zone;
 	pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved);
 	if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages))
 		return (map->pagesneeded - (map->pagesreserved + pages));
 	bz->free_bpages -= pages;
 	bz->reserved_bpages += pages;
 	map->pagesreserved += pages;
 	pages = map->pagesneeded - map->pagesreserved;
 
 	return (pages);
 }
 
 static bus_addr_t
 add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
 		bus_addr_t addr, bus_size_t size)
 {
 	struct bounce_zone *bz;
 	struct bounce_page *bpage;
 
 	KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag"));
 	KASSERT(map != NULL, ("add_bounce_page: bad map %p", map));
 
 	bz = dmat->bounce_zone;
 	if (map->pagesneeded == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesneeded--;
 
 	if (map->pagesreserved == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesreserved--;
 
 	mtx_lock(&bounce_lock);
 	bpage = STAILQ_FIRST(&bz->bounce_page_list);
 	if (bpage == NULL)
 		panic("add_bounce_page: free page list is empty");
 
 	STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links);
 	bz->reserved_bpages--;
 	bz->active_bpages++;
 	mtx_unlock(&bounce_lock);
 
 	if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/* Page offset needs to be preserved. */
 		bpage->vaddr |= addr & PAGE_MASK;
 		bpage->busaddr |= addr & PAGE_MASK;
 	}
 	bpage->datavaddr = vaddr;
 	bpage->dataaddr = addr;
 	bpage->datacount = size;
 	STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
 	return (bpage->busaddr);
 }
 
 static void
 free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
 {
 	struct bus_dmamap *map;
 	struct bounce_zone *bz;
 
 	bz = dmat->bounce_zone;
 	bpage->datavaddr = 0;
 	bpage->datacount = 0;
 	if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/*
 		 * Reset the bounce page to start at offset 0.  Other uses
 		 * of this bounce page may need to store a full page of
 		 * data and/or assume it starts on a page boundary.
 		 */
 		bpage->vaddr &= ~PAGE_MASK;
 		bpage->busaddr &= ~PAGE_MASK;
 	}
 
 	mtx_lock(&bounce_lock);
 	STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links);
 	bz->free_bpages++;
 	bz->active_bpages--;
 	if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
 		if (reserve_bounce_pages(map->dmat, map, 1) == 0) {
 			STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
 			STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
 					   map, links);
 			busdma_swi_pending = 1;
 			bz->total_deferred++;
 			swi_sched(vm_ih, 0);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 void
 busdma_swi(void)
 {
 	bus_dma_tag_t dmat;
 	struct bus_dmamap *map;
 
 	mtx_lock(&bounce_lock);
 	while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
 		STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
 		mtx_unlock(&bounce_lock);
 		dmat = map->dmat;
 		(dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK);
 		bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback,
 		    map->callback_arg, BUS_DMA_WAITOK);
 		(dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK);
 		mtx_lock(&bounce_lock);
 	}
 	mtx_unlock(&bounce_lock);
 }
Index: head/sys/mips/mips/cache_mipsNN.c
===================================================================
--- head/sys/mips/mips/cache_mipsNN.c	(revision 308844)
+++ head/sys/mips/mips/cache_mipsNN.c	(revision 308845)
@@ -1,1375 +1,1382 @@
 /*	$NetBSD: cache_mipsNN.c,v 1.10 2005/12/24 20:07:19 perry Exp $	*/
 
 /*
  * Copyright 2001 Wasabi Systems, Inc.
  * All rights reserved.
  *
  * Written by Jason R. Thorpe and Simon Burge for Wasabi Systems, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed for the NetBSD Project by
  *	Wasabi Systems, Inc.
  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  *    or promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/param.h>
 
 #include <machine/cache.h>
 #include <machine/cache_r4k.h>
 #include <machine/cpuinfo.h>
 
 #define	round_line16(x)		(((x) + 15) & ~15)
 #define	trunc_line16(x)		((x) & ~15)
 
 #define	round_line32(x)		(((x) + 31) & ~31)
 #define	trunc_line32(x)		((x) & ~31)
 
 #define	round_line64(x)		(((x) + 63) & ~63)
 #define	trunc_line64(x)		((x) & ~63)
 
 #define	round_line128(x)	(((x) + 127) & ~127)
 #define	trunc_line128(x)	((x) & ~127)
 
 #if defined(CPU_NLM)
 static __inline void
 xlp_sync(void)
 {
         __asm __volatile (
 	    ".set push              \n"
 	    ".set noreorder         \n"
 	    ".set mips64            \n"
 	    "dla    $8, 1f          \n"
 	    "/* jr.hb $8 */         \n"
 	    ".word 0x1000408        \n"
 	    "nop                    \n"
 	 "1: nop                    \n"
 	    ".set pop               \n"
 	    : : : "$8");
 }
 #endif
 
 #if defined(SB1250_PASS1)
 #define	SYNC	__asm volatile("sync; sync")
 #elif defined(CPU_NLM)
 #define SYNC	xlp_sync()
 #else
 #define	SYNC	__asm volatile("sync")
 #endif
 
 #if defined(CPU_CNMIPS)
 #define SYNCI  mips_sync_icache();
 #elif defined(CPU_NLM)
 #define SYNCI	xlp_sync()
 #else
 #define SYNCI
 #endif
 
 /*
  * Exported variables for consumers like bus_dma code
  */
 int mips_picache_linesize;
 int mips_pdcache_linesize;
+int mips_sdcache_linesize;
+int mips_dcache_max_linesize;
 
 static int picache_size;
 static int picache_stride;
 static int picache_loopcount;
 static int picache_way_mask;
 static int pdcache_size;
 static int pdcache_stride;
 static int pdcache_loopcount;
 static int pdcache_way_mask;
 static int sdcache_size;
 static int sdcache_stride;
 static int sdcache_loopcount;
 static int sdcache_way_mask;
 
 void
 mipsNN_cache_init(struct mips_cpuinfo * cpuinfo)
 {
 	int flush_multiple_lines_per_way;
 
 	flush_multiple_lines_per_way = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize * cpuinfo->l1.ic_linesize > PAGE_SIZE;
 	if (cpuinfo->icache_virtual) {
 		/*
 		 * With a virtual Icache we don't need to flush
 		 * multiples of the page size with index ops; we just
 		 * need to flush one pages' worth.
 		 */
 		flush_multiple_lines_per_way = 0;
 	}
 
 	if (flush_multiple_lines_per_way) {
 		picache_stride = PAGE_SIZE;
 		picache_loopcount = (cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize / PAGE_SIZE) *
 		    cpuinfo->l1.ic_nways;
 	} else {
 		picache_stride = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize;
 		picache_loopcount = cpuinfo->l1.ic_nways;
 	}
 
 	if (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize < PAGE_SIZE) {
 		pdcache_stride = cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize;
 		pdcache_loopcount = cpuinfo->l1.dc_nways;
 	} else {
 		pdcache_stride = PAGE_SIZE;
 		pdcache_loopcount = (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize / PAGE_SIZE) *
 		    cpuinfo->l1.dc_nways;
 	}
 
 	mips_picache_linesize = cpuinfo->l1.ic_linesize;
 	mips_pdcache_linesize = cpuinfo->l1.dc_linesize;
 
 	picache_size = cpuinfo->l1.ic_size;
 	picache_way_mask = cpuinfo->l1.ic_nways - 1;
 	pdcache_size = cpuinfo->l1.dc_size;
 	pdcache_way_mask = cpuinfo->l1.dc_nways - 1;
 
 	sdcache_stride = cpuinfo->l2.dc_nsets * cpuinfo->l2.dc_linesize;
 	sdcache_loopcount = cpuinfo->l2.dc_nways;
 	sdcache_size = cpuinfo->l2.dc_size;
 	sdcache_way_mask = cpuinfo->l2.dc_nways - 1;
 
+	mips_sdcache_linesize = cpuinfo->l2.dc_linesize;
+	mips_dcache_max_linesize = MAX(mips_pdcache_linesize,
+	    mips_sdcache_linesize);
+
 #define CACHE_DEBUG
 #ifdef CACHE_DEBUG
 	printf("Cache info:\n");
 	if (cpuinfo->icache_virtual)
 		printf("  icache is virtual\n");
 	printf("  picache_stride    = %d\n", picache_stride);
 	printf("  picache_loopcount = %d\n", picache_loopcount);
 	printf("  pdcache_stride    = %d\n", pdcache_stride);
 	printf("  pdcache_loopcount = %d\n", pdcache_loopcount);
+	printf("  max line size     = %d\n", mips_dcache_max_linesize);
 #endif
 }
 
 void
 mipsNN_icache_sync_all_16(void)
 {
 	vm_offset_t va, eva;
 
 	va = MIPS_PHYS_TO_KSEG0(0);
 	eva = va + picache_size;
 
 	/*
 	 * Since we're hitting the whole thing, we don't have to
 	 * worry about the N different "ways".
 	 */
 
 	mips_intern_dcache_wbinv_all();
 
 	while (va < eva) {
 		cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
 		va += (32 * 16);
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_icache_sync_all_32(void)
 {
 	vm_offset_t va, eva;
 
 	va = MIPS_PHYS_TO_KSEG0(0);
 	eva = va + picache_size;
 
 	/*
 	 * Since we're hitting the whole thing, we don't have to
 	 * worry about the N different "ways".
 	 */
 
 	mips_intern_dcache_wbinv_all();
 
 	while (va < eva) {
 		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
 		va += (32 * 32);
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_icache_sync_all_64(void)
 {
 	vm_offset_t va, eva;
 
 	va = MIPS_PHYS_TO_KSEG0(0);
 	eva = va + picache_size;
 
 	/*
 	 * Since we're hitting the whole thing, we don't have to
 	 * worry about the N different "ways".
 	 */
 
 	mips_intern_dcache_wbinv_all();
 
 	while (va < eva) {
 		cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
 		va += (32 * 64);
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_icache_sync_range_16(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line16(va + size);
 	va = trunc_line16(va);
 
 	mips_intern_dcache_wb_range(va, (eva - va));
 
 	while ((eva - va) >= (32 * 16)) {
 		cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
 		va += (32 * 16);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
 		va += 16;
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_icache_sync_range_32(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line32(va + size);
 	va = trunc_line32(va);
 
 	mips_intern_dcache_wb_range(va, (eva - va));
 
 	while ((eva - va) >= (32 * 32)) {
 		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
 		va += (32 * 32);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
 		va += 32;
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_icache_sync_range_64(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line64(va + size);
 	va = trunc_line64(va);
 
 	mips_intern_dcache_wb_range(va, (eva - va));
 
 	while ((eva - va) >= (32 * 64)) {
 		cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
 		va += (32 * 64);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
 		va += 64;
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_icache_sync_range_index_16(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva, tmpva;
 	int i, stride, loopcount;
 
 	/*
 	 * Since we're doing Index ops, we expect to not be able
 	 * to access the address we've been given.  So, get the
 	 * bits that determine the cache index, and make a KSEG0
 	 * address out of them.
 	 */
 	va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
 
 	eva = round_line16(va + size);
 	va = trunc_line16(va);
 
 	/*
 	 * GCC generates better code in the loops if we reference local
 	 * copies of these global variables.
 	 */
 	stride = picache_stride;
 	loopcount = picache_loopcount;
 
 	mips_intern_dcache_wbinv_range_index(va, (eva - va));
 
 	while ((eva - va) >= (8 * 16)) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_r4k_op_8lines_16(tmpva,
 			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
 		va += 8 * 16;
 	}
 
 	while (va < eva) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_op_r4k_line(tmpva,
 			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
 		va += 16;
 	}
 }
 
 void
 mipsNN_icache_sync_range_index_32(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva, tmpva;
 	int i, stride, loopcount;
 
 	/*
 	 * Since we're doing Index ops, we expect to not be able
 	 * to access the address we've been given.  So, get the
 	 * bits that determine the cache index, and make a KSEG0
 	 * address out of them.
 	 */
 	va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
 
 	eva = round_line32(va + size);
 	va = trunc_line32(va);
 
 	/*
 	 * GCC generates better code in the loops if we reference local
 	 * copies of these global variables.
 	 */
 	stride = picache_stride;
 	loopcount = picache_loopcount;
 
 	mips_intern_dcache_wbinv_range_index(va, (eva - va));
 
 	while ((eva - va) >= (8 * 32)) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_r4k_op_8lines_32(tmpva,
 			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
 		va += 8 * 32;
 	}
 
 	while (va < eva) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_op_r4k_line(tmpva,
 			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
 		va += 32;
 	}
 }
 
 void
 mipsNN_icache_sync_range_index_64(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva, tmpva;
 	int i, stride, loopcount;
 
 	/*
 	 * Since we're doing Index ops, we expect to not be able
 	 * to access the address we've been given.  So, get the
 	 * bits that determine the cache index, and make a KSEG0
 	 * address out of them.
 	 */
 	va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
 
 	eva = round_line64(va + size);
 	va = trunc_line64(va);
 
 	/*
 	 * GCC generates better code in the loops if we reference local
 	 * copies of these global variables.
 	 */
 	stride = picache_stride;
 	loopcount = picache_loopcount;
 
 	mips_intern_dcache_wbinv_range_index(va, (eva - va));
 
 	while ((eva - va) >= (8 * 64)) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_r4k_op_8lines_64(tmpva,
 			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
 		va += 8 * 64;
 	}
 
 	while (va < eva) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_op_r4k_line(tmpva,
 			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
 		va += 64;
 	}
 }
 
 void
 mipsNN_pdcache_wbinv_all_16(void)
 {
 	vm_offset_t va, eva;
 
 	va = MIPS_PHYS_TO_KSEG0(0);
 	eva = va + pdcache_size;
 
 	/*
 	 * Since we're hitting the whole thing, we don't have to
 	 * worry about the N different "ways".
 	 */
 
 	while (va < eva) {
 		cache_r4k_op_32lines_16(va,
 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
 		va += (32 * 16);
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_pdcache_wbinv_all_32(void)
 {
 	vm_offset_t va, eva;
 
 	va = MIPS_PHYS_TO_KSEG0(0);
 	eva = va + pdcache_size;
 
 	/*
 	 * Since we're hitting the whole thing, we don't have to
 	 * worry about the N different "ways".
 	 */
 
 	while (va < eva) {
 		cache_r4k_op_32lines_32(va,
 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
 		va += (32 * 32);
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_pdcache_wbinv_all_64(void)
 {
 	vm_offset_t va, eva;
 
 	va = MIPS_PHYS_TO_KSEG0(0);
 	eva = va + pdcache_size;
 
 	/*
 	 * Since we're hitting the whole thing, we don't have to
 	 * worry about the N different "ways".
 	 */
 
 	while (va < eva) {
 		cache_r4k_op_32lines_64(va,
 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
 		va += (32 * 64);
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_pdcache_wbinv_range_16(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line16(va + size);
 	va = trunc_line16(va);
 
 	while ((eva - va) >= (32 * 16)) {
 		cache_r4k_op_32lines_16(va,
 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
 		va += (32 * 16);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
 		va += 16;
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_pdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line32(va + size);
 	va = trunc_line32(va);
 
 	while ((eva - va) >= (32 * 32)) {
 		cache_r4k_op_32lines_32(va,
 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
 		va += (32 * 32);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
 		va += 32;
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_pdcache_wbinv_range_64(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line64(va + size);
 	va = trunc_line64(va);
 
 	while ((eva - va) >= (32 * 64)) {
 		cache_r4k_op_32lines_64(va,
 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
 		va += (32 * 64);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
 		va += 64;
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_pdcache_wbinv_range_index_16(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva, tmpva;
 	int i, stride, loopcount;
 
 	/*
 	 * Since we're doing Index ops, we expect to not be able
 	 * to access the address we've been given.  So, get the
 	 * bits that determine the cache index, and make a KSEG0
 	 * address out of them.
 	 */
 	va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
 
 	eva = round_line16(va + size);
 	va = trunc_line16(va);
 
 	/*
 	 * GCC generates better code in the loops if we reference local
 	 * copies of these global variables.
 	 */
 	stride = pdcache_stride;
 	loopcount = pdcache_loopcount;
 
 	while ((eva - va) >= (8 * 16)) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_r4k_op_8lines_16(tmpva,
 			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
 		va += 8 * 16;
 	}
 
 	while (va < eva) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_op_r4k_line(tmpva,
 			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
 		va += 16;
 	}
 }
 
 void
 mipsNN_pdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva, tmpva;
 	int i, stride, loopcount;
 
 	/*
 	 * Since we're doing Index ops, we expect to not be able
 	 * to access the address we've been given.  So, get the
 	 * bits that determine the cache index, and make a KSEG0
 	 * address out of them.
 	 */
 	va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
 
 	eva = round_line32(va + size);
 	va = trunc_line32(va);
 
 	/*
 	 * GCC generates better code in the loops if we reference local
 	 * copies of these global variables.
 	 */
 	stride = pdcache_stride;
 	loopcount = pdcache_loopcount;
 
 	while ((eva - va) >= (8 * 32)) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_r4k_op_8lines_32(tmpva,
 			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
 		va += 8 * 32;
 	}
 
 	while (va < eva) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_op_r4k_line(tmpva,
 			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
 		va += 32;
 	}
 }
 
 void
 mipsNN_pdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva, tmpva;
 	int i, stride, loopcount;
 
 	/*
 	 * Since we're doing Index ops, we expect to not be able
 	 * to access the address we've been given.  So, get the
 	 * bits that determine the cache index, and make a KSEG0
 	 * address out of them.
 	 */
 	va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
 
 	eva = round_line64(va + size);
 	va = trunc_line64(va);
 
 	/*
 	 * GCC generates better code in the loops if we reference local
 	 * copies of these global variables.
 	 */
 	stride = pdcache_stride;
 	loopcount = pdcache_loopcount;
 
 	while ((eva - va) >= (8 * 64)) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_r4k_op_8lines_64(tmpva,
 			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
 		va += 8 * 64;
 	}
 
 	while (va < eva) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_op_r4k_line(tmpva,
 			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
 		va += 64;
 	}
 }
  
 void
 mipsNN_pdcache_inv_range_16(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line16(va + size);
 	va = trunc_line16(va);
 
 	while ((eva - va) >= (32 * 16)) {
 		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
 		va += (32 * 16);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
 		va += 16;
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_pdcache_inv_range_32(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line32(va + size);
 	va = trunc_line32(va);
 
 	while ((eva - va) >= (32 * 32)) {
 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
 		va += (32 * 32);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
 		va += 32;
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_pdcache_inv_range_64(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line64(va + size);
 	va = trunc_line64(va);
 
 	while ((eva - va) >= (32 * 64)) {
 		cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
 		va += (32 * 64);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
 		va += 64;
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_pdcache_wb_range_16(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line16(va + size);
 	va = trunc_line16(va);
 
 	while ((eva - va) >= (32 * 16)) {
 		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
 		va += (32 * 16);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
 		va += 16;
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_pdcache_wb_range_32(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line32(va + size);
 	va = trunc_line32(va);
 
 	while ((eva - va) >= (32 * 32)) {
 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
 		va += (32 * 32);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
 		va += 32;
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_pdcache_wb_range_64(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line64(va + size);
 	va = trunc_line64(va);
 
 	while ((eva - va) >= (32 * 64)) {
 		cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
 		va += (32 * 64);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
 		va += 64;
 	}
 
 	SYNC;
 }
 
 #ifdef CPU_CNMIPS
 
 void
 mipsNN_icache_sync_all_128(void)
 {
         SYNCI
 }
 
 void
 mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size)
 {
 	SYNC;
 }
 
 void
 mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size)
 {
 }
 
 
 void
 mipsNN_pdcache_wbinv_all_128(void)
 {
 }
 
 
 void
 mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
 {
 	SYNC;
 }
 
 void
 mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
 {
 }
 
 void
 mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size)
 {
 }
 
 void
 mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size)
 {
 	SYNC;
 }
 
 #else
 
 void
 mipsNN_icache_sync_all_128(void)
 {
 	vm_offset_t va, eva;
 
 	va = MIPS_PHYS_TO_KSEG0(0);
 	eva = va + picache_size;
 
 	/*
 	 * Since we're hitting the whole thing, we don't have to
 	 * worry about the N different "ways".
 	 */
 
 	mips_intern_dcache_wbinv_all();
 
 	while (va < eva) {
 		cache_r4k_op_32lines_128(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
 		va += (32 * 128);
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line128(va + size);
 	va = trunc_line128(va);
 
 	mips_intern_dcache_wb_range(va, (eva - va));
 
 	while ((eva - va) >= (32 * 128)) {
 		cache_r4k_op_32lines_128(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
 		va += (32 * 128);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
 		va += 128;
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva, tmpva;
 	int i, stride, loopcount;
 
 	/*
 	 * Since we're doing Index ops, we expect to not be able
 	 * to access the address we've been given.  So, get the
 	 * bits that determine the cache index, and make a KSEG0
 	 * address out of them.
 	 */
 	va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
 
 	eva = round_line128(va + size);
 	va = trunc_line128(va);
 
 	/*
 	 * GCC generates better code in the loops if we reference local
 	 * copies of these global variables.
 	 */
 	stride = picache_stride;
 	loopcount = picache_loopcount;
 
 	mips_intern_dcache_wbinv_range_index(va, (eva - va));
 
 	while ((eva - va) >= (32 * 128)) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_r4k_op_32lines_128(tmpva,
 			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
 		va += 32 * 128;
 	}
 
 	while (va < eva) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_op_r4k_line(tmpva,
 			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
 		va += 128;
 	}
 }
 
 void
 mipsNN_pdcache_wbinv_all_128(void)
 {
 	vm_offset_t va, eva;
 
 	va = MIPS_PHYS_TO_KSEG0(0);
 	eva = va + pdcache_size;
 
 	/*
 	 * Since we're hitting the whole thing, we don't have to
 	 * worry about the N different "ways".
 	 */
 
 	while (va < eva) {
 		cache_r4k_op_32lines_128(va,
 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
 		va += (32 * 128);
 	}
 
 	SYNC;
 }
 
 
 void
 mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line128(va + size);
 	va = trunc_line128(va);
 
 	while ((eva - va) >= (32 * 128)) {
 		cache_r4k_op_32lines_128(va,
 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
 		va += (32 * 128);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
 		va += 128;
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva, tmpva;
 	int i, stride, loopcount;
 
 	/*
 	 * Since we're doing Index ops, we expect to not be able
 	 * to access the address we've been given.  So, get the
 	 * bits that determine the cache index, and make a KSEG0
 	 * address out of them.
 	 */
 	va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
 
 	eva = round_line128(va + size);
 	va = trunc_line128(va);
 
 	/*
 	 * GCC generates better code in the loops if we reference local
 	 * copies of these global variables.
 	 */
 	stride = pdcache_stride;
 	loopcount = pdcache_loopcount;
 
 	while ((eva - va) >= (32 * 128)) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_r4k_op_32lines_128(tmpva,
 			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
 		va += 32 * 128;
 	}
 
 	while (va < eva) {
 		tmpva = va;
 		for (i = 0; i < loopcount; i++, tmpva += stride)
 			cache_op_r4k_line(tmpva,
 			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
 		va += 128;
 	}
 }
 
 void
 mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line128(va + size);
 	va = trunc_line128(va);
 
 	while ((eva - va) >= (32 * 128)) {
 		cache_r4k_op_32lines_128(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
 		va += (32 * 128);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
 		va += 128;
 	}
 
 	SYNC;
 }
 
 void
 mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	eva = round_line128(va + size);
 	va = trunc_line128(va);
 
 	while ((eva - va) >= (32 * 128)) {
 		cache_r4k_op_32lines_128(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
 		va += (32 * 128);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
 		va += 128;
 	}
 
 	SYNC;
 }
 
 #endif
 
 void
 mipsNN_sdcache_wbinv_all_32(void)
 {
 	vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
 	vm_offset_t eva = va + sdcache_size;
 
 	while (va < eva) {
 		cache_r4k_op_32lines_32(va,
 		    CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
 		va += (32 * 32);
 	}
 }
 
 void
 mipsNN_sdcache_wbinv_all_64(void)
 {
 	vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
 	vm_offset_t eva = va + sdcache_size;
 
 	while (va < eva) {
 		cache_r4k_op_32lines_64(va,
 		    CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
 		va += (32 * 64);
 	}
 }
 
 void
 mipsNN_sdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva = round_line32(va + size);
 
 	va = trunc_line32(va);
 
 	while ((eva - va) >= (32 * 32)) {
 		cache_r4k_op_32lines_32(va,
 		    CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
 		va += (32 * 32);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
 		va += 32;
 	}
 }
 
 void
 mipsNN_sdcache_wbinv_range_64(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva = round_line64(va + size);
 
 	va = trunc_line64(va);
 
 	while ((eva - va) >= (32 * 64)) {
 		cache_r4k_op_32lines_64(va,
 		    CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
 		va += (32 * 64);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
 		va += 64;
 	}
 }
 
 void
 mipsNN_sdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	/*
 	 * Since we're doing Index ops, we expect to not be able
 	 * to access the address we've been given.  So, get the
 	 * bits that determine the cache index, and make a KSEG0
 	 * address out of them.
 	 */
 	va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
 
 	eva = round_line32(va + size);
 	va = trunc_line32(va);
 
 	while ((eva - va) >= (32 * 32)) {
 		cache_r4k_op_32lines_32(va,
 		    CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
 		va += (32 * 32);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
 		va += 32;
 	}
 }
 
 void
 mipsNN_sdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	/*
 	 * Since we're doing Index ops, we expect to not be able
 	 * to access the address we've been given.  So, get the
 	 * bits that determine the cache index, and make a KSEG0
 	 * address out of them.
 	 */
 	va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
 
 	eva = round_line64(va + size);
 	va = trunc_line64(va);
 
 	while ((eva - va) >= (32 * 64)) {
 		cache_r4k_op_32lines_64(va,
 		    CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
 		va += (32 * 64);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
 		va += 64;
 	}
 }
 
 void
 mipsNN_sdcache_inv_range_32(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva = round_line32(va + size);
 
 	va = trunc_line32(va);
 
 	while ((eva - va) >= (32 * 32)) {
 		cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
 		va += (32 * 32);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
 		va += 32;
 	}
 }
 
 void
 mipsNN_sdcache_inv_range_64(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva = round_line64(va + size);
 
 	va = trunc_line64(va);
 
 	while ((eva - va) >= (32 * 64)) {
 		cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
 		va += (32 * 64);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
 		va += 64;
 	}
 }
 
 void
 mipsNN_sdcache_wb_range_32(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva = round_line32(va + size);
 
 	va = trunc_line32(va);
 
 	while ((eva - va) >= (32 * 32)) {
 		cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
 		va += (32 * 32);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
 		va += 32;
 	}
 }
 
 void
 mipsNN_sdcache_wb_range_64(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva = round_line64(va + size);
 
 	va = trunc_line64(va);
 
 	while ((eva - va) >= (32 * 64)) {
 		cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
 		va += (32 * 64);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
 		va += 64;
 	}
 }
 
 void
 mipsNN_sdcache_wbinv_all_128(void)
 {
 	vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
 	vm_offset_t eva = va + sdcache_size;
 
 	while (va < eva) {
 		cache_r4k_op_32lines_128(va,
 		    CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
 		va += (32 * 128);
 	}
 }
 
 void
 mipsNN_sdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva = round_line128(va + size);
 
 	va = trunc_line128(va);
 
 	while ((eva - va) >= (32 * 128)) {
 		cache_r4k_op_32lines_128(va,
 		    CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
 		va += (32 * 128);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
 		va += 128;
 	}
 }
 
 void
 mipsNN_sdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva;
 
 	/*
 	 * Since we're doing Index ops, we expect to not be able
 	 * to access the address we've been given.  So, get the
 	 * bits that determine the cache index, and make a KSEG0
 	 * address out of them.
 	 */
 	va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
 
 	eva = round_line128(va + size);
 	va = trunc_line128(va);
 
 	while ((eva - va) >= (32 * 128)) {
 		cache_r4k_op_32lines_128(va,
 		    CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
 		va += (32 * 128);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
 		va += 128;
 	}
 }
 
 void
 mipsNN_sdcache_inv_range_128(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva = round_line128(va + size);
 
 	va = trunc_line128(va);
 
 	while ((eva - va) >= (32 * 128)) {
 		cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
 		va += (32 * 128);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
 		va += 128;
 	}
 }
 
 void
 mipsNN_sdcache_wb_range_128(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t eva = round_line128(va + size);
 
 	va = trunc_line128(va);
 
 	while ((eva - va) >= (32 * 128)) {
 		cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
 		va += (32 * 128);
 	}
 
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
 		va += 128;
 	}
 }