Index: head/sys/amd64/amd64/sys_machdep.c
===================================================================
--- head/sys/amd64/amd64/sys_machdep.c	(revision 338317)
+++ head/sys/amd64/amd64/sys_machdep.c	(revision 338318)
@@ -1,757 +1,757 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2003 Peter Wemm.
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)sys_machdep.c	5.5 (Berkeley) 1/19/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysproto.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>		/* for kernel_map */
 #include <vm/vm_extern.h>
 
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/specialreg.h>
 #include <machine/sysarch.h>
 #include <machine/tss.h>
 #include <machine/vmparam.h>
 
 #include <security/audit/audit.h>
 
 static void user_ldt_deref(struct proc_ldt *pldt);
 static void user_ldt_derefl(struct proc_ldt *pldt);
 
 #define	MAX_LD		8192
 
 int max_ldt_segment = 512;
 SYSCTL_INT(_machdep, OID_AUTO, max_ldt_segment, CTLFLAG_RDTUN,
     &max_ldt_segment, 0,
     "Maximum number of allowed LDT segments in the single address space");
 
 static void
 max_ldt_segment_init(void *arg __unused)
 {
 
 	if (max_ldt_segment <= 0)
 		max_ldt_segment = 1;
 	if (max_ldt_segment > MAX_LD)
 		max_ldt_segment = MAX_LD;
 }
 SYSINIT(maxldt, SI_SUB_VM_CONF, SI_ORDER_ANY, max_ldt_segment_init, NULL);
 
 #ifndef _SYS_SYSPROTO_H_
 struct sysarch_args {
 	int op;
 	char *parms;
 };
 #endif
 
 int
 sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space)
 {
 	struct i386_ldt_args *largs, la;
 	struct user_segment_descriptor *lp;
 	int error = 0;
 
 	/*
 	 * XXXKIB check that the BSM generation code knows to encode
 	 * the op argument.
 	 */
 	AUDIT_ARG_CMD(uap->op);
 	if (uap_space == UIO_USERSPACE) {
 		error = copyin(uap->parms, &la, sizeof(struct i386_ldt_args));
 		if (error != 0)
 			return (error);
 		largs = &la;
 	} else
 		largs = (struct i386_ldt_args *)uap->parms;
 
 	switch (uap->op) {
 	case I386_GET_LDT:
 		error = amd64_get_ldt(td, largs);
 		break;
 	case I386_SET_LDT:
 		if (largs->descs != NULL && largs->num > max_ldt_segment)
 			return (EINVAL);
 		set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 		if (largs->descs != NULL) {
 			lp = malloc(largs->num * sizeof(struct
 			    user_segment_descriptor), M_TEMP, M_WAITOK);
 			error = copyin(largs->descs, lp, largs->num *
 			    sizeof(struct user_segment_descriptor));
 			if (error == 0)
 				error = amd64_set_ldt(td, largs, lp);
 			free(lp, M_TEMP);
 		} else {
 			error = amd64_set_ldt(td, largs, NULL);
 		}
 		break;
 	}
 	return (error);
 }
 
 void
 update_gdt_gsbase(struct thread *td, uint32_t base)
 {
 	struct user_segment_descriptor *sd;
 
 	if (td != curthread)
 		return;
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	critical_enter();
 	sd = PCPU_GET(gs32p);
 	sd->sd_lobase = base & 0xffffff;
 	sd->sd_hibase = (base >> 24) & 0xff;
 	critical_exit();
 }
 
 void
 update_gdt_fsbase(struct thread *td, uint32_t base)
 {
 	struct user_segment_descriptor *sd;
 
 	if (td != curthread)
 		return;
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	critical_enter();
 	sd = PCPU_GET(fs32p);
 	sd->sd_lobase = base & 0xffffff;
 	sd->sd_hibase = (base >> 24) & 0xff;
 	critical_exit();
 }
 
 int
 sysarch(struct thread *td, struct sysarch_args *uap)
 {
 	int error = 0;
 	struct pcb *pcb = curthread->td_pcb;
 	uint32_t i386base;
 	uint64_t a64base;
 	struct i386_ioperm_args iargs;
 	struct i386_get_xfpustate i386xfpu;
 	struct amd64_get_xfpustate a64xfpu;
 
 #ifdef CAPABILITY_MODE
 	/*
 	 * When adding new operations, add a new case statement here to
 	 * explicitly indicate whether or not the operation is safe to
 	 * perform in capability mode.
 	 */
 	if (IN_CAPABILITY_MODE(td)) {
 		switch (uap->op) {
 		case I386_GET_LDT:
 		case I386_SET_LDT:
 		case I386_GET_IOPERM:
 		case I386_GET_FSBASE:
 		case I386_SET_FSBASE:
 		case I386_GET_GSBASE:
 		case I386_SET_GSBASE:
 		case I386_GET_XFPUSTATE:
 		case AMD64_GET_FSBASE:
 		case AMD64_SET_FSBASE:
 		case AMD64_GET_GSBASE:
 		case AMD64_SET_GSBASE:
 		case AMD64_GET_XFPUSTATE:
 			break;
 
 		case I386_SET_IOPERM:
 		default:
 #ifdef KTRACE
 			if (KTRPOINT(td, KTR_CAPFAIL))
 				ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL);
 #endif
 			return (ECAPMODE);
 		}
 	}
 #endif
 
 	if (uap->op == I386_GET_LDT || uap->op == I386_SET_LDT)
 		return (sysarch_ldt(td, uap, UIO_USERSPACE));
 	/*
 	 * XXXKIB check that the BSM generation code knows to encode
 	 * the op argument.
 	 */
 	AUDIT_ARG_CMD(uap->op);
 	switch (uap->op) {
 	case I386_GET_IOPERM:
 	case I386_SET_IOPERM:
 		if ((error = copyin(uap->parms, &iargs,
 		    sizeof(struct i386_ioperm_args))) != 0)
 			return (error);
 		break;
 	case I386_GET_XFPUSTATE:
 		if ((error = copyin(uap->parms, &i386xfpu,
 		    sizeof(struct i386_get_xfpustate))) != 0)
 			return (error);
 		a64xfpu.addr = (void *)(uintptr_t)i386xfpu.addr;
 		a64xfpu.len = i386xfpu.len;
 		break;
 	case AMD64_GET_XFPUSTATE:
 		if ((error = copyin(uap->parms, &a64xfpu,
 		    sizeof(struct amd64_get_xfpustate))) != 0)
 			return (error);
 		break;
 	default:
 		break;
 	}
 
 	switch (uap->op) {
 	case I386_GET_IOPERM:
 		error = amd64_get_ioperm(td, &iargs);
 		if (error == 0)
 			error = copyout(&iargs, uap->parms,
 			    sizeof(struct i386_ioperm_args));
 		break;
 	case I386_SET_IOPERM:
 		error = amd64_set_ioperm(td, &iargs);
 		break;
 	case I386_GET_FSBASE:
 		update_pcb_bases(pcb);
 		i386base = pcb->pcb_fsbase;
 		error = copyout(&i386base, uap->parms, sizeof(i386base));
 		break;
 	case I386_SET_FSBASE:
 		error = copyin(uap->parms, &i386base, sizeof(i386base));
 		if (!error) {
 			set_pcb_flags(pcb, PCB_FULL_IRET);
 			pcb->pcb_fsbase = i386base;
 			td->td_frame->tf_fs = _ufssel;
 			update_gdt_fsbase(td, i386base);
 		}
 		break;
 	case I386_GET_GSBASE:
 		update_pcb_bases(pcb);
 		i386base = pcb->pcb_gsbase;
 		error = copyout(&i386base, uap->parms, sizeof(i386base));
 		break;
 	case I386_SET_GSBASE:
 		error = copyin(uap->parms, &i386base, sizeof(i386base));
 		if (!error) {
 			set_pcb_flags(pcb, PCB_FULL_IRET);
 			pcb->pcb_gsbase = i386base;
 			td->td_frame->tf_gs = _ugssel;
 			update_gdt_gsbase(td, i386base);
 		}
 		break;
 	case AMD64_GET_FSBASE:
 		update_pcb_bases(pcb);
 		error = copyout(&pcb->pcb_fsbase, uap->parms,
 		    sizeof(pcb->pcb_fsbase));
 		break;
 		
 	case AMD64_SET_FSBASE:
 		error = copyin(uap->parms, &a64base, sizeof(a64base));
 		if (!error) {
 			if (a64base < VM_MAXUSER_ADDRESS) {
 				set_pcb_flags(pcb, PCB_FULL_IRET);
 				pcb->pcb_fsbase = a64base;
 				td->td_frame->tf_fs = _ufssel;
 			} else
 				error = EINVAL;
 		}
 		break;
 
 	case AMD64_GET_GSBASE:
 		update_pcb_bases(pcb);
 		error = copyout(&pcb->pcb_gsbase, uap->parms,
 		    sizeof(pcb->pcb_gsbase));
 		break;
 
 	case AMD64_SET_GSBASE:
 		error = copyin(uap->parms, &a64base, sizeof(a64base));
 		if (!error) {
 			if (a64base < VM_MAXUSER_ADDRESS) {
 				set_pcb_flags(pcb, PCB_FULL_IRET);
 				pcb->pcb_gsbase = a64base;
 				td->td_frame->tf_gs = _ugssel;
 			} else
 				error = EINVAL;
 		}
 		break;
 
 	case I386_GET_XFPUSTATE:
 	case AMD64_GET_XFPUSTATE:
 		if (a64xfpu.len > cpu_max_ext_state_size -
 		    sizeof(struct savefpu))
 			return (EINVAL);
 		fpugetregs(td);
 		error = copyout((char *)(get_pcb_user_save_td(td) + 1),
 		    a64xfpu.addr, a64xfpu.len);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 int
 amd64_set_ioperm(td, uap)
 	struct thread *td;
 	struct i386_ioperm_args *uap;
 {
 	char *iomap;
 	struct amd64tss *tssp;
 	struct system_segment_descriptor *tss_sd;
 	struct pcb *pcb;
 	u_int i;
 	int error;
 
 	if ((error = priv_check(td, PRIV_IO)) != 0)
 		return (error);
 	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
 		return (error);
 	if (uap->start > uap->start + uap->length ||
 	    uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY)
 		return (EINVAL);
 
 	/*
 	 * XXX
 	 * While this is restricted to root, we should probably figure out
 	 * whether any other driver is using this i/o address, as so not to
 	 * cause confusion.  This probably requires a global 'usage registry'.
 	 */
 	pcb = td->td_pcb;
 	if (pcb->pcb_tssp == NULL) {
 		tssp = (struct amd64tss *)kmem_malloc(ctob(IOPAGES + 1),
 		    M_WAITOK);
 		pmap_pti_add_kva((vm_offset_t)tssp, (vm_offset_t)tssp +
 		    ctob(IOPAGES + 1), false);
 		iomap = (char *)&tssp[1];
 		memset(iomap, 0xff, IOPERM_BITMAP_SIZE);
 		critical_enter();
 		/* Takes care of tss_rsp0. */
 		memcpy(tssp, &common_tss[PCPU_GET(cpuid)],
 		    sizeof(struct amd64tss));
 		tssp->tss_iobase = sizeof(*tssp);
 		pcb->pcb_tssp = tssp;
 		tss_sd = PCPU_GET(tss);
 		tss_sd->sd_lobase = (u_long)tssp & 0xffffff;
 		tss_sd->sd_hibase = ((u_long)tssp >> 24) & 0xfffffffffful;
 		tss_sd->sd_type = SDT_SYSTSS;
 		ltr(GSEL(GPROC0_SEL, SEL_KPL));
 		PCPU_SET(tssp, tssp);
 		critical_exit();
 	} else
 		iomap = (char *)&pcb->pcb_tssp[1];
 	for (i = uap->start; i < uap->start + uap->length; i++) {
 		if (uap->enable)
 			iomap[i >> 3] &= ~(1 << (i & 7));
 		else
 			iomap[i >> 3] |= (1 << (i & 7));
 	}
 	return (error);
 }
 
 int
 amd64_get_ioperm(td, uap)
 	struct thread *td;
 	struct i386_ioperm_args *uap;
 {
 	int i, state;
 	char *iomap;
 
 	if (uap->start >= IOPAGES * PAGE_SIZE * NBBY)
 		return (EINVAL);
 	if (td->td_pcb->pcb_tssp == NULL) {
 		uap->length = 0;
 		goto done;
 	}
 
 	iomap = (char *)&td->td_pcb->pcb_tssp[1];
 
 	i = uap->start;
 	state = (iomap[i >> 3] >> (i & 7)) & 1;
 	uap->enable = !state;
 	uap->length = 1;
 
 	for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
 		if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
 			break;
 		uap->length++;
 	}
 
 done:
 	return (0);
 }
 
 /*
  * Update the GDT entry pointing to the LDT to point to the LDT of the
  * current process.
  */
 static void
 set_user_ldt(struct mdproc *mdp)
 {
 
 	*PCPU_GET(ldt) = mdp->md_ldt_sd;
 	lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
 }
 
 static void
 set_user_ldt_rv(struct vmspace *vmsp)
 {
 	struct thread *td;
 
 	td = curthread;
 	if (vmsp != td->td_proc->p_vmspace)
 		return;
 
 	set_user_ldt(&td->td_proc->p_md);
 }
 
 struct proc_ldt *
 user_ldt_alloc(struct proc *p, int force)
 {
 	struct proc_ldt *pldt, *new_ldt;
 	struct mdproc *mdp;
 	struct soft_segment_descriptor sldt;
 	vm_offset_t sva;
 	vm_size_t sz;
 
 	mtx_assert(&dt_lock, MA_OWNED);
 	mdp = &p->p_md;
 	if (!force && mdp->md_ldt != NULL)
 		return (mdp->md_ldt);
 	mtx_unlock(&dt_lock);
 	new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK);
 	sz = max_ldt_segment * sizeof(struct user_segment_descriptor);
 	sva = kmem_malloc(sz, M_WAITOK | M_ZERO);
 	new_ldt->ldt_base = (caddr_t)sva;
 	pmap_pti_add_kva(sva, sva + sz, false);
 	new_ldt->ldt_refcnt = 1;
 	sldt.ssd_base = sva;
 	sldt.ssd_limit = sz - 1;
 	sldt.ssd_type = SDT_SYSLDT;
 	sldt.ssd_dpl = SEL_KPL;
 	sldt.ssd_p = 1;
 	sldt.ssd_long = 0;
 	sldt.ssd_def32 = 0;
 	sldt.ssd_gran = 0;
 	mtx_lock(&dt_lock);
 	pldt = mdp->md_ldt;
 	if (pldt != NULL && !force) {
 		pmap_pti_remove_kva(sva, sva + sz);
-		kmem_free(kernel_arena, sva, sz);
+		kmem_free(sva, sz);
 		free(new_ldt, M_SUBPROC);
 		return (pldt);
 	}
 
 	if (pldt != NULL) {
 		bcopy(pldt->ldt_base, new_ldt->ldt_base, max_ldt_segment *
 		    sizeof(struct user_segment_descriptor));
 		user_ldt_derefl(pldt);
 	}
 	critical_enter();
 	ssdtosyssd(&sldt, &p->p_md.md_ldt_sd);
 	atomic_thread_fence_rel();
 	mdp->md_ldt = new_ldt;
 	critical_exit();
 	smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv, NULL,
 	    p->p_vmspace);
 
 	return (mdp->md_ldt);
 }
 
 void
 user_ldt_free(struct thread *td)
 {
 	struct proc *p = td->td_proc;
 	struct mdproc *mdp = &p->p_md;
 	struct proc_ldt *pldt;
 
 	mtx_lock(&dt_lock);
 	if ((pldt = mdp->md_ldt) == NULL) {
 		mtx_unlock(&dt_lock);
 		return;
 	}
 
 	critical_enter();
 	mdp->md_ldt = NULL;
 	atomic_thread_fence_rel();
 	bzero(&mdp->md_ldt_sd, sizeof(mdp->md_ldt_sd));
 	if (td == curthread)
 		lldt(GSEL(GNULL_SEL, SEL_KPL));
 	critical_exit();
 	user_ldt_deref(pldt);
 }
 
 static void
 user_ldt_derefl(struct proc_ldt *pldt)
 {
 	vm_offset_t sva;
 	vm_size_t sz;
 
 	if (--pldt->ldt_refcnt == 0) {
 		sva = (vm_offset_t)pldt->ldt_base;
 		sz = max_ldt_segment * sizeof(struct user_segment_descriptor);
 		pmap_pti_remove_kva(sva, sva + sz);
-		kmem_free(kernel_arena, sva, sz);
+		kmem_free(sva, sz);
 		free(pldt, M_SUBPROC);
 	}
 }
 
 static void
 user_ldt_deref(struct proc_ldt *pldt)
 {
 
 	mtx_assert(&dt_lock, MA_OWNED);
 	user_ldt_derefl(pldt);
 	mtx_unlock(&dt_lock);
 }
 
 /*
  * Note for the authors of compat layers (linux, etc): copyout() in
  * the function below is not a problem since it presents data in
  * arch-specific format (i.e. i386-specific in this case), not in
  * the OS-specific one.
  */
 int
 amd64_get_ldt(struct thread *td, struct i386_ldt_args *uap)
 {
 	struct proc_ldt *pldt;
 	struct user_segment_descriptor *lp;
 	uint64_t *data;
 	u_int i, num;
 	int error;
 
 #ifdef	DEBUG
 	printf("amd64_get_ldt: start=%u num=%u descs=%p\n",
 	    uap->start, uap->num, (void *)uap->descs);
 #endif
 
 	pldt = td->td_proc->p_md.md_ldt;
 	if (pldt == NULL || uap->start >= max_ldt_segment || uap->num == 0) {
 		td->td_retval[0] = 0;
 		return (0);
 	}
 	num = min(uap->num, max_ldt_segment - uap->start);
 	lp = &((struct user_segment_descriptor *)(pldt->ldt_base))[uap->start];
 	data = malloc(num * sizeof(struct user_segment_descriptor), M_TEMP,
 	    M_WAITOK);
 	mtx_lock(&dt_lock);
 	for (i = 0; i < num; i++)
 		data[i] = ((volatile uint64_t *)lp)[i];
 	mtx_unlock(&dt_lock);
 	error = copyout(data, uap->descs, num *
 	    sizeof(struct user_segment_descriptor));
 	free(data, M_TEMP);
 	if (error == 0)
 		td->td_retval[0] = num;
 	return (error);
 }
 
 int
 amd64_set_ldt(struct thread *td, struct i386_ldt_args *uap,
     struct user_segment_descriptor *descs)
 {
 	struct mdproc *mdp;
 	struct proc_ldt *pldt;
 	struct user_segment_descriptor *dp;
 	struct proc *p;
 	u_int largest_ld, i;
 	int error;
 
 #ifdef	DEBUG
 	printf("amd64_set_ldt: start=%u num=%u descs=%p\n",
 	    uap->start, uap->num, (void *)uap->descs);
 #endif
 	mdp = &td->td_proc->p_md;
 	error = 0;
 
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	p = td->td_proc;
 	if (descs == NULL) {
 		/* Free descriptors */
 		if (uap->start == 0 && uap->num == 0)
 			uap->num = max_ldt_segment;
 		if (uap->num == 0)
 			return (EINVAL);
 		if ((pldt = mdp->md_ldt) == NULL ||
 		    uap->start >= max_ldt_segment)
 			return (0);
 		largest_ld = uap->start + uap->num;
 		if (largest_ld > max_ldt_segment)
 			largest_ld = max_ldt_segment;
 		if (largest_ld < uap->start)
 			return (EINVAL);
 		mtx_lock(&dt_lock);
 		for (i = uap->start; i < largest_ld; i++)
 			((volatile uint64_t *)(pldt->ldt_base))[i] = 0;
 		mtx_unlock(&dt_lock);
 		return (0);
 	}
 
 	if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) {
 		/* verify range of descriptors to modify */
 		largest_ld = uap->start + uap->num;
 		if (uap->start >= max_ldt_segment ||
 		    largest_ld > max_ldt_segment ||
 		    largest_ld < uap->start)
 			return (EINVAL);
 	}
 
 	/* Check descriptors for access violations */
 	for (i = 0; i < uap->num; i++) {
 		dp = &descs[i];
 
 		switch (dp->sd_type) {
 		case SDT_SYSNULL:	/* system null */
 			dp->sd_p = 0;
 			break;
 		case SDT_SYS286TSS:
 		case SDT_SYSLDT:
 		case SDT_SYS286BSY:
 		case SDT_SYS286CGT:
 		case SDT_SYSTASKGT:
 		case SDT_SYS286IGT:
 		case SDT_SYS286TGT:
 		case SDT_SYSNULL2:
 		case SDT_SYSTSS:
 		case SDT_SYSNULL3:
 		case SDT_SYSBSY:
 		case SDT_SYSCGT:
 		case SDT_SYSNULL4:
 		case SDT_SYSIGT:
 		case SDT_SYSTGT:
 			return (EACCES);
 
 		/* memory segment types */
 		case SDT_MEMEC:   /* memory execute only conforming */
 		case SDT_MEMEAC:  /* memory execute only accessed conforming */
 		case SDT_MEMERC:  /* memory execute read conforming */
 		case SDT_MEMERAC: /* memory execute read accessed conforming */
 			 /* Must be "present" if executable and conforming. */
 			if (dp->sd_p == 0)
 				return (EACCES);
 			break;
 		case SDT_MEMRO:   /* memory read only */
 		case SDT_MEMROA:  /* memory read only accessed */
 		case SDT_MEMRW:   /* memory read write */
 		case SDT_MEMRWA:  /* memory read write accessed */
 		case SDT_MEMROD:  /* memory read only expand dwn limit */
 		case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
 		case SDT_MEMRWD:  /* memory read write expand dwn limit */
 		case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
 		case SDT_MEME:    /* memory execute only */
 		case SDT_MEMEA:   /* memory execute only accessed */
 		case SDT_MEMER:   /* memory execute read */
 		case SDT_MEMERA:  /* memory execute read accessed */
 			break;
 		default:
 			return(EINVAL);
 		}
 
 		/* Only user (ring-3) descriptors may be present. */
 		if ((dp->sd_p != 0) && (dp->sd_dpl != SEL_UPL))
 			return (EACCES);
 	}
 
 	if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) {
 		/* Allocate a free slot */
 		mtx_lock(&dt_lock);
 		pldt = user_ldt_alloc(p, 0);
 		if (pldt == NULL) {
 			mtx_unlock(&dt_lock);
 			return (ENOMEM);
 		}
 
 		/*
 		 * start scanning a bit up to leave room for NVidia and
 		 * Wine, which still user the "Blat" method of allocation.
 		 */
 		i = 16;
 		dp = &((struct user_segment_descriptor *)(pldt->ldt_base))[i];
 		for (; i < max_ldt_segment; ++i, ++dp) {
 			if (dp->sd_type == SDT_SYSNULL)
 				break;
 		}
 		if (i >= max_ldt_segment) {
 			mtx_unlock(&dt_lock);
 			return (ENOSPC);
 		}
 		uap->start = i;
 		error = amd64_set_ldt_data(td, i, 1, descs);
 		mtx_unlock(&dt_lock);
 	} else {
 		largest_ld = uap->start + uap->num;
 		if (largest_ld > max_ldt_segment)
 			return (EINVAL);
 		mtx_lock(&dt_lock);
 		if (user_ldt_alloc(p, 0) != NULL) {
 			error = amd64_set_ldt_data(td, uap->start, uap->num,
 			    descs);
 		}
 		mtx_unlock(&dt_lock);
 	}
 	if (error == 0)
 		td->td_retval[0] = uap->start;
 	return (error);
 }
 
 int
 amd64_set_ldt_data(struct thread *td, int start, int num,
     struct user_segment_descriptor *descs)
 {
 	struct mdproc *mdp;
 	struct proc_ldt *pldt;
 	volatile uint64_t *dst, *src;
 	int i;
 
 	mtx_assert(&dt_lock, MA_OWNED);
 
 	mdp = &td->td_proc->p_md;
 	pldt = mdp->md_ldt;
 	dst = (volatile uint64_t *)(pldt->ldt_base);
 	src = (volatile uint64_t *)descs;
 	for (i = 0; i < num; i++)
 		dst[start + i] = src[i];
 	return (0);
 }
Index: head/sys/amd64/amd64/vm_machdep.c
===================================================================
--- head/sys/amd64/amd64/vm_machdep.c	(revision 338317)
+++ head/sys/amd64/amd64/vm_machdep.c	(revision 338318)
@@ -1,591 +1,590 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 1982, 1986 The Regents of the University of California.
  * Copyright (c) 1989, 1990 William Jolitz
  * Copyright (c) 1994 John Dyson
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department, and William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_isa.h"
 #include "opt_cpu.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/pioctl.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/smp.h>
 #include <machine/specialreg.h>
 #include <machine/tss.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_param.h>
 
 _Static_assert(OFFSETOF_CURTHREAD == offsetof(struct pcpu, pc_curthread),
     "OFFSETOF_CURTHREAD does not correspond with offset of pc_curthread.");
 _Static_assert(OFFSETOF_CURPCB == offsetof(struct pcpu, pc_curpcb),
     "OFFSETOF_CURPCB does not correspond with offset of pc_curpcb.");
 _Static_assert(OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf),
     "OFFSETOF_MONINORBUF does not correspond with offset of pc_monitorbuf.");
 
 struct savefpu *
 get_pcb_user_save_td(struct thread *td)
 {
 	vm_offset_t p;
 
 	p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
 	    roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN);
 	KASSERT((p % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area"));
 	return ((struct savefpu *)p);
 }
 
 struct savefpu *
 get_pcb_user_save_pcb(struct pcb *pcb)
 {
 	vm_offset_t p;
 
 	p = (vm_offset_t)(pcb + 1);
 	return ((struct savefpu *)p);
 }
 
 struct pcb *
 get_pcb_td(struct thread *td)
 {
 	vm_offset_t p;
 
 	p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
 	    roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN) -
 	    sizeof(struct pcb);
 	return ((struct pcb *)p);
 }
 
 void *
 alloc_fpusave(int flags)
 {
 	void *res;
 	struct savefpu_ymm *sf;
 
 	res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags);
 	if (use_xsave) {
 		sf = (struct savefpu_ymm *)res;
 		bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd));
 		sf->sv_xstate.sx_hd.xstate_bv = xsave_mask;
 	}
 	return (res);
 }
 
 /*
  * Finish a fork operation, with process p2 nearly set up.
  * Copy and update the pcb, set up the stack so that the child
  * ready to run and return to user mode.
  */
 void
 cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
 {
 	struct proc *p1;
 	struct pcb *pcb2;
 	struct mdproc *mdp1, *mdp2;
 	struct proc_ldt *pldt;
 
 	p1 = td1->td_proc;
 	if ((flags & RFPROC) == 0) {
 		if ((flags & RFMEM) == 0) {
 			/* unshare user LDT */
 			mdp1 = &p1->p_md;
 			mtx_lock(&dt_lock);
 			if ((pldt = mdp1->md_ldt) != NULL &&
 			    pldt->ldt_refcnt > 1 &&
 			    user_ldt_alloc(p1, 1) == NULL)
 				panic("could not copy LDT");
 			mtx_unlock(&dt_lock);
 		}
 		return;
 	}
 
 	/* Ensure that td1's pcb is up to date. */
 	fpuexit(td1);
 	update_pcb_bases(td1->td_pcb);
 
 	/* Point the pcb to the top of the stack */
 	pcb2 = get_pcb_td(td2);
 	td2->td_pcb = pcb2;
 
 	/* Copy td1's pcb */
 	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
 
 	/* Properly initialize pcb_save */
 	pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
 	bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2),
 	    cpu_max_ext_state_size);
 
 	/* Point mdproc and then copy over td1's contents */
 	mdp2 = &p2->p_md;
 	bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
 
 	/*
 	 * Create a new fresh stack for the new process.
 	 * Copy the trap frame for the return to user mode as if from a
 	 * syscall.  This copies most of the user mode register values.
 	 */
 	td2->td_frame = (struct trapframe *)td2->td_pcb - 1;
 	bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
 
 	td2->td_frame->tf_rax = 0;		/* Child returns zero */
 	td2->td_frame->tf_rflags &= ~PSL_C;	/* success */
 	td2->td_frame->tf_rdx = 1;
 
 	/*
 	 * If the parent process has the trap bit set (i.e. a debugger had
 	 * single stepped the process to the system call), we need to clear
 	 * the trap flag from the new frame unless the debugger had set PF_FORK
 	 * on the parent.  Otherwise, the child will receive a (likely
 	 * unexpected) SIGTRAP when it executes the first instruction after
 	 * returning  to userland.
 	 */
 	if ((p1->p_pfsflags & PF_FORK) == 0)
 		td2->td_frame->tf_rflags &= ~PSL_T;
 
 	/*
 	 * Set registers for trampoline to user mode.  Leave space for the
 	 * return address on stack.  These are the kernel mode register values.
 	 */
 	pcb2->pcb_r12 = (register_t)fork_return;	/* fork_trampoline argument */
 	pcb2->pcb_rbp = 0;
 	pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *);
 	pcb2->pcb_rbx = (register_t)td2;		/* fork_trampoline argument */
 	pcb2->pcb_rip = (register_t)fork_trampoline;
 	/*-
 	 * pcb2->pcb_dr*:	cloned above.
 	 * pcb2->pcb_savefpu:	cloned above.
 	 * pcb2->pcb_flags:	cloned above.
 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
 	 * pcb2->pcb_[fg]sbase:	cloned above
 	 */
 
 	/* Setup to release spin count in fork_exit(). */
 	td2->td_md.md_spinlock_count = 1;
 	td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
 	td2->td_md.md_invl_gen.gen = 0;
 
 	/* As an i386, do not copy io permission bitmap. */
 	pcb2->pcb_tssp = NULL;
 
 	/* New segment registers. */
 	set_pcb_flags_raw(pcb2, PCB_FULL_IRET);
 
 	/* Copy the LDT, if necessary. */
 	mdp1 = &td1->td_proc->p_md;
 	mdp2 = &p2->p_md;
 	if (mdp1->md_ldt == NULL) {
 		mdp2->md_ldt = NULL;
 		return;
 	}
 	mtx_lock(&dt_lock);
 	if (mdp1->md_ldt != NULL) {
 		if (flags & RFMEM) {
 			mdp1->md_ldt->ldt_refcnt++;
 			mdp2->md_ldt = mdp1->md_ldt;
 			bcopy(&mdp1->md_ldt_sd, &mdp2->md_ldt_sd, sizeof(struct
 			    system_segment_descriptor));
 		} else {
 			mdp2->md_ldt = NULL;
 			mdp2->md_ldt = user_ldt_alloc(p2, 0);
 			if (mdp2->md_ldt == NULL)
 				panic("could not copy LDT");
 			amd64_set_ldt_data(td2, 0, max_ldt_segment,
 			    (struct user_segment_descriptor *)
 			    mdp1->md_ldt->ldt_base);
 		}
 	} else
 		mdp2->md_ldt = NULL;
 	mtx_unlock(&dt_lock);
 
 	/*
 	 * Now, cpu_switch() can schedule the new process.
 	 * pcb_rsp is loaded pointing to the cpu_switch() stack frame
 	 * containing the return address when exiting cpu_switch.
 	 * This will normally be to fork_trampoline(), which will have
 	 * %ebx loaded with the new proc's pointer.  fork_trampoline()
 	 * will set up a stack to call fork_return(p, frame); to complete
 	 * the return to user-mode.
 	 */
 }
 
 /*
  * Intercept the return address from a freshly forked process that has NOT
  * been scheduled yet.
  *
  * This is needed to make kernel threads stay in kernel mode.
  */
 void
 cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg)
 {
 	/*
 	 * Note that the trap frame follows the args, so the function
 	 * is really called like this:  func(arg, frame);
 	 */
 	td->td_pcb->pcb_r12 = (long) func;	/* function */
 	td->td_pcb->pcb_rbx = (long) arg;	/* first arg */
 }
 
 void
 cpu_exit(struct thread *td)
 {
 
 	/*
 	 * If this process has a custom LDT, release it.
 	 */
 	if (td->td_proc->p_md.md_ldt != NULL)
 		user_ldt_free(td);
 }
 
 void
 cpu_thread_exit(struct thread *td)
 {
 	struct pcb *pcb;
 
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread))
 		fpudrop();
 	critical_exit();
 
 	pcb = td->td_pcb;
 
 	/* Disable any hardware breakpoints. */
 	if (pcb->pcb_flags & PCB_DBREGS) {
 		reset_dbregs();
 		clear_pcb_flags(pcb, PCB_DBREGS);
 	}
 }
 
 void
 cpu_thread_clean(struct thread *td)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 
 	/*
 	 * Clean TSS/iomap
 	 */
 	if (pcb->pcb_tssp != NULL) {
 		pmap_pti_remove_kva((vm_offset_t)pcb->pcb_tssp,
 		    (vm_offset_t)pcb->pcb_tssp + ctob(IOPAGES + 1));
-		kmem_free(kernel_arena, (vm_offset_t)pcb->pcb_tssp,
-		    ctob(IOPAGES + 1));
+		kmem_free((vm_offset_t)pcb->pcb_tssp, ctob(IOPAGES + 1));
 		pcb->pcb_tssp = NULL;
 	}
 }
 
 void
 cpu_thread_swapin(struct thread *td)
 {
 }
 
 void
 cpu_thread_swapout(struct thread *td)
 {
 }
 
 void
 cpu_thread_alloc(struct thread *td)
 {
 	struct pcb *pcb;
 	struct xstate_hdr *xhdr;
 
 	td->td_pcb = pcb = get_pcb_td(td);
 	td->td_frame = (struct trapframe *)pcb - 1;
 	pcb->pcb_save = get_pcb_user_save_pcb(pcb);
 	if (use_xsave) {
 		xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
 		bzero(xhdr, sizeof(*xhdr));
 		xhdr->xstate_bv = xsave_mask;
 	}
 }
 
 void
 cpu_thread_free(struct thread *td)
 {
 
 	cpu_thread_clean(td);
 }
 
 void
 cpu_set_syscall_retval(struct thread *td, int error)
 {
 
 	switch (error) {
 	case 0:
 		td->td_frame->tf_rax = td->td_retval[0];
 		td->td_frame->tf_rdx = td->td_retval[1];
 		td->td_frame->tf_rflags &= ~PSL_C;
 		break;
 
 	case ERESTART:
 		/*
 		 * Reconstruct pc, we know that 'syscall' is 2 bytes,
 		 * lcall $X,y is 7 bytes, int 0x80 is 2 bytes.
 		 * We saved this in tf_err.
 		 * %r10 (which was holding the value of %rcx) is restored
 		 * for the next iteration.
 		 * %r10 restore is only required for freebsd/amd64 processes,
 		 * but shall be innocent for any ia32 ABI.
 		 *
 		 * Require full context restore to get the arguments
 		 * in the registers reloaded at return to usermode.
 		 */
 		td->td_frame->tf_rip -= td->td_frame->tf_err;
 		td->td_frame->tf_r10 = td->td_frame->tf_rcx;
 		set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 		break;
 
 	case EJUSTRETURN:
 		break;
 
 	default:
 		td->td_frame->tf_rax = SV_ABI_ERRNO(td->td_proc, error);
 		td->td_frame->tf_rflags |= PSL_C;
 		break;
 	}
 }
 
 /*
  * Initialize machine state, mostly pcb and trap frame for a new
  * thread, about to return to userspace.  Put enough state in the new
  * thread's PCB to get it to go back to the fork_return(), which
  * finalizes the thread state and handles peculiarities of the first
  * return to userspace for the new thread.
  */
 void
 cpu_copy_thread(struct thread *td, struct thread *td0)
 {
 	struct pcb *pcb2;
 
 	/* Point the pcb to the top of the stack. */
 	pcb2 = td->td_pcb;
 
 	/*
 	 * Copy the upcall pcb.  This loads kernel regs.
 	 * Those not loaded individually below get their default
 	 * values here.
 	 */
 	update_pcb_bases(td0->td_pcb);
 	bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
 	clear_pcb_flags(pcb2, PCB_FPUINITDONE | PCB_USERFPUINITDONE |
 	    PCB_KERNFPU);
 	pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
 	bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save,
 	    cpu_max_ext_state_size);
 	set_pcb_flags_raw(pcb2, PCB_FULL_IRET);
 
 	/*
 	 * Create a new fresh stack for the new thread.
 	 */
 	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
 
 	/* If the current thread has the trap bit set (i.e. a debugger had
 	 * single stepped the process to the system call), we need to clear
 	 * the trap flag from the new frame. Otherwise, the new thread will
 	 * receive a (likely unexpected) SIGTRAP when it executes the first
 	 * instruction after returning to userland.
 	 */
 	td->td_frame->tf_rflags &= ~PSL_T;
 
 	/*
 	 * Set registers for trampoline to user mode.  Leave space for the
 	 * return address on stack.  These are the kernel mode register values.
 	 */
 	pcb2->pcb_r12 = (register_t)fork_return;	    /* trampoline arg */
 	pcb2->pcb_rbp = 0;
 	pcb2->pcb_rsp = (register_t)td->td_frame - sizeof(void *);	/* trampoline arg */
 	pcb2->pcb_rbx = (register_t)td;			    /* trampoline arg */
 	pcb2->pcb_rip = (register_t)fork_trampoline;
 	/*
 	 * If we didn't copy the pcb, we'd need to do the following registers:
 	 * pcb2->pcb_dr*:	cloned above.
 	 * pcb2->pcb_savefpu:	cloned above.
 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
 	 * pcb2->pcb_[fg]sbase: cloned above
 	 */
 
 	/* Setup to release spin count in fork_exit(). */
 	td->td_md.md_spinlock_count = 1;
 	td->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
 }
 
 /*
  * Set that machine state for performing an upcall that starts
  * the entry function with the given argument.
  */
 void
 cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg,
     stack_t *stack)
 {
 
 	/* 
 	 * Do any extra cleaning that needs to be done.
 	 * The thread may have optional components
 	 * that are not present in a fresh thread.
 	 * This may be a recycled thread so make it look
 	 * as though it's newly allocated.
 	 */
 	cpu_thread_clean(td);
 
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		/*
 		 * Set the trap frame to point at the beginning of the entry
 		 * function.
 		 */
 		td->td_frame->tf_rbp = 0;
 		td->td_frame->tf_rsp =
 		   (((uintptr_t)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4;
 		td->td_frame->tf_rip = (uintptr_t)entry;
 
 		/* Return address sentinel value to stop stack unwinding. */
 		suword32((void *)td->td_frame->tf_rsp, 0);
 
 		/* Pass the argument to the entry point. */
 		suword32((void *)(td->td_frame->tf_rsp + sizeof(int32_t)),
 		    (uint32_t)(uintptr_t)arg);
 
 		return;
 	}
 #endif
 
 	/*
 	 * Set the trap frame to point at the beginning of the uts
 	 * function.
 	 */
 	td->td_frame->tf_rbp = 0;
 	td->td_frame->tf_rsp =
 	    ((register_t)stack->ss_sp + stack->ss_size) & ~0x0f;
 	td->td_frame->tf_rsp -= 8;
 	td->td_frame->tf_rip = (register_t)entry;
 	td->td_frame->tf_ds = _udatasel;
 	td->td_frame->tf_es = _udatasel;
 	td->td_frame->tf_fs = _ufssel;
 	td->td_frame->tf_gs = _ugssel;
 	td->td_frame->tf_flags = TF_HASSEGS;
 
 	/* Return address sentinel value to stop stack unwinding. */
 	suword((void *)td->td_frame->tf_rsp, 0);
 
 	/* Pass the argument to the entry point. */
 	td->td_frame->tf_rdi = (register_t)arg;
 }
 
 int
 cpu_set_user_tls(struct thread *td, void *tls_base)
 {
 	struct pcb *pcb;
 
 	if ((u_int64_t)tls_base >= VM_MAXUSER_ADDRESS)
 		return (EINVAL);
 
 	pcb = td->td_pcb;
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		pcb->pcb_gsbase = (register_t)tls_base;
 		return (0);
 	}
 #endif
 	pcb->pcb_fsbase = (register_t)tls_base;
 	return (0);
 }
 
 /*
  * Software interrupt handler for queued VM system processing.
  */   
 void  
 swi_vm(void *dummy) 
 {     
 	if (busdma_swi_pending != 0)
 		busdma_swi();
 }
 
 /*
  * Tell whether this address is in some physical memory region.
  * Currently used by the kernel coredump code in order to avoid
  * dumping the ``ISA memory hole'' which could cause indefinite hangs,
  * or other unpredictable behaviour.
  */
 
 int
 is_physical_memory(vm_paddr_t addr)
 {
 
 #ifdef DEV_ISA
 	/* The ISA ``memory hole''. */
 	if (addr >= 0xa0000 && addr < 0x100000)
 		return 0;
 #endif
 
 	/*
 	 * stuff other tests for known memory-mapped devices (PCI?)
 	 * here
 	 */
 
 	return 1;
 }
Index: head/sys/arm/allwinner/a10_fb.c
===================================================================
--- head/sys/arm/allwinner/a10_fb.c	(revision 338317)
+++ head/sys/arm/allwinner/a10_fb.c	(revision 338318)
@@ -1,662 +1,662 @@
 /*-
  * Copyright (c) 2016 Jared McNeill <jmcneill@invisible.ca>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Allwinner A10/A20 Framebuffer
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 #include <sys/condvar.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/fbio.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/pmap.h>
 
 #include <machine/bus.h>
 
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <dev/videomode/videomode.h>
 #include <dev/videomode/edidvar.h>
 
 #include <dev/extres/clk/clk.h>
 #include <dev/extres/hwreset/hwreset.h>
 
 #include "fb_if.h"
 #include "hdmi_if.h"
 
 #define	FB_DEFAULT_W	800
 #define	FB_DEFAULT_H	600
 #define	FB_DEFAULT_REF	60
 #define	FB_BPP		32
 #define	FB_ALIGN	0x1000
 
 #define	HDMI_ENABLE_DELAY	20000
 #define	DEBE_FREQ		300000000
 
 #define	DOT_CLOCK_TO_HZ(c)	((c) * 1000)
 
 /* Display backend */
 #define	DEBE_REG_START		0x800
 #define	DEBE_REG_END		0x1000
 #define	DEBE_REG_WIDTH		4
 #define	DEBE_MODCTL		0x800
 #define	MODCTL_ITLMOD_EN	(1 << 28)
 #define	MODCTL_OUT_SEL_MASK	(0x7 << 20)
 #define	MODCTL_OUT_SEL(sel)	((sel) << 20)
 #define	OUT_SEL_LCD		0
 #define	MODCTL_LAY0_EN		(1 << 8)
 #define	MODCTL_START_CTL	(1 << 1)
 #define	MODCTL_EN		(1 << 0)
 #define	DEBE_DISSIZE		0x808
 #define	DIS_HEIGHT(h)		(((h) - 1) << 16)
 #define	DIS_WIDTH(w)		(((w) - 1) << 0)
 #define	DEBE_LAYSIZE0		0x810
 #define	LAY_HEIGHT(h)		(((h) - 1) << 16)
 #define	LAY_WIDTH(w)		(((w) - 1) << 0)
 #define	DEBE_LAYCOOR0		0x820
 #define	LAY_XCOOR(x)		((x) << 16)
 #define	LAY_YCOOR(y)		((y) << 0)
 #define	DEBE_LAYLINEWIDTH0	0x840
 #define	DEBE_LAYFB_L32ADD0	0x850
 #define	LAYFB_L32ADD(pa)	((pa) << 3)
 #define	DEBE_LAYFB_H4ADD	0x860
 #define	LAY0FB_H4ADD(pa)	((pa) >> 29)
 #define	DEBE_REGBUFFCTL		0x870
 #define	REGBUFFCTL_LOAD		(1 << 0)
 #define	DEBE_ATTCTL1		0x8a0
 #define	ATTCTL1_FBFMT(fmt)	((fmt) << 8)
 #define	FBFMT_XRGB8888		9
 #define	ATTCTL1_FBPS(ps)	((ps) << 0)
 #define	FBPS_32BPP_ARGB		0
 
 /* Timing controller */
 #define	TCON_GCTL		0x000
 #define	GCTL_TCON_EN		(1 << 31)
 #define	GCTL_IO_MAP_SEL_TCON1	(1 << 0)
 #define	TCON_GINT1		0x008
 #define	GINT1_TCON1_LINENO(n)	(((n) + 2) << 0)
 #define	TCON0_DCLK		0x044
 #define	DCLK_EN			0xf0000000
 #define	TCON1_CTL		0x090
 #define	TCON1_EN		(1 << 31)
 #define	INTERLACE_EN		(1 << 20)
 #define	TCON1_SRC_SEL(src)	((src) << 0)
 #define	TCON1_SRC_CH1		0
 #define	TCON1_SRC_CH2		1
 #define	TCON1_SRC_BLUE		2
 #define	TCON1_START_DELAY(sd)	((sd) << 4)
 #define	TCON1_BASIC0		0x094
 #define	TCON1_BASIC1		0x098
 #define	TCON1_BASIC2		0x09c
 #define	TCON1_BASIC3		0x0a0
 #define	TCON1_BASIC4		0x0a4
 #define	TCON1_BASIC5		0x0a8
 #define	BASIC_X(x)		(((x) - 1) << 16)
 #define	BASIC_Y(y)		(((y) - 1) << 0)
 #define	BASIC3_HT(ht)		(((ht) - 1) << 16)
 #define	BASIC3_HBP(hbp)		(((hbp) - 1) << 0)
 #define	BASIC4_VT(vt)		((vt) << 16)
 #define	BASIC4_VBP(vbp)		(((vbp) - 1) << 0)
 #define	BASIC5_HSPW(hspw)	(((hspw) - 1) << 16)
 #define	BASIC5_VSPW(vspw)	(((vspw) - 1) << 0)
 #define	TCON1_IO_POL		0x0f0
 #define	IO_POL_IO2_INV		(1 << 26)
 #define	IO_POL_PHSYNC		(1 << 25)
 #define	IO_POL_PVSYNC		(1 << 24)
 #define	TCON1_IO_TRI		0x0f4
 #define	IO0_OUTPUT_TRI_EN	(1 << 24)
 #define	IO1_OUTPUT_TRI_EN	(1 << 25)
 #define	IO_TRI_MASK		0xffffffff
 #define	START_DELAY(vbl)	(MIN(32, (vbl)) - 2)
 #define	VBLANK_LEN(vt, vd, i)	((((vt) << (i)) >> 1) - (vd) - 2)
 #define	VTOTAL(vt)		((vt) * 2)
 #define	DIVIDE(x, y)		(((x) + ((y) / 2)) / (y))
 
 struct a10fb_softc {
 	device_t		dev;
 	device_t		fbdev;
 	struct resource		*res[2];
 
 	/* Framebuffer */
 	struct fb_info		info;
 	size_t			fbsize;
 	bus_addr_t		paddr;
 	vm_offset_t		vaddr;
 
 	/* HDMI */
 	eventhandler_tag	hdmi_evh;
 };
 
 static struct resource_spec a10fb_spec[] = {
 	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },	/* DEBE */
 	{ SYS_RES_MEMORY,	1,	RF_ACTIVE },	/* TCON */
 	{ -1, 0 }
 };
 
 #define	DEBE_READ(sc, reg)		bus_read_4((sc)->res[0], (reg))
 #define	DEBE_WRITE(sc, reg, val)	bus_write_4((sc)->res[0], (reg), (val))
 
 #define	TCON_READ(sc, reg)		bus_read_4((sc)->res[1], (reg))
 #define	TCON_WRITE(sc, reg, val)	bus_write_4((sc)->res[1], (reg), (val))
 
 static int
 a10fb_allocfb(struct a10fb_softc *sc)
 {
 	sc->vaddr = kmem_alloc_contig(sc->fbsize, M_NOWAIT | M_ZERO, 0, ~0,
 	    FB_ALIGN, 0, VM_MEMATTR_WRITE_COMBINING);
 	if (sc->vaddr == 0) {
 		device_printf(sc->dev, "failed to allocate FB memory\n");
 		return (ENOMEM);
 	}
 	sc->paddr = pmap_kextract(sc->vaddr);
 
 	return (0);
 }
 
 static void
 a10fb_freefb(struct a10fb_softc *sc)
 {
-	kmem_free(kernel_arena, sc->vaddr, sc->fbsize);
+	kmem_free(sc->vaddr, sc->fbsize);
 }
 
 static int
 a10fb_setup_debe(struct a10fb_softc *sc, const struct videomode *mode)
 {
 	int width, height, interlace, reg;
 	clk_t clk_ahb, clk_dram, clk_debe;
 	hwreset_t rst;
 	uint32_t val;
 	int error;
 
 	interlace = !!(mode->flags & VID_INTERLACE);
 	width = mode->hdisplay;
 	height = mode->vdisplay << interlace;
 
 	/* Leave reset */
 	error = hwreset_get_by_ofw_name(sc->dev, 0, "de_be", &rst);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot find reset 'de_be'\n");
 		return (error);
 	}
 	error = hwreset_deassert(rst);
 	if (error != 0) {
 		device_printf(sc->dev, "couldn't de-assert reset 'de_be'\n");
 		return (error);
 	}
 	/* Gating AHB clock for BE */
 	error = clk_get_by_ofw_name(sc->dev, 0, "ahb_de_be", &clk_ahb);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot find clk 'ahb_de_be'\n");
 		return (error);
 	}
 	error = clk_enable(clk_ahb);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot enable clk 'ahb_de_be'\n");
 		return (error);
 	}
 	/* Enable DRAM clock to BE */
 	error = clk_get_by_ofw_name(sc->dev, 0, "dram_de_be", &clk_dram);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot find clk 'dram_de_be'\n");
 		return (error);
 	}
 	error = clk_enable(clk_dram);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot enable clk 'dram_de_be'\n");
 		return (error);
 	}
 	/* Set BE clock to 300MHz and enable */
 	error = clk_get_by_ofw_name(sc->dev, 0, "de_be", &clk_debe);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot find clk 'de_be'\n");
 		return (error);
 	}
 	error = clk_set_freq(clk_debe, DEBE_FREQ, CLK_SET_ROUND_DOWN);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot set 'de_be' frequency\n");
 		return (error);
 	}
 	error = clk_enable(clk_debe);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot enable clk 'de_be'\n");
 		return (error);
 	}
 
 	/* Initialize all registers to 0 */
 	for (reg = DEBE_REG_START; reg < DEBE_REG_END; reg += DEBE_REG_WIDTH)
 		DEBE_WRITE(sc, reg, 0);
 
 	/* Enable display backend */
 	DEBE_WRITE(sc, DEBE_MODCTL, MODCTL_EN);
 
 	/* Set display size */
 	DEBE_WRITE(sc, DEBE_DISSIZE, DIS_HEIGHT(height) | DIS_WIDTH(width));
 
 	/* Set layer 0 size, position, and stride */
 	DEBE_WRITE(sc, DEBE_LAYSIZE0, LAY_HEIGHT(height) | LAY_WIDTH(width));
 	DEBE_WRITE(sc, DEBE_LAYCOOR0, LAY_XCOOR(0) | LAY_YCOOR(0));
 	DEBE_WRITE(sc, DEBE_LAYLINEWIDTH0, width * FB_BPP);
 
 	/* Point layer 0 to FB memory */
 	DEBE_WRITE(sc, DEBE_LAYFB_L32ADD0, LAYFB_L32ADD(sc->paddr));
 	DEBE_WRITE(sc, DEBE_LAYFB_H4ADD, LAY0FB_H4ADD(sc->paddr));
 
 	/* Set backend format and pixel sequence */
 	DEBE_WRITE(sc, DEBE_ATTCTL1, ATTCTL1_FBFMT(FBFMT_XRGB8888) |
 	    ATTCTL1_FBPS(FBPS_32BPP_ARGB));
 
 	/* Enable layer 0, output to LCD, setup interlace */
 	val = DEBE_READ(sc, DEBE_MODCTL);
 	val |= MODCTL_LAY0_EN;
 	val &= ~MODCTL_OUT_SEL_MASK;
 	val |= MODCTL_OUT_SEL(OUT_SEL_LCD);
 	if (interlace)
 		val |= MODCTL_ITLMOD_EN;
 	else
 		val &= ~MODCTL_ITLMOD_EN;
 	DEBE_WRITE(sc, DEBE_MODCTL, val);
 
 	/* Commit settings */
 	DEBE_WRITE(sc, DEBE_REGBUFFCTL, REGBUFFCTL_LOAD);
 
 	/* Start DEBE */
 	val = DEBE_READ(sc, DEBE_MODCTL);
 	val |= MODCTL_START_CTL;
 	DEBE_WRITE(sc, DEBE_MODCTL, val);
 
 	return (0);
 }
 
 static int
 a10fb_setup_pll(struct a10fb_softc *sc, uint64_t freq)
 {
 	clk_t clk_sclk1, clk_sclk2;
 	int error;
 
 	error = clk_get_by_ofw_name(sc->dev, 0, "lcd_ch1_sclk1", &clk_sclk1);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot find clk 'lcd_ch1_sclk1'\n");
 		return (error);
 	}
 	error = clk_get_by_ofw_name(sc->dev, 0, "lcd_ch1_sclk2", &clk_sclk2);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot find clk 'lcd_ch1_sclk2'\n");
 		return (error);
 	}
 
 	error = clk_set_freq(clk_sclk2, freq, 0);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot set lcd ch1 frequency\n");
 		return (error);
 	}
 	error = clk_enable(clk_sclk2);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot enable lcd ch1 sclk2\n");
 		return (error);
 	}
 	error = clk_enable(clk_sclk1);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot enable lcd ch1 sclk1\n");
 		return (error);
 	}
 
 	return (0);
 }
 
 static int
 a10fb_setup_tcon(struct a10fb_softc *sc, const struct videomode *mode)
 {
 	u_int interlace, hspw, hbp, vspw, vbp, vbl, width, height, start_delay;
 	u_int vtotal, framerate, clk;
 	clk_t clk_ahb;
 	hwreset_t rst;
 	uint32_t val;
 	int error;
 
 	interlace = !!(mode->flags & VID_INTERLACE);
 	width = mode->hdisplay;
 	height = mode->vdisplay;
 	hspw = mode->hsync_end - mode->hsync_start;
 	hbp = mode->htotal - mode->hsync_start;
 	vspw = mode->vsync_end - mode->vsync_start;
 	vbp = mode->vtotal - mode->vsync_start;
 	vbl = VBLANK_LEN(mode->vtotal, mode->vdisplay, interlace);
 	start_delay = START_DELAY(vbl);
 
 	/* Leave reset */
 	error = hwreset_get_by_ofw_name(sc->dev, 0, "lcd", &rst);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot find reset 'lcd'\n");
 		return (error);
 	}
 	error = hwreset_deassert(rst);
 	if (error != 0) {
 		device_printf(sc->dev, "couldn't de-assert reset 'lcd'\n");
 		return (error);
 	}
 	/* Gating AHB clock for LCD */
 	error = clk_get_by_ofw_name(sc->dev, 0, "ahb_lcd", &clk_ahb);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot find clk 'ahb_lcd'\n");
 		return (error);
 	}
 	error = clk_enable(clk_ahb);
 	if (error != 0) {
 		device_printf(sc->dev, "cannot enable clk 'ahb_lcd'\n");
 		return (error);
 	}
 
 	/* Disable TCON and TCON1 */
 	TCON_WRITE(sc, TCON_GCTL, 0);
 	TCON_WRITE(sc, TCON1_CTL, 0);
 
 	/* Enable clocks */
 	TCON_WRITE(sc, TCON0_DCLK, DCLK_EN);
 
 	/* Disable IO and data output ports */
 	TCON_WRITE(sc, TCON1_IO_TRI, IO_TRI_MASK);
 
 	/* Disable TCON and select TCON1 */
 	TCON_WRITE(sc, TCON_GCTL, GCTL_IO_MAP_SEL_TCON1);
 
 	/* Source width and height */
 	TCON_WRITE(sc, TCON1_BASIC0, BASIC_X(width) | BASIC_Y(height));
 	/* Scaler width and height */
 	TCON_WRITE(sc, TCON1_BASIC1, BASIC_X(width) | BASIC_Y(height));
 	/* Output width and height */
 	TCON_WRITE(sc, TCON1_BASIC2, BASIC_X(width) | BASIC_Y(height));
 	/* Horizontal total and back porch */
 	TCON_WRITE(sc, TCON1_BASIC3, BASIC3_HT(mode->htotal) | BASIC3_HBP(hbp));
 	/* Vertical total and back porch */
 	vtotal = VTOTAL(mode->vtotal);
 	if (interlace) {
 		framerate = DIVIDE(DIVIDE(DOT_CLOCK_TO_HZ(mode->dot_clock),
 		    mode->htotal), mode->vtotal);
 		clk = mode->htotal * (VTOTAL(mode->vtotal) + 1) * framerate;
 		if ((clk / 2) == DOT_CLOCK_TO_HZ(mode->dot_clock))
 			vtotal += 1;
 	}
 	TCON_WRITE(sc, TCON1_BASIC4, BASIC4_VT(vtotal) | BASIC4_VBP(vbp));
 	/* Horizontal and vertical sync */
 	TCON_WRITE(sc, TCON1_BASIC5, BASIC5_HSPW(hspw) | BASIC5_VSPW(vspw));
 	/* Polarity */
 	val = IO_POL_IO2_INV;
 	if (mode->flags & VID_PHSYNC)
 		val |= IO_POL_PHSYNC;
 	if (mode->flags & VID_PVSYNC)
 		val |= IO_POL_PVSYNC;
 	TCON_WRITE(sc, TCON1_IO_POL, val);
 
 	/* Set scan line for TCON1 line trigger */
 	TCON_WRITE(sc, TCON_GINT1, GINT1_TCON1_LINENO(start_delay));
 
 	/* Enable TCON1 */
 	val = TCON1_EN;
 	if (interlace)
 		val |= INTERLACE_EN;
 	val |= TCON1_START_DELAY(start_delay);
 	val |= TCON1_SRC_SEL(TCON1_SRC_CH1);
 	TCON_WRITE(sc, TCON1_CTL, val);
 
 	/* Setup PLL */
 	return (a10fb_setup_pll(sc, DOT_CLOCK_TO_HZ(mode->dot_clock)));
 }
 
 static void
 a10fb_enable_tcon(struct a10fb_softc *sc, int onoff)
 {
 	uint32_t val;
 
 	/* Enable TCON */
 	val = TCON_READ(sc, TCON_GCTL);
 	if (onoff)
 		val |= GCTL_TCON_EN;
 	else
 		val &= ~GCTL_TCON_EN;
 	TCON_WRITE(sc, TCON_GCTL, val);
 
 	/* Enable TCON1 IO0/IO1 outputs */
 	val = TCON_READ(sc, TCON1_IO_TRI);
 	if (onoff)
 		val &= ~(IO0_OUTPUT_TRI_EN | IO1_OUTPUT_TRI_EN);
 	else
 		val |= (IO0_OUTPUT_TRI_EN | IO1_OUTPUT_TRI_EN);
 	TCON_WRITE(sc, TCON1_IO_TRI, val);
 }
 
 static int
 a10fb_configure(struct a10fb_softc *sc, const struct videomode *mode)
 {
 	size_t fbsize;
 	int error;
 
 	fbsize = round_page(mode->hdisplay * mode->vdisplay * (FB_BPP / NBBY));
 
 	/* Detach the old FB device */
 	if (sc->fbdev != NULL) {
 		device_delete_child(sc->dev, sc->fbdev);
 		sc->fbdev = NULL;
 	}
 
 	/* If the FB size has changed, free the old FB memory */
 	if (sc->fbsize > 0 && sc->fbsize != fbsize) {
 		a10fb_freefb(sc);
 		sc->vaddr = 0;
 	}
 
 	/* Allocate the FB if necessary */
 	sc->fbsize = fbsize;
 	if (sc->vaddr == 0) {
 		error = a10fb_allocfb(sc);
 		if (error != 0) {
 			device_printf(sc->dev, "failed to allocate FB memory\n");
 			return (ENXIO);
 		}
 	}
 
 	/* Setup display backend */
 	error = a10fb_setup_debe(sc, mode);
 	if (error != 0)
 		return (error);
 
 	/* Setup display timing controller */
 	error = a10fb_setup_tcon(sc, mode);
 	if (error != 0)
 		return (error);
 
 	/* Attach framebuffer device */
 	sc->info.fb_name = device_get_nameunit(sc->dev);
 	sc->info.fb_vbase = (intptr_t)sc->vaddr;
 	sc->info.fb_pbase = sc->paddr;
 	sc->info.fb_size = sc->fbsize;
 	sc->info.fb_bpp = sc->info.fb_depth = FB_BPP;
 	sc->info.fb_stride = mode->hdisplay * (FB_BPP / NBBY);
 	sc->info.fb_width = mode->hdisplay;
 	sc->info.fb_height = mode->vdisplay;
 
 	sc->fbdev = device_add_child(sc->dev, "fbd", device_get_unit(sc->dev));
 	if (sc->fbdev == NULL) {
 		device_printf(sc->dev, "failed to add fbd child\n");
 		return (ENOENT);
 	}
 
 	error = device_probe_and_attach(sc->fbdev);
 	if (error != 0) {
 		device_printf(sc->dev, "failed to attach fbd device\n");
 		return (error);
 	}
 
 	return (0);
 }
 
 static void
 a10fb_hdmi_event(void *arg, device_t hdmi_dev)
 {
 	const struct videomode *mode;
 	struct videomode hdmi_mode;
 	struct a10fb_softc *sc;
 	struct edid_info ei;
 	uint8_t *edid;
 	uint32_t edid_len;
 	int error;
 
 	sc = arg;
 	edid = NULL;
 	edid_len = 0;
 	mode = NULL;
 
 	error = HDMI_GET_EDID(hdmi_dev, &edid, &edid_len);
 	if (error != 0) {
 		device_printf(sc->dev, "failed to get EDID: %d\n", error);
 	} else {
 		error = edid_parse(edid, &ei);
 		if (error != 0) {
 			device_printf(sc->dev, "failed to parse EDID: %d\n",
 			    error);
 		} else {
 			if (bootverbose)
 				edid_print(&ei);
 			mode = ei.edid_preferred_mode;
 		}
 	}
 
 	/* If the preferred mode could not be determined, use the default */
 	if (mode == NULL)
 		mode = pick_mode_by_ref(FB_DEFAULT_W, FB_DEFAULT_H,
 		    FB_DEFAULT_REF);
 
 	if (mode == NULL) {
 		device_printf(sc->dev, "failed to find usable video mode\n");
 		return;
 	}
 
 	if (bootverbose)
 		device_printf(sc->dev, "using %dx%d\n",
 		    mode->hdisplay, mode->vdisplay);
 
 	/* Disable HDMI */
 	HDMI_ENABLE(hdmi_dev, 0);
 
 	/* Disable timing controller */
 	a10fb_enable_tcon(sc, 0);
 
 	/* Configure DEBE and TCON */
 	error = a10fb_configure(sc, mode);
 	if (error != 0) {
 		device_printf(sc->dev, "failed to configure FB: %d\n", error);
 		return;
 	}
 
 	hdmi_mode = *mode;
 	hdmi_mode.hskew = mode->hsync_end - mode->hsync_start;
 	hdmi_mode.flags |= VID_HSKEW;
 	HDMI_SET_VIDEOMODE(hdmi_dev, &hdmi_mode);
 
 	/* Enable timing controller */
 	a10fb_enable_tcon(sc, 1);
 
 	DELAY(HDMI_ENABLE_DELAY);
 
 	/* Enable HDMI */
 	HDMI_ENABLE(hdmi_dev, 1);
 }
 
 static int
 a10fb_probe(device_t dev)
 {
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 
 	if (!ofw_bus_is_compatible(dev, "allwinner,sun7i-a20-fb"))
 		return (ENXIO);
 
 	device_set_desc(dev, "Allwinner Framebuffer");
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 a10fb_attach(device_t dev)
 {
 	struct a10fb_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	sc->dev = dev;
 
 	if (bus_alloc_resources(dev, a10fb_spec, sc->res)) {
 		device_printf(dev, "cannot allocate resources for device\n");
 		return (ENXIO);
 	}
 
 	sc->hdmi_evh = EVENTHANDLER_REGISTER(hdmi_event,
 	    a10fb_hdmi_event, sc, 0);
 
 	return (0);
 }
 
 static struct fb_info *
 a10fb_fb_getinfo(device_t dev)
 {
 	struct a10fb_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	return (&sc->info);
 }
 
 static device_method_t a10fb_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		a10fb_probe),
 	DEVMETHOD(device_attach,	a10fb_attach),
 
 	/* FB interface */
 	DEVMETHOD(fb_getinfo,		a10fb_fb_getinfo),
 
 	DEVMETHOD_END
 };
 
 static driver_t a10fb_driver = {
 	"fb",
 	a10fb_methods,
 	sizeof(struct a10fb_softc),
 };
 
 static devclass_t a10fb_devclass;
 
 DRIVER_MODULE(fb, simplebus, a10fb_driver, a10fb_devclass, 0, 0);
Index: head/sys/arm/arm/busdma_machdep-v4.c
===================================================================
--- head/sys/arm/arm/busdma_machdep-v4.c	(revision 338317)
+++ head/sys/arm/arm/busdma_machdep-v4.c	(revision 338318)
@@ -1,1617 +1,1617 @@
 /*-
  * Copyright (c) 2012 Ian Lepore
  * Copyright (c) 2004 Olivier Houchard
  * Copyright (c) 2002 Peter Grehan
  * Copyright (c) 1997, 1998 Justin T. Gibbs.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *   From i386/busdma_machdep.c,v 1.26 2002/04/19 22:58:09 alfred
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * ARM bus dma support routines.
  *
  * XXX Things to investigate / fix some day...
  *  - What is the earliest that this API can be called?  Could there be any
  *    fallout from changing the SYSINIT() order from SI_SUB_VM to SI_SUB_KMEM?
  *  - The manpage mentions the BUS_DMA_NOWAIT flag only in the context of the
  *    bus_dmamap_load() function.  This code has historically (and still does)
  *    honor it in bus_dmamem_alloc().  If we got rid of that we could lose some
  *    error checking because some resource management calls would become WAITOK
  *    and thus "cannot fail."
  *  - The decisions made by _bus_dma_can_bounce() should be made once, at tag
  *    creation time, and the result stored in the tag.
  *  - It should be possible to take some shortcuts when mapping a buffer we know
  *    came from the uma(9) allocators based on what we know about such buffers
  *    (aligned, contiguous, etc).
  *  - The allocation of bounce pages could probably be cleaned up, then we could
  *    retire arm_remap_nocache().
  */
 
 #define _ARM32_BUS_DMA_PRIVATE
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/busdma_bufalloc.h>
 #include <sys/counter.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/memdesc.h>
 #include <sys/proc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
 
 #define	MAX_BPAGES		64
 #define	MAX_DMA_SEGMENTS	4096
 #define	BUS_DMA_COULD_BOUNCE	BUS_DMA_BUS3
 #define	BUS_DMA_MIN_ALLOC_COMP	BUS_DMA_BUS4
 
 struct bounce_zone;
 
 struct bus_dma_tag {
 	bus_dma_tag_t		parent;
 	bus_size_t		alignment;
 	bus_addr_t		boundary;
 	bus_addr_t		lowaddr;
 	bus_addr_t		highaddr;
 	bus_dma_filter_t	*filter;
 	void			*filterarg;
 	bus_size_t		maxsize;
 	u_int			nsegments;
 	bus_size_t		maxsegsz;
 	int			flags;
 	int			ref_count;
 	int			map_count;
 	bus_dma_lock_t		*lockfunc;
 	void			*lockfuncarg;
 	struct bounce_zone	*bounce_zone;
 	/*
 	 * DMA range for this tag.  If the page doesn't fall within
 	 * one of these ranges, an error is returned.  The caller
 	 * may then decide what to do with the transfer.  If the
 	 * range pointer is NULL, it is ignored.
 	 */
 	struct arm32_dma_range	*ranges;
 	int			_nranges;
 };
 
 struct bounce_page {
 	vm_offset_t	vaddr;		/* kva of bounce buffer */
 	bus_addr_t	busaddr;	/* Physical address */
 	vm_offset_t	datavaddr;	/* kva of client data */
 	vm_page_t	datapage;	/* physical page of client data */
 	vm_offset_t	dataoffs;	/* page offset of client data */
 	bus_size_t	datacount;	/* client data count */
 	STAILQ_ENTRY(bounce_page) links;
 };
 
 struct sync_list {
 	vm_offset_t	vaddr;		/* kva of client data */
 	vm_page_t	pages;		/* starting page of client data */
 	vm_offset_t	dataoffs;	/* page offset of client data */
 	bus_size_t	datacount;	/* client data count */
 };
 
 int busdma_swi_pending;
 
 struct bounce_zone {
 	STAILQ_ENTRY(bounce_zone) links;
 	STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
 	int		total_bpages;
 	int		free_bpages;
 	int		reserved_bpages;
 	int		active_bpages;
 	int		total_bounced;
 	int		total_deferred;
 	int		map_count;
 	bus_size_t	alignment;
 	bus_addr_t	lowaddr;
 	char		zoneid[8];
 	char		lowaddrid[20];
 	struct sysctl_ctx_list sysctl_tree;
 	struct sysctl_oid *sysctl_tree_top;
 };
 
 static struct mtx bounce_lock;
 static int total_bpages;
 static int busdma_zonecount;
 static uint32_t tags_total;
 static uint32_t maps_total;
 static uint32_t maps_dmamem;
 static uint32_t maps_coherent;
 static counter_u64_t maploads_total;
 static counter_u64_t maploads_bounced;
 static counter_u64_t maploads_coherent;
 static counter_u64_t maploads_dmamem;
 static counter_u64_t maploads_mbuf;
 static counter_u64_t maploads_physmem;
 
 static STAILQ_HEAD(, bounce_zone) bounce_zone_list;
 
 SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters");
 SYSCTL_UINT(_hw_busdma, OID_AUTO, tags_total, CTLFLAG_RD, &tags_total, 0,
    "Number of active tags");
 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_total, CTLFLAG_RD, &maps_total, 0,
    "Number of active maps");
 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_dmamem, CTLFLAG_RD, &maps_dmamem, 0,
    "Number of active maps for bus_dmamem_alloc buffers");
 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_coherent, CTLFLAG_RD, &maps_coherent, 0,
    "Number of active maps with BUS_DMA_COHERENT flag set");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_total, CTLFLAG_RD,
     &maploads_total, "Number of load operations performed");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_bounced, CTLFLAG_RD,
     &maploads_bounced, "Number of load operations that used bounce buffers");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_coherent, CTLFLAG_RD,
     &maploads_dmamem, "Number of load operations on BUS_DMA_COHERENT memory");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_dmamem, CTLFLAG_RD,
     &maploads_dmamem, "Number of load operations on bus_dmamem_alloc buffers");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_mbuf, CTLFLAG_RD,
     &maploads_mbuf, "Number of load operations for mbufs");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_physmem, CTLFLAG_RD,
     &maploads_physmem, "Number of load operations on physical buffers");
 SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0,
    "Total bounce pages");
 
 struct bus_dmamap {
 	struct bp_list		bpages;
 	int			pagesneeded;
 	int			pagesreserved;
 	bus_dma_tag_t		dmat;
 	struct memdesc		mem;
 	bus_dmamap_callback_t	*callback;
 	void			*callback_arg;
 	int			flags;
 #define	DMAMAP_COHERENT		(1 << 0)
 #define	DMAMAP_DMAMEM_ALLOC	(1 << 1)
 #define	DMAMAP_MBUF		(1 << 2)
 #define	DMAMAP_CACHE_ALIGNED	(1 << 3)
 	STAILQ_ENTRY(bus_dmamap) links;
 	bus_dma_segment_t	*segments;
 	int			sync_count;
 	struct sync_list	slist[];
 };
 
 static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
 static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
 
 static void init_bounce_pages(void *dummy);
 static int alloc_bounce_zone(bus_dma_tag_t dmat);
 static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
 static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     int commit);
 static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_offset_t vaddr, bus_addr_t addr, bus_size_t size);
 static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
 static void bus_dmamap_sync_sl(struct sync_list *sl, bus_dmasync_op_t op,
     int bufaligned);
 
 /*
  * ----------------------------------------------------------------------------
  * Begin block of code useful to transplant to other implementations.
  */
 
 static busdma_bufalloc_t coherent_allocator;	/* Cache of coherent buffers */
 static busdma_bufalloc_t standard_allocator;	/* Cache of standard buffers */
 
 MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata");
 MALLOC_DEFINE(M_BOUNCE, "bounce", "busdma bounce pages");
 
 static void
 busdma_init(void *dummy)
 {
 
 	maploads_total    = counter_u64_alloc(M_WAITOK);
 	maploads_bounced  = counter_u64_alloc(M_WAITOK);
 	maploads_coherent = counter_u64_alloc(M_WAITOK);
 	maploads_dmamem   = counter_u64_alloc(M_WAITOK);
 	maploads_mbuf     = counter_u64_alloc(M_WAITOK);
 	maploads_physmem  = counter_u64_alloc(M_WAITOK);
 
 	/* Create a cache of buffers in standard (cacheable) memory. */
 	standard_allocator = busdma_bufalloc_create("buffer",
 	    arm_dcache_align,	/* minimum_alignment */
 	    NULL,		/* uma_alloc func */
 	    NULL,		/* uma_free func */
 	    0);			/* uma_zcreate_flags */
 
 	/*
 	 * Create a cache of buffers in uncacheable memory, to implement the
 	 * BUS_DMA_COHERENT (and potentially BUS_DMA_NOCACHE) flag.
 	 */
 	coherent_allocator = busdma_bufalloc_create("coherent",
 	    arm_dcache_align,	/* minimum_alignment */
 	    busdma_bufalloc_alloc_uncacheable,
 	    busdma_bufalloc_free_uncacheable,
 	    0);			/* uma_zcreate_flags */
 }
 
 /*
  * This init historically used SI_SUB_VM, but now the init code requires
  * malloc(9) using M_BUSDMA memory and the pcpu zones for counter(9), which get
  * set up by SI_SUB_KMEM and SI_ORDER_LAST, so we'll go right after that by
  * using SI_SUB_KMEM+1.
  */
 SYSINIT(busdma, SI_SUB_KMEM+1, SI_ORDER_FIRST, busdma_init, NULL);
 
 /*
  * End block of code useful to transplant to other implementations.
  * ----------------------------------------------------------------------------
  */
 
 /*
  * Return true if a match is made.
  *
  * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'.
  *
  * If paddr is within the bounds of the dma tag then call the filter callback
  * to check for a match, if there is no filter callback then assume a match.
  */
 static int
 run_filter(bus_dma_tag_t dmat, bus_addr_t paddr)
 {
 	int retval;
 
 	retval = 0;
 
 	do {
 		if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr)
 		 || ((paddr & (dmat->alignment - 1)) != 0))
 		 && (dmat->filter == NULL
 		  || (*dmat->filter)(dmat->filterarg, paddr) != 0))
 			retval = 1;
 
 		dmat = dmat->parent;
 	} while (retval == 0 && dmat != NULL);
 	return (retval);
 }
 
 /*
  * This routine checks the exclusion zone constraints from a tag against the
  * physical RAM available on the machine.  If a tag specifies an exclusion zone
  * but there's no RAM in that zone, then we avoid allocating resources to bounce
  * a request, and we can use any memory allocator (as opposed to needing
  * kmem_alloc_contig() just because it can allocate pages in an address range).
  *
  * Most tags have BUS_SPACE_MAXADDR or BUS_SPACE_MAXADDR_32BIT (they are the
  * same value on 32-bit architectures) as their lowaddr constraint, and we can't
  * possibly have RAM at an address higher than the highest address we can
  * express, so we take a fast out.
  */
 static __inline int
 _bus_dma_can_bounce(vm_offset_t lowaddr, vm_offset_t highaddr)
 {
 	int i;
 
 	if (lowaddr >= BUS_SPACE_MAXADDR)
 		return (0);
 
 	for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) {
 		if ((lowaddr >= phys_avail[i] && lowaddr <= phys_avail[i + 1])
 		    || (lowaddr < phys_avail[i] &&
 		    highaddr > phys_avail[i]))
 			return (1);
 	}
 	return (0);
 }
 
 static __inline struct arm32_dma_range *
 _bus_dma_inrange(struct arm32_dma_range *ranges, int nranges,
     bus_addr_t curaddr)
 {
 	struct arm32_dma_range *dr;
 	int i;
 
 	for (i = 0, dr = ranges; i < nranges; i++, dr++) {
 		if (curaddr >= dr->dr_sysbase &&
 		    round_page(curaddr) <= (dr->dr_sysbase + dr->dr_len))
 			return (dr);
 	}
 
 	return (NULL);
 }
 
 /*
  * Convenience function for manipulating driver locks from busdma (during
  * busdma_swi, for example).  Drivers that don't provide their own locks
  * should specify &Giant to dmat->lockfuncarg.  Drivers that use their own
  * non-mutex locking scheme don't have to use this at all.
  */
 void
 busdma_lock_mutex(void *arg, bus_dma_lock_op_t op)
 {
 	struct mtx *dmtx;
 
 	dmtx = (struct mtx *)arg;
 	switch (op) {
 	case BUS_DMA_LOCK:
 		mtx_lock(dmtx);
 		break;
 	case BUS_DMA_UNLOCK:
 		mtx_unlock(dmtx);
 		break;
 	default:
 		panic("Unknown operation 0x%x for busdma_lock_mutex!", op);
 	}
 }
 
 /*
  * dflt_lock should never get called.  It gets put into the dma tag when
  * lockfunc == NULL, which is only valid if the maps that are associated
  * with the tag are meant to never be defered.
  * XXX Should have a way to identify which driver is responsible here.
  */
 static void
 dflt_lock(void *arg, bus_dma_lock_op_t op)
 {
 #ifdef INVARIANTS
 	panic("driver error: busdma dflt_lock called");
 #else
 	printf("DRIVER_ERROR: busdma dflt_lock called\n");
 #endif
 }
 
 /*
  * Allocate a device specific dma_tag.
  */
 int
 bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
     bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
     bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
     int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
     void *lockfuncarg, bus_dma_tag_t *dmat)
 {
 	bus_dma_tag_t newtag;
 	int error = 0;
 	/* Return a NULL tag on failure */
 	*dmat = NULL;
 
 	newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA, M_NOWAIT);
 	if (newtag == NULL) {
 		CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 		    __func__, newtag, 0, error);
 		return (ENOMEM);
 	}
 
 	newtag->parent = parent;
 	newtag->alignment = alignment ? alignment : 1;
 	newtag->boundary = boundary;
 	newtag->lowaddr = trunc_page((vm_offset_t)lowaddr) + (PAGE_SIZE - 1);
 	newtag->highaddr = trunc_page((vm_offset_t)highaddr) + (PAGE_SIZE - 1);
 	newtag->filter = filter;
 	newtag->filterarg = filterarg;
 	newtag->maxsize = maxsize;
 	newtag->nsegments = nsegments;
 	newtag->maxsegsz = maxsegsz;
 	newtag->flags = flags;
 	newtag->ref_count = 1; /* Count ourself */
 	newtag->map_count = 0;
 	newtag->ranges = bus_dma_get_range();
 	newtag->_nranges = bus_dma_get_range_nb();
 	if (lockfunc != NULL) {
 		newtag->lockfunc = lockfunc;
 		newtag->lockfuncarg = lockfuncarg;
 	} else {
 		newtag->lockfunc = dflt_lock;
 		newtag->lockfuncarg = NULL;
 	}
 
 	/* Take into account any restrictions imposed by our parent tag */
 	if (parent != NULL) {
 		newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr);
 		newtag->highaddr = MAX(parent->highaddr, newtag->highaddr);
 		if (newtag->boundary == 0)
 			newtag->boundary = parent->boundary;
 		else if (parent->boundary != 0)
 			newtag->boundary = MIN(parent->boundary,
 					       newtag->boundary);
 		if ((newtag->filter != NULL) ||
 		    ((parent->flags & BUS_DMA_COULD_BOUNCE) != 0))
 			newtag->flags |= BUS_DMA_COULD_BOUNCE;
 		if (newtag->filter == NULL) {
 			/*
 			 * Short circuit looking at our parent directly
 			 * since we have encapsulated all of its information
 			 */
 			newtag->filter = parent->filter;
 			newtag->filterarg = parent->filterarg;
 			newtag->parent = parent->parent;
 		}
 		if (newtag->parent != NULL)
 			atomic_add_int(&parent->ref_count, 1);
 	}
 	if (_bus_dma_can_bounce(newtag->lowaddr, newtag->highaddr)
 	 || newtag->alignment > 1)
 		newtag->flags |= BUS_DMA_COULD_BOUNCE;
 
 	if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 	    (flags & BUS_DMA_ALLOCNOW) != 0) {
 		struct bounce_zone *bz;
 
 		/* Must bounce */
 
 		if ((error = alloc_bounce_zone(newtag)) != 0) {
 			free(newtag, M_BUSDMA);
 			return (error);
 		}
 		bz = newtag->bounce_zone;
 
 		if (ptoa(bz->total_bpages) < maxsize) {
 			int pages;
 
 			pages = atop(maxsize) - bz->total_bpages;
 
 			/* Add pages to our bounce pool */
 			if (alloc_bounce_pages(newtag, pages) < pages)
 				error = ENOMEM;
 		}
 		/* Performed initial allocation */
 		newtag->flags |= BUS_DMA_MIN_ALLOC_COMP;
 	} else
 		newtag->bounce_zone = NULL;
 
 	if (error != 0) {
 		free(newtag, M_BUSDMA);
 	} else {
 		atomic_add_32(&tags_total, 1);
 		*dmat = newtag;
 	}
 	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 	    __func__, newtag, (newtag != NULL ? newtag->flags : 0), error);
 	return (error);
 }
 
 int
 bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain)
 {
 
 	return (0);
 }
 
 int
 bus_dma_tag_destroy(bus_dma_tag_t dmat)
 {
 	bus_dma_tag_t dmat_copy;
 	int error;
 
 	error = 0;
 	dmat_copy = dmat;
 
 	if (dmat != NULL) {
 
 		if (dmat->map_count != 0) {
 			error = EBUSY;
 			goto out;
 		}
 
 		while (dmat != NULL) {
 			bus_dma_tag_t parent;
 
 			parent = dmat->parent;
 			atomic_subtract_int(&dmat->ref_count, 1);
 			if (dmat->ref_count == 0) {
 				atomic_subtract_32(&tags_total, 1);
 				free(dmat, M_BUSDMA);
 				/*
 				 * Last reference count, so
 				 * release our reference
 				 * count on our parent.
 				 */
 				dmat = parent;
 			} else
 				dmat = NULL;
 		}
 	}
 out:
 	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
 	return (error);
 }
 
 static int
 allocate_bz_and_pages(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	int error;
 
 	/*
 	 * Bouncing might be required if the driver asks for an active
 	 * exclusion region, a data alignment that is stricter than 1, and/or
 	 * an active address boundary.
 	 */
 	if (dmat->flags & BUS_DMA_COULD_BOUNCE) {
 
 		/* Must bounce */
 		struct bounce_zone *bz;
 		int maxpages;
 
 		if (dmat->bounce_zone == NULL) {
 			if ((error = alloc_bounce_zone(dmat)) != 0) {
 				return (error);
 			}
 		}
 		bz = dmat->bounce_zone;
 
 		/* Initialize the new map */
 		STAILQ_INIT(&(map->bpages));
 
 		/*
 		 * Attempt to add pages to our pool on a per-instance
 		 * basis up to a sane limit.
 		 */
 		maxpages = MAX_BPAGES;
 		if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0
 		 || (bz->map_count > 0 && bz->total_bpages < maxpages)) {
 			int pages;
 
 			pages = MAX(atop(dmat->maxsize), 1);
 			pages = MIN(maxpages - bz->total_bpages, pages);
 			pages = MAX(pages, 1);
 			if (alloc_bounce_pages(dmat, pages) < pages)
 				return (ENOMEM);
 
 			if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0)
 				dmat->flags |= BUS_DMA_MIN_ALLOC_COMP;
 		}
 		bz->map_count++;
 	}
 	return (0);
 }
 
 static bus_dmamap_t
 allocate_map(bus_dma_tag_t dmat, int mflags)
 {
 	int mapsize, segsize;
 	bus_dmamap_t map;
 
 	/*
 	 * Allocate the map.  The map structure ends with an embedded
 	 * variable-sized array of sync_list structures.  Following that
 	 * we allocate enough extra space to hold the array of bus_dma_segments.
 	 */
 	KASSERT(dmat->nsegments <= MAX_DMA_SEGMENTS,
 	   ("cannot allocate %u dma segments (max is %u)",
 	    dmat->nsegments, MAX_DMA_SEGMENTS));
 	segsize = sizeof(struct bus_dma_segment) * dmat->nsegments;
 	mapsize = sizeof(*map) + sizeof(struct sync_list) * dmat->nsegments;
 	map = malloc(mapsize + segsize, M_BUSDMA, mflags | M_ZERO);
 	if (map == NULL) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM);
 		return (NULL);
 	}
 	map->segments = (bus_dma_segment_t *)((uintptr_t)map + mapsize);
 	return (map);
 }
 
 /*
  * Allocate a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 int
 bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	bus_dmamap_t map;
 	int error = 0;
 
 	*mapp = map = allocate_map(dmat, M_NOWAIT);
 	if (map == NULL) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM);
 		return (ENOMEM);
 	}
 
 	/*
 	 * Bouncing might be required if the driver asks for an exclusion
 	 * region, a data alignment that is stricter than 1, or DMA that begins
 	 * or ends with a partial cacheline.  Whether bouncing will actually
 	 * happen can't be known until mapping time, but we need to pre-allocate
 	 * resources now because we might not be allowed to at mapping time.
 	 */
 	error = allocate_bz_and_pages(dmat, map);
 	if (error != 0) {
 		free(map, M_BUSDMA);
 		*mapp = NULL;
 		return (error);
 	}
 	if (map->flags & DMAMAP_COHERENT)
 		atomic_add_32(&maps_coherent, 1);
 	atomic_add_32(&maps_total, 1);
 	dmat->map_count++;
 
 	return (0);
 }
 
 /*
  * Destroy a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 int
 bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 
 	if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 		    __func__, dmat, EBUSY);
 		return (EBUSY);
 	}
 	if (dmat->bounce_zone)
 		dmat->bounce_zone->map_count--;
 	if (map->flags & DMAMAP_COHERENT)
 		atomic_subtract_32(&maps_coherent, 1);
 	atomic_subtract_32(&maps_total, 1);
 	free(map, M_BUSDMA);
 	dmat->map_count--;
 	CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
 	return (0);
 }
 
 /*
  * Allocate a piece of memory that can be efficiently mapped into bus device
  * space based on the constraints listed in the dma tag.  Returns a pointer to
  * the allocated memory, and a pointer to an associated bus_dmamap.
  */
 int
 bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags,
     bus_dmamap_t *mapp)
 {
 	busdma_bufalloc_t ba;
 	struct busdma_bufzone *bufzone;
 	bus_dmamap_t map;
 	vm_memattr_t memattr;
 	int mflags;
 
 	if (flags & BUS_DMA_NOWAIT)
 		mflags = M_NOWAIT;
 	else
 		mflags = M_WAITOK;
 	if (flags & BUS_DMA_ZERO)
 		mflags |= M_ZERO;
 
 	*mapp = map = allocate_map(dmat, mflags);
 	if (map == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->flags, ENOMEM);
 		return (ENOMEM);
 	}
 	map->flags = DMAMAP_DMAMEM_ALLOC;
 
 	/* Choose a busdma buffer allocator based on memory type flags. */
 	if (flags & BUS_DMA_COHERENT) {
 		memattr = VM_MEMATTR_UNCACHEABLE;
 		ba = coherent_allocator;
 		map->flags |= DMAMAP_COHERENT;
 	} else {
 		memattr = VM_MEMATTR_DEFAULT;
 		ba = standard_allocator;
 	}
 
 	/*
 	 * Try to find a bufzone in the allocator that holds a cache of buffers
 	 * of the right size for this request.  If the buffer is too big to be
 	 * held in the allocator cache, this returns NULL.
 	 */
 	bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize);
 
 	/*
 	 * Allocate the buffer from the uma(9) allocator if...
 	 *  - It's small enough to be in the allocator (bufzone not NULL).
 	 *  - The alignment constraint isn't larger than the allocation size
 	 *    (the allocator aligns buffers to their size boundaries).
 	 *  - There's no need to handle lowaddr/highaddr exclusion zones.
 	 * else allocate non-contiguous pages if...
 	 *  - The page count that could get allocated doesn't exceed nsegments.
 	 *  - The alignment constraint isn't larger than a page boundary.
 	 *  - There are no boundary-crossing constraints.
 	 * else allocate a block of contiguous pages because one or more of the
 	 * constraints is something that only the contig allocator can fulfill.
 	 */
 	if (bufzone != NULL && dmat->alignment <= bufzone->size &&
 	    !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) {
 		*vaddr = uma_zalloc(bufzone->umazone, mflags);
 	} else if (dmat->nsegments >=
 	    howmany(dmat->maxsize, MIN(dmat->maxsegsz, PAGE_SIZE)) &&
 	    dmat->alignment <= PAGE_SIZE &&
 	    (dmat->boundary % PAGE_SIZE) == 0) {
 		*vaddr = (void *)kmem_alloc_attr(dmat->maxsize, mflags, 0,
 		    dmat->lowaddr, memattr);
 	} else {
 		*vaddr = (void *)kmem_alloc_contig(dmat->maxsize, mflags, 0,
 		    dmat->lowaddr, dmat->alignment, dmat->boundary, memattr);
 	}
 	if (*vaddr == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->flags, ENOMEM);
 		free(map, M_BUSDMA);
 		*mapp = NULL;
 		return (ENOMEM);
 	}
 	if (map->flags & DMAMAP_COHERENT)
 		atomic_add_32(&maps_coherent, 1);
 	atomic_add_32(&maps_dmamem, 1);
 	atomic_add_32(&maps_total, 1);
 	dmat->map_count++;
 
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->flags, 0);
 	return (0);
 }
 
 /*
  * Free a piece of memory that was allocated via bus_dmamem_alloc, along with
  * its associated map.
  */
 void
 bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
 {
 	struct busdma_bufzone *bufzone;
 	busdma_bufalloc_t ba;
 
 	if (map->flags & DMAMAP_COHERENT)
 		ba = coherent_allocator;
 	else
 		ba = standard_allocator;
 
 	bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize);
 
 	if (bufzone != NULL && dmat->alignment <= bufzone->size &&
 	    !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr))
 		uma_zfree(bufzone->umazone, vaddr);
 	else
-		kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->maxsize);
+		kmem_free((vm_offset_t)vaddr, dmat->maxsize);
 
 	dmat->map_count--;
 	if (map->flags & DMAMAP_COHERENT)
 		atomic_subtract_32(&maps_coherent, 1);
 	atomic_subtract_32(&maps_total, 1);
 	atomic_subtract_32(&maps_dmamem, 1);
 	free(map, M_BUSDMA);
 	CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags);
 }
 
 static void
 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags)
 {
 	bus_addr_t curaddr;
 	bus_size_t sgsize;
 
 	if (map->pagesneeded == 0) {
 		CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d",
 		    dmat->lowaddr, dmat->boundary, dmat->alignment);
 		CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d",
 		    map, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		curaddr = buf;
 		while (buflen != 0) {
 			sgsize = MIN(buflen, dmat->maxsegsz);
 			if (run_filter(dmat, curaddr) != 0) {
 				sgsize = MIN(sgsize,
 				    PAGE_SIZE - (curaddr & PAGE_MASK));
 				map->pagesneeded++;
 			}
 			curaddr += sgsize;
 			buflen -= sgsize;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static void
 _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap,
     void *buf, bus_size_t buflen, int flags)
 {
 	vm_offset_t vaddr;
 	vm_offset_t vendaddr;
 	bus_addr_t paddr;
 
 	if (map->pagesneeded == 0) {
 		CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d",
 		    dmat->lowaddr, dmat->boundary, dmat->alignment);
 		CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d",
 		    map, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		vaddr = trunc_page((vm_offset_t)buf);
 		vendaddr = (vm_offset_t)buf + buflen;
 
 		while (vaddr < vendaddr) {
 			if (__predict_true(pmap == kernel_pmap))
 				paddr = pmap_kextract(vaddr);
 			else
 				paddr = pmap_extract(pmap, vaddr);
 			if (run_filter(dmat, paddr) != 0)
 				map->pagesneeded++;
 			vaddr += PAGE_SIZE;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static int
 _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags)
 {
 
 	/* Reserve Necessary Bounce Pages */
 	mtx_lock(&bounce_lock);
 	if (flags & BUS_DMA_NOWAIT) {
 		if (reserve_bounce_pages(dmat, map, 0) != 0) {
 			mtx_unlock(&bounce_lock);
 			return (ENOMEM);
 		}
 	} else {
 		if (reserve_bounce_pages(dmat, map, 1) != 0) {
 			/* Queue us for resources */
 			STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links);
 			mtx_unlock(&bounce_lock);
 			return (EINPROGRESS);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 
 	return (0);
 }
 
 /*
  * Add a single contiguous physical range to the segment list.
  */
 static int
 _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr,
     bus_size_t sgsize, bus_dma_segment_t *segs, int *segp)
 {
 	bus_addr_t baddr, bmask;
 	int seg;
 
 	/*
 	 * Make sure we don't cross any boundaries.
 	 */
 	bmask = ~(dmat->boundary - 1);
 	if (dmat->boundary > 0) {
 		baddr = (curaddr + dmat->boundary) & bmask;
 		if (sgsize > (baddr - curaddr))
 			sgsize = (baddr - curaddr);
 	}
 	if (dmat->ranges) {
 		struct arm32_dma_range *dr;
 
 		dr = _bus_dma_inrange(dmat->ranges, dmat->_nranges,
 		    curaddr);
 		if (dr == NULL)
 			return (0);
 		/*
 		 * In a valid DMA range.  Translate the physical
 		 * memory address to an address in the DMA window.
 		 */
 		curaddr = (curaddr - dr->dr_sysbase) + dr->dr_busbase;
 
 	}
 
 	seg = *segp;
 	/*
 	 * Insert chunk into a segment, coalescing with
 	 * the previous segment if possible.
 	 */
 	if (seg >= 0 &&
 	    curaddr == segs[seg].ds_addr + segs[seg].ds_len &&
 	    (segs[seg].ds_len + sgsize) <= dmat->maxsegsz &&
 	    (dmat->boundary == 0 ||
 	    (segs[seg].ds_addr & bmask) == (curaddr & bmask))) {
 		segs[seg].ds_len += sgsize;
 	} else {
 		if (++seg >= dmat->nsegments)
 			return (0);
 		segs[seg].ds_addr = curaddr;
 		segs[seg].ds_len = sgsize;
 	}
 	*segp = seg;
 	return (sgsize);
 }
 
 /*
  * Utility function to load a physical buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 int
 _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp)
 {
 	bus_addr_t curaddr;
 	bus_addr_t sl_end = 0;
 	bus_size_t sgsize;
 	struct sync_list *sl;
 	int error;
 
 	if (segs == NULL)
 		segs = map->segments;
 
 	counter_u64_add(maploads_total, 1);
 	counter_u64_add(maploads_physmem, 1);
 
 	if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_phys(dmat, map, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			counter_u64_add(maploads_bounced, 1);
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	sl = map->slist + map->sync_count - 1;
 
 	while (buflen > 0) {
 		curaddr = buf;
 		sgsize = MIN(buflen, dmat->maxsegsz);
 		if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 		    map->pagesneeded != 0 && run_filter(dmat, curaddr)) {
 			sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK));
 			curaddr = add_bounce_page(dmat, map, 0, curaddr,
 			    sgsize);
 		} else {
 			if (map->sync_count > 0)
 				sl_end = VM_PAGE_TO_PHYS(sl->pages) +
 				    sl->dataoffs + sl->datacount;
 
 			if (map->sync_count == 0 || curaddr != sl_end) {
 				if (++map->sync_count > dmat->nsegments)
 					break;
 				sl++;
 				sl->vaddr = 0;
 				sl->datacount = sgsize;
 				sl->pages = PHYS_TO_VM_PAGE(curaddr);
 				sl->dataoffs = curaddr & PAGE_MASK;
 			} else
 				sl->datacount += sgsize;
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		buf += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	if (buflen != 0) {
 		bus_dmamap_unload(dmat, map);
 		return (EFBIG); /* XXX better return value here? */
 	}
 	return (0);
 }
 
 int
 _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
     bus_dma_segment_t *segs, int *segp)
 {
 
 	return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags,
 	    segs, segp));
 }
 
 /*
  * Utility function to load a linear buffer.  segp contains
  * the starting segment on entrance, and the ending segment on exit.
  */
 int
 _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
     bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	bus_size_t sgsize;
 	bus_addr_t curaddr;
 	bus_addr_t sl_pend = 0;
 	struct sync_list *sl;
 	vm_offset_t kvaddr;
 	vm_offset_t vaddr = (vm_offset_t)buf;
 	vm_offset_t sl_vend = 0;
 	int error = 0;
 
 	counter_u64_add(maploads_total, 1);
 	if (map->flags & DMAMAP_COHERENT)
 		counter_u64_add(maploads_coherent, 1);
 	if (map->flags & DMAMAP_DMAMEM_ALLOC)
 		counter_u64_add(maploads_dmamem, 1);
 
 	if (segs == NULL)
 		segs = map->segments;
 	if (flags & BUS_DMA_LOAD_MBUF) {
 		counter_u64_add(maploads_mbuf, 1);
 		map->flags |= DMAMAP_CACHE_ALIGNED;
 	}
 
 	if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			counter_u64_add(maploads_bounced, 1);
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 	CTR3(KTR_BUSDMA, "lowaddr= %d boundary= %d, "
 	    "alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment);
 
 	sl = map->slist + map->sync_count - 1;
 
 	while (buflen > 0) {
 		/*
 		 * Get the physical address for this segment.
 		 */
 		if (__predict_true(pmap == kernel_pmap)) {
 			curaddr = pmap_kextract(vaddr);
 			kvaddr = vaddr;
 		} else {
 			curaddr = pmap_extract(pmap, vaddr);
 			map->flags &= ~DMAMAP_COHERENT;
 			kvaddr = 0;
 		}
 
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
 		sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
 		if (sgsize > dmat->maxsegsz)
 			sgsize = dmat->maxsegsz;
 		if (buflen < sgsize)
 			sgsize = buflen;
 
 		if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 		    map->pagesneeded != 0 && run_filter(dmat, curaddr)) {
 			curaddr = add_bounce_page(dmat, map, kvaddr, curaddr,
 			    sgsize);
 		} else {
 			if (map->sync_count > 0) {
 				sl_pend = VM_PAGE_TO_PHYS(sl->pages) +
 				    sl->dataoffs + sl->datacount;
 				sl_vend = sl->vaddr + sl->datacount;
 			}
 
 			if (map->sync_count == 0 ||
 			    (kvaddr != 0 && kvaddr != sl_vend) ||
 			    (kvaddr == 0 && curaddr != sl_pend)) {
 
 				if (++map->sync_count > dmat->nsegments)
 					goto cleanup;
 				sl++;
 				sl->vaddr = kvaddr;
 				sl->datacount = sgsize;
 				sl->pages = PHYS_TO_VM_PAGE(curaddr);
 				sl->dataoffs = curaddr & PAGE_MASK;
 			} else
 				sl->datacount += sgsize;
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		vaddr += sgsize;
 		buflen -= sgsize;
 	}
 
 cleanup:
 	/*
 	 * Did we fit?
 	 */
 	if (buflen != 0) {
 		bus_dmamap_unload(dmat, map);
 		return (EFBIG); /* XXX better return value here? */
 	}
 	return (0);
 }
 
 void
 _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem,
     bus_dmamap_callback_t *callback, void *callback_arg)
 {
 
 	KASSERT(dmat != NULL, ("dmatag is NULL"));
 	KASSERT(map != NULL, ("dmamap is NULL"));
 	map->mem = *mem;
 	map->callback = callback;
 	map->callback_arg = callback_arg;
 }
 
 bus_dma_segment_t *
 _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dma_segment_t *segs, int nsegs, int error)
 {
 
 	if (segs == NULL)
 		segs = map->segments;
 	return (segs);
 }
 
 /*
  * Release the mapping held by map.
  */
 void
 bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bounce_page *bpage;
 	struct bounce_zone *bz;
 
 	if ((bz = dmat->bounce_zone) != NULL) {
 		while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 			STAILQ_REMOVE_HEAD(&map->bpages, links);
 			free_bounce_page(dmat, bpage);
 		}
 
 		bz = dmat->bounce_zone;
 		bz->free_bpages += map->pagesreserved;
 		bz->reserved_bpages -= map->pagesreserved;
 		map->pagesreserved = 0;
 		map->pagesneeded = 0;
 	}
 	map->sync_count = 0;
 	map->flags &= ~DMAMAP_MBUF;
 }
 
 static void
 bus_dmamap_sync_buf(vm_offset_t buf, int len, bus_dmasync_op_t op,
     int bufaligned)
 {
 	char _tmp_cl[arm_dcache_align], _tmp_clend[arm_dcache_align];
 	register_t s;
 	int partial;
 
 	if ((op & BUS_DMASYNC_PREWRITE) && !(op & BUS_DMASYNC_PREREAD)) {
 		cpu_dcache_wb_range(buf, len);
 		cpu_l2cache_wb_range(buf, len);
 	}
 
 	/*
 	 * If the caller promises the buffer is properly aligned to a cache line
 	 * (even if the call parms make it look like it isn't) we can avoid
 	 * attempting to preserve the non-DMA part of the cache line in the
 	 * POSTREAD case, but we MUST still do a writeback in the PREREAD case.
 	 *
 	 * This covers the case of mbufs, where we know how they're aligned and
 	 * know the CPU doesn't touch the header in front of the DMA data area
 	 * during the IO, but it may have touched it right before invoking the
 	 * sync, so a PREREAD writeback is required.
 	 *
 	 * It also handles buffers we created in bus_dmamem_alloc(), which are
 	 * always aligned and padded to cache line size even if the IO length
 	 * isn't a multiple of cache line size.  In this case the PREREAD
 	 * writeback probably isn't required, but it's harmless.
 	 */
 	partial = (((vm_offset_t)buf) | len) & arm_dcache_align_mask;
 
 	if (op & BUS_DMASYNC_PREREAD) {
 		if (!(op & BUS_DMASYNC_PREWRITE) && !partial) {
 			cpu_dcache_inv_range(buf, len);
 			cpu_l2cache_inv_range(buf, len);
 		} else {
 		    	cpu_dcache_wbinv_range(buf, len);
 	    		cpu_l2cache_wbinv_range(buf, len);
 		}
 	}
 	if (op & BUS_DMASYNC_POSTREAD) {
 		if (partial && !bufaligned) {
 			s = intr_disable();
 			if (buf & arm_dcache_align_mask)
 				memcpy(_tmp_cl, (void *)(buf &
 				    ~arm_dcache_align_mask),
 				    buf & arm_dcache_align_mask);
 			if ((buf + len) & arm_dcache_align_mask)
 				memcpy(_tmp_clend,
 				    (void *)(buf + len),
 				    arm_dcache_align -
 				    ((buf + len) & arm_dcache_align_mask));
 		}
 		cpu_dcache_inv_range(buf, len);
 		cpu_l2cache_inv_range(buf, len);
 		if (partial && !bufaligned) {
 			if (buf & arm_dcache_align_mask)
 				memcpy((void *)(buf &
 				    ~arm_dcache_align_mask), _tmp_cl,
 				    buf & arm_dcache_align_mask);
 			if ((buf + len) & arm_dcache_align_mask)
 				memcpy((void *)(buf + len),
 				    _tmp_clend, arm_dcache_align -
 				    ((buf + len) & arm_dcache_align_mask));
 			intr_restore(s);
 		}
 	}
 }
 
 static void
 bus_dmamap_sync_sl(struct sync_list *sl, bus_dmasync_op_t op,
     int bufaligned)
 {
 	vm_offset_t tempvaddr;
 	vm_page_t curpage;
 	size_t npages;
 
 	if (sl->vaddr != 0) {
 		bus_dmamap_sync_buf(sl->vaddr, sl->datacount, op, bufaligned);
 		return;
 	}
 
 	tempvaddr = 0;
 	npages = atop(round_page(sl->dataoffs + sl->datacount));
 
 	for (curpage = sl->pages; curpage != sl->pages + npages; ++curpage) {
 		/*
 		 * If the page is mapped to some other VA that hasn't
 		 * been supplied to busdma, then pmap_quick_enter_page()
 		 * will find all duplicate mappings and mark them
 		 * uncacheable.
 		 * That will also do any necessary wb/inv.  Otherwise,
 		 * if the page is truly unmapped, then we don't actually
 		 * need to do cache maintenance.
 		 * XXX: May overwrite DMA'ed data in the POSTREAD
 		 * case where the CPU has written to a cacheline not
 		 * completely covered by the DMA region.
 		 */
 		KASSERT(VM_PAGE_TO_PHYS(curpage) == VM_PAGE_TO_PHYS(sl->pages) +
 		    ptoa(curpage - sl->pages),
 		    ("unexpected vm_page_t phys: 0x%08x != 0x%08x",
 		    VM_PAGE_TO_PHYS(curpage), VM_PAGE_TO_PHYS(sl->pages) +
 		    ptoa(curpage - sl->pages)));
 		tempvaddr = pmap_quick_enter_page(curpage);
 		pmap_quick_remove_page(tempvaddr);
 	}
 }
 
 static void
 _bus_dmamap_sync_bp(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
 {
 	struct bounce_page *bpage;
 	vm_offset_t datavaddr, tempvaddr;
 
 	if ((op & (BUS_DMASYNC_PREWRITE | BUS_DMASYNC_POSTREAD)) == 0)
 		return;
 
 	STAILQ_FOREACH(bpage, &map->bpages, links) {
 		tempvaddr = 0;
 		datavaddr = bpage->datavaddr;
 		if (op & BUS_DMASYNC_PREWRITE) {
 			if (datavaddr == 0) {
 				tempvaddr =
 				    pmap_quick_enter_page(bpage->datapage);
 				datavaddr = tempvaddr | bpage->dataoffs;
 			}
 			bcopy((void *)datavaddr,
 			    (void *)bpage->vaddr, bpage->datacount);
 			if (tempvaddr != 0)
 				pmap_quick_remove_page(tempvaddr);
 			cpu_dcache_wb_range(bpage->vaddr, bpage->datacount);
 			cpu_l2cache_wb_range(bpage->vaddr, bpage->datacount);
 			dmat->bounce_zone->total_bounced++;
 		}
 		if (op & BUS_DMASYNC_POSTREAD) {
 			cpu_dcache_inv_range(bpage->vaddr, bpage->datacount);
 			cpu_l2cache_inv_range(bpage->vaddr, bpage->datacount);
 			if (datavaddr == 0) {
 				tempvaddr =
 				    pmap_quick_enter_page(bpage->datapage);
 				datavaddr = tempvaddr | bpage->dataoffs;
 			}
 			bcopy((void *)bpage->vaddr,
 			    (void *)datavaddr, bpage->datacount);
 			if (tempvaddr != 0)
 				pmap_quick_remove_page(tempvaddr);
 			dmat->bounce_zone->total_bounced++;
 		}
 	}
 }
 
 void
 bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
 {
 	struct sync_list *sl, *end;
 	int bufaligned;
 
 	if (op == BUS_DMASYNC_POSTWRITE)
 		return;
 	if (map->flags & DMAMAP_COHERENT)
 		goto drain;
 	if (STAILQ_FIRST(&map->bpages))
 		_bus_dmamap_sync_bp(dmat, map, op);
 	CTR3(KTR_BUSDMA, "%s: op %x flags %x", __func__, op, map->flags);
 	bufaligned = (map->flags & DMAMAP_CACHE_ALIGNED);
 	if (map->sync_count) {
 		end = &map->slist[map->sync_count];
 		for (sl = &map->slist[0]; sl != end; sl++)
 			bus_dmamap_sync_sl(sl, op, bufaligned);
 	}
 
 drain:
 
 	cpu_drain_writebuf();
 }
 
 static void
 init_bounce_pages(void *dummy __unused)
 {
 
 	total_bpages = 0;
 	STAILQ_INIT(&bounce_zone_list);
 	STAILQ_INIT(&bounce_map_waitinglist);
 	STAILQ_INIT(&bounce_map_callbacklist);
 	mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF);
 }
 SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL);
 
 static struct sysctl_ctx_list *
 busdma_sysctl_tree(struct bounce_zone *bz)
 {
 
 	return (&bz->sysctl_tree);
 }
 
 static struct sysctl_oid *
 busdma_sysctl_tree_top(struct bounce_zone *bz)
 {
 
 	return (bz->sysctl_tree_top);
 }
 
 static int
 alloc_bounce_zone(bus_dma_tag_t dmat)
 {
 	struct bounce_zone *bz;
 
 	/* Check to see if we already have a suitable zone */
 	STAILQ_FOREACH(bz, &bounce_zone_list, links) {
 		if ((dmat->alignment <= bz->alignment) &&
 		    (dmat->lowaddr >= bz->lowaddr)) {
 			dmat->bounce_zone = bz;
 			return (0);
 		}
 	}
 
 	if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_BUSDMA,
 	    M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	STAILQ_INIT(&bz->bounce_page_list);
 	bz->free_bpages = 0;
 	bz->reserved_bpages = 0;
 	bz->active_bpages = 0;
 	bz->lowaddr = dmat->lowaddr;
 	bz->alignment = MAX(dmat->alignment, PAGE_SIZE);
 	bz->map_count = 0;
 	snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount);
 	busdma_zonecount++;
 	snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr);
 	STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links);
 	dmat->bounce_zone = bz;
 
 	sysctl_ctx_init(&bz->sysctl_tree);
 	bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree,
 	    SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid,
 	    CTLFLAG_RD, 0, "");
 	if (bz->sysctl_tree_top == NULL) {
 		sysctl_ctx_free(&bz->sysctl_tree);
 		return (0);	/* XXX error code? */
 	}
 
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0,
 	    "Total bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0,
 	    "Free bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0,
 	    "Reserved bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0,
 	    "Active bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0,
 	    "Total bounce requests (pages bounced)");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0,
 	    "Total bounce requests that were deferred");
 	SYSCTL_ADD_STRING(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, "");
 	SYSCTL_ADD_ULONG(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "alignment", CTLFLAG_RD, &bz->alignment, "");
 
 	return (0);
 }
 
 static int
 alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
 {
 	struct bounce_zone *bz;
 	int count;
 
 	bz = dmat->bounce_zone;
 	count = 0;
 	while (numpages > 0) {
 		struct bounce_page *bpage;
 
 		bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_BUSDMA,
 		    M_NOWAIT | M_ZERO);
 
 		if (bpage == NULL)
 			break;
 		bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_BOUNCE,
 		    M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0);
 		if (bpage->vaddr == 0) {
 			free(bpage, M_BUSDMA);
 			break;
 		}
 		bpage->busaddr = pmap_kextract(bpage->vaddr);
 		mtx_lock(&bounce_lock);
 		STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links);
 		total_bpages++;
 		bz->total_bpages++;
 		bz->free_bpages++;
 		mtx_unlock(&bounce_lock);
 		count++;
 		numpages--;
 	}
 	return (count);
 }
 
 static int
 reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit)
 {
 	struct bounce_zone *bz;
 	int pages;
 
 	mtx_assert(&bounce_lock, MA_OWNED);
 	bz = dmat->bounce_zone;
 	pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved);
 	if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages))
 		return (map->pagesneeded - (map->pagesreserved + pages));
 	bz->free_bpages -= pages;
 	bz->reserved_bpages += pages;
 	map->pagesreserved += pages;
 	pages = map->pagesneeded - map->pagesreserved;
 
 	return (pages);
 }
 
 static bus_addr_t
 add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
     bus_addr_t addr, bus_size_t size)
 {
 	struct bounce_zone *bz;
 	struct bounce_page *bpage;
 
 	KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag"));
 	KASSERT(map != NULL, ("add_bounce_page: bad map %p", map));
 
 	bz = dmat->bounce_zone;
 	if (map->pagesneeded == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesneeded--;
 
 	if (map->pagesreserved == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesreserved--;
 
 	mtx_lock(&bounce_lock);
 	bpage = STAILQ_FIRST(&bz->bounce_page_list);
 	if (bpage == NULL)
 		panic("add_bounce_page: free page list is empty");
 
 	STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links);
 	bz->reserved_bpages--;
 	bz->active_bpages++;
 	mtx_unlock(&bounce_lock);
 
 	if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/* Page offset needs to be preserved. */
 		bpage->vaddr |= addr & PAGE_MASK;
 		bpage->busaddr |= addr & PAGE_MASK;
 	}
 	bpage->datavaddr = vaddr;
 	bpage->datapage = PHYS_TO_VM_PAGE(addr);
 	bpage->dataoffs = addr & PAGE_MASK;
 	bpage->datacount = size;
 	STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
 	return (bpage->busaddr);
 }
 
 static void
 free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
 {
 	struct bus_dmamap *map;
 	struct bounce_zone *bz;
 
 	bz = dmat->bounce_zone;
 	bpage->datavaddr = 0;
 	bpage->datacount = 0;
 	if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/*
 		 * Reset the bounce page to start at offset 0.  Other uses
 		 * of this bounce page may need to store a full page of
 		 * data and/or assume it starts on a page boundary.
 		 */
 		bpage->vaddr &= ~PAGE_MASK;
 		bpage->busaddr &= ~PAGE_MASK;
 	}
 
 	mtx_lock(&bounce_lock);
 	STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links);
 	bz->free_bpages++;
 	bz->active_bpages--;
 	if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
 		if (reserve_bounce_pages(map->dmat, map, 1) == 0) {
 			STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
 			STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
 			    map, links);
 			busdma_swi_pending = 1;
 			bz->total_deferred++;
 			swi_sched(vm_ih, 0);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 void
 busdma_swi(void)
 {
 	bus_dma_tag_t dmat;
 	struct bus_dmamap *map;
 
 	mtx_lock(&bounce_lock);
 	while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
 		STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
 		mtx_unlock(&bounce_lock);
 		dmat = map->dmat;
 		dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_LOCK);
 		bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback,
 		    map->callback_arg, BUS_DMA_WAITOK);
 		dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_UNLOCK);
 		mtx_lock(&bounce_lock);
 	}
 	mtx_unlock(&bounce_lock);
 }
Index: head/sys/arm/arm/busdma_machdep-v6.c
===================================================================
--- head/sys/arm/arm/busdma_machdep-v6.c	(revision 338317)
+++ head/sys/arm/arm/busdma_machdep-v6.c	(revision 338318)
@@ -1,1719 +1,1719 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2012-2015 Ian Lepore
  * Copyright (c) 2010 Mark Tinguely
  * Copyright (c) 2004 Olivier Houchard
  * Copyright (c) 2002 Peter Grehan
  * Copyright (c) 1997, 1998 Justin T. Gibbs.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *  From i386/busdma_machdep.c 191438 2009-04-23 20:24:19Z jhb
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/busdma_bufalloc.h>
 #include <sys/counter.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/memdesc.h>
 #include <sys/proc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 
 #define	BUSDMA_DCACHE_ALIGN	cpuinfo.dcache_line_size
 #define	BUSDMA_DCACHE_MASK	cpuinfo.dcache_line_mask
 
 #define	MAX_BPAGES		64
 #define	MAX_DMA_SEGMENTS	4096
 #define	BUS_DMA_EXCL_BOUNCE	BUS_DMA_BUS2
 #define	BUS_DMA_ALIGN_BOUNCE	BUS_DMA_BUS3
 #define	BUS_DMA_COULD_BOUNCE	(BUS_DMA_EXCL_BOUNCE | BUS_DMA_ALIGN_BOUNCE)
 #define	BUS_DMA_MIN_ALLOC_COMP	BUS_DMA_BUS4
 
 struct bounce_zone;
 
 struct bus_dma_tag {
 	bus_dma_tag_t		parent;
 	bus_size_t		alignment;
 	bus_addr_t		boundary;
 	bus_addr_t		lowaddr;
 	bus_addr_t		highaddr;
 	bus_dma_filter_t	*filter;
 	void			*filterarg;
 	bus_size_t		maxsize;
 	u_int			nsegments;
 	bus_size_t		maxsegsz;
 	int			flags;
 	int			ref_count;
 	int			map_count;
 	bus_dma_lock_t		*lockfunc;
 	void			*lockfuncarg;
 	struct bounce_zone	*bounce_zone;
 };
 
 struct bounce_page {
 	vm_offset_t	vaddr;		/* kva of bounce buffer */
 	bus_addr_t	busaddr;	/* Physical address */
 	vm_offset_t	datavaddr;	/* kva of client data */
 	vm_page_t	datapage;	/* physical page of client data */
 	vm_offset_t	dataoffs;	/* page offset of client data */
 	bus_size_t	datacount;	/* client data count */
 	STAILQ_ENTRY(bounce_page) links;
 };
 
 struct sync_list {
 	vm_offset_t	vaddr;		/* kva of client data */
 	bus_addr_t	paddr;		/* physical address */
 	vm_page_t	pages;		/* starting page of client data */
 	bus_size_t	datacount;	/* client data count */
 };
 
 int busdma_swi_pending;
 
 struct bounce_zone {
 	STAILQ_ENTRY(bounce_zone) links;
 	STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
 	int		total_bpages;
 	int		free_bpages;
 	int		reserved_bpages;
 	int		active_bpages;
 	int		total_bounced;
 	int		total_deferred;
 	int		map_count;
 	bus_size_t	alignment;
 	bus_addr_t	lowaddr;
 	char		zoneid[8];
 	char		lowaddrid[20];
 	struct sysctl_ctx_list sysctl_tree;
 	struct sysctl_oid *sysctl_tree_top;
 };
 
 static struct mtx bounce_lock;
 static int total_bpages;
 static int busdma_zonecount;
 static uint32_t tags_total;
 static uint32_t maps_total;
 static uint32_t maps_dmamem;
 static uint32_t maps_coherent;
 static counter_u64_t maploads_total;
 static counter_u64_t maploads_bounced;
 static counter_u64_t maploads_coherent;
 static counter_u64_t maploads_dmamem;
 static counter_u64_t maploads_mbuf;
 static counter_u64_t maploads_physmem;
 
 static STAILQ_HEAD(, bounce_zone) bounce_zone_list;
 
 SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters");
 SYSCTL_UINT(_hw_busdma, OID_AUTO, tags_total, CTLFLAG_RD, &tags_total, 0,
    "Number of active tags");
 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_total, CTLFLAG_RD, &maps_total, 0,
    "Number of active maps");
 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_dmamem, CTLFLAG_RD, &maps_dmamem, 0,
    "Number of active maps for bus_dmamem_alloc buffers");
 SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_coherent, CTLFLAG_RD, &maps_coherent, 0,
    "Number of active maps with BUS_DMA_COHERENT flag set");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_total, CTLFLAG_RD,
     &maploads_total, "Number of load operations performed");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_bounced, CTLFLAG_RD,
     &maploads_bounced, "Number of load operations that used bounce buffers");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_coherent, CTLFLAG_RD,
     &maploads_dmamem, "Number of load operations on BUS_DMA_COHERENT memory");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_dmamem, CTLFLAG_RD,
     &maploads_dmamem, "Number of load operations on bus_dmamem_alloc buffers");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_mbuf, CTLFLAG_RD,
     &maploads_mbuf, "Number of load operations for mbufs");
 SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_physmem, CTLFLAG_RD,
     &maploads_physmem, "Number of load operations on physical buffers");
 SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0,
    "Total bounce pages");
 
 struct bus_dmamap {
 	struct bp_list		bpages;
 	int			pagesneeded;
 	int			pagesreserved;
 	bus_dma_tag_t		dmat;
 	struct memdesc		mem;
 	bus_dmamap_callback_t	*callback;
 	void			*callback_arg;
 	int			flags;
 #define	DMAMAP_COHERENT		(1 << 0)
 #define	DMAMAP_DMAMEM_ALLOC	(1 << 1)
 #define	DMAMAP_MBUF		(1 << 2)
 	STAILQ_ENTRY(bus_dmamap) links;
 	bus_dma_segment_t	*segments;
 	int			sync_count;
 	struct sync_list	slist[];
 };
 
 static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
 static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
 
 static void init_bounce_pages(void *dummy);
 static int alloc_bounce_zone(bus_dma_tag_t dmat);
 static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
 static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     int commit);
 static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_offset_t vaddr, bus_addr_t addr, bus_size_t size);
 static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
 static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap,
     bus_dmamap_t map, void *buf, bus_size_t buflen, int flags);
 static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_paddr_t buf, bus_size_t buflen, int flags);
 static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     int flags);
 static void dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size);
 static void dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op);
 
 static busdma_bufalloc_t coherent_allocator;	/* Cache of coherent buffers */
 static busdma_bufalloc_t standard_allocator;	/* Cache of standard buffers */
 
 MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata");
 MALLOC_DEFINE(M_BOUNCE, "bounce", "busdma bounce pages");
 
 static void
 busdma_init(void *dummy)
 {
 	int uma_flags;
 
 	maploads_total    = counter_u64_alloc(M_WAITOK);
 	maploads_bounced  = counter_u64_alloc(M_WAITOK);
 	maploads_coherent = counter_u64_alloc(M_WAITOK);
 	maploads_dmamem   = counter_u64_alloc(M_WAITOK);
 	maploads_mbuf     = counter_u64_alloc(M_WAITOK);
 	maploads_physmem  = counter_u64_alloc(M_WAITOK);
 
 	uma_flags = 0;
 
 	/* Create a cache of buffers in standard (cacheable) memory. */
 	standard_allocator = busdma_bufalloc_create("buffer",
 	    BUSDMA_DCACHE_ALIGN,/* minimum_alignment */
 	    NULL,		/* uma_alloc func */
 	    NULL,		/* uma_free func */
 	    uma_flags);		/* uma_zcreate_flags */
 
 #ifdef INVARIANTS
 	/*
 	 * Force UMA zone to allocate service structures like
 	 * slabs using own allocator. uma_debug code performs
 	 * atomic ops on uma_slab_t fields and safety of this
 	 * operation is not guaranteed for write-back caches
 	 */
 	uma_flags = UMA_ZONE_OFFPAGE;
 #endif
 	/*
 	 * Create a cache of buffers in uncacheable memory, to implement the
 	 * BUS_DMA_COHERENT (and potentially BUS_DMA_NOCACHE) flag.
 	 */
 	coherent_allocator = busdma_bufalloc_create("coherent",
 	    BUSDMA_DCACHE_ALIGN,/* minimum_alignment */
 	    busdma_bufalloc_alloc_uncacheable,
 	    busdma_bufalloc_free_uncacheable,
 	    uma_flags);	/* uma_zcreate_flags */
 }
 
 /*
  * This init historically used SI_SUB_VM, but now the init code requires
  * malloc(9) using M_BUSDMA memory and the pcpu zones for counter(9), which get
  * set up by SI_SUB_KMEM and SI_ORDER_LAST, so we'll go right after that by
  * using SI_SUB_KMEM+1.
  */
 SYSINIT(busdma, SI_SUB_KMEM+1, SI_ORDER_FIRST, busdma_init, NULL);
 
 /*
  * This routine checks the exclusion zone constraints from a tag against the
  * physical RAM available on the machine.  If a tag specifies an exclusion zone
  * but there's no RAM in that zone, then we avoid allocating resources to bounce
  * a request, and we can use any memory allocator (as opposed to needing
  * kmem_alloc_contig() just because it can allocate pages in an address range).
  *
  * Most tags have BUS_SPACE_MAXADDR or BUS_SPACE_MAXADDR_32BIT (they are the
  * same value on 32-bit architectures) as their lowaddr constraint, and we can't
  * possibly have RAM at an address higher than the highest address we can
  * express, so we take a fast out.
  */
 static int
 exclusion_bounce_check(vm_offset_t lowaddr, vm_offset_t highaddr)
 {
 	int i;
 
 	if (lowaddr >= BUS_SPACE_MAXADDR)
 		return (0);
 
 	for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) {
 		if ((lowaddr >= phys_avail[i] && lowaddr < phys_avail[i + 1]) ||
 		    (lowaddr < phys_avail[i] && highaddr >= phys_avail[i]))
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Return true if the tag has an exclusion zone that could lead to bouncing.
  */
 static __inline int
 exclusion_bounce(bus_dma_tag_t dmat)
 {
 
 	return (dmat->flags & BUS_DMA_EXCL_BOUNCE);
 }
 
 /*
  * Return true if the given address does not fall on the alignment boundary.
  */
 static __inline int
 alignment_bounce(bus_dma_tag_t dmat, bus_addr_t addr)
 {
 
 	return (addr & (dmat->alignment - 1));
 }
 
 /*
  * Return true if the DMA should bounce because the start or end does not fall
  * on a cacheline boundary (which would require a partial cacheline flush).
  * COHERENT memory doesn't trigger cacheline flushes.  Memory allocated by
  * bus_dmamem_alloc() is always aligned to cacheline boundaries, and there's a
  * strict rule that such memory cannot be accessed by the CPU while DMA is in
  * progress (or by multiple DMA engines at once), so that it's always safe to do
  * full cacheline flushes even if that affects memory outside the range of a
  * given DMA operation that doesn't involve the full allocated buffer.  If we're
  * mapping an mbuf, that follows the same rules as a buffer we allocated.
  */
 static __inline int
 cacheline_bounce(bus_dmamap_t map, bus_addr_t addr, bus_size_t size)
 {
 
 	if (map->flags & (DMAMAP_DMAMEM_ALLOC | DMAMAP_COHERENT | DMAMAP_MBUF))
 		return (0);
 	return ((addr | size) & BUSDMA_DCACHE_MASK);
 }
 
 /*
  * Return true if we might need to bounce the DMA described by addr and size.
  *
  * This is used to quick-check whether we need to do the more expensive work of
  * checking the DMA page-by-page looking for alignment and exclusion bounces.
  *
  * Note that the addr argument might be either virtual or physical.  It doesn't
  * matter because we only look at the low-order bits, which are the same in both
  * address spaces.
  */
 static __inline int
 might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t addr,
     bus_size_t size)
 {
 
 	return ((dmat->flags & BUS_DMA_EXCL_BOUNCE) ||
 	    alignment_bounce(dmat, addr) ||
 	    cacheline_bounce(map, addr, size));
 }
 
 /*
  * Return true if we must bounce the DMA described by paddr and size.
  *
  * Bouncing can be triggered by DMA that doesn't begin and end on cacheline
  * boundaries, or doesn't begin on an alignment boundary, or falls within the
  * exclusion zone of any tag in the ancestry chain.
  *
  * For exclusions, walk the chain of tags comparing paddr to the exclusion zone
  * within each tag.  If the tag has a filter function, use it to decide whether
  * the DMA needs to bounce, otherwise any DMA within the zone bounces.
  */
 static int
 must_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr,
     bus_size_t size)
 {
 
 	if (cacheline_bounce(map, paddr, size))
 		return (1);
 
 	/*
 	 *  The tag already contains ancestors' alignment restrictions so this
 	 *  check doesn't need to be inside the loop.
 	 */
 	if (alignment_bounce(dmat, paddr))
 		return (1);
 
 	/*
 	 * Even though each tag has an exclusion zone that is a superset of its
 	 * own and all its ancestors' exclusions, the exclusion zone of each tag
 	 * up the chain must be checked within the loop, because the busdma
 	 * rules say the filter function is called only when the address lies
 	 * within the low-highaddr range of the tag that filterfunc belongs to.
 	 */
 	while (dmat != NULL && exclusion_bounce(dmat)) {
 		if ((paddr >= dmat->lowaddr && paddr <= dmat->highaddr) &&
 		    (dmat->filter == NULL ||
 		    dmat->filter(dmat->filterarg, paddr) != 0))
 			return (1);
 		dmat = dmat->parent;
 	}
 
 	return (0);
 }
 
 /*
  * Convenience function for manipulating driver locks from busdma (during
  * busdma_swi, for example).  Drivers that don't provide their own locks
  * should specify &Giant to dmat->lockfuncarg.  Drivers that use their own
  * non-mutex locking scheme don't have to use this at all.
  */
 void
 busdma_lock_mutex(void *arg, bus_dma_lock_op_t op)
 {
 	struct mtx *dmtx;
 
 	dmtx = (struct mtx *)arg;
 	switch (op) {
 	case BUS_DMA_LOCK:
 		mtx_lock(dmtx);
 		break;
 	case BUS_DMA_UNLOCK:
 		mtx_unlock(dmtx);
 		break;
 	default:
 		panic("Unknown operation 0x%x for busdma_lock_mutex!", op);
 	}
 }
 
 /*
  * dflt_lock should never get called.  It gets put into the dma tag when
  * lockfunc == NULL, which is only valid if the maps that are associated
  * with the tag are meant to never be defered.
  * XXX Should have a way to identify which driver is responsible here.
  */
 static void
 dflt_lock(void *arg, bus_dma_lock_op_t op)
 {
 
 	panic("driver error: busdma dflt_lock called");
 }
 
 /*
  * Allocate a device specific dma_tag.
  */
 int
 bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
     bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
     bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
     int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
     void *lockfuncarg, bus_dma_tag_t *dmat)
 {
 	bus_dma_tag_t newtag;
 	int error = 0;
 
 	/* Basic sanity checking. */
 	KASSERT(boundary == 0 || powerof2(boundary),
 	    ("dma tag boundary %lu, must be a power of 2", boundary));
 	KASSERT(boundary == 0 || boundary >= maxsegsz,
 	    ("dma tag boundary %lu is < maxsegsz %lu\n", boundary, maxsegsz));
 	KASSERT(alignment != 0 && powerof2(alignment),
 	    ("dma tag alignment %lu, must be non-zero power of 2", alignment));
 	KASSERT(maxsegsz != 0, ("dma tag maxsegsz must not be zero"));
 
 	/* Return a NULL tag on failure */
 	*dmat = NULL;
 
 	newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA,
 	    M_ZERO | M_NOWAIT);
 	if (newtag == NULL) {
 		CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 		    __func__, newtag, 0, error);
 		return (ENOMEM);
 	}
 
 	newtag->parent = parent;
 	newtag->alignment = alignment;
 	newtag->boundary = boundary;
 	newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1);
 	newtag->highaddr = trunc_page((vm_paddr_t)highaddr) +
 	    (PAGE_SIZE - 1);
 	newtag->filter = filter;
 	newtag->filterarg = filterarg;
 	newtag->maxsize = maxsize;
 	newtag->nsegments = nsegments;
 	newtag->maxsegsz = maxsegsz;
 	newtag->flags = flags;
 	newtag->ref_count = 1; /* Count ourself */
 	newtag->map_count = 0;
 	if (lockfunc != NULL) {
 		newtag->lockfunc = lockfunc;
 		newtag->lockfuncarg = lockfuncarg;
 	} else {
 		newtag->lockfunc = dflt_lock;
 		newtag->lockfuncarg = NULL;
 	}
 
 	/* Take into account any restrictions imposed by our parent tag */
 	if (parent != NULL) {
 		newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr);
 		newtag->highaddr = MAX(parent->highaddr, newtag->highaddr);
 		newtag->alignment = MAX(parent->alignment, newtag->alignment);
 		newtag->flags |= parent->flags & BUS_DMA_COULD_BOUNCE;
 		newtag->flags |= parent->flags & BUS_DMA_COHERENT;
 		if (newtag->boundary == 0)
 			newtag->boundary = parent->boundary;
 		else if (parent->boundary != 0)
 			newtag->boundary = MIN(parent->boundary,
 					       newtag->boundary);
 		if (newtag->filter == NULL) {
 			/*
 			 * Short circuit to looking at our parent directly
 			 * since we have encapsulated all of its information
 			 */
 			newtag->filter = parent->filter;
 			newtag->filterarg = parent->filterarg;
 			newtag->parent = parent->parent;
 		}
 		if (newtag->parent != NULL)
 			atomic_add_int(&parent->ref_count, 1);
 	}
 
 	if (exclusion_bounce_check(newtag->lowaddr, newtag->highaddr))
 		newtag->flags |= BUS_DMA_EXCL_BOUNCE;
 	if (alignment_bounce(newtag, 1))
 		newtag->flags |= BUS_DMA_ALIGN_BOUNCE;
 
 	/*
 	 * Any request can auto-bounce due to cacheline alignment, in addition
 	 * to any alignment or boundary specifications in the tag, so if the
 	 * ALLOCNOW flag is set, there's always work to do.
 	 */
 	if ((flags & BUS_DMA_ALLOCNOW) != 0) {
 		struct bounce_zone *bz;
 		/*
 		 * Round size up to a full page, and add one more page because
 		 * there can always be one more boundary crossing than the
 		 * number of pages in a transfer.
 		 */
 		maxsize = roundup2(maxsize, PAGE_SIZE) + PAGE_SIZE;
 
 		if ((error = alloc_bounce_zone(newtag)) != 0) {
 			free(newtag, M_BUSDMA);
 			return (error);
 		}
 		bz = newtag->bounce_zone;
 
 		if (ptoa(bz->total_bpages) < maxsize) {
 			int pages;
 
 			pages = atop(maxsize) - bz->total_bpages;
 
 			/* Add pages to our bounce pool */
 			if (alloc_bounce_pages(newtag, pages) < pages)
 				error = ENOMEM;
 		}
 		/* Performed initial allocation */
 		newtag->flags |= BUS_DMA_MIN_ALLOC_COMP;
 	} else
 		newtag->bounce_zone = NULL;
 
 	if (error != 0) {
 		free(newtag, M_BUSDMA);
 	} else {
 		atomic_add_32(&tags_total, 1);
 		*dmat = newtag;
 	}
 	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 	    __func__, newtag, (newtag != NULL ? newtag->flags : 0), error);
 	return (error);
 }
 
 int
 bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain)
 {
 
 	return (0);
 }
 
 int
 bus_dma_tag_destroy(bus_dma_tag_t dmat)
 {
 	bus_dma_tag_t dmat_copy;
 	int error;
 
 	error = 0;
 	dmat_copy = dmat;
 
 	if (dmat != NULL) {
 
 		if (dmat->map_count != 0) {
 			error = EBUSY;
 			goto out;
 		}
 
 		while (dmat != NULL) {
 			bus_dma_tag_t parent;
 
 			parent = dmat->parent;
 			atomic_subtract_int(&dmat->ref_count, 1);
 			if (dmat->ref_count == 0) {
 				atomic_subtract_32(&tags_total, 1);
 				free(dmat, M_BUSDMA);
 				/*
 				 * Last reference count, so
 				 * release our reference
 				 * count on our parent.
 				 */
 				dmat = parent;
 			} else
 				dmat = NULL;
 		}
 	}
 out:
 	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
 	return (error);
 }
 
 static int
 allocate_bz_and_pages(bus_dma_tag_t dmat, bus_dmamap_t mapp)
 {
 	struct bounce_zone *bz;
 	int maxpages;
 	int error;
 
 	if (dmat->bounce_zone == NULL)
 		if ((error = alloc_bounce_zone(dmat)) != 0)
 			return (error);
 	bz = dmat->bounce_zone;
 	/* Initialize the new map */
 	STAILQ_INIT(&(mapp->bpages));
 
 	/*
 	 * Attempt to add pages to our pool on a per-instance basis up to a sane
 	 * limit.  Even if the tag isn't flagged as COULD_BOUNCE due to
 	 * alignment and boundary constraints, it could still auto-bounce due to
 	 * cacheline alignment, which requires at most two bounce pages.
 	 */
 	if (dmat->flags & BUS_DMA_COULD_BOUNCE)
 		maxpages = MAX_BPAGES;
 	else
 		maxpages = 2 * bz->map_count;
 	if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 ||
 	    (bz->map_count > 0 && bz->total_bpages < maxpages)) {
 		int pages;
 
 		pages = atop(roundup2(dmat->maxsize, PAGE_SIZE)) + 1;
 		pages = MIN(maxpages - bz->total_bpages, pages);
 		pages = MAX(pages, 2);
 		if (alloc_bounce_pages(dmat, pages) < pages)
 			return (ENOMEM);
 
 		if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0)
 			dmat->flags |= BUS_DMA_MIN_ALLOC_COMP;
 	}
 	bz->map_count++;
 	return (0);
 }
 
 static bus_dmamap_t
 allocate_map(bus_dma_tag_t dmat, int mflags)
 {
 	int mapsize, segsize;
 	bus_dmamap_t map;
 
 	/*
 	 * Allocate the map.  The map structure ends with an embedded
 	 * variable-sized array of sync_list structures.  Following that
 	 * we allocate enough extra space to hold the array of bus_dma_segments.
 	 */
 	KASSERT(dmat->nsegments <= MAX_DMA_SEGMENTS,
 	   ("cannot allocate %u dma segments (max is %u)",
 	    dmat->nsegments, MAX_DMA_SEGMENTS));
 	segsize = sizeof(struct bus_dma_segment) * dmat->nsegments;
 	mapsize = sizeof(*map) + sizeof(struct sync_list) * dmat->nsegments;
 	map = malloc(mapsize + segsize, M_BUSDMA, mflags | M_ZERO);
 	if (map == NULL) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM);
 		return (NULL);
 	}
 	map->segments = (bus_dma_segment_t *)((uintptr_t)map + mapsize);
 	return (map);
 }
 
 /*
  * Allocate a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 int
 bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	bus_dmamap_t map;
 	int error = 0;
 
 	*mapp = map = allocate_map(dmat, M_NOWAIT);
 	if (map == NULL) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM);
 		return (ENOMEM);
 	}
 
 	/*
 	 * Bouncing might be required if the driver asks for an exclusion
 	 * region, a data alignment that is stricter than 1, or DMA that begins
 	 * or ends with a partial cacheline.  Whether bouncing will actually
 	 * happen can't be known until mapping time, but we need to pre-allocate
 	 * resources now because we might not be allowed to at mapping time.
 	 */
 	error = allocate_bz_and_pages(dmat, map);
 	if (error != 0) {
 		free(map, M_BUSDMA);
 		*mapp = NULL;
 		return (error);
 	}
 	if (map->flags & DMAMAP_COHERENT)
 		atomic_add_32(&maps_coherent, 1);
 	atomic_add_32(&maps_total, 1);
 	dmat->map_count++;
 
 	return (0);
 }
 
 /*
  * Destroy a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 int
 bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 
 	if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 		    __func__, dmat, EBUSY);
 		return (EBUSY);
 	}
 	if (dmat->bounce_zone)
 		dmat->bounce_zone->map_count--;
 	if (map->flags & DMAMAP_COHERENT)
 		atomic_subtract_32(&maps_coherent, 1);
 	atomic_subtract_32(&maps_total, 1);
 	free(map, M_BUSDMA);
 	dmat->map_count--;
 	CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
 	return (0);
 }
 
 /*
  * Allocate a piece of memory that can be efficiently mapped into bus device
  * space based on the constraints listed in the dma tag.  Returns a pointer to
  * the allocated memory, and a pointer to an associated bus_dmamap.
  */
 int
 bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags,
     bus_dmamap_t *mapp)
 {
 	busdma_bufalloc_t ba;
 	struct busdma_bufzone *bufzone;
 	bus_dmamap_t map;
 	vm_memattr_t memattr;
 	int mflags;
 
 	if (flags & BUS_DMA_NOWAIT)
 		mflags = M_NOWAIT;
 	else
 		mflags = M_WAITOK;
 	if (flags & BUS_DMA_ZERO)
 		mflags |= M_ZERO;
 
 	*mapp = map = allocate_map(dmat, mflags);
 	if (map == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->flags, ENOMEM);
 		return (ENOMEM);
 	}
 	map->flags = DMAMAP_DMAMEM_ALLOC;
 
 	/* For coherent memory, set the map flag that disables sync ops. */
 	if (flags & BUS_DMA_COHERENT)
 		map->flags |= DMAMAP_COHERENT;
 
 	/*
 	 * Choose a busdma buffer allocator based on memory type flags.
 	 * If the tag's COHERENT flag is set, that means normal memory
 	 * is already coherent, use the normal allocator.
 	 */
 	if ((flags & BUS_DMA_COHERENT) &&
 	    ((dmat->flags & BUS_DMA_COHERENT) == 0)) {
 		memattr = VM_MEMATTR_UNCACHEABLE;
 		ba = coherent_allocator;
 	} else {
 		memattr = VM_MEMATTR_DEFAULT;
 		ba = standard_allocator;
 	}
 
 	/*
 	 * Try to find a bufzone in the allocator that holds a cache of buffers
 	 * of the right size for this request.  If the buffer is too big to be
 	 * held in the allocator cache, this returns NULL.
 	 */
 	bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize);
 
 	/*
 	 * Allocate the buffer from the uma(9) allocator if...
 	 *  - It's small enough to be in the allocator (bufzone not NULL).
 	 *  - The alignment constraint isn't larger than the allocation size
 	 *    (the allocator aligns buffers to their size boundaries).
 	 *  - There's no need to handle lowaddr/highaddr exclusion zones.
 	 * else allocate non-contiguous pages if...
 	 *  - The page count that could get allocated doesn't exceed
 	 *    nsegments also when the maximum segment size is less
 	 *    than PAGE_SIZE.
 	 *  - The alignment constraint isn't larger than a page boundary.
 	 *  - There are no boundary-crossing constraints.
 	 * else allocate a block of contiguous pages because one or more of the
 	 * constraints is something that only the contig allocator can fulfill.
 	 */
 	if (bufzone != NULL && dmat->alignment <= bufzone->size &&
 	    !exclusion_bounce(dmat)) {
 		*vaddr = uma_zalloc(bufzone->umazone, mflags);
 	} else if (dmat->nsegments >=
 	    howmany(dmat->maxsize, MIN(dmat->maxsegsz, PAGE_SIZE)) &&
 	    dmat->alignment <= PAGE_SIZE &&
 	    (dmat->boundary % PAGE_SIZE) == 0) {
 		*vaddr = (void *)kmem_alloc_attr(dmat->maxsize, mflags, 0,
 		    dmat->lowaddr, memattr);
 	} else {
 		*vaddr = (void *)kmem_alloc_contig(dmat->maxsize, mflags, 0,
 		    dmat->lowaddr, dmat->alignment, dmat->boundary, memattr);
 	}
 	if (*vaddr == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->flags, ENOMEM);
 		free(map, M_BUSDMA);
 		*mapp = NULL;
 		return (ENOMEM);
 	}
 	if (map->flags & DMAMAP_COHERENT)
 		atomic_add_32(&maps_coherent, 1);
 	atomic_add_32(&maps_dmamem, 1);
 	atomic_add_32(&maps_total, 1);
 	dmat->map_count++;
 
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->flags, 0);
 	return (0);
 }
 
 /*
  * Free a piece of memory that was allocated via bus_dmamem_alloc, along with
  * its associated map.
  */
 void
 bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
 {
 	struct busdma_bufzone *bufzone;
 	busdma_bufalloc_t ba;
 
 	if ((map->flags & DMAMAP_COHERENT) &&
 	    ((dmat->flags & BUS_DMA_COHERENT) == 0))
 		ba = coherent_allocator;
 	else
 		ba = standard_allocator;
 
 	bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize);
 
 	if (bufzone != NULL && dmat->alignment <= bufzone->size &&
 	    !exclusion_bounce(dmat))
 		uma_zfree(bufzone->umazone, vaddr);
 	else
-		kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->maxsize);
+		kmem_free((vm_offset_t)vaddr, dmat->maxsize);
 
 	dmat->map_count--;
 	if (map->flags & DMAMAP_COHERENT)
 		atomic_subtract_32(&maps_coherent, 1);
 	atomic_subtract_32(&maps_total, 1);
 	atomic_subtract_32(&maps_dmamem, 1);
 	free(map, M_BUSDMA);
 	CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags);
 }
 
 static void
 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags)
 {
 	bus_addr_t curaddr;
 	bus_size_t sgsize;
 
 	if (map->pagesneeded == 0) {
 		CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d"
 		    " map= %p, pagesneeded= %d",
 		    dmat->lowaddr, dmat->boundary, dmat->alignment,
 		    map, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		curaddr = buf;
 		while (buflen != 0) {
 			sgsize = MIN(buflen, dmat->maxsegsz);
 			if (must_bounce(dmat, map, curaddr, sgsize) != 0) {
 				sgsize = MIN(sgsize,
 				    PAGE_SIZE - (curaddr & PAGE_MASK));
 				map->pagesneeded++;
 			}
 			curaddr += sgsize;
 			buflen -= sgsize;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded);
 	}
 }
 
 static void
 _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap, bus_dmamap_t map,
     void *buf, bus_size_t buflen, int flags)
 {
 	vm_offset_t vaddr;
 	vm_offset_t vendaddr;
 	bus_addr_t paddr;
 
 	if (map->pagesneeded == 0) {
 		CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d"
 		    " map= %p, pagesneeded= %d",
 		    dmat->lowaddr, dmat->boundary, dmat->alignment,
 		    map, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		vaddr = (vm_offset_t)buf;
 		vendaddr = (vm_offset_t)buf + buflen;
 
 		while (vaddr < vendaddr) {
 			if (__predict_true(pmap == kernel_pmap))
 				paddr = pmap_kextract(vaddr);
 			else
 				paddr = pmap_extract(pmap, vaddr);
 			if (must_bounce(dmat, map, paddr,
 			    min(vendaddr - vaddr, (PAGE_SIZE - ((vm_offset_t)vaddr &
 			    PAGE_MASK)))) != 0) {
 				map->pagesneeded++;
 			}
 			vaddr += (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK));
 
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded);
 	}
 }
 
 static int
 _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags)
 {
 
 	/* Reserve Necessary Bounce Pages */
 	mtx_lock(&bounce_lock);
 	if (flags & BUS_DMA_NOWAIT) {
 		if (reserve_bounce_pages(dmat, map, 0) != 0) {
 			map->pagesneeded = 0;
 			mtx_unlock(&bounce_lock);
 			return (ENOMEM);
 		}
 	} else {
 		if (reserve_bounce_pages(dmat, map, 1) != 0) {
 			/* Queue us for resources */
 			STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links);
 			mtx_unlock(&bounce_lock);
 			return (EINPROGRESS);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 
 	return (0);
 }
 
 /*
  * Add a single contiguous physical range to the segment list.
  */
 static int
 _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr,
     bus_size_t sgsize, bus_dma_segment_t *segs, int *segp)
 {
 	bus_addr_t baddr, bmask;
 	int seg;
 
 	/*
 	 * Make sure we don't cross any boundaries.
 	 */
 	bmask = ~(dmat->boundary - 1);
 	if (dmat->boundary > 0) {
 		baddr = (curaddr + dmat->boundary) & bmask;
 		if (sgsize > (baddr - curaddr))
 			sgsize = (baddr - curaddr);
 	}
 
 	/*
 	 * Insert chunk into a segment, coalescing with
 	 * previous segment if possible.
 	 */
 	seg = *segp;
 	if (seg == -1) {
 		seg = 0;
 		segs[seg].ds_addr = curaddr;
 		segs[seg].ds_len = sgsize;
 	} else {
 		if (curaddr == segs[seg].ds_addr + segs[seg].ds_len &&
 		    (segs[seg].ds_len + sgsize) <= dmat->maxsegsz &&
 		    (dmat->boundary == 0 ||
 		    (segs[seg].ds_addr & bmask) == (curaddr & bmask)))
 			segs[seg].ds_len += sgsize;
 		else {
 			if (++seg >= dmat->nsegments)
 				return (0);
 			segs[seg].ds_addr = curaddr;
 			segs[seg].ds_len = sgsize;
 		}
 	}
 	*segp = seg;
 	return (sgsize);
 }
 
 /*
  * Utility function to load a physical buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 int
 _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp)
 {
 	bus_addr_t curaddr;
 	bus_addr_t sl_end = 0;
 	bus_size_t sgsize;
 	struct sync_list *sl;
 	int error;
 
 	if (segs == NULL)
 		segs = map->segments;
 
 	counter_u64_add(maploads_total, 1);
 	counter_u64_add(maploads_physmem, 1);
 
 	if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) {
 		_bus_dmamap_count_phys(dmat, map, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			counter_u64_add(maploads_bounced, 1);
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	sl = map->slist + map->sync_count - 1;
 
 	while (buflen > 0) {
 		curaddr = buf;
 		sgsize = MIN(buflen, dmat->maxsegsz);
 		if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr,
 		    sgsize)) {
 			sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK));
 			curaddr = add_bounce_page(dmat, map, 0, curaddr,
 			    sgsize);
 		} else if ((dmat->flags & BUS_DMA_COHERENT) == 0) {
 			if (map->sync_count > 0)
 				sl_end = sl->paddr + sl->datacount;
 
 			if (map->sync_count == 0 || curaddr != sl_end) {
 				if (++map->sync_count > dmat->nsegments)
 					break;
 				sl++;
 				sl->vaddr = 0;
 				sl->paddr = curaddr;
 				sl->datacount = sgsize;
 				sl->pages = PHYS_TO_VM_PAGE(curaddr);
 				KASSERT(sl->pages != NULL,
 				    ("%s: page at PA:0x%08lx is not in "
 				    "vm_page_array", __func__, curaddr));
 			} else
 				sl->datacount += sgsize;
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		buf += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	if (buflen != 0) {
 		bus_dmamap_unload(dmat, map);
 		return (EFBIG); /* XXX better return value here? */
 	}
 	return (0);
 }
 
 int
 _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
     bus_dma_segment_t *segs, int *segp)
 {
 
 	return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags,
 	    segs, segp));
 }
 
 /*
  * Utility function to load a linear buffer.  segp contains
  * the starting segment on entrance, and the ending segment on exit.
  */
 int
 _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
     bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	bus_size_t sgsize;
 	bus_addr_t curaddr;
 	bus_addr_t sl_pend = 0;
 	vm_offset_t kvaddr, vaddr, sl_vend = 0;
 	struct sync_list *sl;
 	int error;
 
 	counter_u64_add(maploads_total, 1);
 	if (map->flags & DMAMAP_COHERENT)
 		counter_u64_add(maploads_coherent, 1);
 	if (map->flags & DMAMAP_DMAMEM_ALLOC)
 		counter_u64_add(maploads_dmamem, 1);
 
 	if (segs == NULL)
 		segs = map->segments;
 
 	if (flags & BUS_DMA_LOAD_MBUF) {
 		counter_u64_add(maploads_mbuf, 1);
 		map->flags |= DMAMAP_MBUF;
 	}
 
 	if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) {
 		_bus_dmamap_count_pages(dmat, pmap, map, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			counter_u64_add(maploads_bounced, 1);
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	sl = map->slist + map->sync_count - 1;
 	vaddr = (vm_offset_t)buf;
 
 	while (buflen > 0) {
 		/*
 		 * Get the physical address for this segment.
 		 */
 		if (__predict_true(pmap == kernel_pmap)) {
 			curaddr = pmap_kextract(vaddr);
 			kvaddr = vaddr;
 		} else {
 			curaddr = pmap_extract(pmap, vaddr);
 			kvaddr = 0;
 		}
 
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
 		sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
 		if (sgsize > dmat->maxsegsz)
 			sgsize = dmat->maxsegsz;
 		if (buflen < sgsize)
 			sgsize = buflen;
 
 		if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr,
 		    sgsize)) {
 			curaddr = add_bounce_page(dmat, map, kvaddr, curaddr,
 			    sgsize);
 		} else if ((dmat->flags & BUS_DMA_COHERENT) == 0) {
 			if (map->sync_count > 0) {
 				sl_pend = sl->paddr + sl->datacount;
 				sl_vend = sl->vaddr + sl->datacount;
 			}
 
 			if (map->sync_count == 0 ||
 			    (kvaddr != 0 && kvaddr != sl_vend) ||
 			    (curaddr != sl_pend)) {
 
 				if (++map->sync_count > dmat->nsegments)
 					goto cleanup;
 				sl++;
 				sl->vaddr = kvaddr;
 				sl->paddr = curaddr;
 				if (kvaddr != 0) {
 					sl->pages = NULL;
 				} else {
 					sl->pages = PHYS_TO_VM_PAGE(curaddr);
 					KASSERT(sl->pages != NULL,
 					    ("%s: page at PA:0x%08lx is not "
 					    "in vm_page_array", __func__,
 					    curaddr));
 				}
 				sl->datacount = sgsize;
 			} else
 				sl->datacount += sgsize;
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		vaddr += sgsize;
 		buflen -= sgsize;
 	}
 
 cleanup:
 	/*
 	 * Did we fit?
 	 */
 	if (buflen != 0) {
 		bus_dmamap_unload(dmat, map);
 		return (EFBIG); /* XXX better return value here? */
 	}
 	return (0);
 }
 
 void
 _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem,
     bus_dmamap_callback_t *callback, void *callback_arg)
 {
 
 	map->mem = *mem;
 	map->dmat = dmat;
 	map->callback = callback;
 	map->callback_arg = callback_arg;
 }
 
 bus_dma_segment_t *
 _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dma_segment_t *segs, int nsegs, int error)
 {
 
 	if (segs == NULL)
 		segs = map->segments;
 	return (segs);
 }
 
 /*
  * Release the mapping held by map.
  */
 void
 bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bounce_page *bpage;
 	struct bounce_zone *bz;
 
 	if ((bz = dmat->bounce_zone) != NULL) {
 		while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 			STAILQ_REMOVE_HEAD(&map->bpages, links);
 			free_bounce_page(dmat, bpage);
 		}
 
 		bz = dmat->bounce_zone;
 		bz->free_bpages += map->pagesreserved;
 		bz->reserved_bpages -= map->pagesreserved;
 		map->pagesreserved = 0;
 		map->pagesneeded = 0;
 	}
 	map->sync_count = 0;
 	map->flags &= ~DMAMAP_MBUF;
 }
 
 static void
 dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size)
 {
 	/*
 	 * Write back any partial cachelines immediately before and
 	 * after the DMA region.  We don't need to round the address
 	 * down to the nearest cacheline or specify the exact size,
 	 * as dcache_wb_poc() will do the rounding for us and works
 	 * at cacheline granularity.
 	 */
 	if (va & BUSDMA_DCACHE_MASK)
 		dcache_wb_poc(va, pa, 1);
 	if ((va + size) & BUSDMA_DCACHE_MASK)
 		dcache_wb_poc(va + size, pa + size, 1);
 
 	dcache_inv_poc_dma(va, pa, size);
 }
 
 static void
 dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op)
 {
 	uint32_t len, offset;
 	vm_page_t m;
 	vm_paddr_t pa;
 	vm_offset_t va, tempva;
 	bus_size_t size;
 
 	offset = sl->paddr & PAGE_MASK;
 	m = sl->pages;
 	size = sl->datacount;
 	pa = sl->paddr;
 
 	for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) {
 		tempva = 0;
 		if (sl->vaddr == 0) {
 			len = min(PAGE_SIZE - offset, size);
 			tempva = pmap_quick_enter_page(m);
 			va = tempva | offset;
 			KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset),
 			    ("unexpected vm_page_t phys: 0x%08x != 0x%08x",
 			    VM_PAGE_TO_PHYS(m) | offset, pa));
 		} else {
 			len = sl->datacount;
 			va = sl->vaddr;
 		}
 
 		switch (op) {
 		case BUS_DMASYNC_PREWRITE:
 		case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD:
 			dcache_wb_poc(va, pa, len);
 			break;
 		case BUS_DMASYNC_PREREAD:
 			/*
 			 * An mbuf may start in the middle of a cacheline. There
 			 * will be no cpu writes to the beginning of that line
 			 * (which contains the mbuf header) while dma is in
 			 * progress.  Handle that case by doing a writeback of
 			 * just the first cacheline before invalidating the
 			 * overall buffer.  Any mbuf in a chain may have this
 			 * misalignment.  Buffers which are not mbufs bounce if
 			 * they are not aligned to a cacheline.
 			 */
 			dma_preread_safe(va, pa, len);
 			break;
 		case BUS_DMASYNC_POSTREAD:
 		case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE:
 			dcache_inv_poc(va, pa, len);
 			break;
 		default:
 			panic("unsupported combination of sync operations: "
                               "0x%08x\n", op);
 		}
 
 		if (tempva != 0)
 			pmap_quick_remove_page(tempva);
 	}
 }
 
 void
 bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
 {
 	struct bounce_page *bpage;
 	struct sync_list *sl, *end;
 	vm_offset_t datavaddr, tempvaddr;
 
 	if (op == BUS_DMASYNC_POSTWRITE)
 		return;
 
 	/*
 	 * If the buffer was from user space, it is possible that this is not
 	 * the same vm map, especially on a POST operation.  It's not clear that
 	 * dma on userland buffers can work at all right now.  To be safe, until
 	 * we're able to test direct userland dma, panic on a map mismatch.
 	 */
 	if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
 		    "performing bounce", __func__, dmat, dmat->flags, op);
 
 		/*
 		 * For PREWRITE do a writeback.  Clean the caches from the
 		 * innermost to the outermost levels.
 		 */
 		if (op & BUS_DMASYNC_PREWRITE) {
 			while (bpage != NULL) {
 				tempvaddr = 0;
 				datavaddr = bpage->datavaddr;
 				if (datavaddr == 0) {
 					tempvaddr = pmap_quick_enter_page(
 					    bpage->datapage);
 					datavaddr = tempvaddr | bpage->dataoffs;
 				}
 				bcopy((void *)datavaddr, (void *)bpage->vaddr,
 				    bpage->datacount);
 				if (tempvaddr != 0)
 					pmap_quick_remove_page(tempvaddr);
 				if ((dmat->flags & BUS_DMA_COHERENT) == 0)
 					dcache_wb_poc(bpage->vaddr,
 					    bpage->busaddr, bpage->datacount);
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 			dmat->bounce_zone->total_bounced++;
 		}
 
 		/*
 		 * Do an invalidate for PREREAD unless a writeback was already
 		 * done above due to PREWRITE also being set.  The reason for a
 		 * PREREAD invalidate is to prevent dirty lines currently in the
 		 * cache from being evicted during the DMA.  If a writeback was
 		 * done due to PREWRITE also being set there will be no dirty
 		 * lines and the POSTREAD invalidate handles the rest. The
 		 * invalidate is done from the innermost to outermost level. If
 		 * L2 were done first, a dirty cacheline could be automatically
 		 * evicted from L1 before we invalidated it, re-dirtying the L2.
 		 */
 		if ((op & BUS_DMASYNC_PREREAD) && !(op & BUS_DMASYNC_PREWRITE)) {
 			bpage = STAILQ_FIRST(&map->bpages);
 			while (bpage != NULL) {
 				if ((dmat->flags & BUS_DMA_COHERENT) == 0)
 					dcache_inv_poc_dma(bpage->vaddr,
 					    bpage->busaddr, bpage->datacount);
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 		}
 
 		/*
 		 * Re-invalidate the caches on a POSTREAD, even though they were
 		 * already invalidated at PREREAD time.  Aggressive prefetching
 		 * due to accesses to other data near the dma buffer could have
 		 * brought buffer data into the caches which is now stale.  The
 		 * caches are invalidated from the outermost to innermost; the
 		 * prefetches could be happening right now, and if L1 were
 		 * invalidated first, stale L2 data could be prefetched into L1.
 		 */
 		if (op & BUS_DMASYNC_POSTREAD) {
 			while (bpage != NULL) {
 				if ((dmat->flags & BUS_DMA_COHERENT) == 0)
 					dcache_inv_poc(bpage->vaddr,
 					    bpage->busaddr, bpage->datacount);
 				tempvaddr = 0;
 				datavaddr = bpage->datavaddr;
 				if (datavaddr == 0) {
 					tempvaddr = pmap_quick_enter_page(
 					    bpage->datapage);
 					datavaddr = tempvaddr | bpage->dataoffs;
 				}
 				bcopy((void *)bpage->vaddr, (void *)datavaddr,
 				    bpage->datacount);
 				if (tempvaddr != 0)
 					pmap_quick_remove_page(tempvaddr);
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 			dmat->bounce_zone->total_bounced++;
 		}
 	}
 
 	/*
 	 * For COHERENT memory no cache maintenance is necessary, but ensure all
 	 * writes have reached memory for the PREWRITE case.  No action is
 	 * needed for a PREREAD without PREWRITE also set, because that would
 	 * imply that the cpu had written to the COHERENT buffer and expected
 	 * the dma device to see that change, and by definition a PREWRITE sync
 	 * is required to make that happen.
 	 */
 	if (map->flags & DMAMAP_COHERENT) {
 		if (op & BUS_DMASYNC_PREWRITE) {
 			dsb();
 			if ((dmat->flags & BUS_DMA_COHERENT) == 0)
 				cpu_l2cache_drain_writebuf();
 		}
 		return;
 	}
 
 	/*
 	 * Cache maintenance for normal (non-COHERENT non-bounce) buffers.  All
 	 * the comments about the sequences for flushing cache levels in the
 	 * bounce buffer code above apply here as well.  In particular, the fact
 	 * that the sequence is inner-to-outer for PREREAD invalidation and
 	 * outer-to-inner for POSTREAD invalidation is not a mistake.
 	 */
 	if (map->sync_count != 0) {
 		sl = &map->slist[0];
 		end = &map->slist[map->sync_count];
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
 		    "performing sync", __func__, dmat, dmat->flags, op);
 
 		for ( ; sl != end; ++sl)
 			dma_dcache_sync(sl, op);
 	}
 }
 
 static void
 init_bounce_pages(void *dummy __unused)
 {
 
 	total_bpages = 0;
 	STAILQ_INIT(&bounce_zone_list);
 	STAILQ_INIT(&bounce_map_waitinglist);
 	STAILQ_INIT(&bounce_map_callbacklist);
 	mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF);
 }
 SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL);
 
 static struct sysctl_ctx_list *
 busdma_sysctl_tree(struct bounce_zone *bz)
 {
 
 	return (&bz->sysctl_tree);
 }
 
 static struct sysctl_oid *
 busdma_sysctl_tree_top(struct bounce_zone *bz)
 {
 
 	return (bz->sysctl_tree_top);
 }
 
 static int
 alloc_bounce_zone(bus_dma_tag_t dmat)
 {
 	struct bounce_zone *bz;
 
 	/* Check to see if we already have a suitable zone */
 	STAILQ_FOREACH(bz, &bounce_zone_list, links) {
 		if ((dmat->alignment <= bz->alignment) &&
 		    (dmat->lowaddr >= bz->lowaddr)) {
 			dmat->bounce_zone = bz;
 			return (0);
 		}
 	}
 
 	if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_BUSDMA,
 	    M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	STAILQ_INIT(&bz->bounce_page_list);
 	bz->free_bpages = 0;
 	bz->reserved_bpages = 0;
 	bz->active_bpages = 0;
 	bz->lowaddr = dmat->lowaddr;
 	bz->alignment = MAX(dmat->alignment, PAGE_SIZE);
 	bz->map_count = 0;
 	snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount);
 	busdma_zonecount++;
 	snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr);
 	STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links);
 	dmat->bounce_zone = bz;
 
 	sysctl_ctx_init(&bz->sysctl_tree);
 	bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree,
 	    SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid,
 	    CTLFLAG_RD, 0, "");
 	if (bz->sysctl_tree_top == NULL) {
 		sysctl_ctx_free(&bz->sysctl_tree);
 		return (0);	/* XXX error code? */
 	}
 
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0,
 	    "Total bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0,
 	    "Free bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0,
 	    "Reserved bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0,
 	    "Active bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0,
 	    "Total bounce requests (pages bounced)");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0,
 	    "Total bounce requests that were deferred");
 	SYSCTL_ADD_STRING(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, "");
 	SYSCTL_ADD_ULONG(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "alignment", CTLFLAG_RD, &bz->alignment, "");
 
 	return (0);
 }
 
 static int
 alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
 {
 	struct bounce_zone *bz;
 	int count;
 
 	bz = dmat->bounce_zone;
 	count = 0;
 	while (numpages > 0) {
 		struct bounce_page *bpage;
 
 		bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_BUSDMA,
 		    M_NOWAIT | M_ZERO);
 
 		if (bpage == NULL)
 			break;
 		bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_BOUNCE,
 		    M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0);
 		if (bpage->vaddr == 0) {
 			free(bpage, M_BUSDMA);
 			break;
 		}
 		bpage->busaddr = pmap_kextract(bpage->vaddr);
 		mtx_lock(&bounce_lock);
 		STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links);
 		total_bpages++;
 		bz->total_bpages++;
 		bz->free_bpages++;
 		mtx_unlock(&bounce_lock);
 		count++;
 		numpages--;
 	}
 	return (count);
 }
 
 static int
 reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit)
 {
 	struct bounce_zone *bz;
 	int pages;
 
 	mtx_assert(&bounce_lock, MA_OWNED);
 	bz = dmat->bounce_zone;
 	pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved);
 	if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages))
 		return (map->pagesneeded - (map->pagesreserved + pages));
 	bz->free_bpages -= pages;
 	bz->reserved_bpages += pages;
 	map->pagesreserved += pages;
 	pages = map->pagesneeded - map->pagesreserved;
 
 	return (pages);
 }
 
 static bus_addr_t
 add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
     bus_addr_t addr, bus_size_t size)
 {
 	struct bounce_zone *bz;
 	struct bounce_page *bpage;
 
 	KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag"));
 	KASSERT(map != NULL, ("add_bounce_page: bad map %p", map));
 
 	bz = dmat->bounce_zone;
 	if (map->pagesneeded == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesneeded--;
 
 	if (map->pagesreserved == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesreserved--;
 
 	mtx_lock(&bounce_lock);
 	bpage = STAILQ_FIRST(&bz->bounce_page_list);
 	if (bpage == NULL)
 		panic("add_bounce_page: free page list is empty");
 
 	STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links);
 	bz->reserved_bpages--;
 	bz->active_bpages++;
 	mtx_unlock(&bounce_lock);
 
 	if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/* Page offset needs to be preserved. */
 		bpage->vaddr |= addr & PAGE_MASK;
 		bpage->busaddr |= addr & PAGE_MASK;
 	}
 	bpage->datavaddr = vaddr;
 	bpage->datapage = PHYS_TO_VM_PAGE(addr);
 	bpage->dataoffs = addr & PAGE_MASK;
 	bpage->datacount = size;
 	STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
 	return (bpage->busaddr);
 }
 
 static void
 free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
 {
 	struct bus_dmamap *map;
 	struct bounce_zone *bz;
 
 	bz = dmat->bounce_zone;
 	bpage->datavaddr = 0;
 	bpage->datacount = 0;
 	if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/*
 		 * Reset the bounce page to start at offset 0.  Other uses
 		 * of this bounce page may need to store a full page of
 		 * data and/or assume it starts on a page boundary.
 		 */
 		bpage->vaddr &= ~PAGE_MASK;
 		bpage->busaddr &= ~PAGE_MASK;
 	}
 
 	mtx_lock(&bounce_lock);
 	STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links);
 	bz->free_bpages++;
 	bz->active_bpages--;
 	if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
 		if (reserve_bounce_pages(map->dmat, map, 1) == 0) {
 			STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
 			STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
 			    map, links);
 			busdma_swi_pending = 1;
 			bz->total_deferred++;
 			swi_sched(vm_ih, 0);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 void
 busdma_swi(void)
 {
 	bus_dma_tag_t dmat;
 	struct bus_dmamap *map;
 
 	mtx_lock(&bounce_lock);
 	while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
 		STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
 		mtx_unlock(&bounce_lock);
 		dmat = map->dmat;
 		dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_LOCK);
 		bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback,
 		    map->callback_arg, BUS_DMA_WAITOK);
 		dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_UNLOCK);
 		mtx_lock(&bounce_lock);
 	}
 	mtx_unlock(&bounce_lock);
 }
Index: head/sys/arm/arm/pmap-v6.c
===================================================================
--- head/sys/arm/arm/pmap-v6.c	(revision 338317)
+++ head/sys/arm/arm/pmap-v6.c	(revision 338318)
@@ -1,6983 +1,6982 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause AND BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1991 Regents of the University of California.
  * Copyright (c) 1994 John S. Dyson
  * Copyright (c) 1994 David Greenman
  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
  * Copyright (c) 2014-2016 Svatopluk Kraus <skra@FreeBSD.org>
  * Copyright (c) 2014-2016 Michal Meloun <mmel@FreeBSD.org>
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  */
 /*-
  * Copyright (c) 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jake Burkholder,
  * Safeport Network Services, and Network Associates Laboratories, the
  * Security Research Division of Network Associates, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
  * CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Manages physical address maps.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include "opt_vm.h"
 #include "opt_pmap.h"
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/malloc.h>
 #include <sys/vmmeter.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/sf_buf.h>
 #include <sys/smp.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <machine/physmem.h>
 
 #include <vm/vm.h>
 #include <vm/uma.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_reserv.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #include <machine/md_var.h>
 #include <machine/pmap_var.h>
 #include <machine/cpu.h>
 #include <machine/pcb.h>
 #include <machine/sf_buf.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC 200
 #endif
 
 #ifndef DIAGNOSTIC
 #define PMAP_INLINE	__inline
 #else
 #define PMAP_INLINE
 #endif
 
 #ifdef PMAP_DEBUG
 static void pmap_zero_page_check(vm_page_t m);
 void pmap_debug(int level);
 int pmap_pid_dump(int pid);
 
 #define PDEBUG(_lev_,_stat_) \
 	if (pmap_debug_level >= (_lev_)) \
 		((_stat_))
 #define dprintf printf
 int pmap_debug_level = 1;
 #else   /* PMAP_DEBUG */
 #define PDEBUG(_lev_,_stat_) /* Nothing */
 #define dprintf(x, arg...)
 #endif  /* PMAP_DEBUG */
 
 /*
  *  Level 2 page tables map definion ('max' is excluded).
  */
 
 #define PT2V_MIN_ADDRESS	((vm_offset_t)PT2MAP)
 #define PT2V_MAX_ADDRESS	((vm_offset_t)PT2MAP + PT2MAP_SIZE)
 
 #define UPT2V_MIN_ADDRESS	((vm_offset_t)PT2MAP)
 #define UPT2V_MAX_ADDRESS \
     ((vm_offset_t)(PT2MAP + (KERNBASE >> PT2MAP_SHIFT)))
 
 /*
  *  Promotion to a 1MB (PTE1) page mapping requires that the corresponding
  *  4KB (PTE2) page mappings have identical settings for the following fields:
  */
 #define PTE2_PROMOTE	(PTE2_V | PTE2_A | PTE2_NM | PTE2_S | PTE2_NG |	\
 			 PTE2_NX | PTE2_RO | PTE2_U | PTE2_W |		\
 			 PTE2_ATTR_MASK)
 
 #define PTE1_PROMOTE	(PTE1_V | PTE1_A | PTE1_NM | PTE1_S | PTE1_NG |	\
 			 PTE1_NX | PTE1_RO | PTE1_U | PTE1_W |		\
 			 PTE1_ATTR_MASK)
 
 #define ATTR_TO_L1(l2_attr)	((((l2_attr) & L2_TEX0) ? L1_S_TEX0 : 0) | \
 				 (((l2_attr) & L2_C)    ? L1_S_C    : 0) | \
 				 (((l2_attr) & L2_B)    ? L1_S_B    : 0) | \
 				 (((l2_attr) & PTE2_A)  ? PTE1_A    : 0) | \
 				 (((l2_attr) & PTE2_NM) ? PTE1_NM   : 0) | \
 				 (((l2_attr) & PTE2_S)  ? PTE1_S    : 0) | \
 				 (((l2_attr) & PTE2_NG) ? PTE1_NG   : 0) | \
 				 (((l2_attr) & PTE2_NX) ? PTE1_NX   : 0) | \
 				 (((l2_attr) & PTE2_RO) ? PTE1_RO   : 0) | \
 				 (((l2_attr) & PTE2_U)  ? PTE1_U    : 0) | \
 				 (((l2_attr) & PTE2_W)  ? PTE1_W    : 0))
 
 #define ATTR_TO_L2(l1_attr)	((((l1_attr) & L1_S_TEX0) ? L2_TEX0 : 0) | \
 				 (((l1_attr) & L1_S_C)    ? L2_C    : 0) | \
 				 (((l1_attr) & L1_S_B)    ? L2_B    : 0) | \
 				 (((l1_attr) & PTE1_A)    ? PTE2_A  : 0) | \
 				 (((l1_attr) & PTE1_NM)   ? PTE2_NM : 0) | \
 				 (((l1_attr) & PTE1_S)    ? PTE2_S  : 0) | \
 				 (((l1_attr) & PTE1_NG)   ? PTE2_NG : 0) | \
 				 (((l1_attr) & PTE1_NX)   ? PTE2_NX : 0) | \
 				 (((l1_attr) & PTE1_RO)   ? PTE2_RO : 0) | \
 				 (((l1_attr) & PTE1_U)    ? PTE2_U  : 0) | \
 				 (((l1_attr) & PTE1_W)    ? PTE2_W  : 0))
 
 /*
  *  PTE2 descriptors creation macros.
  */
 #define PTE2_ATTR_DEFAULT	vm_memattr_to_pte2(VM_MEMATTR_DEFAULT)
 #define PTE2_ATTR_PT		vm_memattr_to_pte2(pt_memattr)
 
 #define PTE2_KPT(pa)	PTE2_KERN(pa, PTE2_AP_KRW, PTE2_ATTR_PT)
 #define PTE2_KPT_NG(pa)	PTE2_KERN_NG(pa, PTE2_AP_KRW, PTE2_ATTR_PT)
 
 #define PTE2_KRW(pa)	PTE2_KERN(pa, PTE2_AP_KRW, PTE2_ATTR_DEFAULT)
 #define PTE2_KRO(pa)	PTE2_KERN(pa, PTE2_AP_KR, PTE2_ATTR_DEFAULT)
 
 #define PV_STATS
 #ifdef PV_STATS
 #define PV_STAT(x)	do { x ; } while (0)
 #else
 #define PV_STAT(x)	do { } while (0)
 #endif
 
 /*
  *  The boot_pt1 is used temporary in very early boot stage as L1 page table.
  *  We can init many things with no memory allocation thanks to its static
  *  allocation and this brings two main advantages:
  *  (1) other cores can be started very simply,
  *  (2) various boot loaders can be supported as its arguments can be processed
  *      in virtual address space and can be moved to safe location before
  *      first allocation happened.
  *  Only disadvantage is that boot_pt1 is used only in very early boot stage.
  *  However, the table is uninitialized and so lays in bss. Therefore kernel
  *  image size is not influenced.
  *
  *  QQQ: In the future, maybe, boot_pt1 can be used for soft reset and
  *       CPU suspend/resume game.
  */
 extern pt1_entry_t boot_pt1[];
 
 vm_paddr_t base_pt1;
 pt1_entry_t *kern_pt1;
 pt2_entry_t *kern_pt2tab;
 pt2_entry_t *PT2MAP;
 
 static uint32_t ttb_flags;
 static vm_memattr_t pt_memattr;
 ttb_entry_t pmap_kern_ttb;
 
 struct pmap kernel_pmap_store;
 LIST_HEAD(pmaplist, pmap);
 static struct pmaplist allpmaps;
 static struct mtx allpmaps_lock;
 
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 
 static vm_offset_t kernel_vm_end_new;
 vm_offset_t kernel_vm_end = KERNBASE + NKPT2PG * NPT2_IN_PG * PTE1_SIZE;
 vm_offset_t vm_max_kernel_address;
 vm_paddr_t kernel_l1pa;
 
 static struct rwlock __aligned(CACHE_LINE_SIZE) pvh_global_lock;
 
 /*
  *  Data for the pv entry allocation mechanism
  */
 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
 static struct md_page *pv_table; /* XXX: Is it used only the list in md_page? */
 static int shpgperproc = PMAP_SHPGPERPROC;
 
 struct pv_chunk *pv_chunkbase;		/* KVA block for pv_chunks */
 int pv_maxchunks;			/* How many chunks we have KVA for */
 vm_offset_t pv_vafree;			/* freelist stored in the PTE */
 
 vm_paddr_t first_managed_pa;
 #define	pa_to_pvh(pa)	(&pv_table[pte1_index(pa - first_managed_pa)])
 
 /*
  *  All those kernel PT submaps that BSD is so fond of
  */
 caddr_t _tmppt = 0;
 
 /*
  *  Crashdump maps.
  */
 static caddr_t crashdumpmap;
 
 static pt2_entry_t *PMAP1 = NULL, *PMAP2;
 static pt2_entry_t *PADDR1 = NULL, *PADDR2;
 #ifdef DDB
 static pt2_entry_t *PMAP3;
 static pt2_entry_t *PADDR3;
 static int PMAP3cpu __unused; /* for SMP only */
 #endif
 #ifdef SMP
 static int PMAP1cpu;
 static int PMAP1changedcpu;
 SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD,
     &PMAP1changedcpu, 0,
     "Number of times pmap_pte2_quick changed CPU with same PMAP1");
 #endif
 static int PMAP1changed;
 SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD,
     &PMAP1changed, 0,
     "Number of times pmap_pte2_quick changed PMAP1");
 static int PMAP1unchanged;
 SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD,
     &PMAP1unchanged, 0,
     "Number of times pmap_pte2_quick didn't change PMAP1");
 static struct mtx PMAP2mutex;
 
 /*
  * Internal flags for pmap_enter()'s helper functions.
  */
 #define	PMAP_ENTER_NORECLAIM	0x1000000	/* Don't reclaim PV entries. */
 #define	PMAP_ENTER_NOREPLACE	0x2000000	/* Don't replace mappings. */
 
 static __inline void pt2_wirecount_init(vm_page_t m);
 static boolean_t pmap_demote_pte1(pmap_t pmap, pt1_entry_t *pte1p,
     vm_offset_t va);
 static int pmap_enter_pte1(pmap_t pmap, vm_offset_t va, pt1_entry_t pte1,
     u_int flags, vm_page_t m);
 void cache_icache_sync_fresh(vm_offset_t va, vm_paddr_t pa, vm_size_t size);
 
 /*
  *  Function to set the debug level of the pmap code.
  */
 #ifdef PMAP_DEBUG
 void
 pmap_debug(int level)
 {
 
 	pmap_debug_level = level;
 	dprintf("pmap_debug: level=%d\n", pmap_debug_level);
 }
 #endif /* PMAP_DEBUG */
 
 /*
  *  This table must corespond with memory attribute configuration in vm.h.
  *  First entry is used for normal system mapping.
  *
  *  Device memory is always marked as shared.
  *  Normal memory is shared only in SMP .
  *  Not outer shareable bits are not used yet.
  *  Class 6 cannot be used on ARM11.
  */
 #define TEXDEF_TYPE_SHIFT	0
 #define TEXDEF_TYPE_MASK	0x3
 #define TEXDEF_INNER_SHIFT	2
 #define TEXDEF_INNER_MASK	0x3
 #define TEXDEF_OUTER_SHIFT	4
 #define TEXDEF_OUTER_MASK	0x3
 #define TEXDEF_NOS_SHIFT	6
 #define TEXDEF_NOS_MASK		0x1
 
 #define TEX(t, i, o, s) 			\
 		((t) << TEXDEF_TYPE_SHIFT) |	\
 		((i) << TEXDEF_INNER_SHIFT) |	\
 		((o) << TEXDEF_OUTER_SHIFT | 	\
 		((s) << TEXDEF_NOS_SHIFT))
 
 static uint32_t tex_class[8] = {
 /*	    type      inner cache outer cache */
 	TEX(PRRR_MEM, NMRR_WB_WA, NMRR_WB_WA, 0),  /* 0 - ATTR_WB_WA	*/
 	TEX(PRRR_MEM, NMRR_NC,	  NMRR_NC,    0),  /* 1 - ATTR_NOCACHE	*/
 	TEX(PRRR_DEV, NMRR_NC,	  NMRR_NC,    0),  /* 2 - ATTR_DEVICE	*/
 	TEX(PRRR_SO,  NMRR_NC,	  NMRR_NC,    0),  /* 3 - ATTR_SO	*/
 	TEX(PRRR_MEM, NMRR_WT,	  NMRR_WT,    0),  /* 4 - ATTR_WT	*/
 	TEX(PRRR_MEM, NMRR_NC,	  NMRR_NC,    0),  /* 5 - NOT USED YET	*/
 	TEX(PRRR_MEM, NMRR_NC,	  NMRR_NC,    0),  /* 6 - NOT USED YET	*/
 	TEX(PRRR_MEM, NMRR_NC,	  NMRR_NC,    0),  /* 7 - NOT USED YET	*/
 };
 #undef TEX
 
 static uint32_t pte2_attr_tab[8] = {
 	PTE2_ATTR_WB_WA,	/* 0 - VM_MEMATTR_WB_WA */
 	PTE2_ATTR_NOCACHE,	/* 1 - VM_MEMATTR_NOCACHE */
 	PTE2_ATTR_DEVICE,	/* 2 - VM_MEMATTR_DEVICE */
 	PTE2_ATTR_SO,		/* 3 - VM_MEMATTR_SO */
 	PTE2_ATTR_WT,		/* 4 - VM_MEMATTR_WRITE_THROUGH */
 	0,			/* 5 - NOT USED YET */
 	0,			/* 6 - NOT USED YET */
 	0			/* 7 - NOT USED YET */
 };
 CTASSERT(VM_MEMATTR_WB_WA == 0);
 CTASSERT(VM_MEMATTR_NOCACHE == 1);
 CTASSERT(VM_MEMATTR_DEVICE == 2);
 CTASSERT(VM_MEMATTR_SO == 3);
 CTASSERT(VM_MEMATTR_WRITE_THROUGH == 4);
 #define	VM_MEMATTR_END	(VM_MEMATTR_WRITE_THROUGH + 1)
 
 boolean_t
 pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode)
 {
 
 	return (mode >= 0 && mode < VM_MEMATTR_END);
 }
 
 static inline uint32_t
 vm_memattr_to_pte2(vm_memattr_t ma)
 {
 
 	KASSERT((u_int)ma < VM_MEMATTR_END,
 	    ("%s: bad vm_memattr_t %d", __func__, ma));
 	return (pte2_attr_tab[(u_int)ma]);
 }
 
 static inline uint32_t
 vm_page_pte2_attr(vm_page_t m)
 {
 
 	return (vm_memattr_to_pte2(m->md.pat_mode));
 }
 
 /*
  * Convert TEX definition entry to TTB flags.
  */
 static uint32_t
 encode_ttb_flags(int idx)
 {
 	uint32_t inner, outer, nos, reg;
 
 	inner = (tex_class[idx] >> TEXDEF_INNER_SHIFT) &
 		TEXDEF_INNER_MASK;
 	outer = (tex_class[idx] >> TEXDEF_OUTER_SHIFT) &
 		TEXDEF_OUTER_MASK;
 	nos = (tex_class[idx] >> TEXDEF_NOS_SHIFT) &
 		TEXDEF_NOS_MASK;
 
 	reg = nos << 5;
 	reg |= outer << 3;
 	if (cpuinfo.coherent_walk)
 		reg |= (inner & 0x1) << 6;
 	reg |= (inner & 0x2) >> 1;
 #ifdef SMP
 	ARM_SMP_UP(
 		reg |= 1 << 1,
 	);
 #endif
 	return reg;
 }
 
 /*
  *  Set TEX remapping registers in current CPU.
  */
 void
 pmap_set_tex(void)
 {
 	uint32_t prrr, nmrr;
 	uint32_t type, inner, outer, nos;
 	int i;
 
 #ifdef PMAP_PTE_NOCACHE
 	/* XXX fixme */
 	if (cpuinfo.coherent_walk) {
 		pt_memattr = VM_MEMATTR_WB_WA;
 		ttb_flags = encode_ttb_flags(0);
 	}
 	else {
 		pt_memattr = VM_MEMATTR_NOCACHE;
 		ttb_flags = encode_ttb_flags(1);
 	}
 #else
 	pt_memattr = VM_MEMATTR_WB_WA;
 	ttb_flags = encode_ttb_flags(0);
 #endif
 
 	prrr = 0;
 	nmrr = 0;
 
 	/* Build remapping register from TEX classes. */
 	for (i = 0; i < 8; i++) {
 		type = (tex_class[i] >> TEXDEF_TYPE_SHIFT) &
 			TEXDEF_TYPE_MASK;
 		inner = (tex_class[i] >> TEXDEF_INNER_SHIFT) &
 			TEXDEF_INNER_MASK;
 		outer = (tex_class[i] >> TEXDEF_OUTER_SHIFT) &
 			TEXDEF_OUTER_MASK;
 		nos = (tex_class[i] >> TEXDEF_NOS_SHIFT) &
 			TEXDEF_NOS_MASK;
 
 		prrr |= type  << (i * 2);
 		prrr |= nos   << (i + 24);
 		nmrr |= inner << (i * 2);
 		nmrr |= outer << (i * 2 + 16);
 	}
 	/* Add shareable bits for device memory. */
 	prrr |= PRRR_DS0 | PRRR_DS1;
 
 	/* Add shareable bits for normal memory in SMP case. */
 #ifdef SMP
 	ARM_SMP_UP(
 		prrr |= PRRR_NS1,
 	);
 #endif
 	cp15_prrr_set(prrr);
 	cp15_nmrr_set(nmrr);
 
 	/* Caches are disabled, so full TLB flush should be enough. */
 	tlb_flush_all_local();
 }
 
 /*
  * Remap one vm_meattr class to another one. This can be useful as
  * workaround for SOC errata, e.g. if devices must be accessed using
  * SO memory class.
  *
  * !!! Please note that this function is absolutely last resort thing.
  * It should not be used under normal circumstances. !!!
  *
  * Usage rules:
  * - it shall be called after pmap_bootstrap_prepare() and before
  *   cpu_mp_start() (thus only on boot CPU). In practice, it's expected
  *   to be called from platform_attach() or platform_late_init().
  *
  * - if remapping doesn't change caching mode, or until uncached class
  *   is remapped to any kind of cached one, then no other restriction exists.
  *
  * - if pmap_remap_vm_attr() changes caching mode, but both (original and
  *   remapped) remain cached, then caller is resposible for calling
  *   of dcache_wbinv_poc_all().
  *
  * - remapping of any kind of cached class to uncached is not permitted.
  */
 void
 pmap_remap_vm_attr(vm_memattr_t old_attr, vm_memattr_t new_attr)
 {
 	int old_idx, new_idx;
 
 	/* Map VM memattrs to indexes to tex_class table. */
 	old_idx = PTE2_ATTR2IDX(pte2_attr_tab[(int)old_attr]);
 	new_idx = PTE2_ATTR2IDX(pte2_attr_tab[(int)new_attr]);
 
 	/* Replace TEX attribute and apply it. */
 	tex_class[old_idx] = tex_class[new_idx];
 	pmap_set_tex();
 }
 
 /*
  * KERNBASE must be multiple of NPT2_IN_PG * PTE1_SIZE. In other words,
  * KERNBASE is mapped by first L2 page table in L2 page table page. It
  * meets same constrain due to PT2MAP being placed just under KERNBASE.
  */
 CTASSERT((KERNBASE & (NPT2_IN_PG * PTE1_SIZE - 1)) == 0);
 CTASSERT((KERNBASE - VM_MAXUSER_ADDRESS) >= PT2MAP_SIZE);
 
 /*
  *  In crazy dreams, PAGE_SIZE could be a multiple of PTE2_SIZE in general.
  *  For now, anyhow, the following check must be fulfilled.
  */
 CTASSERT(PAGE_SIZE == PTE2_SIZE);
 /*
  *  We don't want to mess up MI code with all MMU and PMAP definitions,
  *  so some things, which depend on other ones, are defined independently.
  *  Now, it is time to check that we don't screw up something.
  */
 CTASSERT(PDRSHIFT == PTE1_SHIFT);
 /*
  *  Check L1 and L2 page table entries definitions consistency.
  */
 CTASSERT(NB_IN_PT1 == (sizeof(pt1_entry_t) * NPTE1_IN_PT1));
 CTASSERT(NB_IN_PT2 == (sizeof(pt2_entry_t) * NPTE2_IN_PT2));
 /*
  *  Check L2 page tables page consistency.
  */
 CTASSERT(PAGE_SIZE == (NPT2_IN_PG * NB_IN_PT2));
 CTASSERT((1 << PT2PG_SHIFT) == NPT2_IN_PG);
 /*
  *  Check PT2TAB consistency.
  *  PT2TAB_ENTRIES is defined as a division of NPTE1_IN_PT1 by NPT2_IN_PG.
  *  This should be done without remainder.
  */
 CTASSERT(NPTE1_IN_PT1 == (PT2TAB_ENTRIES * NPT2_IN_PG));
 
 /*
  *	A PT2MAP magic.
  *
  *  All level 2 page tables (PT2s) are mapped continuously and accordingly
  *  into PT2MAP address space. As PT2 size is less than PAGE_SIZE, this can
  *  be done only if PAGE_SIZE is a multiple of PT2 size. All PT2s in one page
  *  must be used together, but not necessary at once. The first PT2 in a page
  *  must map things on correctly aligned address and the others must follow
  *  in right order.
  */
 #define NB_IN_PT2TAB	(PT2TAB_ENTRIES * sizeof(pt2_entry_t))
 #define NPT2_IN_PT2TAB	(NB_IN_PT2TAB / NB_IN_PT2)
 #define NPG_IN_PT2TAB	(NB_IN_PT2TAB / PAGE_SIZE)
 
 /*
  *  Check PT2TAB consistency.
  *  NPT2_IN_PT2TAB is defined as a division of NB_IN_PT2TAB by NB_IN_PT2.
  *  NPG_IN_PT2TAB is defined as a division of NB_IN_PT2TAB by PAGE_SIZE.
  *  The both should be done without remainder.
  */
 CTASSERT(NB_IN_PT2TAB == (NPT2_IN_PT2TAB * NB_IN_PT2));
 CTASSERT(NB_IN_PT2TAB == (NPG_IN_PT2TAB * PAGE_SIZE));
 /*
  *  The implementation was made general, however, with the assumption
  *  bellow in mind. In case of another value of NPG_IN_PT2TAB,
  *  the code should be once more rechecked.
  */
 CTASSERT(NPG_IN_PT2TAB == 1);
 
 /*
  *  Get offset of PT2 in a page
  *  associated with given PT1 index.
  */
 static __inline u_int
 page_pt2off(u_int pt1_idx)
 {
 
 	return ((pt1_idx & PT2PG_MASK) * NB_IN_PT2);
 }
 
 /*
  *  Get physical address of PT2
  *  associated with given PT2s page and PT1 index.
  */
 static __inline vm_paddr_t
 page_pt2pa(vm_paddr_t pgpa, u_int pt1_idx)
 {
 
 	return (pgpa + page_pt2off(pt1_idx));
 }
 
 /*
  *  Get first entry of PT2
  *  associated with given PT2s page and PT1 index.
  */
 static __inline pt2_entry_t *
 page_pt2(vm_offset_t pgva, u_int pt1_idx)
 {
 
 	return ((pt2_entry_t *)(pgva + page_pt2off(pt1_idx)));
 }
 
 /*
  *  Get virtual address of PT2s page (mapped in PT2MAP)
  *  which holds PT2 which holds entry which maps given virtual address.
  */
 static __inline vm_offset_t
 pt2map_pt2pg(vm_offset_t va)
 {
 
 	va &= ~(NPT2_IN_PG * PTE1_SIZE - 1);
 	return ((vm_offset_t)pt2map_entry(va));
 }
 
 /*****************************************************************************
  *
  *     THREE pmap initialization milestones exist:
  *
  *  locore.S
  *    -> fundamental init (including MMU) in ASM
  *
  *  initarm()
  *    -> fundamental init continues in C
  *    -> first available physical address is known
  *
  *    pmap_bootstrap_prepare() -> FIRST PMAP MILESTONE (first epoch begins)
  *      -> basic (safe) interface for physical address allocation is made
  *      -> basic (safe) interface for virtual mapping is made
  *      -> limited not SMP coherent work is possible
  *
  *    -> more fundamental init continues in C
  *    -> locks and some more things are available
  *    -> all fundamental allocations and mappings are done
  *
  *    pmap_bootstrap() -> SECOND PMAP MILESTONE (second epoch begins)
  *      -> phys_avail[] and virtual_avail is set
  *      -> control is passed to vm subsystem
  *      -> physical and virtual address allocation are off limit
  *      -> low level mapping functions, some SMP coherent,
  *         are available, which cannot be used before vm subsystem
  *         is being inited
  *
  *  mi_startup()
  *    -> vm subsystem is being inited
  *
  *      pmap_init() -> THIRD PMAP MILESTONE (third epoch begins)
  *        -> pmap is fully inited
  *
  *****************************************************************************/
 
 /*****************************************************************************
  *
  *	PMAP first stage initialization and utility functions
  *	for pre-bootstrap epoch.
  *
  *  After pmap_bootstrap_prepare() is called, the following functions
  *  can be used:
  *
  *  (1) strictly only for this stage functions for physical page allocations,
  *      virtual space allocations, and mappings:
  *
  *  vm_paddr_t pmap_preboot_get_pages(u_int num);
  *  void pmap_preboot_map_pages(vm_paddr_t pa, vm_offset_t va, u_int num);
  *  vm_offset_t pmap_preboot_reserve_pages(u_int num);
  *  vm_offset_t pmap_preboot_get_vpages(u_int num);
  *  void pmap_preboot_map_attr(vm_paddr_t pa, vm_offset_t va, vm_size_t size,
  *      vm_prot_t prot, vm_memattr_t attr);
  *
  *  (2) for all stages:
  *
  *  vm_paddr_t pmap_kextract(vm_offset_t va);
  *
  *  NOTE: This is not SMP coherent stage.
  *
  *****************************************************************************/
 
 #define KERNEL_P2V(pa) \
     ((vm_offset_t)((pa) - arm_physmem_kernaddr + KERNVIRTADDR))
 #define KERNEL_V2P(va) \
     ((vm_paddr_t)((va) - KERNVIRTADDR + arm_physmem_kernaddr))
 
 static vm_paddr_t last_paddr;
 
 /*
  *  Pre-bootstrap epoch page allocator.
  */
 vm_paddr_t
 pmap_preboot_get_pages(u_int num)
 {
 	vm_paddr_t ret;
 
 	ret = last_paddr;
 	last_paddr += num * PAGE_SIZE;
 
 	return (ret);
 }
 
 /*
  *	The fundamental initialization of PMAP stuff.
  *
  *  Some things already happened in locore.S and some things could happen
  *  before pmap_bootstrap_prepare() is called, so let's recall what is done:
  *  1. Caches are disabled.
  *  2. We are running on virtual addresses already with 'boot_pt1'
  *     as L1 page table.
  *  3. So far, all virtual addresses can be converted to physical ones and
  *     vice versa by the following macros:
  *       KERNEL_P2V(pa) .... physical to virtual ones,
  *       KERNEL_V2P(va) .... virtual to physical ones.
  *
  *  What is done herein:
  *  1. The 'boot_pt1' is replaced by real kernel L1 page table 'kern_pt1'.
  *  2. PT2MAP magic is brought to live.
  *  3. Basic preboot functions for page allocations and mappings can be used.
  *  4. Everything is prepared for L1 cache enabling.
  *
  *  Variations:
  *  1. To use second TTB register, so kernel and users page tables will be
  *     separated. This way process forking - pmap_pinit() - could be faster,
  *     it saves physical pages and KVA per a process, and it's simple change.
  *     However, it will lead, due to hardware matter, to the following:
  *     (a) 2G space for kernel and 2G space for users.
  *     (b) 1G space for kernel in low addresses and 3G for users above it.
  *     A question is: Is the case (b) really an option? Note that case (b)
  *     does save neither physical memory and KVA.
  */
 void
 pmap_bootstrap_prepare(vm_paddr_t last)
 {
 	vm_paddr_t pt2pg_pa, pt2tab_pa, pa, size;
 	vm_offset_t pt2pg_va;
 	pt1_entry_t *pte1p;
 	pt2_entry_t *pte2p;
 	u_int i;
 	uint32_t l1_attr;
 
 	/*
 	 * Now, we are going to make real kernel mapping. Note that we are
 	 * already running on some mapping made in locore.S and we expect
 	 * that it's large enough to ensure nofault access to physical memory
 	 * allocated herein before switch.
 	 *
 	 * As kernel image and everything needed before are and will be mapped
 	 * by section mappings, we align last physical address to PTE1_SIZE.
 	 */
 	last_paddr = pte1_roundup(last);
 
 	/*
 	 * Allocate and zero page(s) for kernel L1 page table.
 	 *
 	 * Note that it's first allocation on space which was PTE1_SIZE
 	 * aligned and as such base_pt1 is aligned to NB_IN_PT1 too.
 	 */
 	base_pt1 = pmap_preboot_get_pages(NPG_IN_PT1);
 	kern_pt1 = (pt1_entry_t *)KERNEL_P2V(base_pt1);
 	bzero((void*)kern_pt1, NB_IN_PT1);
 	pte1_sync_range(kern_pt1, NB_IN_PT1);
 
 	/* Allocate and zero page(s) for kernel PT2TAB. */
 	pt2tab_pa = pmap_preboot_get_pages(NPG_IN_PT2TAB);
 	kern_pt2tab = (pt2_entry_t *)KERNEL_P2V(pt2tab_pa);
 	bzero(kern_pt2tab, NB_IN_PT2TAB);
 	pte2_sync_range(kern_pt2tab, NB_IN_PT2TAB);
 
 	/* Allocate and zero page(s) for kernel L2 page tables. */
 	pt2pg_pa = pmap_preboot_get_pages(NKPT2PG);
 	pt2pg_va = KERNEL_P2V(pt2pg_pa);
 	size = NKPT2PG * PAGE_SIZE;
 	bzero((void*)pt2pg_va, size);
 	pte2_sync_range((pt2_entry_t *)pt2pg_va, size);
 
 	/*
 	 * Add a physical memory segment (vm_phys_seg) corresponding to the
 	 * preallocated pages for kernel L2 page tables so that vm_page
 	 * structures representing these pages will be created. The vm_page
 	 * structures are required for promotion of the corresponding kernel
 	 * virtual addresses to section mappings.
 	 */
 	vm_phys_add_seg(pt2tab_pa, pmap_preboot_get_pages(0));
 
 	/*
 	 * Insert allocated L2 page table pages to PT2TAB and make
 	 * link to all PT2s in L1 page table. See how kernel_vm_end
 	 * is initialized.
 	 *
 	 * We play simple and safe. So every KVA will have underlaying
 	 * L2 page table, even kernel image mapped by sections.
 	 */
 	pte2p = kern_pt2tab_entry(KERNBASE);
 	for (pa = pt2pg_pa; pa < pt2pg_pa + size; pa += PTE2_SIZE)
 		pt2tab_store(pte2p++, PTE2_KPT(pa));
 
 	pte1p = kern_pte1(KERNBASE);
 	for (pa = pt2pg_pa; pa < pt2pg_pa + size; pa += NB_IN_PT2)
 		pte1_store(pte1p++, PTE1_LINK(pa));
 
 	/* Make section mappings for kernel. */
 	l1_attr = ATTR_TO_L1(PTE2_ATTR_DEFAULT);
 	pte1p = kern_pte1(KERNBASE);
 	for (pa = KERNEL_V2P(KERNBASE); pa < last; pa += PTE1_SIZE)
 		pte1_store(pte1p++, PTE1_KERN(pa, PTE1_AP_KRW, l1_attr));
 
 	/*
 	 * Get free and aligned space for PT2MAP and make L1 page table links
 	 * to L2 page tables held in PT2TAB.
 	 *
 	 * Note that pages holding PT2s are stored in PT2TAB as pt2_entry_t
 	 * descriptors and PT2TAB page(s) itself is(are) used as PT2s. Thus
 	 * each entry in PT2TAB maps all PT2s in a page. This implies that
 	 * virtual address of PT2MAP must be aligned to NPT2_IN_PG * PTE1_SIZE.
 	 */
 	PT2MAP = (pt2_entry_t *)(KERNBASE - PT2MAP_SIZE);
 	pte1p = kern_pte1((vm_offset_t)PT2MAP);
 	for (pa = pt2tab_pa, i = 0; i < NPT2_IN_PT2TAB; i++, pa += NB_IN_PT2) {
 		pte1_store(pte1p++, PTE1_LINK(pa));
 	}
 
 	/*
 	 * Store PT2TAB in PT2TAB itself, i.e. self reference mapping.
 	 * Each pmap will hold own PT2TAB, so the mapping should be not global.
 	 */
 	pte2p = kern_pt2tab_entry((vm_offset_t)PT2MAP);
 	for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) {
 		pt2tab_store(pte2p++, PTE2_KPT_NG(pa));
 	}
 
 	/*
 	 * Choose correct L2 page table and make mappings for allocations
 	 * made herein which replaces temporary locore.S mappings after a while.
 	 * Note that PT2MAP cannot be used until we switch to kern_pt1.
 	 *
 	 * Note, that these allocations started aligned on 1M section and
 	 * kernel PT1 was allocated first. Making of mappings must follow
 	 * order of physical allocations as we've used KERNEL_P2V() macro
 	 * for virtual addresses resolution.
 	 */
 	pte2p = kern_pt2tab_entry((vm_offset_t)kern_pt1);
 	pt2pg_va = KERNEL_P2V(pte2_pa(pte2_load(pte2p)));
 
 	pte2p = page_pt2(pt2pg_va, pte1_index((vm_offset_t)kern_pt1));
 
 	/* Make mapping for kernel L1 page table. */
 	for (pa = base_pt1, i = 0; i < NPG_IN_PT1; i++, pa += PTE2_SIZE)
 		pte2_store(pte2p++, PTE2_KPT(pa));
 
 	/* Make mapping for kernel PT2TAB. */
 	for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE)
 		pte2_store(pte2p++, PTE2_KPT(pa));
 
 	/* Finally, switch from 'boot_pt1' to 'kern_pt1'. */
 	pmap_kern_ttb = base_pt1 | ttb_flags;
 	cpuinfo_reinit_mmu(pmap_kern_ttb);
 	/*
 	 * Initialize the first available KVA. As kernel image is mapped by
 	 * sections, we are leaving some gap behind.
 	 */
 	virtual_avail = (vm_offset_t)kern_pt2tab + NPG_IN_PT2TAB * PAGE_SIZE;
 }
 
 /*
  *  Setup L2 page table page for given KVA.
  *  Used in pre-bootstrap epoch.
  *
  *  Note that we have allocated NKPT2PG pages for L2 page tables in advance
  *  and used them for mapping KVA starting from KERNBASE. However, this is not
  *  enough. Vectors and devices need L2 page tables too. Note that they are
  *  even above VM_MAX_KERNEL_ADDRESS.
  */
 static __inline vm_paddr_t
 pmap_preboot_pt2pg_setup(vm_offset_t va)
 {
 	pt2_entry_t *pte2p, pte2;
 	vm_paddr_t pt2pg_pa;
 
 	/* Get associated entry in PT2TAB. */
 	pte2p = kern_pt2tab_entry(va);
 
 	/* Just return, if PT2s page exists already. */
 	pte2 = pt2tab_load(pte2p);
 	if (pte2_is_valid(pte2))
 		return (pte2_pa(pte2));
 
 	KASSERT(va >= VM_MAX_KERNEL_ADDRESS,
 	    ("%s: NKPT2PG too small", __func__));
 
 	/*
 	 * Allocate page for PT2s and insert it to PT2TAB.
 	 * In other words, map it into PT2MAP space.
 	 */
 	pt2pg_pa = pmap_preboot_get_pages(1);
 	pt2tab_store(pte2p, PTE2_KPT(pt2pg_pa));
 
 	/* Zero all PT2s in allocated page. */
 	bzero((void*)pt2map_pt2pg(va), PAGE_SIZE);
 	pte2_sync_range((pt2_entry_t *)pt2map_pt2pg(va), PAGE_SIZE);
 
 	return (pt2pg_pa);
 }
 
 /*
  *  Setup L2 page table for given KVA.
  *  Used in pre-bootstrap epoch.
  */
 static void
 pmap_preboot_pt2_setup(vm_offset_t va)
 {
 	pt1_entry_t *pte1p;
 	vm_paddr_t pt2pg_pa, pt2_pa;
 
 	/* Setup PT2's page. */
 	pt2pg_pa = pmap_preboot_pt2pg_setup(va);
 	pt2_pa = page_pt2pa(pt2pg_pa, pte1_index(va));
 
 	/* Insert PT2 to PT1. */
 	pte1p = kern_pte1(va);
 	pte1_store(pte1p, PTE1_LINK(pt2_pa));
 }
 
 /*
  *  Get L2 page entry associated with given KVA.
  *  Used in pre-bootstrap epoch.
  */
 static __inline pt2_entry_t*
 pmap_preboot_vtopte2(vm_offset_t va)
 {
 	pt1_entry_t *pte1p;
 
 	/* Setup PT2 if needed. */
 	pte1p = kern_pte1(va);
 	if (!pte1_is_valid(pte1_load(pte1p))) /* XXX - sections ?! */
 		pmap_preboot_pt2_setup(va);
 
 	return (pt2map_entry(va));
 }
 
 /*
  *  Pre-bootstrap epoch page(s) mapping(s).
  */
 void
 pmap_preboot_map_pages(vm_paddr_t pa, vm_offset_t va, u_int num)
 {
 	u_int i;
 	pt2_entry_t *pte2p;
 
 	/* Map all the pages. */
 	for (i = 0; i < num; i++) {
 		pte2p = pmap_preboot_vtopte2(va);
 		pte2_store(pte2p, PTE2_KRW(pa));
 		va += PAGE_SIZE;
 		pa += PAGE_SIZE;
 	}
 }
 
 /*
  *  Pre-bootstrap epoch virtual space alocator.
  */
 vm_offset_t
 pmap_preboot_reserve_pages(u_int num)
 {
 	u_int i;
 	vm_offset_t start, va;
 	pt2_entry_t *pte2p;
 
 	/* Allocate virtual space. */
 	start = va = virtual_avail;
 	virtual_avail += num * PAGE_SIZE;
 
 	/* Zero the mapping. */
 	for (i = 0; i < num; i++) {
 		pte2p = pmap_preboot_vtopte2(va);
 		pte2_store(pte2p, 0);
 		va += PAGE_SIZE;
 	}
 
 	return (start);
 }
 
 /*
  *  Pre-bootstrap epoch page(s) allocation and mapping(s).
  */
 vm_offset_t
 pmap_preboot_get_vpages(u_int num)
 {
 	vm_paddr_t  pa;
 	vm_offset_t va;
 
 	/* Allocate physical page(s). */
 	pa = pmap_preboot_get_pages(num);
 
 	/* Allocate virtual space. */
 	va = virtual_avail;
 	virtual_avail += num * PAGE_SIZE;
 
 	/* Map and zero all. */
 	pmap_preboot_map_pages(pa, va, num);
 	bzero((void *)va, num * PAGE_SIZE);
 
 	return (va);
 }
 
 /*
  *  Pre-bootstrap epoch page mapping(s) with attributes.
  */
 void
 pmap_preboot_map_attr(vm_paddr_t pa, vm_offset_t va, vm_size_t size,
     vm_prot_t prot, vm_memattr_t attr)
 {
 	u_int num;
 	u_int l1_attr, l1_prot, l2_prot, l2_attr;
 	pt1_entry_t *pte1p;
 	pt2_entry_t *pte2p;
 
 	l2_prot = prot & VM_PROT_WRITE ? PTE2_AP_KRW : PTE2_AP_KR;
 	l2_prot |= (prot & VM_PROT_EXECUTE) ? PTE2_X : PTE2_NX;
 	l2_attr = vm_memattr_to_pte2(attr);
 	l1_prot = ATTR_TO_L1(l2_prot);
 	l1_attr = ATTR_TO_L1(l2_attr);
 
 	/* Map all the pages. */
 	num = round_page(size);
 	while (num > 0) {
 		if ((((va | pa) & PTE1_OFFSET) == 0) && (num >= PTE1_SIZE)) {
 			pte1p = kern_pte1(va);
 			pte1_store(pte1p, PTE1_KERN(pa, l1_prot, l1_attr));
 			va += PTE1_SIZE;
 			pa += PTE1_SIZE;
 			num -= PTE1_SIZE;
 		} else {
 			pte2p = pmap_preboot_vtopte2(va);
 			pte2_store(pte2p, PTE2_KERN(pa, l2_prot, l2_attr));
 			va += PAGE_SIZE;
 			pa += PAGE_SIZE;
 			num -= PAGE_SIZE;
 		}
 	}
 }
 
 /*
  *  Extract from the kernel page table the physical address
  *  that is mapped by the given virtual address "va".
  */
 vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
 	vm_paddr_t pa;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 
 	pte1 = pte1_load(kern_pte1(va));
 	if (pte1_is_section(pte1)) {
 		pa = pte1_pa(pte1) | (va & PTE1_OFFSET);
 	} else if (pte1_is_link(pte1)) {
 		/*
 		 * We should beware of concurrent promotion that changes
 		 * pte1 at this point. However, it's not a problem as PT2
 		 * page is preserved by promotion in PT2TAB. So even if
 		 * it happens, using of PT2MAP is still safe.
 		 *
 		 * QQQ: However, concurrent removing is a problem which
 		 *      ends in abort on PT2MAP space. Locking must be used
 		 *      to deal with this.
 		 */
 		pte2 = pte2_load(pt2map_entry(va));
 		pa = pte2_pa(pte2) | (va & PTE2_OFFSET);
 	}
 	else {
 		panic("%s: va %#x pte1 %#x", __func__, va, pte1);
 	}
 	return (pa);
 }
 
 /*
  *  Extract from the kernel page table the physical address
  *  that is mapped by the given virtual address "va". Also
  *  return L2 page table entry which maps the address.
  *
  *  This is only intended to be used for panic dumps.
  */
 vm_paddr_t
 pmap_dump_kextract(vm_offset_t va, pt2_entry_t *pte2p)
 {
 	vm_paddr_t pa;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 
 	pte1 = pte1_load(kern_pte1(va));
 	if (pte1_is_section(pte1)) {
 		pa = pte1_pa(pte1) | (va & PTE1_OFFSET);
 		pte2 = pa | ATTR_TO_L2(pte1) | PTE2_V;
 	} else if (pte1_is_link(pte1)) {
 		pte2 = pte2_load(pt2map_entry(va));
 		pa = pte2_pa(pte2);
 	} else {
 		pte2 = 0;
 		pa = 0;
 	}
 	if (pte2p != NULL)
 		*pte2p = pte2;
 	return (pa);
 }
 
 /*****************************************************************************
  *
  *	PMAP second stage initialization and utility functions
  *	for bootstrap epoch.
  *
  *  After pmap_bootstrap() is called, the following functions for
  *  mappings can be used:
  *
  *  void pmap_kenter(vm_offset_t va, vm_paddr_t pa);
  *  void pmap_kremove(vm_offset_t va);
  *  vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end,
  *      int prot);
  *
  *  NOTE: This is not SMP coherent stage. And physical page allocation is not
  *        allowed during this stage.
  *
  *****************************************************************************/
 
 /*
  *  Initialize kernel PMAP locks and lists, kernel_pmap itself, and
  *  reserve various virtual spaces for temporary mappings.
  */
 void
 pmap_bootstrap(vm_offset_t firstaddr)
 {
 	pt2_entry_t *unused __unused;
 	struct pcpu *pc;
 
 	/*
 	 * Initialize the kernel pmap (which is statically allocated).
 	 */
 	PMAP_LOCK_INIT(kernel_pmap);
 	kernel_l1pa = (vm_paddr_t)kern_pt1;  /* for libkvm */
 	kernel_pmap->pm_pt1 = kern_pt1;
 	kernel_pmap->pm_pt2tab = kern_pt2tab;
 	CPU_FILL(&kernel_pmap->pm_active);  /* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 
 	/*
 	 * Initialize the global pv list lock.
 	 */
 	rw_init(&pvh_global_lock, "pmap pv global");
 
 	LIST_INIT(&allpmaps);
 
 	/*
 	 * Request a spin mutex so that changes to allpmaps cannot be
 	 * preempted by smp_rendezvous_cpus().
 	 */
 	mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 
 	/*
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
 #define	SYSMAP(c, p, v, n)  do {		\
 	v = (c)pmap_preboot_reserve_pages(n);	\
 	p = pt2map_entry((vm_offset_t)v);	\
 	} while (0)
 
 	/*
 	 * Local CMAP1/CMAP2 are used for zeroing and copying pages.
 	 * Local CMAP2 is also used for data cache cleaning.
 	 */
 	pc = get_pcpu();
 	mtx_init(&pc->pc_cmap_lock, "SYSMAPS", NULL, MTX_DEF);
 	SYSMAP(caddr_t, pc->pc_cmap1_pte2p, pc->pc_cmap1_addr, 1);
 	SYSMAP(caddr_t, pc->pc_cmap2_pte2p, pc->pc_cmap2_addr, 1);
 	SYSMAP(vm_offset_t, pc->pc_qmap_pte2p, pc->pc_qmap_addr, 1);
 
 	/*
 	 * Crashdump maps.
 	 */
 	SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS);
 
 	/*
 	 * _tmppt is used for reading arbitrary physical pages via /dev/mem.
 	 */
 	SYSMAP(caddr_t, unused, _tmppt, 1);
 
 	/*
 	 * PADDR1 and PADDR2 are used by pmap_pte2_quick() and pmap_pte2(),
 	 * respectively. PADDR3 is used by pmap_pte2_ddb().
 	 */
 	SYSMAP(pt2_entry_t *, PMAP1, PADDR1, 1);
 	SYSMAP(pt2_entry_t *, PMAP2, PADDR2, 1);
 #ifdef DDB
 	SYSMAP(pt2_entry_t *, PMAP3, PADDR3, 1);
 #endif
 	mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
 
 	/*
 	 * Note that in very short time in initarm(), we are going to
 	 * initialize phys_avail[] array and no further page allocation
 	 * can happen after that until vm subsystem will be initialized.
 	 */
 	kernel_vm_end_new = kernel_vm_end;
 	virtual_end = vm_max_kernel_address;
 }
 
 static void
 pmap_init_reserved_pages(void)
 {
 	struct pcpu *pc;
 	vm_offset_t pages;
 	int i;
 
 	CPU_FOREACH(i) {
 		pc = pcpu_find(i);
 		/*
 		 * Skip if the mapping has already been initialized,
 		 * i.e. this is the BSP.
 		 */
 		if (pc->pc_cmap1_addr != 0)
 			continue;
 		mtx_init(&pc->pc_cmap_lock, "SYSMAPS", NULL, MTX_DEF);
 		pages = kva_alloc(PAGE_SIZE * 3);
 		if (pages == 0)
 			panic("%s: unable to allocate KVA", __func__);
 		pc->pc_cmap1_pte2p = pt2map_entry(pages);
 		pc->pc_cmap2_pte2p = pt2map_entry(pages + PAGE_SIZE);
 		pc->pc_qmap_pte2p = pt2map_entry(pages + (PAGE_SIZE * 2));
 		pc->pc_cmap1_addr = (caddr_t)pages;
 		pc->pc_cmap2_addr = (caddr_t)(pages + PAGE_SIZE);
 		pc->pc_qmap_addr = pages + (PAGE_SIZE * 2);
 	}
 }
 SYSINIT(rpages_init, SI_SUB_CPU, SI_ORDER_ANY, pmap_init_reserved_pages, NULL);
 
 /*
  *  The function can already be use in second initialization stage.
  *  As such, the function DOES NOT call pmap_growkernel() where PT2
  *  allocation can happen. So if used, be sure that PT2 for given
  *  virtual address is allocated already!
  *
  *  Add a wired page to the kva.
  *  Note: not SMP coherent.
  */
 static __inline void
 pmap_kenter_prot_attr(vm_offset_t va, vm_paddr_t pa, uint32_t prot,
     uint32_t attr)
 {
 	pt1_entry_t *pte1p;
 	pt2_entry_t *pte2p;
 
 	pte1p = kern_pte1(va);
 	if (!pte1_is_valid(pte1_load(pte1p))) { /* XXX - sections ?! */
 		/*
 		 * This is a very low level function, so PT2 and particularly
 		 * PT2PG associated with given virtual address must be already
 		 * allocated. It's a pain mainly during pmap initialization
 		 * stage. However, called after pmap initialization with
 		 * virtual address not under kernel_vm_end will lead to
 		 * the same misery.
 		 */
 		if (!pte2_is_valid(pte2_load(kern_pt2tab_entry(va))))
 			panic("%s: kernel PT2 not allocated!", __func__);
 	}
 
 	pte2p = pt2map_entry(va);
 	pte2_store(pte2p, PTE2_KERN(pa, prot, attr));
 }
 
 PMAP_INLINE void
 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 {
 
 	pmap_kenter_prot_attr(va, pa, PTE2_AP_KRW, PTE2_ATTR_DEFAULT);
 }
 
 /*
  *  Remove a page from the kernel pagetables.
  *  Note: not SMP coherent.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
 	pt1_entry_t *pte1p;
 	pt2_entry_t *pte2p;
 
 	pte1p = kern_pte1(va);
 	if (pte1_is_section(pte1_load(pte1p))) {
 		pte1_clear(pte1p);
 	} else {
 		pte2p = pt2map_entry(va);
 		pte2_clear(pte2p);
 	}
 }
 
 /*
  *  Share new kernel PT2PG with all pmaps.
  *  The caller is responsible for maintaining TLB consistency.
  */
 static void
 pmap_kenter_pt2tab(vm_offset_t va, pt2_entry_t npte2)
 {
 	pmap_t pmap;
 	pt2_entry_t *pte2p;
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_FOREACH(pmap, &allpmaps, pm_list) {
 		pte2p = pmap_pt2tab_entry(pmap, va);
 		pt2tab_store(pte2p, npte2);
 	}
 	mtx_unlock_spin(&allpmaps_lock);
 }
 
 /*
  *  Share new kernel PTE1 with all pmaps.
  *  The caller is responsible for maintaining TLB consistency.
  */
 static void
 pmap_kenter_pte1(vm_offset_t va, pt1_entry_t npte1)
 {
 	pmap_t pmap;
 	pt1_entry_t *pte1p;
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_FOREACH(pmap, &allpmaps, pm_list) {
 		pte1p = pmap_pte1(pmap, va);
 		pte1_store(pte1p, npte1);
 	}
 	mtx_unlock_spin(&allpmaps_lock);
 }
 
 /*
  *  Used to map a range of physical addresses into kernel
  *  virtual address space.
  *
  *  The value passed in '*virt' is a suggested virtual address for
  *  the mapping. Architectures which can support a direct-mapped
  *  physical to virtual region can return the appropriate address
  *  within that region, leaving '*virt' unchanged. Other
  *  architectures should map the pages starting at '*virt' and
  *  update '*virt' with the first usable address after the mapped
  *  region.
  *
  *  NOTE: Read the comments above pmap_kenter_prot_attr() as
  *        the function is used herein!
  */
 vm_offset_t
 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 {
 	vm_offset_t va, sva;
 	vm_paddr_t pte1_offset;
 	pt1_entry_t npte1;
 	uint32_t l1prot, l2prot;
 	uint32_t l1attr, l2attr;
 
 	PDEBUG(1, printf("%s: virt = %#x, start = %#x, end = %#x (size = %#x),"
 	    " prot = %d\n", __func__, *virt, start, end, end - start,  prot));
 
 	l2prot = (prot & VM_PROT_WRITE) ? PTE2_AP_KRW : PTE2_AP_KR;
 	l2prot |= (prot & VM_PROT_EXECUTE) ? PTE2_X : PTE2_NX;
 	l1prot = ATTR_TO_L1(l2prot);
 
 	l2attr = PTE2_ATTR_DEFAULT;
 	l1attr = ATTR_TO_L1(l2attr);
 
 	va = *virt;
 	/*
 	 * Does the physical address range's size and alignment permit at
 	 * least one section mapping to be created?
 	 */
 	pte1_offset = start & PTE1_OFFSET;
 	if ((end - start) - ((PTE1_SIZE - pte1_offset) & PTE1_OFFSET) >=
 	    PTE1_SIZE) {
 		/*
 		 * Increase the starting virtual address so that its alignment
 		 * does not preclude the use of section mappings.
 		 */
 		if ((va & PTE1_OFFSET) < pte1_offset)
 			va = pte1_trunc(va) + pte1_offset;
 		else if ((va & PTE1_OFFSET) > pte1_offset)
 			va = pte1_roundup(va) + pte1_offset;
 	}
 	sva = va;
 	while (start < end) {
 		if ((start & PTE1_OFFSET) == 0 && end - start >= PTE1_SIZE) {
 			KASSERT((va & PTE1_OFFSET) == 0,
 			    ("%s: misaligned va %#x", __func__, va));
 			npte1 = PTE1_KERN(start, l1prot, l1attr);
 			pmap_kenter_pte1(va, npte1);
 			va += PTE1_SIZE;
 			start += PTE1_SIZE;
 		} else {
 			pmap_kenter_prot_attr(va, start, l2prot, l2attr);
 			va += PAGE_SIZE;
 			start += PAGE_SIZE;
 		}
 	}
 	tlb_flush_range(sva, va - sva);
 	*virt = va;
 	return (sva);
 }
 
 /*
  *  Make a temporary mapping for a physical address.
  *  This is only intended to be used for panic dumps.
  */
 void *
 pmap_kenter_temporary(vm_paddr_t pa, int i)
 {
 	vm_offset_t va;
 
 	/* QQQ: 'i' should be less or equal to MAXDUMPPGS. */
 
 	va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 	pmap_kenter(va, pa);
 	tlb_flush_local(va);
 	return ((void *)crashdumpmap);
 }
 
 
 /*************************************
  *
  *  TLB & cache maintenance routines.
  *
  *************************************/
 
 /*
  *  We inline these within pmap.c for speed.
  */
 PMAP_INLINE void
 pmap_tlb_flush(pmap_t pmap, vm_offset_t va)
 {
 
 	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		tlb_flush(va);
 }
 
 PMAP_INLINE void
 pmap_tlb_flush_range(pmap_t pmap, vm_offset_t sva, vm_size_t size)
 {
 
 	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		tlb_flush_range(sva, size);
 }
 
 /*
  *  Abuse the pte2 nodes for unmapped kva to thread a kva freelist through.
  *  Requirements:
  *   - Must deal with pages in order to ensure that none of the PTE2_* bits
  *     are ever set, PTE2_V in particular.
  *   - Assumes we can write to pte2s without pte2_store() atomic ops.
  *   - Assumes nothing will ever test these addresses for 0 to indicate
  *     no mapping instead of correctly checking PTE2_V.
  *   - Assumes a vm_offset_t will fit in a pte2 (true for arm).
  *  Because PTE2_V is never set, there can be no mappings to invalidate.
  */
 static vm_offset_t
 pmap_pte2list_alloc(vm_offset_t *head)
 {
 	pt2_entry_t *pte2p;
 	vm_offset_t va;
 
 	va = *head;
 	if (va == 0)
 		panic("pmap_ptelist_alloc: exhausted ptelist KVA");
 	pte2p = pt2map_entry(va);
 	*head = *pte2p;
 	if (*head & PTE2_V)
 		panic("%s: va with PTE2_V set!", __func__);
 	*pte2p = 0;
 	return (va);
 }
 
 static void
 pmap_pte2list_free(vm_offset_t *head, vm_offset_t va)
 {
 	pt2_entry_t *pte2p;
 
 	if (va & PTE2_V)
 		panic("%s: freeing va with PTE2_V set!", __func__);
 	pte2p = pt2map_entry(va);
 	*pte2p = *head;		/* virtual! PTE2_V is 0 though */
 	*head = va;
 }
 
 static void
 pmap_pte2list_init(vm_offset_t *head, void *base, int npages)
 {
 	int i;
 	vm_offset_t va;
 
 	*head = 0;
 	for (i = npages - 1; i >= 0; i--) {
 		va = (vm_offset_t)base + i * PAGE_SIZE;
 		pmap_pte2list_free(head, va);
 	}
 }
 
 /*****************************************************************************
  *
  *	PMAP third and final stage initialization.
  *
  *  After pmap_init() is called, PMAP subsystem is fully initialized.
  *
  *****************************************************************************/
 
 SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
     "Max number of PV entries");
 SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
     "Page share factor per proc");
 
 static u_long nkpt2pg = NKPT2PG;
 SYSCTL_ULONG(_vm_pmap, OID_AUTO, nkpt2pg, CTLFLAG_RD,
     &nkpt2pg, 0, "Pre-allocated pages for kernel PT2s");
 
 static int sp_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, sp_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &sp_enabled, 0, "Are large page mappings enabled?");
 
 bool
 pmap_ps_enabled(pmap_t pmap __unused)
 {
 
 	return (sp_enabled != 0);
 }
 
 static SYSCTL_NODE(_vm_pmap, OID_AUTO, pte1, CTLFLAG_RD, 0,
     "1MB page mapping counters");
 
 static u_long pmap_pte1_demotions;
 SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, demotions, CTLFLAG_RD,
     &pmap_pte1_demotions, 0, "1MB page demotions");
 
 static u_long pmap_pte1_mappings;
 SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, mappings, CTLFLAG_RD,
     &pmap_pte1_mappings, 0, "1MB page mappings");
 
 static u_long pmap_pte1_p_failures;
 SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, p_failures, CTLFLAG_RD,
     &pmap_pte1_p_failures, 0, "1MB page promotion failures");
 
 static u_long pmap_pte1_promotions;
 SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, promotions, CTLFLAG_RD,
     &pmap_pte1_promotions, 0, "1MB page promotions");
 
 static u_long pmap_pte1_kern_demotions;
 SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, kern_demotions, CTLFLAG_RD,
     &pmap_pte1_kern_demotions, 0, "1MB page kernel demotions");
 
 static u_long pmap_pte1_kern_promotions;
 SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, kern_promotions, CTLFLAG_RD,
     &pmap_pte1_kern_promotions, 0, "1MB page kernel promotions");
 
 static __inline ttb_entry_t
 pmap_ttb_get(pmap_t pmap)
 {
 
 	return (vtophys(pmap->pm_pt1) | ttb_flags);
 }
 
 /*
  *  Initialize a vm_page's machine-dependent fields.
  *
  *  Variations:
  *  1. Pages for L2 page tables are always not managed. So, pv_list and
  *     pt2_wirecount can share same physical space. However, proper
  *     initialization on a page alloc for page tables and reinitialization
  *     on the page free must be ensured.
  */
 void
 pmap_page_init(vm_page_t m)
 {
 
 	TAILQ_INIT(&m->md.pv_list);
 	pt2_wirecount_init(m);
 	m->md.pat_mode = VM_MEMATTR_DEFAULT;
 }
 
 /*
  *  Virtualization for faster way how to zero whole page.
  */
 static __inline void
 pagezero(void *page)
 {
 
 	bzero(page, PAGE_SIZE);
 }
 
 /*
  *  Zero L2 page table page.
  *  Use same KVA as in pmap_zero_page().
  */
 static __inline vm_paddr_t
 pmap_pt2pg_zero(vm_page_t m)
 {
 	pt2_entry_t *cmap2_pte2p;
 	vm_paddr_t pa;
 	struct pcpu *pc;
 
 	pa = VM_PAGE_TO_PHYS(m);
 
 	/*
 	 * XXX: For now, we map whole page even if it's already zero,
 	 *      to sync it even if the sync is only DSB.
 	 */
 	sched_pin();
 	pc = get_pcpu();
 	cmap2_pte2p = pc->pc_cmap2_pte2p;
 	mtx_lock(&pc->pc_cmap_lock);
 	if (pte2_load(cmap2_pte2p) != 0)
 		panic("%s: CMAP2 busy", __func__);
 	pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW,
 	    vm_page_pte2_attr(m)));
 	/*  Even VM_ALLOC_ZERO request is only advisory. */
 	if ((m->flags & PG_ZERO) == 0)
 		pagezero(pc->pc_cmap2_addr);
 	pte2_sync_range((pt2_entry_t *)pc->pc_cmap2_addr, PAGE_SIZE);
 	pte2_clear(cmap2_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 
 	/*
 	 * Unpin the thread before releasing the lock.  Otherwise the thread
 	 * could be rescheduled while still bound to the current CPU, only
 	 * to unpin itself immediately upon resuming execution.
 	 */
 	sched_unpin();
 	mtx_unlock(&pc->pc_cmap_lock);
 
 	return (pa);
 }
 
 /*
  *  Init just allocated page as L2 page table(s) holder
  *  and return its physical address.
  */
 static __inline vm_paddr_t
 pmap_pt2pg_init(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	vm_paddr_t pa;
 	pt2_entry_t *pte2p;
 
 	/* Check page attributes. */
 	if (m->md.pat_mode != pt_memattr)
 		pmap_page_set_memattr(m, pt_memattr);
 
 	/* Zero page and init wire counts. */
 	pa = pmap_pt2pg_zero(m);
 	pt2_wirecount_init(m);
 
 	/*
 	 * Map page to PT2MAP address space for given pmap.
 	 * Note that PT2MAP space is shared with all pmaps.
 	 */
 	if (pmap == kernel_pmap)
 		pmap_kenter_pt2tab(va, PTE2_KPT(pa));
 	else {
 		pte2p = pmap_pt2tab_entry(pmap, va);
 		pt2tab_store(pte2p, PTE2_KPT_NG(pa));
 	}
 
 	return (pa);
 }
 
 /*
  *  Initialize the pmap module.
  *  Called by vm_init, to initialize any structures that the pmap
  *  system needs to map virtual memory.
  */
 void
 pmap_init(void)
 {
 	vm_size_t s;
 	pt2_entry_t *pte2p, pte2;
 	u_int i, pte1_idx, pv_npg;
 
 	PDEBUG(1, printf("%s: phys_start = %#x\n", __func__, PHYSADDR));
 
 	/*
 	 * Initialize the vm page array entries for kernel pmap's
 	 * L2 page table pages allocated in advance.
 	 */
 	pte1_idx = pte1_index(KERNBASE - PT2MAP_SIZE);
 	pte2p = kern_pt2tab_entry(KERNBASE - PT2MAP_SIZE);
 	for (i = 0; i < nkpt2pg + NPG_IN_PT2TAB; i++, pte2p++) {
 		vm_paddr_t pa;
 		vm_page_t m;
 
 		pte2 = pte2_load(pte2p);
 		KASSERT(pte2_is_valid(pte2), ("%s: no valid entry", __func__));
 
 		pa = pte2_pa(pte2);
 		m = PHYS_TO_VM_PAGE(pa);
 		KASSERT(m >= vm_page_array &&
 		    m < &vm_page_array[vm_page_array_size],
 		    ("%s: L2 page table page is out of range", __func__));
 
 		m->pindex = pte1_idx;
 		m->phys_addr = pa;
 		pte1_idx += NPT2_IN_PG;
 	}
 
 	/*
 	 * Initialize the address space (zone) for the pv entries.  Set a
 	 * high water mark so that the system can recover from excessive
 	 * numbers of pv entries.
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
 	pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count;
 	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
 	pv_entry_max = roundup(pv_entry_max, _NPCPV);
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 
 	/*
 	 * Are large page mappings enabled?
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.sp_enabled", &sp_enabled);
 	if (sp_enabled) {
 		KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
 		    ("%s: can't assign to pagesizes[1]", __func__));
 		pagesizes[1] = PTE1_SIZE;
 	}
 
 	/*
 	 * Calculate the size of the pv head table for sections.
 	 * Handle the possibility that "vm_phys_segs[...].end" is zero.
 	 * Note that the table is only for sections which could be promoted.
 	 */
 	first_managed_pa = pte1_trunc(vm_phys_segs[0].start);
 	pv_npg = (pte1_trunc(vm_phys_segs[vm_phys_nsegs - 1].end - PAGE_SIZE)
 	    - first_managed_pa) / PTE1_SIZE + 1;
 
 	/*
 	 * Allocate memory for the pv head table for sections.
 	 */
 	s = (vm_size_t)(pv_npg * sizeof(struct md_page));
 	s = round_page(s);
 	pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO);
 	for (i = 0; i < pv_npg; i++)
 		TAILQ_INIT(&pv_table[i].pv_list);
 
 	pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
 	pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks);
 	if (pv_chunkbase == NULL)
 		panic("%s: not enough kvm for pv chunks", __func__);
 	pmap_pte2list_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
 }
 
 /*
  *  Add a list of wired pages to the kva
  *  this routine is only used for temporary
  *  kernel mappings that do not need to have
  *  page modification or references recorded.
  *  Note that old mappings are simply written
  *  over.  The page *must* be wired.
  *  Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
 	u_int anychanged;
 	pt2_entry_t *epte2p, *pte2p, pte2;
 	vm_page_t m;
 	vm_paddr_t pa;
 
 	anychanged = 0;
 	pte2p = pt2map_entry(sva);
 	epte2p = pte2p + count;
 	while (pte2p < epte2p) {
 		m = *ma++;
 		pa = VM_PAGE_TO_PHYS(m);
 		pte2 = pte2_load(pte2p);
 		if ((pte2_pa(pte2) != pa) ||
 		    (pte2_attr(pte2) != vm_page_pte2_attr(m))) {
 			anychanged++;
 			pte2_store(pte2p, PTE2_KERN(pa, PTE2_AP_KRW,
 			    vm_page_pte2_attr(m)));
 		}
 		pte2p++;
 	}
 	if (__predict_false(anychanged))
 		tlb_flush_range(sva, count * PAGE_SIZE);
 }
 
 /*
  *  This routine tears out page mappings from the
  *  kernel -- it is meant only for temporary mappings.
  *  Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
 	vm_offset_t va;
 
 	va = sva;
 	while (count-- > 0) {
 		pmap_kremove(va);
 		va += PAGE_SIZE;
 	}
 	tlb_flush_range(sva, va - sva);
 }
 
 /*
  *  Are we current address space or kernel?
  */
 static __inline int
 pmap_is_current(pmap_t pmap)
 {
 
 	return (pmap == kernel_pmap ||
 		(pmap == vmspace_pmap(curthread->td_proc->p_vmspace)));
 }
 
 /*
  *  If the given pmap is not the current or kernel pmap, the returned
  *  pte2 must be released by passing it to pmap_pte2_release().
  */
 static pt2_entry_t *
 pmap_pte2(pmap_t pmap, vm_offset_t va)
 {
 	pt1_entry_t pte1;
 	vm_paddr_t pt2pg_pa;
 
 	pte1 = pte1_load(pmap_pte1(pmap, va));
 	if (pte1_is_section(pte1))
 		panic("%s: attempt to map PTE1", __func__);
 	if (pte1_is_link(pte1)) {
 		/* Are we current address space or kernel? */
 		if (pmap_is_current(pmap))
 			return (pt2map_entry(va));
 		/* Note that L2 page table size is not equal to PAGE_SIZE. */
 		pt2pg_pa = trunc_page(pte1_link_pa(pte1));
 		mtx_lock(&PMAP2mutex);
 		if (pte2_pa(pte2_load(PMAP2)) != pt2pg_pa) {
 			pte2_store(PMAP2, PTE2_KPT(pt2pg_pa));
 			tlb_flush((vm_offset_t)PADDR2);
 		}
 		return (PADDR2 + (arm32_btop(va) & (NPTE2_IN_PG - 1)));
 	}
 	return (NULL);
 }
 
 /*
  *  Releases a pte2 that was obtained from pmap_pte2().
  *  Be prepared for the pte2p being NULL.
  */
 static __inline void
 pmap_pte2_release(pt2_entry_t *pte2p)
 {
 
 	if ((pt2_entry_t *)(trunc_page((vm_offset_t)pte2p)) == PADDR2) {
 		mtx_unlock(&PMAP2mutex);
 	}
 }
 
 /*
  *  Super fast pmap_pte2 routine best used when scanning
  *  the pv lists.  This eliminates many coarse-grained
  *  invltlb calls.  Note that many of the pv list
  *  scans are across different pmaps.  It is very wasteful
  *  to do an entire tlb flush for checking a single mapping.
  *
  *  If the given pmap is not the current pmap, pvh_global_lock
  *  must be held and curthread pinned to a CPU.
  */
 static pt2_entry_t *
 pmap_pte2_quick(pmap_t pmap, vm_offset_t va)
 {
 	pt1_entry_t pte1;
 	vm_paddr_t pt2pg_pa;
 
 	pte1 = pte1_load(pmap_pte1(pmap, va));
 	if (pte1_is_section(pte1))
 		panic("%s: attempt to map PTE1", __func__);
 	if (pte1_is_link(pte1)) {
 		/* Are we current address space or kernel? */
 		if (pmap_is_current(pmap))
 			return (pt2map_entry(va));
 		rw_assert(&pvh_global_lock, RA_WLOCKED);
 		KASSERT(curthread->td_pinned > 0,
 		    ("%s: curthread not pinned", __func__));
 		/* Note that L2 page table size is not equal to PAGE_SIZE. */
 		pt2pg_pa = trunc_page(pte1_link_pa(pte1));
 		if (pte2_pa(pte2_load(PMAP1)) != pt2pg_pa) {
 			pte2_store(PMAP1, PTE2_KPT(pt2pg_pa));
 #ifdef SMP
 			PMAP1cpu = PCPU_GET(cpuid);
 #endif
 			tlb_flush_local((vm_offset_t)PADDR1);
 			PMAP1changed++;
 		} else
 #ifdef SMP
 		if (PMAP1cpu != PCPU_GET(cpuid)) {
 			PMAP1cpu = PCPU_GET(cpuid);
 			tlb_flush_local((vm_offset_t)PADDR1);
 			PMAP1changedcpu++;
 		} else
 #endif
 			PMAP1unchanged++;
 		return (PADDR1 + (arm32_btop(va) & (NPTE2_IN_PG - 1)));
 	}
 	return (NULL);
 }
 
 /*
  *  Routine: pmap_extract
  *  Function:
  * 	Extract the physical page address associated
  *	with the given map/virtual_address pair.
  */
 vm_paddr_t
 pmap_extract(pmap_t pmap, vm_offset_t va)
 {
 	vm_paddr_t pa;
 	pt1_entry_t pte1;
 	pt2_entry_t *pte2p;
 
 	PMAP_LOCK(pmap);
 	pte1 = pte1_load(pmap_pte1(pmap, va));
 	if (pte1_is_section(pte1))
 		pa = pte1_pa(pte1) | (va & PTE1_OFFSET);
 	else if (pte1_is_link(pte1)) {
 		pte2p = pmap_pte2(pmap, va);
 		pa = pte2_pa(pte2_load(pte2p)) | (va & PTE2_OFFSET);
 		pmap_pte2_release(pte2p);
 	} else
 		pa = 0;
 	PMAP_UNLOCK(pmap);
 	return (pa);
 }
 
 /*
  *  Routine: pmap_extract_and_hold
  *  Function:
  *	Atomically extract and hold the physical page
  *	with the given pmap and virtual address pair
  *	if that mapping permits the given protection.
  */
 vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	vm_paddr_t pa, lockpa;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2, *pte2p;
 	vm_page_t m;
 
 	lockpa = 0;
 	m = NULL;
 	PMAP_LOCK(pmap);
 retry:
 	pte1 = pte1_load(pmap_pte1(pmap, va));
 	if (pte1_is_section(pte1)) {
 		if (!(pte1 & PTE1_RO) || !(prot & VM_PROT_WRITE)) {
 			pa = pte1_pa(pte1) | (va & PTE1_OFFSET);
 			if (vm_page_pa_tryrelock(pmap, pa, &lockpa))
 				goto retry;
 			m = PHYS_TO_VM_PAGE(pa);
 			vm_page_hold(m);
 		}
 	} else if (pte1_is_link(pte1)) {
 		pte2p = pmap_pte2(pmap, va);
 		pte2 = pte2_load(pte2p);
 		pmap_pte2_release(pte2p);
 		if (pte2_is_valid(pte2) &&
 		    (!(pte2 & PTE2_RO) || !(prot & VM_PROT_WRITE))) {
 			pa = pte2_pa(pte2);
 			if (vm_page_pa_tryrelock(pmap, pa, &lockpa))
 				goto retry;
 			m = PHYS_TO_VM_PAGE(pa);
 			vm_page_hold(m);
 		}
 	}
 	PA_UNLOCK_COND(lockpa);
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 /*
  *  Grow the number of kernel L2 page table entries, if needed.
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	vm_page_t m;
 	vm_paddr_t pt2pg_pa, pt2_pa;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 
 	PDEBUG(1, printf("%s: addr = %#x\n", __func__, addr));
 	/*
 	 * All the time kernel_vm_end is first KVA for which underlying
 	 * L2 page table is either not allocated or linked from L1 page table
 	 * (not considering sections). Except for two possible cases:
 	 *
 	 *   (1) in the very beginning as long as pmap_growkernel() was
 	 *       not called, it could be first unused KVA (which is not
 	 *       rounded up to PTE1_SIZE),
 	 *
 	 *   (2) when all KVA space is mapped and kernel_map->max_offset
 	 *       address is not rounded up to PTE1_SIZE. (For example,
 	 *       it could be 0xFFFFFFFF.)
 	 */
 	kernel_vm_end = pte1_roundup(kernel_vm_end);
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 	addr = roundup2(addr, PTE1_SIZE);
 	if (addr - 1 >= kernel_map->max_offset)
 		addr = kernel_map->max_offset;
 	while (kernel_vm_end < addr) {
 		pte1 = pte1_load(kern_pte1(kernel_vm_end));
 		if (pte1_is_valid(pte1)) {
 			kernel_vm_end += PTE1_SIZE;
 			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 				kernel_vm_end = kernel_map->max_offset;
 				break;
 			}
 			continue;
 		}
 
 		/*
 		 * kernel_vm_end_new is used in pmap_pinit() when kernel
 		 * mappings are entered to new pmap all at once to avoid race
 		 * between pmap_kenter_pte1() and kernel_vm_end increase.
 		 * The same aplies to pmap_kenter_pt2tab().
 		 */
 		kernel_vm_end_new = kernel_vm_end + PTE1_SIZE;
 
 		pte2 = pt2tab_load(kern_pt2tab_entry(kernel_vm_end));
 		if (!pte2_is_valid(pte2)) {
 			/*
 			 * Install new PT2s page into kernel PT2TAB.
 			 */
 			m = vm_page_alloc(NULL,
 			    pte1_index(kernel_vm_end) & ~PT2PG_MASK,
 			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
 			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 			if (m == NULL)
 				panic("%s: no memory to grow kernel", __func__);
 			/*
 			 * QQQ: To link all new L2 page tables from L1 page
 			 *      table now and so pmap_kenter_pte1() them
 			 *      at once together with pmap_kenter_pt2tab()
 			 *      could be nice speed up. However,
 			 *      pmap_growkernel() does not happen so often...
 			 * QQQ: The other TTBR is another option.
 			 */
 			pt2pg_pa = pmap_pt2pg_init(kernel_pmap, kernel_vm_end,
 			    m);
 		} else
 			pt2pg_pa = pte2_pa(pte2);
 
 		pt2_pa = page_pt2pa(pt2pg_pa, pte1_index(kernel_vm_end));
 		pmap_kenter_pte1(kernel_vm_end, PTE1_LINK(pt2_pa));
 
 		kernel_vm_end = kernel_vm_end_new;
 		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 			kernel_vm_end = kernel_map->max_offset;
 			break;
 		}
 	}
 }
 
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = vm_max_kernel_address - KERNBASE;
 
 	return (sysctl_handle_long(oidp, &ksize, 0, req));
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
     0, 0, kvm_size, "IU", "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = vm_max_kernel_address - kernel_vm_end;
 
 	return (sysctl_handle_long(oidp, &kfree, 0, req));
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
     0, 0, kvm_free, "IU", "Amount of KVM free");
 
 /***********************************************
  *
  *  Pmap allocation/deallocation routines.
  *
  ***********************************************/
 
 /*
  *  Initialize the pmap for the swapper process.
  */
 void
 pmap_pinit0(pmap_t pmap)
 {
 	PDEBUG(1, printf("%s: pmap = %p\n", __func__, pmap));
 
 	PMAP_LOCK_INIT(pmap);
 
 	/*
 	 * Kernel page table directory and pmap stuff around is already
 	 * initialized, we are using it right now and here. So, finish
 	 * only PMAP structures initialization for process0 ...
 	 *
 	 * Since the L1 page table and PT2TAB is shared with the kernel pmap,
 	 * which is already included in the list "allpmaps", this pmap does
 	 * not need to be inserted into that list.
 	 */
 	pmap->pm_pt1 = kern_pt1;
 	pmap->pm_pt2tab = kern_pt2tab;
 	CPU_ZERO(&pmap->pm_active);
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 	CPU_SET(0, &pmap->pm_active);
 }
 
 static __inline void
 pte1_copy_nosync(pt1_entry_t *spte1p, pt1_entry_t *dpte1p, vm_offset_t sva,
     vm_offset_t eva)
 {
 	u_int idx, count;
 
 	idx = pte1_index(sva);
 	count = (pte1_index(eva) - idx + 1) * sizeof(pt1_entry_t);
 	bcopy(spte1p + idx, dpte1p + idx, count);
 }
 
 static __inline void
 pt2tab_copy_nosync(pt2_entry_t *spte2p, pt2_entry_t *dpte2p, vm_offset_t sva,
     vm_offset_t eva)
 {
 	u_int idx, count;
 
 	idx = pt2tab_index(sva);
 	count = (pt2tab_index(eva) - idx + 1) * sizeof(pt2_entry_t);
 	bcopy(spte2p + idx, dpte2p + idx, count);
 }
 
 /*
  *  Initialize a preallocated and zeroed pmap structure,
  *  such as one in a vmspace structure.
  */
 int
 pmap_pinit(pmap_t pmap)
 {
 	pt1_entry_t *pte1p;
 	pt2_entry_t *pte2p;
 	vm_paddr_t pa, pt2tab_pa;
 	u_int i;
 
 	PDEBUG(6, printf("%s: pmap = %p, pm_pt1 = %p\n", __func__, pmap,
 	    pmap->pm_pt1));
 
 	/*
 	 * No need to allocate L2 page table space yet but we do need
 	 * a valid L1 page table and PT2TAB table.
 	 *
 	 * Install shared kernel mappings to these tables. It's a little
 	 * tricky as some parts of KVA are reserved for vectors, devices,
 	 * and whatever else. These parts are supposed to be above
 	 * vm_max_kernel_address. Thus two regions should be installed:
 	 *
 	 *   (1) <KERNBASE, kernel_vm_end),
 	 *   (2) <vm_max_kernel_address, 0xFFFFFFFF>.
 	 *
 	 * QQQ: The second region should be stable enough to be installed
 	 *      only once in time when the tables are allocated.
 	 * QQQ: Maybe copy of both regions at once could be faster ...
 	 * QQQ: Maybe the other TTBR is an option.
 	 *
 	 * Finally, install own PT2TAB table to these tables.
 	 */
 
 	if (pmap->pm_pt1 == NULL) {
 		pmap->pm_pt1 = (pt1_entry_t *)kmem_alloc_contig(NB_IN_PT1,
 		    M_NOWAIT | M_ZERO, 0, -1UL, NB_IN_PT1, 0, pt_memattr);
 		if (pmap->pm_pt1 == NULL)
 			return (0);
 	}
 	if (pmap->pm_pt2tab == NULL) {
 		/*
 		 * QQQ: (1) PT2TAB must be contiguous. If PT2TAB is one page
 		 *      only, what should be the only size for 32 bit systems,
 		 *      then we could allocate it with vm_page_alloc() and all
 		 *      the stuff needed as other L2 page table pages.
 		 *      (2) Note that a process PT2TAB is special L2 page table
 		 *      page. Its mapping in kernel_arena is permanent and can
 		 *      be used no matter which process is current. Its mapping
 		 *      in PT2MAP can be used only for current process.
 		 */
 		pmap->pm_pt2tab = (pt2_entry_t *)kmem_alloc_attr(NB_IN_PT2TAB,
 		    M_NOWAIT | M_ZERO, 0, -1UL, pt_memattr);
 		if (pmap->pm_pt2tab == NULL) {
 			/*
 			 * QQQ: As struct pmap is allocated from UMA with
 			 *      UMA_ZONE_NOFREE flag, it's important to leave
 			 *      no allocation in pmap if initialization failed.
 			 */
-			kmem_free(kernel_arena, (vm_offset_t)pmap->pm_pt1,
-			    NB_IN_PT1);
+			kmem_free((vm_offset_t)pmap->pm_pt1, NB_IN_PT1);
 			pmap->pm_pt1 = NULL;
 			return (0);
 		}
 		/*
 		 * QQQ: Each L2 page table page vm_page_t has pindex set to
 		 *      pte1 index of virtual address mapped by this page.
 		 *      It's not valid for non kernel PT2TABs themselves.
 		 *      The pindex of these pages can not be altered because
 		 *      of the way how they are allocated now. However, it
 		 *      should not be a problem.
 		 */
 	}
 
 	mtx_lock_spin(&allpmaps_lock);
 	/*
 	 * To avoid race with pmap_kenter_pte1() and pmap_kenter_pt2tab(),
 	 * kernel_vm_end_new is used here instead of kernel_vm_end.
 	 */
 	pte1_copy_nosync(kern_pt1, pmap->pm_pt1, KERNBASE,
 	    kernel_vm_end_new - 1);
 	pte1_copy_nosync(kern_pt1, pmap->pm_pt1, vm_max_kernel_address,
 	    0xFFFFFFFF);
 	pt2tab_copy_nosync(kern_pt2tab, pmap->pm_pt2tab, KERNBASE,
 	    kernel_vm_end_new - 1);
 	pt2tab_copy_nosync(kern_pt2tab, pmap->pm_pt2tab, vm_max_kernel_address,
 	    0xFFFFFFFF);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 
 	/*
 	 * Store PT2MAP PT2 pages (a.k.a. PT2TAB) in PT2TAB itself.
 	 * I.e. self reference mapping.  The PT2TAB is private, however mapped
 	 * into shared PT2MAP space, so the mapping should be not global.
 	 */
 	pt2tab_pa = vtophys(pmap->pm_pt2tab);
 	pte2p = pmap_pt2tab_entry(pmap, (vm_offset_t)PT2MAP);
 	for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) {
 		pt2tab_store(pte2p++, PTE2_KPT_NG(pa));
 	}
 
 	/* Insert PT2MAP PT2s into pmap PT1. */
 	pte1p = pmap_pte1(pmap, (vm_offset_t)PT2MAP);
 	for (pa = pt2tab_pa, i = 0; i < NPT2_IN_PT2TAB; i++, pa += NB_IN_PT2) {
 		pte1_store(pte1p++, PTE1_LINK(pa));
 	}
 
 	/*
 	 * Now synchronize new mapping which was made above.
 	 */
 	pte1_sync_range(pmap->pm_pt1, NB_IN_PT1);
 	pte2_sync_range(pmap->pm_pt2tab, NB_IN_PT2TAB);
 
 	CPU_ZERO(&pmap->pm_active);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 
 	return (1);
 }
 
 #ifdef INVARIANTS
 static boolean_t
 pt2tab_user_is_empty(pt2_entry_t *tab)
 {
 	u_int i, end;
 
 	end = pt2tab_index(VM_MAXUSER_ADDRESS);
 	for (i = 0; i < end; i++)
 		if (tab[i] != 0) return (FALSE);
 	return (TRUE);
 }
 #endif
 /*
  *  Release any resources held by the given physical map.
  *  Called when a pmap initialized by pmap_pinit is being released.
  *  Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 #ifdef INVARIANTS
 	vm_offset_t start, end;
 #endif
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("%s: pmap resident count %ld != 0", __func__,
 	    pmap->pm_stats.resident_count));
 	KASSERT(pt2tab_user_is_empty(pmap->pm_pt2tab),
 	    ("%s: has allocated user PT2(s)", __func__));
 	KASSERT(CPU_EMPTY(&pmap->pm_active),
 	    ("%s: pmap %p is active on some CPU(s)", __func__, pmap));
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_REMOVE(pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 
 #ifdef INVARIANTS
 	start = pte1_index(KERNBASE) * sizeof(pt1_entry_t);
 	end = (pte1_index(0xFFFFFFFF) + 1) * sizeof(pt1_entry_t);
 	bzero((char *)pmap->pm_pt1 + start, end - start);
 
 	start = pt2tab_index(KERNBASE) * sizeof(pt2_entry_t);
 	end = (pt2tab_index(0xFFFFFFFF) + 1) * sizeof(pt2_entry_t);
 	bzero((char *)pmap->pm_pt2tab + start, end - start);
 #endif
 	/*
 	 * We are leaving PT1 and PT2TAB allocated on released pmap,
 	 * so hopefully UMA vmspace_zone will always be inited with
 	 * UMA_ZONE_NOFREE flag.
 	 */
 }
 
 /*********************************************************
  *
  *  L2 table pages and their pages management routines.
  *
  *********************************************************/
 
 /*
  *  Virtual interface for L2 page table wire counting.
  *
  *  Each L2 page table in a page has own counter which counts a number of
  *  valid mappings in a table. Global page counter counts mappings in all
  *  tables in a page plus a single itself mapping in PT2TAB.
  *
  *  During a promotion we leave the associated L2 page table counter
  *  untouched, so the table (strictly speaking a page which holds it)
  *  is never freed if promoted.
  *
  *  If a page m->wire_count == 1 then no valid mappings exist in any L2 page
  *  table in the page and the page itself is only mapped in PT2TAB.
  */
 
 static __inline void
 pt2_wirecount_init(vm_page_t m)
 {
 	u_int i;
 
 	/*
 	 * Note: A page m is allocated with VM_ALLOC_WIRED flag and
 	 *       m->wire_count should be already set correctly.
 	 *       So, there is no need to set it again herein.
 	 */
 	for (i = 0; i < NPT2_IN_PG; i++)
 		m->md.pt2_wirecount[i] = 0;
 }
 
 static __inline void
 pt2_wirecount_inc(vm_page_t m, uint32_t pte1_idx)
 {
 
 	/*
 	 * Note: A just modificated pte2 (i.e. already allocated)
 	 *       is acquiring one extra reference which must be
 	 *       explicitly cleared. It influences the KASSERTs herein.
 	 *       All L2 page tables in a page always belong to the same
 	 *       pmap, so we allow only one extra reference for the page.
 	 */
 	KASSERT(m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] < (NPTE2_IN_PT2 + 1),
 	    ("%s: PT2 is overflowing ...", __func__));
 	KASSERT(m->wire_count <= (NPTE2_IN_PG + 1),
 	    ("%s: PT2PG is overflowing ...", __func__));
 
 	m->wire_count++;
 	m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]++;
 }
 
 static __inline void
 pt2_wirecount_dec(vm_page_t m, uint32_t pte1_idx)
 {
 
 	KASSERT(m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] != 0,
 	    ("%s: PT2 is underflowing ...", __func__));
 	KASSERT(m->wire_count > 1,
 	    ("%s: PT2PG is underflowing ...", __func__));
 
 	m->wire_count--;
 	m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]--;
 }
 
 static __inline void
 pt2_wirecount_set(vm_page_t m, uint32_t pte1_idx, uint16_t count)
 {
 
 	KASSERT(count <= NPTE2_IN_PT2,
 	    ("%s: invalid count %u", __func__, count));
 	KASSERT(m->wire_count >  m->md.pt2_wirecount[pte1_idx & PT2PG_MASK],
 	    ("%s: PT2PG corrupting (%u, %u) ...", __func__, m->wire_count,
 	    m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]));
 
 	m->wire_count -= m->md.pt2_wirecount[pte1_idx & PT2PG_MASK];
 	m->wire_count += count;
 	m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] = count;
 
 	KASSERT(m->wire_count <= (NPTE2_IN_PG + 1),
 	    ("%s: PT2PG is overflowed (%u) ...", __func__, m->wire_count));
 }
 
 static __inline uint32_t
 pt2_wirecount_get(vm_page_t m, uint32_t pte1_idx)
 {
 
 	return (m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]);
 }
 
 static __inline boolean_t
 pt2_is_empty(vm_page_t m, vm_offset_t va)
 {
 
 	return (m->md.pt2_wirecount[pte1_index(va) & PT2PG_MASK] == 0);
 }
 
 static __inline boolean_t
 pt2_is_full(vm_page_t m, vm_offset_t va)
 {
 
 	return (m->md.pt2_wirecount[pte1_index(va) & PT2PG_MASK] ==
 	    NPTE2_IN_PT2);
 }
 
 static __inline boolean_t
 pt2pg_is_empty(vm_page_t m)
 {
 
 	return (m->wire_count == 1);
 }
 
 /*
  *  This routine is called if the L2 page table
  *  is not mapped correctly.
  */
 static vm_page_t
 _pmap_allocpte2(pmap_t pmap, vm_offset_t va, u_int flags)
 {
 	uint32_t pte1_idx;
 	pt1_entry_t *pte1p;
 	pt2_entry_t pte2;
 	vm_page_t  m;
 	vm_paddr_t pt2pg_pa, pt2_pa;
 
 	pte1_idx = pte1_index(va);
 	pte1p = pmap->pm_pt1 + pte1_idx;
 
 	KASSERT(pte1_load(pte1p) == 0,
 	    ("%s: pm_pt1[%#x] is not zero: %#x", __func__, pte1_idx,
 	    pte1_load(pte1p)));
 
 	pte2 = pt2tab_load(pmap_pt2tab_entry(pmap, va));
 	if (!pte2_is_valid(pte2)) {
 		/*
 		 * Install new PT2s page into pmap PT2TAB.
 		 */
 		m = vm_page_alloc(NULL, pte1_idx & ~PT2PG_MASK,
 		    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 		if (m == NULL) {
 			if ((flags & PMAP_ENTER_NOSLEEP) == 0) {
 				PMAP_UNLOCK(pmap);
 				rw_wunlock(&pvh_global_lock);
 				vm_wait(NULL);
 				rw_wlock(&pvh_global_lock);
 				PMAP_LOCK(pmap);
 			}
 
 			/*
 			 * Indicate the need to retry.  While waiting,
 			 * the L2 page table page may have been allocated.
 			 */
 			return (NULL);
 		}
 		pmap->pm_stats.resident_count++;
 		pt2pg_pa = pmap_pt2pg_init(pmap, va, m);
 	} else {
 		pt2pg_pa = pte2_pa(pte2);
 		m = PHYS_TO_VM_PAGE(pt2pg_pa);
 	}
 
 	pt2_wirecount_inc(m, pte1_idx);
 	pt2_pa = page_pt2pa(pt2pg_pa, pte1_idx);
 	pte1_store(pte1p, PTE1_LINK(pt2_pa));
 
 	return (m);
 }
 
 static vm_page_t
 pmap_allocpte2(pmap_t pmap, vm_offset_t va, u_int flags)
 {
 	u_int pte1_idx;
 	pt1_entry_t *pte1p, pte1;
 	vm_page_t m;
 
 	pte1_idx = pte1_index(va);
 retry:
 	pte1p = pmap->pm_pt1 + pte1_idx;
 	pte1 = pte1_load(pte1p);
 
 	/*
 	 * This supports switching from a 1MB page to a
 	 * normal 4K page.
 	 */
 	if (pte1_is_section(pte1)) {
 		(void)pmap_demote_pte1(pmap, pte1p, va);
 		/*
 		 * Reload pte1 after demotion.
 		 *
 		 * Note: Demotion can even fail as either PT2 is not find for
 		 *       the virtual address or PT2PG can not be allocated.
 		 */
 		pte1 = pte1_load(pte1p);
 	}
 
 	/*
 	 * If the L2 page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (pte1_is_link(pte1)) {
 		m = PHYS_TO_VM_PAGE(pte1_link_pa(pte1));
 		pt2_wirecount_inc(m, pte1_idx);
 	} else  {
 		/*
 		 * Here if the PT2 isn't mapped, or if it has
 		 * been deallocated.
 		 */
 		m = _pmap_allocpte2(pmap, va, flags);
 		if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0)
 			goto retry;
 	}
 
 	return (m);
 }
 
 /*
  *  Schedule the specified unused L2 page table page to be freed. Specifically,
  *  add the page to the specified list of pages that will be released to the
  *  physical memory manager after the TLB has been updated.
  */
 static __inline void
 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free)
 {
 
 	/*
 	 * Put page on a list so that it is released after
 	 * *ALL* TLB shootdown is done
 	 */
 #ifdef PMAP_DEBUG
 	pmap_zero_page_check(m);
 #endif
 	m->flags |= PG_ZERO;
 	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
 }
 
 /*
  *  Unwire L2 page tables page.
  */
 static void
 pmap_unwire_pt2pg(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pt1_entry_t *pte1p, opte1 __unused;
 	pt2_entry_t *pte2p;
 	uint32_t i;
 
 	KASSERT(pt2pg_is_empty(m),
 	    ("%s: pmap %p PT2PG %p wired", __func__, pmap, m));
 
 	/*
 	 * Unmap all L2 page tables in the page from L1 page table.
 	 *
 	 * QQQ: Individual L2 page tables (except the last one) can be unmapped
 	 * earlier. However, we are doing that this way.
 	 */
 	KASSERT(m->pindex == (pte1_index(va) & ~PT2PG_MASK),
 	    ("%s: pmap %p va %#x PT2PG %p bad index", __func__, pmap, va, m));
 	pte1p = pmap->pm_pt1 + m->pindex;
 	for (i = 0; i < NPT2_IN_PG; i++, pte1p++) {
 		KASSERT(m->md.pt2_wirecount[i] == 0,
 		    ("%s: pmap %p PT2 %u (PG %p) wired", __func__, pmap, i, m));
 		opte1 = pte1_load(pte1p);
 		if (pte1_is_link(opte1)) {
 			pte1_clear(pte1p);
 			/*
 			 * Flush intermediate TLB cache.
 			 */
 			pmap_tlb_flush(pmap, (m->pindex + i) << PTE1_SHIFT);
 		}
 #ifdef INVARIANTS
 		else
 			KASSERT((opte1 == 0) || pte1_is_section(opte1),
 			    ("%s: pmap %p va %#x bad pte1 %x at %u", __func__,
 			    pmap, va, opte1, i));
 #endif
 	}
 
 	/*
 	 * Unmap the page from PT2TAB.
 	 */
 	pte2p = pmap_pt2tab_entry(pmap, va);
 	(void)pt2tab_load_clear(pte2p);
 	pmap_tlb_flush(pmap, pt2map_pt2pg(va));
 
 	m->wire_count = 0;
 	pmap->pm_stats.resident_count--;
 
 	/*
 	 * This barrier is so that the ordinary store unmapping
 	 * the L2 page table page is globally performed before TLB shoot-
 	 * down is begun.
 	 */
 	wmb();
 	vm_wire_sub(1);
 }
 
 /*
  *  Decrements a L2 page table page's wire count, which is used to record the
  *  number of valid page table entries within the page.  If the wire count
  *  drops to zero, then the page table page is unmapped.  Returns TRUE if the
  *  page table page was unmapped and FALSE otherwise.
  */
 static __inline boolean_t
 pmap_unwire_pt2(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 {
 	pt2_wirecount_dec(m, pte1_index(va));
 	if (pt2pg_is_empty(m)) {
 		/*
 		 * QQQ: Wire count is zero, so whole page should be zero and
 		 *      we can set PG_ZERO flag to it.
 		 *      Note that when promotion is enabled, it takes some
 		 *      more efforts. See pmap_unwire_pt2_all() below.
 		 */
 		pmap_unwire_pt2pg(pmap, va, m);
 		pmap_add_delayed_free_list(m, free);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  *  Drop a L2 page table page's wire count at once, which is used to record
  *  the number of valid L2 page table entries within the page. If the wire
  *  count drops to zero, then the L2 page table page is unmapped.
  */
 static __inline void
 pmap_unwire_pt2_all(pmap_t pmap, vm_offset_t va, vm_page_t m,
     struct spglist *free)
 {
 	u_int pte1_idx = pte1_index(va);
 
 	KASSERT(m->pindex == (pte1_idx & ~PT2PG_MASK),
 		("%s: PT2 page's pindex is wrong", __func__));
 	KASSERT(m->wire_count > pt2_wirecount_get(m, pte1_idx),
 	    ("%s: bad pt2 wire count %u > %u", __func__, m->wire_count,
 	    pt2_wirecount_get(m, pte1_idx)));
 
 	/*
 	 * It's possible that the L2 page table was never used.
 	 * It happened in case that a section was created without promotion.
 	 */
 	if (pt2_is_full(m, va)) {
 		pt2_wirecount_set(m, pte1_idx, 0);
 
 		/*
 		 * QQQ: We clear L2 page table now, so when L2 page table page
 		 *      is going to be freed, we can set it PG_ZERO flag ...
 		 *      This function is called only on section mappings, so
 		 *      hopefully it's not to big overload.
 		 *
 		 * XXX: If pmap is current, existing PT2MAP mapping could be
 		 *      used for zeroing.
 		 */
 		pmap_zero_page_area(m, page_pt2off(pte1_idx), NB_IN_PT2);
 	}
 #ifdef INVARIANTS
 	else
 		KASSERT(pt2_is_empty(m, va), ("%s: PT2 is not empty (%u)",
 		    __func__, pt2_wirecount_get(m, pte1_idx)));
 #endif
 	if (pt2pg_is_empty(m)) {
 		pmap_unwire_pt2pg(pmap, va, m);
 		pmap_add_delayed_free_list(m, free);
 	}
 }
 
 /*
  *  After removing a L2 page table entry, this routine is used to
  *  conditionally free the page, and manage the hold/wire counts.
  */
 static boolean_t
 pmap_unuse_pt2(pmap_t pmap, vm_offset_t va, struct spglist *free)
 {
 	pt1_entry_t pte1;
 	vm_page_t mpte;
 
 	if (va >= VM_MAXUSER_ADDRESS)
 		return (FALSE);
 	pte1 = pte1_load(pmap_pte1(pmap, va));
 	mpte = PHYS_TO_VM_PAGE(pte1_link_pa(pte1));
 	return (pmap_unwire_pt2(pmap, va, mpte, free));
 }
 
 /*************************************
  *
  *  Page management routines.
  *
  *************************************/
 
 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 CTASSERT(_NPCM == 11);
 CTASSERT(_NPCPV == 336);
 
 static __inline struct pv_chunk *
 pv_to_chunk(pv_entry_t pv)
 {
 
 	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
 }
 
 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 
 #define	PC_FREE0_9	0xfffffffful	/* Free values for index 0 through 9 */
 #define	PC_FREE10	0x0000fffful	/* Free values for index 10 */
 
 static const uint32_t pc_freemask[_NPCM] = {
 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 	PC_FREE0_9, PC_FREE10
 };
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 	"Current number of pv entries");
 
 #ifdef PV_STATS
 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
     "Current number of pv entry chunks");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
     "Current number of pv entry chunks allocated");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
     "Current number of pv entry chunks frees");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail,
     0, "Number of times tried to get a chunk page but failed.");
 
 static long pv_entry_frees, pv_entry_allocs;
 static int pv_entry_spare;
 
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
     "Current number of pv entry frees");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs,
     0, "Current number of pv entry allocs");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
     "Current number of spare pv entries");
 #endif
 
 /*
  *  Is given page managed?
  */
 static __inline bool
 is_managed(vm_paddr_t pa)
 {
 	vm_page_t m;
 
 	m = PHYS_TO_VM_PAGE(pa);
 	if (m == NULL)
 		return (false);
 	return ((m->oflags & VPO_UNMANAGED) == 0);
 }
 
 static __inline bool
 pte1_is_managed(pt1_entry_t pte1)
 {
 
 	return (is_managed(pte1_pa(pte1)));
 }
 
 static __inline bool
 pte2_is_managed(pt2_entry_t pte2)
 {
 
 	return (is_managed(pte2_pa(pte2)));
 }
 
 /*
  *  We are in a serious low memory condition.  Resort to
  *  drastic measures to free some pages so we can allocate
  *  another pv entry chunk.
  */
 static vm_page_t
 pmap_pv_reclaim(pmap_t locked_pmap)
 {
 	struct pch newtail;
 	struct pv_chunk *pc;
 	struct md_page *pvh;
 	pt1_entry_t *pte1p;
 	pmap_t pmap;
 	pt2_entry_t *pte2p, tpte2;
 	pv_entry_t pv;
 	vm_offset_t va;
 	vm_page_t m, m_pc;
 	struct spglist free;
 	uint32_t inuse;
 	int bit, field, freed;
 
 	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
 	pmap = NULL;
 	m_pc = NULL;
 	SLIST_INIT(&free);
 	TAILQ_INIT(&newtail);
 	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 ||
 	    SLIST_EMPTY(&free))) {
 		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 		if (pmap != pc->pc_pmap) {
 			if (pmap != NULL) {
 				if (pmap != locked_pmap)
 					PMAP_UNLOCK(pmap);
 			}
 			pmap = pc->pc_pmap;
 			/* Avoid deadlock and lock recursion. */
 			if (pmap > locked_pmap)
 				PMAP_LOCK(pmap);
 			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
 				pmap = NULL;
 				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
 				continue;
 			}
 		}
 
 		/*
 		 * Destroy every non-wired, 4 KB page mapping in the chunk.
 		 */
 		freed = 0;
 		for (field = 0; field < _NPCM; field++) {
 			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
 			    inuse != 0; inuse &= ~(1UL << bit)) {
 				bit = ffs(inuse) - 1;
 				pv = &pc->pc_pventry[field * 32 + bit];
 				va = pv->pv_va;
 				pte1p = pmap_pte1(pmap, va);
 				if (pte1_is_section(pte1_load(pte1p)))
 					continue;
 				pte2p = pmap_pte2(pmap, va);
 				tpte2 = pte2_load(pte2p);
 				if ((tpte2 & PTE2_W) == 0)
 					tpte2 = pte2_load_clear(pte2p);
 				pmap_pte2_release(pte2p);
 				if ((tpte2 & PTE2_W) != 0)
 					continue;
 				KASSERT(tpte2 != 0,
 				    ("pmap_pv_reclaim: pmap %p va %#x zero pte",
 				    pmap, va));
 				pmap_tlb_flush(pmap, va);
 				m = PHYS_TO_VM_PAGE(pte2_pa(tpte2));
 				if (pte2_is_dirty(tpte2))
 					vm_page_dirty(m);
 				if ((tpte2 & PTE2_A) != 0)
 					vm_page_aflag_set(m, PGA_REFERENCED);
 				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 				if (TAILQ_EMPTY(&m->md.pv_list) &&
 				    (m->flags & PG_FICTITIOUS) == 0) {
 					pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 					if (TAILQ_EMPTY(&pvh->pv_list)) {
 						vm_page_aflag_clear(m,
 						    PGA_WRITEABLE);
 					}
 				}
 				pc->pc_map[field] |= 1UL << bit;
 				pmap_unuse_pt2(pmap, va, &free);
 				freed++;
 			}
 		}
 		if (freed == 0) {
 			TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
 			continue;
 		}
 		/* Every freed mapping is for a 4 KB page. */
 		pmap->pm_stats.resident_count -= freed;
 		PV_STAT(pv_entry_frees += freed);
 		PV_STAT(pv_entry_spare += freed);
 		pv_entry_count -= freed;
 		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 		for (field = 0; field < _NPCM; field++)
 			if (pc->pc_map[field] != pc_freemask[field]) {
 				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
 				    pc_list);
 				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
 
 				/*
 				 * One freed pv entry in locked_pmap is
 				 * sufficient.
 				 */
 				if (pmap == locked_pmap)
 					goto out;
 				break;
 			}
 		if (field == _NPCM) {
 			PV_STAT(pv_entry_spare -= _NPCPV);
 			PV_STAT(pc_chunk_count--);
 			PV_STAT(pc_chunk_frees++);
 			/* Entire chunk is free; return it. */
 			m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 			pmap_qremove((vm_offset_t)pc, 1);
 			pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc);
 			break;
 		}
 	}
 out:
 	TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
 	if (pmap != NULL) {
 		if (pmap != locked_pmap)
 			PMAP_UNLOCK(pmap);
 	}
 	if (m_pc == NULL && pv_vafree != 0 && SLIST_EMPTY(&free)) {
 		m_pc = SLIST_FIRST(&free);
 		SLIST_REMOVE_HEAD(&free, plinks.s.ss);
 		/* Recycle a freed page table page. */
 		m_pc->wire_count = 1;
 		vm_wire_add(1);
 	}
 	vm_page_free_pages_toq(&free, false);
 	return (m_pc);
 }
 
 static void
 free_pv_chunk(struct pv_chunk *pc)
 {
 	vm_page_t m;
 
 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 	PV_STAT(pv_entry_spare -= _NPCPV);
 	PV_STAT(pc_chunk_count--);
 	PV_STAT(pc_chunk_frees++);
 	/* entire chunk is free, return it */
 	m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 	pmap_qremove((vm_offset_t)pc, 1);
 	vm_page_unwire(m, PQ_NONE);
 	vm_page_free(m);
 	pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc);
 }
 
 /*
  *  Free the pv_entry back to the free list.
  */
 static void
 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 {
 	struct pv_chunk *pc;
 	int idx, field, bit;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(pv_entry_frees++);
 	PV_STAT(pv_entry_spare++);
 	pv_entry_count--;
 	pc = pv_to_chunk(pv);
 	idx = pv - &pc->pc_pventry[0];
 	field = idx / 32;
 	bit = idx % 32;
 	pc->pc_map[field] |= 1ul << bit;
 	for (idx = 0; idx < _NPCM; idx++)
 		if (pc->pc_map[idx] != pc_freemask[idx]) {
 			/*
 			 * 98% of the time, pc is already at the head of the
 			 * list.  If it isn't already, move it to the head.
 			 */
 			if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
 			    pc)) {
 				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
 				    pc_list);
 			}
 			return;
 		}
 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 	free_pv_chunk(pc);
 }
 
 /*
  *  Get a new pv_entry, allocating a block from the system
  *  when needed.
  */
 static pv_entry_t
 get_pv_entry(pmap_t pmap, boolean_t try)
 {
 	static const struct timeval printinterval = { 60, 0 };
 	static struct timeval lastprint;
 	int bit, field;
 	pv_entry_t pv;
 	struct pv_chunk *pc;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(pv_entry_allocs++);
 	pv_entry_count++;
 	if (pv_entry_count > pv_entry_high_water)
 		if (ratecheck(&lastprint, &printinterval))
 			printf("Approaching the limit on PV entries, consider "
 			    "increasing either the vm.pmap.shpgperproc or the "
 			    "vm.pmap.pv_entry_max tunable.\n");
 retry:
 	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 	if (pc != NULL) {
 		for (field = 0; field < _NPCM; field++) {
 			if (pc->pc_map[field]) {
 				bit = ffs(pc->pc_map[field]) - 1;
 				break;
 			}
 		}
 		if (field < _NPCM) {
 			pv = &pc->pc_pventry[field * 32 + bit];
 			pc->pc_map[field] &= ~(1ul << bit);
 			/* If this was the last item, move it to tail */
 			for (field = 0; field < _NPCM; field++)
 				if (pc->pc_map[field] != 0) {
 					PV_STAT(pv_entry_spare--);
 					return (pv);	/* not full, return */
 				}
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 			PV_STAT(pv_entry_spare--);
 			return (pv);
 		}
 	}
 	/*
 	 * Access to the pte2list "pv_vafree" is synchronized by the pvh
 	 * global lock.  If "pv_vafree" is currently non-empty, it will
 	 * remain non-empty until pmap_pte2list_alloc() completes.
 	 */
 	if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
 		if (try) {
 			pv_entry_count--;
 			PV_STAT(pc_chunk_tryfail++);
 			return (NULL);
 		}
 		m = pmap_pv_reclaim(pmap);
 		if (m == NULL)
 			goto retry;
 	}
 	PV_STAT(pc_chunk_count++);
 	PV_STAT(pc_chunk_allocs++);
 	pc = (struct pv_chunk *)pmap_pte2list_alloc(&pv_vafree);
 	pmap_qenter((vm_offset_t)pc, &m, 1);
 	pc->pc_pmap = pmap;
 	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
 	for (field = 1; field < _NPCM; field++)
 		pc->pc_map[field] = pc_freemask[field];
 	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
 	pv = &pc->pc_pventry[0];
 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 	PV_STAT(pv_entry_spare += _NPCPV - 1);
 	return (pv);
 }
 
 /*
  *  Create a pv entry for page at pa for
  *  (pmap, va).
  */
 static void
 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pv = get_pv_entry(pmap, FALSE);
 	pv->pv_va = va;
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 }
 
 static __inline pv_entry_t
 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			break;
 		}
 	}
 	return (pv);
 }
 
 static void
 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
 	free_pv_entry(pmap, pv);
 }
 
 static void
 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 {
 	struct md_page *pvh;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	pmap_pvh_free(&m->md, pmap, va);
 	if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		if (TAILQ_EMPTY(&pvh->pv_list))
 			vm_page_aflag_clear(m, PGA_WRITEABLE);
 	}
 }
 
 static void
 pmap_pv_demote_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_offset_t va_last;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	KASSERT((pa & PTE1_OFFSET) == 0,
 	    ("pmap_pv_demote_pte1: pa is not 1mpage aligned"));
 
 	/*
 	 * Transfer the 1mpage's pv entry for this mapping to the first
 	 * page's pv list.
 	 */
 	pvh = pa_to_pvh(pa);
 	va = pte1_trunc(va);
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_demote_pte1: pv not found"));
 	m = PHYS_TO_VM_PAGE(pa);
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 	/* Instantiate the remaining NPTE2_IN_PT2 - 1 pv entries. */
 	va_last = va + PTE1_SIZE - PAGE_SIZE;
 	do {
 		m++;
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 		    ("pmap_pv_demote_pte1: page %p is not managed", m));
 		va += PAGE_SIZE;
 		pmap_insert_entry(pmap, va, m);
 	} while (va < va_last);
 }
 
 #if VM_NRESERVLEVEL > 0
 static void
 pmap_pv_promote_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_offset_t va_last;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	KASSERT((pa & PTE1_OFFSET) == 0,
 	    ("pmap_pv_promote_pte1: pa is not 1mpage aligned"));
 
 	/*
 	 * Transfer the first page's pv entry for this mapping to the
 	 * 1mpage's pv list.  Aside from avoiding the cost of a call
 	 * to get_pv_entry(), a transfer avoids the possibility that
 	 * get_pv_entry() calls pmap_pv_reclaim() and that pmap_pv_reclaim()
 	 * removes one of the mappings that is being promoted.
 	 */
 	m = PHYS_TO_VM_PAGE(pa);
 	va = pte1_trunc(va);
 	pv = pmap_pvh_remove(&m->md, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_promote_pte1: pv not found"));
 	pvh = pa_to_pvh(pa);
 	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 	/* Free the remaining NPTE2_IN_PT2 - 1 pv entries. */
 	va_last = va + PTE1_SIZE - PAGE_SIZE;
 	do {
 		m++;
 		va += PAGE_SIZE;
 		pmap_pvh_free(&m->md, pmap, va);
 	} while (va < va_last);
 }
 #endif
 
 /*
  *  Conditionally create a pv entry.
  */
 static boolean_t
 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if (pv_entry_count < pv_entry_high_water &&
 	    (pv = get_pv_entry(pmap, TRUE)) != NULL) {
 		pv->pv_va = va;
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  *  Create the pv entries for each of the pages within a section.
  */
 static bool
 pmap_pv_insert_pte1(pmap_t pmap, vm_offset_t va, pt1_entry_t pte1, u_int flags)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	bool noreclaim;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	noreclaim = (flags & PMAP_ENTER_NORECLAIM) != 0;
 	if ((noreclaim && pv_entry_count >= pv_entry_high_water) ||
 	    (pv = get_pv_entry(pmap, noreclaim)) == NULL)
 		return (false);
 	pv->pv_va = va;
 	pvh = pa_to_pvh(pte1_pa(pte1));
 	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 	return (true);
 }
 
 static inline void
 pmap_tlb_flush_pte1(pmap_t pmap, vm_offset_t va, pt1_entry_t npte1)
 {
 
 	/* Kill all the small mappings or the big one only. */
 	if (pte1_is_section(npte1))
 		pmap_tlb_flush_range(pmap, pte1_trunc(va), PTE1_SIZE);
 	else
 		pmap_tlb_flush(pmap, pte1_trunc(va));
 }
 
 /*
  *  Update kernel pte1 on all pmaps.
  *
  *  The following function is called only on one cpu with disabled interrupts.
  *  In SMP case, smp_rendezvous_cpus() is used to stop other cpus. This way
  *  nobody can invoke explicit hardware table walk during the update of pte1.
  *  Unsolicited hardware table walk can still happen, invoked by speculative
  *  data or instruction prefetch or even by speculative hardware table walk.
  *
  *  The break-before-make approach should be implemented here. However, it's
  *  not so easy to do that for kernel mappings as it would be unhappy to unmap
  *  itself unexpectedly but voluntarily.
  */
 static void
 pmap_update_pte1_kernel(vm_offset_t va, pt1_entry_t npte1)
 {
 	pmap_t pmap;
 	pt1_entry_t *pte1p;
 
 	/*
 	 * Get current pmap. Interrupts should be disabled here
 	 * so PCPU_GET() is done atomically.
 	 */
 	pmap = PCPU_GET(curpmap);
 	if (pmap == NULL)
 		pmap = kernel_pmap;
 
 	/*
 	 * (1) Change pte1 on current pmap.
 	 * (2) Flush all obsolete TLB entries on current CPU.
 	 * (3) Change pte1 on all pmaps.
 	 * (4) Flush all obsolete TLB entries on all CPUs in SMP case.
 	 */
 
 	pte1p = pmap_pte1(pmap, va);
 	pte1_store(pte1p, npte1);
 
 	/* Kill all the small mappings or the big one only. */
 	if (pte1_is_section(npte1)) {
 		pmap_pte1_kern_promotions++;
 		tlb_flush_range_local(pte1_trunc(va), PTE1_SIZE);
 	} else {
 		pmap_pte1_kern_demotions++;
 		tlb_flush_local(pte1_trunc(va));
 	}
 
 	/*
 	 * In SMP case, this function is called when all cpus are at smp
 	 * rendezvous, so there is no need to use 'allpmaps_lock' lock here.
 	 * In UP case, the function is called with this lock locked.
 	 */
 	LIST_FOREACH(pmap, &allpmaps, pm_list) {
 		pte1p = pmap_pte1(pmap, va);
 		pte1_store(pte1p, npte1);
 	}
 
 #ifdef SMP
 	/* Kill all the small mappings or the big one only. */
 	if (pte1_is_section(npte1))
 		tlb_flush_range(pte1_trunc(va), PTE1_SIZE);
 	else
 		tlb_flush(pte1_trunc(va));
 #endif
 }
 
 #ifdef SMP
 struct pte1_action {
 	vm_offset_t va;
 	pt1_entry_t npte1;
 	u_int update;		/* CPU that updates the PTE1 */
 };
 
 static void
 pmap_update_pte1_action(void *arg)
 {
 	struct pte1_action *act = arg;
 
 	if (act->update == PCPU_GET(cpuid))
 		pmap_update_pte1_kernel(act->va, act->npte1);
 }
 
 /*
  *  Change pte1 on current pmap.
  *  Note that kernel pte1 must be changed on all pmaps.
  *
  *  According to the architecture reference manual published by ARM,
  *  the behaviour is UNPREDICTABLE when two or more TLB entries map the same VA.
  *  According to this manual, UNPREDICTABLE behaviours must never happen in
  *  a viable system. In contrast, on x86 processors, it is not specified which
  *  TLB entry mapping the virtual address will be used, but the MMU doesn't
  *  generate a bogus translation the way it does on Cortex-A8 rev 2 (Beaglebone
  *  Black).
  *
  *  It's a problem when either promotion or demotion is being done. The pte1
  *  update and appropriate TLB flush must be done atomically in general.
  */
 static void
 pmap_change_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va,
     pt1_entry_t npte1)
 {
 
 	if (pmap == kernel_pmap) {
 		struct pte1_action act;
 
 		sched_pin();
 		act.va = va;
 		act.npte1 = npte1;
 		act.update = PCPU_GET(cpuid);
 		smp_rendezvous_cpus(all_cpus, smp_no_rendezvous_barrier,
 		    pmap_update_pte1_action, NULL, &act);
 		sched_unpin();
 	} else {
 		register_t cspr;
 
 		/*
 		 * Use break-before-make approach for changing userland
 		 * mappings. It can cause L1 translation aborts on other
 		 * cores in SMP case. So, special treatment is implemented
 		 * in pmap_fault(). To reduce the likelihood that another core
 		 * will be affected by the broken mapping, disable interrupts
 		 * until the mapping change is completed.
 		 */
 		cspr = disable_interrupts(PSR_I | PSR_F);
 		pte1_clear(pte1p);
 		pmap_tlb_flush_pte1(pmap, va, npte1);
 		pte1_store(pte1p, npte1);
 		restore_interrupts(cspr);
 	}
 }
 #else
 static void
 pmap_change_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va,
     pt1_entry_t npte1)
 {
 
 	if (pmap == kernel_pmap) {
 		mtx_lock_spin(&allpmaps_lock);
 		pmap_update_pte1_kernel(va, npte1);
 		mtx_unlock_spin(&allpmaps_lock);
 	} else {
 		register_t cspr;
 
 		/*
 		 * Use break-before-make approach for changing userland
 		 * mappings. It's absolutely safe in UP case when interrupts
 		 * are disabled.
 		 */
 		cspr = disable_interrupts(PSR_I | PSR_F);
 		pte1_clear(pte1p);
 		pmap_tlb_flush_pte1(pmap, va, npte1);
 		pte1_store(pte1p, npte1);
 		restore_interrupts(cspr);
 	}
 }
 #endif
 
 #if VM_NRESERVLEVEL > 0
 /*
  *  Tries to promote the NPTE2_IN_PT2, contiguous 4KB page mappings that are
  *  within a single page table page (PT2) to a single 1MB page mapping.
  *  For promotion to occur, two conditions must be met: (1) the 4KB page
  *  mappings must map aligned, contiguous physical memory and (2) the 4KB page
  *  mappings must have identical characteristics.
  *
  *  Managed (PG_MANAGED) mappings within the kernel address space are not
  *  promoted.  The reason is that kernel PTE1s are replicated in each pmap but
  *  pmap_remove_write(), pmap_clear_modify(), and pmap_clear_reference() only
  *  read the PTE1 from the kernel pmap.
  */
 static void
 pmap_promote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va)
 {
 	pt1_entry_t npte1;
 	pt2_entry_t *fpte2p, fpte2, fpte2_fav;
 	pt2_entry_t *pte2p, pte2;
 	vm_offset_t pteva __unused;
 	vm_page_t m __unused;
 
 	PDEBUG(6, printf("%s(%p): try for va %#x pte1 %#x at %p\n", __func__,
 	    pmap, va, pte1_load(pte1p), pte1p));
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * Examine the first PTE2 in the specified PT2. Abort if this PTE2 is
 	 * either invalid, unused, or does not map the first 4KB physical page
 	 * within a 1MB page.
 	 */
 	fpte2p = pmap_pte2_quick(pmap, pte1_trunc(va));
 	fpte2 = pte2_load(fpte2p);
 	if ((fpte2 & ((PTE2_FRAME & PTE1_OFFSET) | PTE2_A | PTE2_V)) !=
 	    (PTE2_A | PTE2_V)) {
 		pmap_pte1_p_failures++;
 		CTR3(KTR_PMAP, "%s: failure(1) for va %#x in pmap %p",
 		    __func__, va, pmap);
 		return;
 	}
 	if (pte2_is_managed(fpte2) && pmap == kernel_pmap) {
 		pmap_pte1_p_failures++;
 		CTR3(KTR_PMAP, "%s: failure(2) for va %#x in pmap %p",
 		    __func__, va, pmap);
 		return;
 	}
 	if ((fpte2 & (PTE2_NM | PTE2_RO)) == PTE2_NM) {
 		/*
 		 * When page is not modified, PTE2_RO can be set without
 		 * a TLB invalidation.
 		 */
 		fpte2 |= PTE2_RO;
 		pte2_store(fpte2p, fpte2);
 	}
 
 	/*
 	 * Examine each of the other PTE2s in the specified PT2. Abort if this
 	 * PTE2 maps an unexpected 4KB physical page or does not have identical
 	 * characteristics to the first PTE2.
 	 */
 	fpte2_fav = (fpte2 & (PTE2_FRAME | PTE2_A | PTE2_V));
 	fpte2_fav += PTE1_SIZE - PTE2_SIZE; /* examine from the end */
 	for (pte2p = fpte2p + NPTE2_IN_PT2 - 1; pte2p > fpte2p; pte2p--) {
 		pte2 = pte2_load(pte2p);
 		if ((pte2 & (PTE2_FRAME | PTE2_A | PTE2_V)) != fpte2_fav) {
 			pmap_pte1_p_failures++;
 			CTR3(KTR_PMAP, "%s: failure(3) for va %#x in pmap %p",
 			    __func__, va, pmap);
 			return;
 		}
 		if ((pte2 & (PTE2_NM | PTE2_RO)) == PTE2_NM) {
 			/*
 			 * When page is not modified, PTE2_RO can be set
 			 * without a TLB invalidation. See note above.
 			 */
 			pte2 |= PTE2_RO;
 			pte2_store(pte2p, pte2);
 			pteva = pte1_trunc(va) | (pte2 & PTE1_OFFSET &
 			    PTE2_FRAME);
 			CTR3(KTR_PMAP, "%s: protect for va %#x in pmap %p",
 			    __func__, pteva, pmap);
 		}
 		if ((pte2 & PTE2_PROMOTE) != (fpte2 & PTE2_PROMOTE)) {
 			pmap_pte1_p_failures++;
 			CTR3(KTR_PMAP, "%s: failure(4) for va %#x in pmap %p",
 			    __func__, va, pmap);
 			return;
 		}
 
 		fpte2_fav -= PTE2_SIZE;
 	}
 	/*
 	 * The page table page in its current state will stay in PT2TAB
 	 * until the PTE1 mapping the section is demoted by pmap_demote_pte1()
 	 * or destroyed by pmap_remove_pte1().
 	 *
 	 * Note that L2 page table size is not equal to PAGE_SIZE.
 	 */
 	m = PHYS_TO_VM_PAGE(trunc_page(pte1_link_pa(pte1_load(pte1p))));
 	KASSERT(m >= vm_page_array && m < &vm_page_array[vm_page_array_size],
 	    ("%s: PT2 page is out of range", __func__));
 	KASSERT(m->pindex == (pte1_index(va) & ~PT2PG_MASK),
 	    ("%s: PT2 page's pindex is wrong", __func__));
 
 	/*
 	 * Get pte1 from pte2 format.
 	 */
 	npte1 = (fpte2 & PTE1_FRAME) | ATTR_TO_L1(fpte2) | PTE1_V;
 
 	/*
 	 * Promote the pv entries.
 	 */
 	if (pte2_is_managed(fpte2))
 		pmap_pv_promote_pte1(pmap, va, pte1_pa(npte1));
 
 	/*
 	 * Promote the mappings.
 	 */
 	pmap_change_pte1(pmap, pte1p, va, npte1);
 
 	pmap_pte1_promotions++;
 	CTR3(KTR_PMAP, "%s: success for va %#x in pmap %p",
 	    __func__, va, pmap);
 
 	PDEBUG(6, printf("%s(%p): success for va %#x pte1 %#x(%#x) at %p\n",
 	    __func__, pmap, va, npte1, pte1_load(pte1p), pte1p));
 }
 #endif /* VM_NRESERVLEVEL > 0 */
 
 /*
  *  Zero L2 page table page.
  */
 static __inline void
 pmap_clear_pt2(pt2_entry_t *fpte2p)
 {
 	pt2_entry_t *pte2p;
 
 	for (pte2p = fpte2p; pte2p < fpte2p + NPTE2_IN_PT2; pte2p++)
 		pte2_clear(pte2p);
 
 }
 
 /*
  *  Removes a 1MB page mapping from the kernel pmap.
  */
 static void
 pmap_remove_kernel_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va)
 {
 	vm_page_t m;
 	uint32_t pte1_idx;
 	pt2_entry_t *fpte2p;
 	vm_paddr_t pt2_pa;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	m = pmap_pt2_page(pmap, va);
 	if (m == NULL)
 		/*
 		 * QQQ: Is this function called only on promoted pte1?
 		 *      We certainly do section mappings directly
 		 *      (without promotion) in kernel !!!
 		 */
 		panic("%s: missing pt2 page", __func__);
 
 	pte1_idx = pte1_index(va);
 
 	/*
 	 * Initialize the L2 page table.
 	 */
 	fpte2p = page_pt2(pt2map_pt2pg(va), pte1_idx);
 	pmap_clear_pt2(fpte2p);
 
 	/*
 	 * Remove the mapping.
 	 */
 	pt2_pa = page_pt2pa(VM_PAGE_TO_PHYS(m), pte1_idx);
 	pmap_kenter_pte1(va, PTE1_LINK(pt2_pa));
 
 	/*
 	 * QQQ: We do not need to invalidate PT2MAP mapping
 	 * as we did not change it. I.e. the L2 page table page
 	 * was and still is mapped the same way.
 	 */
 }
 
 /*
  *  Do the things to unmap a section in a process
  */
 static void
 pmap_remove_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t sva,
     struct spglist *free)
 {
 	pt1_entry_t opte1;
 	struct md_page *pvh;
 	vm_offset_t eva, va;
 	vm_page_t m;
 
 	PDEBUG(6, printf("%s(%p): va %#x pte1 %#x at %p\n", __func__, pmap, sva,
 	    pte1_load(pte1p), pte1p));
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((sva & PTE1_OFFSET) == 0,
 	    ("%s: sva is not 1mpage aligned", __func__));
 
 	/*
 	 * Clear and invalidate the mapping. It should occupy one and only TLB
 	 * entry. So, pmap_tlb_flush() called with aligned address should be
 	 * sufficient.
 	 */
 	opte1 = pte1_load_clear(pte1p);
 	pmap_tlb_flush(pmap, sva);
 
 	if (pte1_is_wired(opte1))
 		pmap->pm_stats.wired_count -= PTE1_SIZE / PAGE_SIZE;
 	pmap->pm_stats.resident_count -= PTE1_SIZE / PAGE_SIZE;
 	if (pte1_is_managed(opte1)) {
 		pvh = pa_to_pvh(pte1_pa(opte1));
 		pmap_pvh_free(pvh, pmap, sva);
 		eva = sva + PTE1_SIZE;
 		for (va = sva, m = PHYS_TO_VM_PAGE(pte1_pa(opte1));
 		    va < eva; va += PAGE_SIZE, m++) {
 			if (pte1_is_dirty(opte1))
 				vm_page_dirty(m);
 			if (opte1 & PTE1_A)
 				vm_page_aflag_set(m, PGA_REFERENCED);
 			if (TAILQ_EMPTY(&m->md.pv_list) &&
 			    TAILQ_EMPTY(&pvh->pv_list))
 				vm_page_aflag_clear(m, PGA_WRITEABLE);
 		}
 	}
 	if (pmap == kernel_pmap) {
 		/*
 		 * L2 page table(s) can't be removed from kernel map as
 		 * kernel counts on it (stuff around pmap_growkernel()).
 		 */
 		 pmap_remove_kernel_pte1(pmap, pte1p, sva);
 	} else {
 		/*
 		 * Get associated L2 page table page.
 		 * It's possible that the page was never allocated.
 		 */
 		m = pmap_pt2_page(pmap, sva);
 		if (m != NULL)
 			pmap_unwire_pt2_all(pmap, sva, m, free);
 	}
 }
 
 /*
  *  Fills L2 page table page with mappings to consecutive physical pages.
  */
 static __inline void
 pmap_fill_pt2(pt2_entry_t *fpte2p, pt2_entry_t npte2)
 {
 	pt2_entry_t *pte2p;
 
 	for (pte2p = fpte2p; pte2p < fpte2p + NPTE2_IN_PT2; pte2p++) {
 		pte2_store(pte2p, npte2);
 		npte2 += PTE2_SIZE;
 	}
 }
 
 /*
  *  Tries to demote a 1MB page mapping. If demotion fails, the
  *  1MB page mapping is invalidated.
  */
 static boolean_t
 pmap_demote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va)
 {
 	pt1_entry_t opte1, npte1;
 	pt2_entry_t *fpte2p, npte2;
 	vm_paddr_t pt2pg_pa, pt2_pa;
 	vm_page_t m;
 	struct spglist free;
 	uint32_t pte1_idx, isnew = 0;
 
 	PDEBUG(6, printf("%s(%p): try for va %#x pte1 %#x at %p\n", __func__,
 	    pmap, va, pte1_load(pte1p), pte1p));
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	opte1 = pte1_load(pte1p);
 	KASSERT(pte1_is_section(opte1), ("%s: opte1 not a section", __func__));
 
 	if ((opte1 & PTE1_A) == 0 || (m = pmap_pt2_page(pmap, va)) == NULL) {
 		KASSERT(!pte1_is_wired(opte1),
 		    ("%s: PT2 page for a wired mapping is missing", __func__));
 
 		/*
 		 * Invalidate the 1MB page mapping and return
 		 * "failure" if the mapping was never accessed or the
 		 * allocation of the new page table page fails.
 		 */
 		if ((opte1 & PTE1_A) == 0 || (m = vm_page_alloc(NULL,
 		    pte1_index(va) & ~PT2PG_MASK, VM_ALLOC_NOOBJ |
 		    VM_ALLOC_NORMAL | VM_ALLOC_WIRED)) == NULL) {
 			SLIST_INIT(&free);
 			pmap_remove_pte1(pmap, pte1p, pte1_trunc(va), &free);
 			vm_page_free_pages_toq(&free, false);
 			CTR3(KTR_PMAP, "%s: failure for va %#x in pmap %p",
 			    __func__, va, pmap);
 			return (FALSE);
 		}
 		if (va < VM_MAXUSER_ADDRESS)
 			pmap->pm_stats.resident_count++;
 
 		isnew = 1;
 
 		/*
 		 * We init all L2 page tables in the page even if
 		 * we are going to change everything for one L2 page
 		 * table in a while.
 		 */
 		pt2pg_pa = pmap_pt2pg_init(pmap, va, m);
 	} else {
 		if (va < VM_MAXUSER_ADDRESS) {
 			if (pt2_is_empty(m, va))
 				isnew = 1; /* Demoting section w/o promotion. */
 #ifdef INVARIANTS
 			else
 				KASSERT(pt2_is_full(m, va), ("%s: bad PT2 wire"
 				    " count %u", __func__,
 				    pt2_wirecount_get(m, pte1_index(va))));
 #endif
 		}
 	}
 
 	pt2pg_pa = VM_PAGE_TO_PHYS(m);
 	pte1_idx = pte1_index(va);
 	/*
 	 * If the pmap is current, then the PT2MAP can provide access to
 	 * the page table page (promoted L2 page tables are not unmapped).
 	 * Otherwise, temporarily map the L2 page table page (m) into
 	 * the kernel's address space at either PADDR1 or PADDR2.
 	 *
 	 * Note that L2 page table size is not equal to PAGE_SIZE.
 	 */
 	if (pmap_is_current(pmap))
 		fpte2p = page_pt2(pt2map_pt2pg(va), pte1_idx);
 	else if (curthread->td_pinned > 0 && rw_wowned(&pvh_global_lock)) {
 		if (pte2_pa(pte2_load(PMAP1)) != pt2pg_pa) {
 			pte2_store(PMAP1, PTE2_KPT(pt2pg_pa));
 #ifdef SMP
 			PMAP1cpu = PCPU_GET(cpuid);
 #endif
 			tlb_flush_local((vm_offset_t)PADDR1);
 			PMAP1changed++;
 		} else
 #ifdef SMP
 		if (PMAP1cpu != PCPU_GET(cpuid)) {
 			PMAP1cpu = PCPU_GET(cpuid);
 			tlb_flush_local((vm_offset_t)PADDR1);
 			PMAP1changedcpu++;
 		} else
 #endif
 			PMAP1unchanged++;
 		fpte2p = page_pt2((vm_offset_t)PADDR1, pte1_idx);
 	} else {
 		mtx_lock(&PMAP2mutex);
 		if (pte2_pa(pte2_load(PMAP2)) != pt2pg_pa) {
 			pte2_store(PMAP2, PTE2_KPT(pt2pg_pa));
 			tlb_flush((vm_offset_t)PADDR2);
 		}
 		fpte2p = page_pt2((vm_offset_t)PADDR2, pte1_idx);
 	}
 	pt2_pa = page_pt2pa(pt2pg_pa, pte1_idx);
 	npte1 = PTE1_LINK(pt2_pa);
 
 	KASSERT((opte1 & PTE1_A) != 0,
 	    ("%s: opte1 is missing PTE1_A", __func__));
 	KASSERT((opte1 & (PTE1_NM | PTE1_RO)) != PTE1_NM,
 	    ("%s: opte1 has PTE1_NM", __func__));
 
 	/*
 	 *  Get pte2 from pte1 format.
 	*/
 	npte2 = pte1_pa(opte1) | ATTR_TO_L2(opte1) | PTE2_V;
 
 	/*
 	 * If the L2 page table page is new, initialize it. If the mapping
 	 * has changed attributes, update the page table entries.
 	 */
 	if (isnew != 0) {
 		pt2_wirecount_set(m, pte1_idx, NPTE2_IN_PT2);
 		pmap_fill_pt2(fpte2p, npte2);
 	} else if ((pte2_load(fpte2p) & PTE2_PROMOTE) !=
 		    (npte2 & PTE2_PROMOTE))
 		pmap_fill_pt2(fpte2p, npte2);
 
 	KASSERT(pte2_pa(pte2_load(fpte2p)) == pte2_pa(npte2),
 	    ("%s: fpte2p and npte2 map different physical addresses",
 	    __func__));
 
 	if (fpte2p == PADDR2)
 		mtx_unlock(&PMAP2mutex);
 
 	/*
 	 * Demote the mapping. This pmap is locked. The old PTE1 has
 	 * PTE1_A set. If the old PTE1 has not PTE1_RO set, it also
 	 * has not PTE1_NM set. Thus, there is no danger of a race with
 	 * another processor changing the setting of PTE1_A and/or PTE1_NM
 	 * between the read above and the store below.
 	 */
 	pmap_change_pte1(pmap, pte1p, va, npte1);
 
 	/*
 	 * Demote the pv entry. This depends on the earlier demotion
 	 * of the mapping. Specifically, the (re)creation of a per-
 	 * page pv entry might trigger the execution of pmap_pv_reclaim(),
 	 * which might reclaim a newly (re)created per-page pv entry
 	 * and destroy the associated mapping. In order to destroy
 	 * the mapping, the PTE1 must have already changed from mapping
 	 * the 1mpage to referencing the page table page.
 	 */
 	if (pte1_is_managed(opte1))
 		pmap_pv_demote_pte1(pmap, va, pte1_pa(opte1));
 
 	pmap_pte1_demotions++;
 	CTR3(KTR_PMAP, "%s: success for va %#x in pmap %p",
 	    __func__, va, pmap);
 
 	PDEBUG(6, printf("%s(%p): success for va %#x pte1 %#x(%#x) at %p\n",
 	    __func__, pmap, va, npte1, pte1_load(pte1p), pte1p));
 	return (TRUE);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 int
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
     u_int flags, int8_t psind)
 {
 	pt1_entry_t *pte1p;
 	pt2_entry_t *pte2p;
 	pt2_entry_t npte2, opte2;
 	pv_entry_t pv;
 	vm_paddr_t opa, pa;
 	vm_page_t mpte2, om;
 	int rv;
 
 	va = trunc_page(va);
 	KASSERT(va <= vm_max_kernel_address, ("%s: toobig", __func__));
 	KASSERT(va < UPT2V_MIN_ADDRESS || va >= UPT2V_MAX_ADDRESS,
 	    ("%s: invalid to pmap_enter page table pages (va: 0x%x)", __func__,
 	    va));
 	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva ||
 	    va >= kmi.clean_eva,
 	    ("%s: managed mapping within the clean submap", __func__));
 	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);
 	KASSERT((flags & PMAP_ENTER_RESERVED) == 0,
 	    ("%s: flags %u has reserved bits set", __func__, flags));
 	pa = VM_PAGE_TO_PHYS(m);
 	npte2 = PTE2(pa, PTE2_A, vm_page_pte2_attr(m));
 	if ((flags & VM_PROT_WRITE) == 0)
 		npte2 |= PTE2_NM;
 	if ((prot & VM_PROT_WRITE) == 0)
 		npte2 |= PTE2_RO;
 	KASSERT((npte2 & (PTE2_NM | PTE2_RO)) != PTE2_RO,
 	    ("%s: flags includes VM_PROT_WRITE but prot doesn't", __func__));
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		npte2 |= PTE2_NX;
 	if ((flags & PMAP_ENTER_WIRED) != 0)
 		npte2 |= PTE2_W;
 	if (va < VM_MAXUSER_ADDRESS)
 		npte2 |= PTE2_U;
 	if (pmap != kernel_pmap)
 		npte2 |= PTE2_NG;
 
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	sched_pin();
 	if (psind == 1) {
 		/* Assert the required virtual and physical alignment. */
 		KASSERT((va & PTE1_OFFSET) == 0,
 		    ("%s: va unaligned", __func__));
 		KASSERT(m->psind > 0, ("%s: m->psind < psind", __func__));
 		rv = pmap_enter_pte1(pmap, va, PTE1_PA(pa) | ATTR_TO_L1(npte2) |
 		    PTE1_V, flags, m);
 		goto out;
 	}
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		mpte2 = pmap_allocpte2(pmap, va, flags);
 		if (mpte2 == NULL) {
 			KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0,
 			    ("pmap_allocpte2 failed with sleep allowed"));
 			rv = KERN_RESOURCE_SHORTAGE;
 			goto out;
 		}
 	} else
 		mpte2 = NULL;
 	pte1p = pmap_pte1(pmap, va);
 	if (pte1_is_section(pte1_load(pte1p)))
 		panic("%s: attempted on 1MB page", __func__);
 	pte2p = pmap_pte2_quick(pmap, va);
 	if (pte2p == NULL)
 		panic("%s: invalid L1 page table entry va=%#x", __func__, va);
 
 	om = NULL;
 	opte2 = pte2_load(pte2p);
 	opa = pte2_pa(opte2);
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (pte2_is_valid(opte2) && (opa == pa)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT2 pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT2 page will be also.
 		 */
 		if (pte2_is_wired(npte2) && !pte2_is_wired(opte2))
 			pmap->pm_stats.wired_count++;
 		else if (!pte2_is_wired(npte2) && pte2_is_wired(opte2))
 			pmap->pm_stats.wired_count--;
 
 		/*
 		 * Remove extra pte2 reference
 		 */
 		if (mpte2)
 			pt2_wirecount_dec(mpte2, pte1_index(va));
 		if ((m->oflags & VPO_UNMANAGED) == 0)
 			om = m;
 		goto validate;
 	}
 
 	/*
 	 * QQQ: We think that changing physical address on writeable mapping
 	 *      is not safe. Well, maybe on kernel address space with correct
 	 *      locking, it can make a sense. However, we have no idea why
 	 *      anyone should do that on user address space. Are we wrong?
 	 */
 	KASSERT((opa == 0) || (opa == pa) ||
 	    !pte2_is_valid(opte2) || ((opte2 & PTE2_RO) != 0),
 	    ("%s: pmap %p va %#x(%#x) opa %#x pa %#x - gotcha %#x %#x!",
 	    __func__, pmap, va, opte2, opa, pa, flags, prot));
 
 	pv = NULL;
 
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
 	if (opa) {
 		if (pte2_is_wired(opte2))
 			pmap->pm_stats.wired_count--;
 		om = PHYS_TO_VM_PAGE(opa);
 		if (om != NULL && (om->oflags & VPO_UNMANAGED) != 0)
 			om = NULL;
 		if (om != NULL)
 			pv = pmap_pvh_remove(&om->md, pmap, va);
 
 		/*
 		 * Remove extra pte2 reference
 		 */
 		if (mpte2 != NULL)
 			pt2_wirecount_dec(mpte2, va >> PTE1_SHIFT);
 	} else
 		pmap->pm_stats.resident_count++;
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		if (pv == NULL) {
 			pv = get_pv_entry(pmap, FALSE);
 			pv->pv_va = va;
 		}
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 	} else if (pv != NULL)
 		free_pv_entry(pmap, pv);
 
 	/*
 	 * Increment counters
 	 */
 	if (pte2_is_wired(npte2))
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	if (prot & VM_PROT_WRITE) {
 		if ((m->oflags & VPO_UNMANAGED) == 0)
 			vm_page_aflag_set(m, PGA_WRITEABLE);
 	}
 
 	/*
 	 * If the mapping or permission bits are different, we need
 	 * to update the pte2.
 	 *
 	 * QQQ: Think again and again what to do
 	 *      if the mapping is going to be changed!
 	 */
 	if ((opte2 & ~(PTE2_NM | PTE2_A)) != (npte2 & ~(PTE2_NM | PTE2_A))) {
 		/*
 		 * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA
 		 * is set. Do it now, before the mapping is stored and made
 		 * valid for hardware table walk. If done later, there is a race
 		 * for other threads of current process in lazy loading case.
 		 * Don't do it for kernel memory which is mapped with exec
 		 * permission even if the memory isn't going to hold executable
 		 * code. The only time when icache sync is needed is after
 		 * kernel module is loaded and the relocation info is processed.
 		 * And it's done in elf_cpu_load_file().
 		 *
 		 * QQQ: (1) Does it exist any better way where
 		 *          or how to sync icache?
 		 *      (2) Now, we do it on a page basis.
 		 */
 		if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap &&
 		    m->md.pat_mode == VM_MEMATTR_WB_WA &&
 		    (opa != pa || (opte2 & PTE2_NX)))
 			cache_icache_sync_fresh(va, pa, PAGE_SIZE);
 
 		if (opte2 & PTE2_V) {
 			/* Change mapping with break-before-make approach. */
 			opte2 = pte2_load_clear(pte2p);
 			pmap_tlb_flush(pmap, va);
 			pte2_store(pte2p, npte2);
 			if (om != NULL) {
 				KASSERT((om->oflags & VPO_UNMANAGED) == 0,
 				    ("%s: om %p unmanaged", __func__, om));
 				if ((opte2 & PTE2_A) != 0)
 					vm_page_aflag_set(om, PGA_REFERENCED);
 				if (pte2_is_dirty(opte2))
 					vm_page_dirty(om);
 				if (TAILQ_EMPTY(&om->md.pv_list) &&
 				    ((om->flags & PG_FICTITIOUS) != 0 ||
 				    TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))
 					vm_page_aflag_clear(om, PGA_WRITEABLE);
 			}
 		} else
 			pte2_store(pte2p, npte2);
 	}
 #if 0
 	else {
 		/*
 		 * QQQ: In time when both access and not mofified bits are
 		 *      emulated by software, this should not happen. Some
 		 *      analysis is need, if this really happen. Missing
 		 *      tlb flush somewhere could be the reason.
 		 */
 		panic("%s: pmap %p va %#x opte2 %x npte2 %x !!", __func__, pmap,
 		    va, opte2, npte2);
 	}
 #endif
 
 #if VM_NRESERVLEVEL > 0
 	/*
 	 * If both the L2 page table page and the reservation are fully
 	 * populated, then attempt promotion.
 	 */
 	if ((mpte2 == NULL || pt2_is_full(mpte2, va)) &&
 	    sp_enabled && (m->flags & PG_FICTITIOUS) == 0 &&
 	    vm_reserv_level_iffullpop(m) == 0)
 		pmap_promote_pte1(pmap, pte1p, va);
 #endif
 
 	rv = KERN_SUCCESS;
 out:
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  *  Do the things to unmap a page in a process.
  */
 static int
 pmap_remove_pte2(pmap_t pmap, pt2_entry_t *pte2p, vm_offset_t va,
     struct spglist *free)
 {
 	pt2_entry_t opte2;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/* Clear and invalidate the mapping. */
 	opte2 = pte2_load_clear(pte2p);
 	pmap_tlb_flush(pmap, va);
 
 	KASSERT(pte2_is_valid(opte2), ("%s: pmap %p va %#x not link pte2 %#x",
 	    __func__, pmap, va, opte2));
 
 	if (opte2 & PTE2_W)
 		pmap->pm_stats.wired_count -= 1;
 	pmap->pm_stats.resident_count -= 1;
 	if (pte2_is_managed(opte2)) {
 		m = PHYS_TO_VM_PAGE(pte2_pa(opte2));
 		if (pte2_is_dirty(opte2))
 			vm_page_dirty(m);
 		if (opte2 & PTE2_A)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 		pmap_remove_entry(pmap, m, va);
 	}
 	return (pmap_unuse_pt2(pmap, va, free));
 }
 
 /*
  *  Remove a single page from a process address space.
  */
 static void
 pmap_remove_page(pmap_t pmap, vm_offset_t va, struct spglist *free)
 {
 	pt2_entry_t *pte2p;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	KASSERT(curthread->td_pinned > 0,
 	    ("%s: curthread not pinned", __func__));
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if ((pte2p = pmap_pte2_quick(pmap, va)) == NULL ||
 	    !pte2_is_valid(pte2_load(pte2p)))
 		return;
 	pmap_remove_pte2(pmap, pte2p, va, free);
 }
 
 /*
  *  Remove the given range of addresses from the specified map.
  *
  *  It is assumed that the start and end are properly
  *  rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t nextva;
 	pt1_entry_t *pte1p, pte1;
 	pt2_entry_t *pte2p, pte2;
 	struct spglist free;
 
 	/*
 	 * Perform an unsynchronized read. This is, however, safe.
 	 */
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	SLIST_INIT(&free);
 
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	PMAP_LOCK(pmap);
 
 	/*
 	 * Special handling of removing one page. A very common
 	 * operation and easy to short circuit some code.
 	 */
 	if (sva + PAGE_SIZE == eva) {
 		pte1 = pte1_load(pmap_pte1(pmap, sva));
 		if (pte1_is_link(pte1)) {
 			pmap_remove_page(pmap, sva, &free);
 			goto out;
 		}
 	}
 
 	for (; sva < eva; sva = nextva) {
 		/*
 		 * Calculate address for next L2 page table.
 		 */
 		nextva = pte1_trunc(sva + PTE1_SIZE);
 		if (nextva < sva)
 			nextva = eva;
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		pte1p = pmap_pte1(pmap, sva);
 		pte1 = pte1_load(pte1p);
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the L1 page
 		 * table is always allocated, and in kernel virtual.
 		 */
 		if (pte1 == 0)
 			continue;
 
 		if (pte1_is_section(pte1)) {
 			/*
 			 * Are we removing the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + PTE1_SIZE == nextva && eva >= nextva) {
 				pmap_remove_pte1(pmap, pte1p, sva, &free);
 				continue;
 			} else if (!pmap_demote_pte1(pmap, pte1p, sva)) {
 				/* The large page mapping was destroyed. */
 				continue;
 			}
 #ifdef INVARIANTS
 			else {
 				/* Update pte1 after demotion. */
 				pte1 = pte1_load(pte1p);
 			}
 #endif
 		}
 
 		KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p"
 		    " is not link", __func__, pmap, sva, pte1, pte1p));
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current L2 page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (nextva > eva)
 			nextva = eva;
 
 		for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva;
 		    pte2p++, sva += PAGE_SIZE) {
 			pte2 = pte2_load(pte2p);
 			if (!pte2_is_valid(pte2))
 				continue;
 			if (pmap_remove_pte2(pmap, pte2p, sva, &free))
 				break;
 		}
 	}
 out:
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	vm_page_free_pages_toq(&free, false);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 void
 pmap_remove_all(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	pmap_t pmap;
 	pt2_entry_t *pte2p, opte2;
 	pt1_entry_t *pte1p;
 	vm_offset_t va;
 	struct spglist free;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("%s: page %p is not managed", __func__, m));
 	SLIST_INIT(&free);
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		goto small_mappings;
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1p = pmap_pte1(pmap, va);
 		(void)pmap_demote_pte1(pmap, pte1p, va);
 		PMAP_UNLOCK(pmap);
 	}
 small_mappings:
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pmap->pm_stats.resident_count--;
 		pte1p = pmap_pte1(pmap, pv->pv_va);
 		KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found "
 		    "a 1mpage in page %p's pv list", __func__, m));
 		pte2p = pmap_pte2_quick(pmap, pv->pv_va);
 		opte2 = pte2_load_clear(pte2p);
 		pmap_tlb_flush(pmap, pv->pv_va);
 		KASSERT(pte2_is_valid(opte2), ("%s: pmap %p va %x zero pte2",
 		    __func__, pmap, pv->pv_va));
 		if (pte2_is_wired(opte2))
 			pmap->pm_stats.wired_count--;
 		if (opte2 & PTE2_A)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (pte2_is_dirty(opte2))
 			vm_page_dirty(m);
 		pmap_unuse_pt2(pmap, pv->pv_va, &free);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 		free_pv_entry(pmap, pv);
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	vm_page_free_pages_toq(&free, false);
 }
 
 /*
  *  Just subroutine for pmap_remove_pages() to reasonably satisfy
  *  good coding style, a.k.a. 80 character line width limit hell.
  */
 static __inline void
 pmap_remove_pte1_quick(pmap_t pmap, pt1_entry_t pte1, pv_entry_t pv,
     struct spglist *free)
 {
 	vm_paddr_t pa;
 	vm_page_t m, mt, mpt2pg;
 	struct md_page *pvh;
 
 	pa = pte1_pa(pte1);
 	m = PHYS_TO_VM_PAGE(pa);
 
 	KASSERT(m->phys_addr == pa, ("%s: vm_page_t %p addr mismatch %#x %#x",
 	    __func__, m, m->phys_addr, pa));
 	KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
 	    m < &vm_page_array[vm_page_array_size],
 	    ("%s: bad pte1 %#x", __func__, pte1));
 
 	if (pte1_is_dirty(pte1)) {
 		for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++)
 			vm_page_dirty(mt);
 	}
 
 	pmap->pm_stats.resident_count -= PTE1_SIZE / PAGE_SIZE;
 	pvh = pa_to_pvh(pa);
 	TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 	if (TAILQ_EMPTY(&pvh->pv_list)) {
 		for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++)
 			if (TAILQ_EMPTY(&mt->md.pv_list))
 				vm_page_aflag_clear(mt, PGA_WRITEABLE);
 	}
 	mpt2pg = pmap_pt2_page(pmap, pv->pv_va);
 	if (mpt2pg != NULL)
 		pmap_unwire_pt2_all(pmap, pv->pv_va, mpt2pg, free);
 }
 
 /*
  *  Just subroutine for pmap_remove_pages() to reasonably satisfy
  *  good coding style, a.k.a. 80 character line width limit hell.
  */
 static __inline void
 pmap_remove_pte2_quick(pmap_t pmap, pt2_entry_t pte2, pv_entry_t pv,
     struct spglist *free)
 {
 	vm_paddr_t pa;
 	vm_page_t m;
 	struct md_page *pvh;
 
 	pa = pte2_pa(pte2);
 	m = PHYS_TO_VM_PAGE(pa);
 
 	KASSERT(m->phys_addr == pa, ("%s: vm_page_t %p addr mismatch %#x %#x",
 	    __func__, m, m->phys_addr, pa));
 	KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
 	    m < &vm_page_array[vm_page_array_size],
 	    ("%s: bad pte2 %#x", __func__, pte2));
 
 	if (pte2_is_dirty(pte2))
 		vm_page_dirty(m);
 
 	pmap->pm_stats.resident_count--;
 	TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 	if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(pa);
 		if (TAILQ_EMPTY(&pvh->pv_list))
 			vm_page_aflag_clear(m, PGA_WRITEABLE);
 	}
 	pmap_unuse_pt2(pmap, pv->pv_va, free);
 }
 
 /*
  *  Remove all pages from specified address space this aids process
  *  exit speeds. Also, this code is special cased for current process
  *  only, but can have the more generic (and slightly slower) mode enabled.
  *  This is much faster than pmap_remove in the case of running down
  *  an entire address space.
  */
 void
 pmap_remove_pages(pmap_t pmap)
 {
 	pt1_entry_t *pte1p, pte1;
 	pt2_entry_t *pte2p, pte2;
 	pv_entry_t pv;
 	struct pv_chunk *pc, *npc;
 	struct spglist free;
 	int field, idx;
 	int32_t bit;
 	uint32_t inuse, bitmask;
 	boolean_t allfree;
 
 	/*
 	 * Assert that the given pmap is only active on the current
 	 * CPU.  Unfortunately, we cannot block another CPU from
 	 * activating the pmap while this function is executing.
 	 */
 	KASSERT(pmap == vmspace_pmap(curthread->td_proc->p_vmspace),
 	    ("%s: non-current pmap %p", __func__, pmap));
 #if defined(SMP) && defined(INVARIANTS)
 	{
 		cpuset_t other_cpus;
 
 		sched_pin();
 		other_cpus = pmap->pm_active;
 		CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 		sched_unpin();
 		KASSERT(CPU_EMPTY(&other_cpus),
 		    ("%s: pmap %p active on other cpus", __func__, pmap));
 	}
 #endif
 	SLIST_INIT(&free);
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	sched_pin();
 	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 		KASSERT(pc->pc_pmap == pmap, ("%s: wrong pmap %p %p",
 		    __func__, pmap, pc->pc_pmap));
 		allfree = TRUE;
 		for (field = 0; field < _NPCM; field++) {
 			inuse = (~(pc->pc_map[field])) & pc_freemask[field];
 			while (inuse != 0) {
 				bit = ffs(inuse) - 1;
 				bitmask = 1UL << bit;
 				idx = field * 32 + bit;
 				pv = &pc->pc_pventry[idx];
 				inuse &= ~bitmask;
 
 				/*
 				 * Note that we cannot remove wired pages
 				 * from a process' mapping at this time
 				 */
 				pte1p = pmap_pte1(pmap, pv->pv_va);
 				pte1 = pte1_load(pte1p);
 				if (pte1_is_section(pte1)) {
 					if (pte1_is_wired(pte1))  {
 						allfree = FALSE;
 						continue;
 					}
 					pte1_clear(pte1p);
 					pmap_remove_pte1_quick(pmap, pte1, pv,
 					    &free);
 				}
 				else if (pte1_is_link(pte1)) {
 					pte2p = pt2map_entry(pv->pv_va);
 					pte2 = pte2_load(pte2p);
 
 					if (!pte2_is_valid(pte2)) {
 						printf("%s: pmap %p va %#x "
 						    "pte2 %#x\n", __func__,
 						    pmap, pv->pv_va, pte2);
 						panic("bad pte2");
 					}
 
 					if (pte2_is_wired(pte2))   {
 						allfree = FALSE;
 						continue;
 					}
 					pte2_clear(pte2p);
 					pmap_remove_pte2_quick(pmap, pte2, pv,
 					    &free);
 				} else {
 					printf("%s: pmap %p va %#x pte1 %#x\n",
 					    __func__, pmap, pv->pv_va, pte1);
 					panic("bad pte1");
 				}
 
 				/* Mark free */
 				PV_STAT(pv_entry_frees++);
 				PV_STAT(pv_entry_spare++);
 				pv_entry_count--;
 				pc->pc_map[field] |= bitmask;
 			}
 		}
 		if (allfree) {
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			free_pv_chunk(pc);
 		}
 	}
 	tlb_flush_all_ng_local();
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	vm_page_free_pages_toq(&free, false);
 }
 
 /*
  *  This code makes some *MAJOR* assumptions:
  *  1. Current pmap & pmap exists.
  *  2. Not wired.
  *  3. Read access.
  *  4. No L2 page table pages.
  *  but is *MUCH* faster than pmap_enter...
  */
 static vm_page_t
 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, vm_page_t mpt2pg)
 {
 	pt2_entry_t *pte2p, pte2;
 	vm_paddr_t pa;
 	struct spglist free;
 	uint32_t l2prot;
 
 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 	    (m->oflags & VPO_UNMANAGED) != 0,
 	    ("%s: managed mapping within the clean submap", __func__));
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * In the case that a L2 page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		u_int pte1_idx;
 		pt1_entry_t pte1, *pte1p;
 		vm_paddr_t pt2_pa;
 
 		/*
 		 * Get L1 page table things.
 		 */
 		pte1_idx = pte1_index(va);
 		pte1p = pmap_pte1(pmap, va);
 		pte1 = pte1_load(pte1p);
 
 		if (mpt2pg && (mpt2pg->pindex == (pte1_idx & ~PT2PG_MASK))) {
 			/*
 			 * Each of NPT2_IN_PG L2 page tables on the page can
 			 * come here. Make sure that associated L1 page table
 			 * link is established.
 			 *
 			 * QQQ: It comes that we don't establish all links to
 			 *      L2 page tables for newly allocated L2 page
 			 *      tables page.
 			 */
 			KASSERT(!pte1_is_section(pte1),
 			    ("%s: pte1 %#x is section", __func__, pte1));
 			if (!pte1_is_link(pte1)) {
 				pt2_pa = page_pt2pa(VM_PAGE_TO_PHYS(mpt2pg),
 				    pte1_idx);
 				pte1_store(pte1p, PTE1_LINK(pt2_pa));
 			}
 			pt2_wirecount_inc(mpt2pg, pte1_idx);
 		} else {
 			/*
 			 * If the L2 page table page is mapped, we just
 			 * increment the hold count, and activate it.
 			 */
 			if (pte1_is_section(pte1)) {
 				return (NULL);
 			} else if (pte1_is_link(pte1)) {
 				mpt2pg = PHYS_TO_VM_PAGE(pte1_link_pa(pte1));
 				pt2_wirecount_inc(mpt2pg, pte1_idx);
 			} else {
 				mpt2pg = _pmap_allocpte2(pmap, va,
 				    PMAP_ENTER_NOSLEEP);
 				if (mpt2pg == NULL)
 					return (NULL);
 			}
 		}
 	} else {
 		mpt2pg = NULL;
 	}
 
 	/*
 	 * This call to pt2map_entry() makes the assumption that we are
 	 * entering the page into the current pmap.  In order to support
 	 * quick entry into any pmap, one would likely use pmap_pte2_quick().
 	 * But that isn't as quick as pt2map_entry().
 	 */
 	pte2p = pt2map_entry(va);
 	pte2 = pte2_load(pte2p);
 	if (pte2_is_valid(pte2)) {
 		if (mpt2pg != NULL) {
 			/*
 			 * Remove extra pte2 reference
 			 */
 			pt2_wirecount_dec(mpt2pg, pte1_index(va));
 			mpt2pg = NULL;
 		}
 		return (NULL);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0 &&
 	    !pmap_try_insert_pv_entry(pmap, va, m)) {
 		if (mpt2pg != NULL) {
 			SLIST_INIT(&free);
 			if (pmap_unwire_pt2(pmap, va, mpt2pg, &free)) {
 				pmap_tlb_flush(pmap, va);
 				vm_page_free_pages_toq(&free, false);
 			}
 
 			mpt2pg = NULL;
 		}
 		return (NULL);
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	pa = VM_PAGE_TO_PHYS(m);
 	l2prot = PTE2_RO | PTE2_NM;
 	if (va < VM_MAXUSER_ADDRESS)
 		l2prot |= PTE2_U | PTE2_NG;
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		l2prot |= PTE2_NX;
 	else if (m->md.pat_mode == VM_MEMATTR_WB_WA && pmap != kernel_pmap) {
 		/*
 		 * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA
 		 * is set. QQQ: For more info, see comments in pmap_enter().
 		 */
 		cache_icache_sync_fresh(va, pa, PAGE_SIZE);
 	}
 	pte2_store(pte2p, PTE2(pa, l2prot, vm_page_pte2_attr(m)));
 
 	return (mpt2pg);
 }
 
 void
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *  Tries to create a read- and/or execute-only 1 MB page mapping.  Returns
  *  true if successful.  Returns false if (1) a mapping already exists at the
  *  specified virtual address or (2) a PV entry cannot be allocated without
  *  reclaiming another PV entry.
  */
 static bool
 pmap_enter_1mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 	pt1_entry_t pte1;
 	vm_paddr_t pa;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pa = VM_PAGE_TO_PHYS(m);
 	pte1 = PTE1(pa, PTE1_NM | PTE1_RO, ATTR_TO_L1(vm_page_pte2_attr(m)));
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		pte1 |= PTE1_NX;
 	if (va < VM_MAXUSER_ADDRESS)
 		pte1 |= PTE1_U;
 	if (pmap != kernel_pmap)
 		pte1 |= PTE1_NG;
 	return (pmap_enter_pte1(pmap, va, pte1, PMAP_ENTER_NOSLEEP |
 	    PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, m) == KERN_SUCCESS);
 }
 
 /*
  *  Tries to create the specified 1 MB page mapping.  Returns KERN_SUCCESS if
  *  the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE
  *  otherwise.  Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and
  *  a mapping already exists at the specified virtual address.  Returns
  *  KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NORECLAIM was specified and PV entry
  *  allocation failed.
  */
 static int
 pmap_enter_pte1(pmap_t pmap, vm_offset_t va, pt1_entry_t pte1, u_int flags,
     vm_page_t m)
 {
 	struct spglist free;
 	pt1_entry_t opte1, *pte1p;
 	pt2_entry_t pte2, *pte2p;
 	vm_offset_t cur, end;
 	vm_page_t mt;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	KASSERT((pte1 & (PTE1_NM | PTE1_RO)) == 0 ||
 	    (pte1 & (PTE1_NM | PTE1_RO)) == (PTE1_NM | PTE1_RO),
 	    ("%s: pte1 has inconsistent NM and RO attributes", __func__));
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pte1p = pmap_pte1(pmap, va);
 	opte1 = pte1_load(pte1p);
 	if (pte1_is_valid(opte1)) {
 		if ((flags & PMAP_ENTER_NOREPLACE) != 0) {
 			CTR3(KTR_PMAP, "%s: failure for va %#lx in pmap %p",
 			    __func__, va, pmap);
 			return (KERN_FAILURE);
 		}
 		/* Break the existing mapping(s). */
 		SLIST_INIT(&free);
 		if (pte1_is_section(opte1)) {
 			/*
 			 * If the section resulted from a promotion, then a
 			 * reserved PT page could be freed.
 			 */
 			pmap_remove_pte1(pmap, pte1p, va, &free);
 		} else {
 			sched_pin();
 			end = va + PTE1_SIZE;
 			for (cur = va, pte2p = pmap_pte2_quick(pmap, va);
 			    cur != end; cur += PAGE_SIZE, pte2p++) {
 				pte2 = pte2_load(pte2p);
 				if (!pte2_is_valid(pte2))
 					continue;
 				if (pmap_remove_pte2(pmap, pte2p, cur, &free))
 					break;
 			}
 			sched_unpin();
 		}
 		vm_page_free_pages_toq(&free, false);
 	}
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		/*
 		 * Abort this mapping if its PV entry could not be created.
 		 */
 		if (!pmap_pv_insert_pte1(pmap, va, pte1, flags)) {
 			CTR3(KTR_PMAP, "%s: failure for va %#lx in pmap %p",
 			    __func__, va, pmap);
 			return (KERN_RESOURCE_SHORTAGE);
 		}
 		if ((pte1 & PTE1_RO) == 0) {
 			for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++)
 				vm_page_aflag_set(mt, PGA_WRITEABLE);
 		}
 	}
 
 	/*
 	 * Increment counters.
 	 */
 	if (pte1_is_wired(pte1))
 		pmap->pm_stats.wired_count += PTE1_SIZE / PAGE_SIZE;
 	pmap->pm_stats.resident_count += PTE1_SIZE / PAGE_SIZE;
 
 	/*
 	 * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA
 	 * is set.  QQQ: For more info, see comments in pmap_enter().
 	 */
 	if ((pte1 & PTE1_NX) == 0 && m->md.pat_mode == VM_MEMATTR_WB_WA &&
 	    pmap != kernel_pmap && (!pte1_is_section(opte1) ||
 	    pte1_pa(opte1) != VM_PAGE_TO_PHYS(m) || (opte1 & PTE2_NX) != 0))
 		cache_icache_sync_fresh(va, VM_PAGE_TO_PHYS(m), PTE1_SIZE);
 
 	/*
 	 * Map the section.
 	 */
 	pte1_store(pte1p, pte1);
 
 	pmap_pte1_mappings++;
 	CTR3(KTR_PMAP, "%s: success for va %#lx in pmap %p", __func__, va,
 	    pmap);
 	return (KERN_SUCCESS);
 }
 
 /*
  *  Maps a sequence of resident pages belonging to the same object.
  *  The sequence begins with the given page m_start.  This page is
  *  mapped at the given virtual address start.  Each subsequent page is
  *  mapped at a virtual address that is offset from start by the same
  *  amount as the page is offset from m_start within the object.  The
  *  last page in the sequence is the page with the largest offset from
  *  m_start that can be mapped at a virtual address less than the given
  *  virtual address end.  Not every virtual page between start and end
  *  is mapped; only those for which a resident page exists with the
  *  corresponding offset from m_start are mapped.
  */
 void
 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	vm_offset_t va;
 	vm_page_t m, mpt2pg;
 	vm_pindex_t diff, psize;
 
 	PDEBUG(6, printf("%s: pmap %p start %#x end  %#x m %p prot %#x\n",
 	    __func__, pmap, start, end, m_start, prot));
 
 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
 	psize = atop(end - start);
 	mpt2pg = NULL;
 	m = m_start;
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		va = start + ptoa(diff);
 		if ((va & PTE1_OFFSET) == 0 && va + PTE1_SIZE <= end &&
 		    m->psind == 1 && sp_enabled &&
 		    pmap_enter_1mpage(pmap, va, m, prot))
 			m = &m[PTE1_SIZE / PAGE_SIZE - 1];
 		else
 			mpt2pg = pmap_enter_quick_locked(pmap, va, m, prot,
 			    mpt2pg);
 		m = TAILQ_NEXT(m, listq);
 	}
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *  This code maps large physical mmap regions into the
  *  processor address space.  Note that some shortcuts
  *  are taken, but the code works.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
 	pt1_entry_t *pte1p;
 	vm_paddr_t pa, pte2_pa;
 	vm_page_t p;
 	vm_memattr_t pat_mode;
 	u_int l1attr, l1prot;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 	    ("%s: non-device object", __func__));
 	if ((addr & PTE1_OFFSET) == 0 && (size & PTE1_OFFSET) == 0) {
 		if (!vm_object_populate(object, pindex, pindex + atop(size)))
 			return;
 		p = vm_page_lookup(object, pindex);
 		KASSERT(p->valid == VM_PAGE_BITS_ALL,
 		    ("%s: invalid page %p", __func__, p));
 		pat_mode = p->md.pat_mode;
 
 		/*
 		 * Abort the mapping if the first page is not physically
 		 * aligned to a 1MB page boundary.
 		 */
 		pte2_pa = VM_PAGE_TO_PHYS(p);
 		if (pte2_pa & PTE1_OFFSET)
 			return;
 
 		/*
 		 * Skip the first page. Abort the mapping if the rest of
 		 * the pages are not physically contiguous or have differing
 		 * memory attributes.
 		 */
 		p = TAILQ_NEXT(p, listq);
 		for (pa = pte2_pa + PAGE_SIZE; pa < pte2_pa + size;
 		    pa += PAGE_SIZE) {
 			KASSERT(p->valid == VM_PAGE_BITS_ALL,
 			    ("%s: invalid page %p", __func__, p));
 			if (pa != VM_PAGE_TO_PHYS(p) ||
 			    pat_mode != p->md.pat_mode)
 				return;
 			p = TAILQ_NEXT(p, listq);
 		}
 
 		/*
 		 * Map using 1MB pages.
 		 *
 		 * QQQ: Well, we are mapping a section, so same condition must
 		 * be hold like during promotion. It looks that only RW mapping
 		 * is done here, so readonly mapping must be done elsewhere.
 		 */
 		l1prot = PTE1_U | PTE1_NG | PTE1_RW | PTE1_M | PTE1_A;
 		l1attr = ATTR_TO_L1(vm_memattr_to_pte2(pat_mode));
 		PMAP_LOCK(pmap);
 		for (pa = pte2_pa; pa < pte2_pa + size; pa += PTE1_SIZE) {
 			pte1p = pmap_pte1(pmap, addr);
 			if (!pte1_is_valid(pte1_load(pte1p))) {
 				pte1_store(pte1p, PTE1(pa, l1prot, l1attr));
 				pmap->pm_stats.resident_count += PTE1_SIZE /
 				    PAGE_SIZE;
 				pmap_pte1_mappings++;
 			}
 			/* Else continue on if the PTE1 is already valid. */
 			addr += PTE1_SIZE;
 		}
 		PMAP_UNLOCK(pmap);
 	}
 }
 
 /*
  *  Do the things to protect a 1mpage in a process.
  */
 static void
 pmap_protect_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t sva,
     vm_prot_t prot)
 {
 	pt1_entry_t npte1, opte1;
 	vm_offset_t eva, va;
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((sva & PTE1_OFFSET) == 0,
 	    ("%s: sva is not 1mpage aligned", __func__));
 
 	opte1 = npte1 = pte1_load(pte1p);
 	if (pte1_is_managed(opte1) && pte1_is_dirty(opte1)) {
 		eva = sva + PTE1_SIZE;
 		for (va = sva, m = PHYS_TO_VM_PAGE(pte1_pa(opte1));
 		    va < eva; va += PAGE_SIZE, m++)
 			vm_page_dirty(m);
 	}
 	if ((prot & VM_PROT_WRITE) == 0)
 		npte1 |= PTE1_RO | PTE1_NM;
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		npte1 |= PTE1_NX;
 
 	/*
 	 * QQQ: Herein, execute permission is never set.
 	 *      It only can be cleared. So, no icache
 	 *      syncing is needed.
 	 */
 
 	if (npte1 != opte1) {
 		pte1_store(pte1p, npte1);
 		pmap_tlb_flush(pmap, sva);
 	}
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	boolean_t pv_lists_locked;
 	vm_offset_t nextva;
 	pt1_entry_t *pte1p, pte1;
 	pt2_entry_t *pte2p, opte2, npte2;
 
 	KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
 	if (prot == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ==
 	    (VM_PROT_WRITE | VM_PROT_EXECUTE))
 		return;
 
 	if (pmap_is_current(pmap))
 		pv_lists_locked = FALSE;
 	else {
 		pv_lists_locked = TRUE;
 resume:
 		rw_wlock(&pvh_global_lock);
 		sched_pin();
 	}
 
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = nextva) {
 		/*
 		 * Calculate address for next L2 page table.
 		 */
 		nextva = pte1_trunc(sva + PTE1_SIZE);
 		if (nextva < sva)
 			nextva = eva;
 
 		pte1p = pmap_pte1(pmap, sva);
 		pte1 = pte1_load(pte1p);
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that L1 page
 		 * page table is always allocated, and in kernel virtual.
 		 */
 		if (pte1 == 0)
 			continue;
 
 		if (pte1_is_section(pte1)) {
 			/*
 			 * Are we protecting the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + PTE1_SIZE == nextva && eva >= nextva) {
 				pmap_protect_pte1(pmap, pte1p, sva, prot);
 				continue;
 			} else {
 				if (!pv_lists_locked) {
 					pv_lists_locked = TRUE;
 					if (!rw_try_wlock(&pvh_global_lock)) {
 						PMAP_UNLOCK(pmap);
 						goto resume;
 					}
 					sched_pin();
 				}
 				if (!pmap_demote_pte1(pmap, pte1p, sva)) {
 					/*
 					 * The large page mapping
 					 * was destroyed.
 					 */
 					continue;
 				}
 #ifdef INVARIANTS
 				else {
 					/* Update pte1 after demotion */
 					pte1 = pte1_load(pte1p);
 				}
 #endif
 			}
 		}
 
 		KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p"
 		    " is not link", __func__, pmap, sva, pte1, pte1p));
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current L2 page table page, or to the end of the
 		 * range being protected.
 		 */
 		if (nextva > eva)
 			nextva = eva;
 
 		for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++,
 		    sva += PAGE_SIZE) {
 			vm_page_t m;
 
 			opte2 = npte2 = pte2_load(pte2p);
 			if (!pte2_is_valid(opte2))
 				continue;
 
 			if ((prot & VM_PROT_WRITE) == 0) {
 				if (pte2_is_managed(opte2) &&
 				    pte2_is_dirty(opte2)) {
 					m = PHYS_TO_VM_PAGE(pte2_pa(opte2));
 					vm_page_dirty(m);
 				}
 				npte2 |= PTE2_RO | PTE2_NM;
 			}
 
 			if ((prot & VM_PROT_EXECUTE) == 0)
 				npte2 |= PTE2_NX;
 
 			/*
 			 * QQQ: Herein, execute permission is never set.
 			 *      It only can be cleared. So, no icache
 			 *      syncing is needed.
 			 */
 
 			if (npte2 != opte2) {
 				pte2_store(pte2p, npte2);
 				pmap_tlb_flush(pmap, sva);
 			}
 		}
 	}
 	if (pv_lists_locked) {
 		sched_unpin();
 		rw_wunlock(&pvh_global_lock);
 	}
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *	pmap_pvh_wired_mappings:
  *
  *	Return the updated number "count" of managed mappings that are wired.
  */
 static int
 pmap_pvh_wired_mappings(struct md_page *pvh, int count)
 {
 	pmap_t pmap;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	sched_pin();
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va));
 		if (pte1_is_section(pte1)) {
 			if (pte1_is_wired(pte1))
 				count++;
 		} else {
 			KASSERT(pte1_is_link(pte1),
 			    ("%s: pte1 %#x is not link", __func__, pte1));
 			pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va));
 			if (pte2_is_wired(pte2))
 				count++;
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	sched_unpin();
 	return (count);
 }
 
 /*
  *	pmap_page_wired_mappings:
  *
  *	Return the number of managed mappings to the given physical page
  *	that are wired.
  */
 int
 pmap_page_wired_mappings(vm_page_t m)
 {
 	int count;
 
 	count = 0;
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (count);
 	rw_wlock(&pvh_global_lock);
 	count = pmap_pvh_wired_mappings(&m->md, count);
 	if ((m->flags & PG_FICTITIOUS) == 0) {
 		count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)),
 		    count);
 	}
 	rw_wunlock(&pvh_global_lock);
 	return (count);
 }
 
 /*
  *  Returns TRUE if any of the given mappings were used to modify
  *  physical memory.  Otherwise, returns FALSE.  Both page and 1mpage
  *  mappings are supported.
  */
 static boolean_t
 pmap_is_modified_pvh(struct md_page *pvh)
 {
 	pv_entry_t pv;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 	pmap_t pmap;
 	boolean_t rv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	rv = FALSE;
 	sched_pin();
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va));
 		if (pte1_is_section(pte1)) {
 			rv = pte1_is_dirty(pte1);
 		} else {
 			KASSERT(pte1_is_link(pte1),
 			    ("%s: pte1 %#x is not link", __func__, pte1));
 			pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va));
 			rv = pte2_is_dirty(pte2);
 		}
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			break;
 	}
 	sched_unpin();
 	return (rv);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("%s: page %p is not managed", __func__, m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTE2s can have PG_M set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	rw_wlock(&pvh_global_lock);
 	rv = pmap_is_modified_pvh(&m->md) ||
 	    ((m->flags & PG_FICTITIOUS) == 0 &&
 	    pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_is_prefaultable:
  *
  *	Return whether or not the specified virtual address is eligible
  *	for prefault.
  */
 boolean_t
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 {
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 	boolean_t rv;
 
 	rv = FALSE;
 	PMAP_LOCK(pmap);
 	pte1 = pte1_load(pmap_pte1(pmap, addr));
 	if (pte1_is_link(pte1)) {
 		pte2 = pte2_load(pt2map_entry(addr));
 		rv = !pte2_is_valid(pte2) ;
 	}
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  *  Returns TRUE if any of the given mappings were referenced and FALSE
  *  otherwise. Both page and 1mpage mappings are supported.
  */
 static boolean_t
 pmap_is_referenced_pvh(struct md_page *pvh)
 {
 
 	pv_entry_t pv;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 	pmap_t pmap;
 	boolean_t rv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	rv = FALSE;
 	sched_pin();
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va));
 		if (pte1_is_section(pte1)) {
 			rv = (pte1 & (PTE1_A | PTE1_V)) == (PTE1_A | PTE1_V);
 		} else {
 			pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va));
 			rv = (pte2 & (PTE2_A | PTE2_V)) == (PTE2_A | PTE2_V);
 		}
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			break;
 	}
 	sched_unpin();
 	return (rv);
 }
 
 /*
  *	pmap_is_referenced:
  *
  *	Return whether or not the specified physical page was referenced
  *	in any physical maps.
  */
 boolean_t
 pmap_is_referenced(vm_page_t m)
 {
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("%s: page %p is not managed", __func__, m));
 	rw_wlock(&pvh_global_lock);
 	rv = pmap_is_referenced_pvh(&m->md) ||
 	    ((m->flags & PG_FICTITIOUS) == 0 &&
 	    pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	As an optimization, update the page's dirty field if a modified bit is
  *	found while counting reference bits.  This opportunistic update can be
  *	performed at low cost and can eliminate the need for some future calls
  *	to pmap_is_modified().  However, since this function stops after
  *	finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some
  *	dirty pages.  Those dirty pages will only be detected by a future call
  *	to pmap_is_modified().
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv, pvf;
 	pmap_t pmap;
 	pt1_entry_t  *pte1p, opte1;
 	pt2_entry_t *pte2p, opte2;
 	vm_paddr_t pa;
 	int rtval = 0;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("%s: page %p is not managed", __func__, m));
 	pa = VM_PAGE_TO_PHYS(m);
 	pvh = pa_to_pvh(pa);
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	if ((m->flags & PG_FICTITIOUS) != 0 ||
 	    (pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
 		goto small_mappings;
 	pv = pvf;
 	do {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1p = pmap_pte1(pmap, pv->pv_va);
 		opte1 = pte1_load(pte1p);
 		if (pte1_is_dirty(opte1)) {
 			/*
 			 * Although "opte1" is mapping a 1MB page, because
 			 * this function is called at a 4KB page granularity,
 			 * we only update the 4KB page under test.
 			 */
 			vm_page_dirty(m);
 		}
 		if ((opte1 & PTE1_A) != 0) {
 			/*
 			 * Since this reference bit is shared by 256 4KB pages,
 			 * it should not be cleared every time it is tested.
 			 * Apply a simple "hash" function on the physical page
 			 * number, the virtual section number, and the pmap
 			 * address to select one 4KB page out of the 256
 			 * on which testing the reference bit will result
 			 * in clearing that bit. This function is designed
 			 * to avoid the selection of the same 4KB page
 			 * for every 1MB page mapping.
 			 *
 			 * On demotion, a mapping that hasn't been referenced
 			 * is simply destroyed.  To avoid the possibility of a
 			 * subsequent page fault on a demoted wired mapping,
 			 * always leave its reference bit set.  Moreover,
 			 * since the section is wired, the current state of
 			 * its reference bit won't affect page replacement.
 			 */
 			 if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PTE1_SHIFT) ^
 			    (uintptr_t)pmap) & (NPTE2_IN_PG - 1)) == 0 &&
 			    !pte1_is_wired(opte1)) {
 				pte1_clear_bit(pte1p, PTE1_A);
 				pmap_tlb_flush(pmap, pv->pv_va);
 			}
 			rtval++;
 		}
 		PMAP_UNLOCK(pmap);
 		/* Rotate the PV list if it has more than one entry. */
 		if (TAILQ_NEXT(pv, pv_next) != NULL) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 		}
 		if (rtval >= PMAP_TS_REFERENCED_MAX)
 			goto out;
 	} while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf);
 small_mappings:
 	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
 		goto out;
 	pv = pvf;
 	do {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1p = pmap_pte1(pmap, pv->pv_va);
 		KASSERT(pte1_is_link(pte1_load(pte1p)),
 		    ("%s: not found a link in page %p's pv list", __func__, m));
 
 		pte2p = pmap_pte2_quick(pmap, pv->pv_va);
 		opte2 = pte2_load(pte2p);
 		if (pte2_is_dirty(opte2))
 			vm_page_dirty(m);
 		if ((opte2 & PTE2_A) != 0) {
 			pte2_clear_bit(pte2p, PTE2_A);
 			pmap_tlb_flush(pmap, pv->pv_va);
 			rtval++;
 		}
 		PMAP_UNLOCK(pmap);
 		/* Rotate the PV list if it has more than one entry. */
 		if (TAILQ_NEXT(pv, pv_next) != NULL) {
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		}
 	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && rtval <
 	    PMAP_TS_REFERENCED_MAX);
 out:
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	return (rtval);
 }
 
 /*
  *	Clear the wired attribute from the mappings for the specified range of
  *	addresses in the given pmap.  Every valid mapping within that range
  *	must have the wired attribute set.  In contrast, invalid mappings
  *	cannot have the wired attribute set, so they are ignored.
  *
  *	The wired attribute of the page table entry is not a hardware feature,
  *	so there is no need to invalidate any TLB entries.
  */
 void
 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t nextva;
 	pt1_entry_t *pte1p, pte1;
 	pt2_entry_t *pte2p, pte2;
 	boolean_t pv_lists_locked;
 
 	if (pmap_is_current(pmap))
 		pv_lists_locked = FALSE;
 	else {
 		pv_lists_locked = TRUE;
 resume:
 		rw_wlock(&pvh_global_lock);
 		sched_pin();
 	}
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = nextva) {
 		nextva = pte1_trunc(sva + PTE1_SIZE);
 		if (nextva < sva)
 			nextva = eva;
 
 		pte1p = pmap_pte1(pmap, sva);
 		pte1 = pte1_load(pte1p);
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that L1 page
 		 * page table is always allocated, and in kernel virtual.
 		 */
 		if (pte1 == 0)
 			continue;
 
 		if (pte1_is_section(pte1)) {
 			if (!pte1_is_wired(pte1))
 				panic("%s: pte1 %#x not wired", __func__, pte1);
 
 			/*
 			 * Are we unwiring the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + PTE1_SIZE == nextva && eva >= nextva) {
 				pte1_clear_bit(pte1p, PTE1_W);
 				pmap->pm_stats.wired_count -= PTE1_SIZE /
 				    PAGE_SIZE;
 				continue;
 			} else {
 				if (!pv_lists_locked) {
 					pv_lists_locked = TRUE;
 					if (!rw_try_wlock(&pvh_global_lock)) {
 						PMAP_UNLOCK(pmap);
 						/* Repeat sva. */
 						goto resume;
 					}
 					sched_pin();
 				}
 				if (!pmap_demote_pte1(pmap, pte1p, sva))
 					panic("%s: demotion failed", __func__);
 #ifdef INVARIANTS
 				else {
 					/* Update pte1 after demotion */
 					pte1 = pte1_load(pte1p);
 				}
 #endif
 			}
 		}
 
 		KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p"
 		    " is not link", __func__, pmap, sva, pte1, pte1p));
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current L2 page table page, or to the end of the
 		 * range being protected.
 		 */
 		if (nextva > eva)
 			nextva = eva;
 
 		for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++,
 		    sva += PAGE_SIZE) {
 			pte2 = pte2_load(pte2p);
 			if (!pte2_is_valid(pte2))
 				continue;
 			if (!pte2_is_wired(pte2))
 				panic("%s: pte2 %#x is missing PTE2_W",
 				    __func__, pte2);
 
 			/*
 			 * PTE2_W must be cleared atomically. Although the pmap
 			 * lock synchronizes access to PTE2_W, another processor
 			 * could be changing PTE2_NM and/or PTE2_A concurrently.
 			 */
 			pte2_clear_bit(pte2p, PTE2_W);
 			pmap->pm_stats.wired_count--;
 		}
 	}
 	if (pv_lists_locked) {
 		sched_unpin();
 		rw_wunlock(&pvh_global_lock);
 	}
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *  Clear the write and modified bits in each of the given page's mappings.
  */
 void
 pmap_remove_write(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t next_pv, pv;
 	pmap_t pmap;
 	pt1_entry_t *pte1p;
 	pt2_entry_t *pte2p, opte2;
 	vm_offset_t va;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("%s: page %p is not managed", __func__, m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * set by another thread while the object is locked.  Thus,
 	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		goto small_mappings;
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1p = pmap_pte1(pmap, va);
 		if (!(pte1_load(pte1p) & PTE1_RO))
 			(void)pmap_demote_pte1(pmap, pte1p, va);
 		PMAP_UNLOCK(pmap);
 	}
 small_mappings:
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1p = pmap_pte1(pmap, pv->pv_va);
 		KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found"
 		    " a section in page %p's pv list", __func__, m));
 		pte2p = pmap_pte2_quick(pmap, pv->pv_va);
 		opte2 = pte2_load(pte2p);
 		if (!(opte2 & PTE2_RO)) {
 			pte2_store(pte2p, opte2 | PTE2_RO | PTE2_NM);
 			if (pte2_is_dirty(opte2))
 				vm_page_dirty(m);
 			pmap_tlb_flush(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 }
 
 /*
  *	Apply the given advice to the specified range of addresses within the
  *	given pmap.  Depending on the advice, clear the referenced and/or
  *	modified flags in each mapping and set the mapped page's dirty field.
  */
 void
 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 {
 	pt1_entry_t *pte1p, opte1;
 	pt2_entry_t *pte2p, pte2;
 	vm_offset_t pdnxt;
 	vm_page_t m;
 	boolean_t pv_lists_locked;
 
 	if (advice != MADV_DONTNEED && advice != MADV_FREE)
 		return;
 	if (pmap_is_current(pmap))
 		pv_lists_locked = FALSE;
 	else {
 		pv_lists_locked = TRUE;
 resume:
 		rw_wlock(&pvh_global_lock);
 		sched_pin();
 	}
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = pdnxt) {
 		pdnxt = pte1_trunc(sva + PTE1_SIZE);
 		if (pdnxt < sva)
 			pdnxt = eva;
 		pte1p = pmap_pte1(pmap, sva);
 		opte1 = pte1_load(pte1p);
 		if (!pte1_is_valid(opte1)) /* XXX */
 			continue;
 		else if (pte1_is_section(opte1)) {
 			if (!pte1_is_managed(opte1))
 				continue;
 			if (!pv_lists_locked) {
 				pv_lists_locked = TRUE;
 				if (!rw_try_wlock(&pvh_global_lock)) {
 					PMAP_UNLOCK(pmap);
 					goto resume;
 				}
 				sched_pin();
 			}
 			if (!pmap_demote_pte1(pmap, pte1p, sva)) {
 				/*
 				 * The large page mapping was destroyed.
 				 */
 				continue;
 			}
 
 			/*
 			 * Unless the page mappings are wired, remove the
 			 * mapping to a single page so that a subsequent
 			 * access may repromote.  Since the underlying L2 page
 			 * table is fully populated, this removal never
 			 * frees a L2 page table page.
 			 */
 			if (!pte1_is_wired(opte1)) {
 				pte2p = pmap_pte2_quick(pmap, sva);
 				KASSERT(pte2_is_valid(pte2_load(pte2p)),
 				    ("%s: invalid PTE2", __func__));
 				pmap_remove_pte2(pmap, pte2p, sva, NULL);
 			}
 		}
 		if (pdnxt > eva)
 			pdnxt = eva;
 		for (pte2p = pmap_pte2_quick(pmap, sva); sva != pdnxt; pte2p++,
 		    sva += PAGE_SIZE) {
 			pte2 = pte2_load(pte2p);
 			if (!pte2_is_valid(pte2) || !pte2_is_managed(pte2))
 				continue;
 			else if (pte2_is_dirty(pte2)) {
 				if (advice == MADV_DONTNEED) {
 					/*
 					 * Future calls to pmap_is_modified()
 					 * can be avoided by making the page
 					 * dirty now.
 					 */
 					m = PHYS_TO_VM_PAGE(pte2_pa(pte2));
 					vm_page_dirty(m);
 				}
 				pte2_set_bit(pte2p, PTE2_NM);
 				pte2_clear_bit(pte2p, PTE2_A);
 			} else if ((pte2 & PTE2_A) != 0)
 				pte2_clear_bit(pte2p, PTE2_A);
 			else
 				continue;
 			pmap_tlb_flush(pmap, sva);
 		}
 	}
 	if (pv_lists_locked) {
 		sched_unpin();
 		rw_wunlock(&pvh_global_lock);
 	}
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t next_pv, pv;
 	pmap_t pmap;
 	pt1_entry_t *pte1p, opte1;
 	pt2_entry_t *pte2p, opte2;
 	vm_offset_t va;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("%s: page %p is not managed", __func__, m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	KASSERT(!vm_page_xbusied(m),
 	    ("%s: page %p is exclusive busy", __func__, m));
 
 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTE2s can have PTE2_NM
 	 * cleared. If the object containing the page is locked and the page
 	 * is not exclusive busied, then PGA_WRITEABLE cannot be concurrently
 	 * set.
 	 */
 	if ((m->flags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		goto small_mappings;
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1p = pmap_pte1(pmap, va);
 		opte1 = pte1_load(pte1p);
 		if (!(opte1 & PTE1_RO)) {
 			if (pmap_demote_pte1(pmap, pte1p, va) &&
 			    !pte1_is_wired(opte1)) {
 				/*
 				 * Write protect the mapping to a
 				 * single page so that a subsequent
 				 * write access may repromote.
 				 */
 				va += VM_PAGE_TO_PHYS(m) - pte1_pa(opte1);
 				pte2p = pmap_pte2_quick(pmap, va);
 				opte2 = pte2_load(pte2p);
 				if ((opte2 & PTE2_V)) {
 					pte2_set_bit(pte2p, PTE2_NM | PTE2_RO);
 					vm_page_dirty(m);
 					pmap_tlb_flush(pmap, va);
 				}
 			}
 		}
 		PMAP_UNLOCK(pmap);
 	}
 small_mappings:
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1p = pmap_pte1(pmap, pv->pv_va);
 		KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found"
 		    " a section in page %p's pv list", __func__, m));
 		pte2p = pmap_pte2_quick(pmap, pv->pv_va);
 		if (pte2_is_dirty(pte2_load(pte2p))) {
 			pte2_set_bit(pte2p, PTE2_NM);
 			pmap_tlb_flush(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 }
 
 
 /*
  *  Sets the memory attribute for the specified page.
  */
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
 	pt2_entry_t *cmap2_pte2p;
 	vm_memattr_t oma;
 	vm_paddr_t pa;
 	struct pcpu *pc;
 
 	oma = m->md.pat_mode;
 	m->md.pat_mode = ma;
 
 	CTR5(KTR_PMAP, "%s: page %p - 0x%08X oma: %d, ma: %d", __func__, m,
 	    VM_PAGE_TO_PHYS(m), oma, ma);
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		return;
 #if 0
 	/*
 	 * If "m" is a normal page, flush it from the cache.
 	 *
 	 * First, try to find an existing mapping of the page by sf
 	 * buffer. sf_buf_invalidate_cache() modifies mapping and
 	 * flushes the cache.
 	 */
 	if (sf_buf_invalidate_cache(m, oma))
 		return;
 #endif
 	/*
 	 * If page is not mapped by sf buffer, map the page
 	 * transient and do invalidation.
 	 */
 	if (ma != oma) {
 		pa = VM_PAGE_TO_PHYS(m);
 		sched_pin();
 		pc = get_pcpu();
 		cmap2_pte2p = pc->pc_cmap2_pte2p;
 		mtx_lock(&pc->pc_cmap_lock);
 		if (pte2_load(cmap2_pte2p) != 0)
 			panic("%s: CMAP2 busy", __func__);
 		pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW,
 		    vm_memattr_to_pte2(ma)));
 		dcache_wbinv_poc((vm_offset_t)pc->pc_cmap2_addr, pa, PAGE_SIZE);
 		pte2_clear(cmap2_pte2p);
 		tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 		sched_unpin();
 		mtx_unlock(&pc->pc_cmap_lock);
 	}
 }
 
 /*
  *  Miscellaneous support routines follow
  */
 
 /*
  *  Returns TRUE if the given page is mapped individually or as part of
  *  a 1mpage.  Otherwise, returns FALSE.
  */
 boolean_t
 pmap_page_is_mapped(vm_page_t m)
 {
 	boolean_t rv;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (FALSE);
 	rw_wlock(&pvh_global_lock);
 	rv = !TAILQ_EMPTY(&m->md.pv_list) ||
 	    ((m->flags & PG_FICTITIOUS) == 0 &&
 	    !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *  Returns true if the pmap's pv is one of the first
  *  16 pvs linked to from this page.  This count may
  *  be changed upwards or downwards in the future; it
  *  is only necessary that true be returned for a small
  *  subset of pmaps for proper page aging.
  */
 boolean_t
 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	int loops = 0;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("%s: page %p is not managed", __func__, m));
 	rv = FALSE;
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		if (PV_PMAP(pv) == pmap) {
 			rv = TRUE;
 			break;
 		}
 		loops++;
 		if (loops >= 16)
 			break;
 	}
 	if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 			if (PV_PMAP(pv) == pmap) {
 				rv = TRUE;
 				break;
 			}
 			loops++;
 			if (loops >= 16)
 				break;
 		}
 	}
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(vm_page_t m)
 {
 	pt2_entry_t *cmap2_pte2p;
 	struct pcpu *pc;
 
 	sched_pin();
 	pc = get_pcpu();
 	cmap2_pte2p = pc->pc_cmap2_pte2p;
 	mtx_lock(&pc->pc_cmap_lock);
 	if (pte2_load(cmap2_pte2p) != 0)
 		panic("%s: CMAP2 busy", __func__);
 	pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW,
 	    vm_page_pte2_attr(m)));
 	pagezero(pc->pc_cmap2_addr);
 	pte2_clear(cmap2_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 	sched_unpin();
 	mtx_unlock(&pc->pc_cmap_lock);
 }
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by mapping
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(vm_page_t m, int off, int size)
 {
 	pt2_entry_t *cmap2_pte2p;
 	struct pcpu *pc;
 
 	sched_pin();
 	pc = get_pcpu();
 	cmap2_pte2p = pc->pc_cmap2_pte2p;
 	mtx_lock(&pc->pc_cmap_lock);
 	if (pte2_load(cmap2_pte2p) != 0)
 		panic("%s: CMAP2 busy", __func__);
 	pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW,
 	    vm_page_pte2_attr(m)));
 	if (off == 0 && size == PAGE_SIZE)
 		pagezero(pc->pc_cmap2_addr);
 	else
 		bzero(pc->pc_cmap2_addr + off, size);
 	pte2_clear(cmap2_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 	sched_unpin();
 	mtx_unlock(&pc->pc_cmap_lock);
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(vm_page_t src, vm_page_t dst)
 {
 	pt2_entry_t *cmap1_pte2p, *cmap2_pte2p;
 	struct pcpu *pc;
 
 	sched_pin();
 	pc = get_pcpu();
 	cmap1_pte2p = pc->pc_cmap1_pte2p;
 	cmap2_pte2p = pc->pc_cmap2_pte2p;
 	mtx_lock(&pc->pc_cmap_lock);
 	if (pte2_load(cmap1_pte2p) != 0)
 		panic("%s: CMAP1 busy", __func__);
 	if (pte2_load(cmap2_pte2p) != 0)
 		panic("%s: CMAP2 busy", __func__);
 	pte2_store(cmap1_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(src),
 	    PTE2_AP_KR | PTE2_NM, vm_page_pte2_attr(src)));
 	pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(dst),
 	    PTE2_AP_KRW, vm_page_pte2_attr(dst)));
 	bcopy(pc->pc_cmap1_addr, pc->pc_cmap2_addr, PAGE_SIZE);
 	pte2_clear(cmap1_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap1_addr);
 	pte2_clear(cmap2_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 	sched_unpin();
 	mtx_unlock(&pc->pc_cmap_lock);
 }
 
 int unmapped_buf_allowed = 1;
 
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)
 {
 	pt2_entry_t *cmap1_pte2p, *cmap2_pte2p;
 	vm_page_t a_pg, b_pg;
 	char *a_cp, *b_cp;
 	vm_offset_t a_pg_offset, b_pg_offset;
 	struct pcpu *pc;
 	int cnt;
 
 	sched_pin();
 	pc = get_pcpu();
 	cmap1_pte2p = pc->pc_cmap1_pte2p;
 	cmap2_pte2p = pc->pc_cmap2_pte2p;
 	mtx_lock(&pc->pc_cmap_lock);
 	if (pte2_load(cmap1_pte2p) != 0)
 		panic("pmap_copy_pages: CMAP1 busy");
 	if (pte2_load(cmap2_pte2p) != 0)
 		panic("pmap_copy_pages: CMAP2 busy");
 	while (xfersize > 0) {
 		a_pg = ma[a_offset >> PAGE_SHIFT];
 		a_pg_offset = a_offset & PAGE_MASK;
 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 		b_pg = mb[b_offset >> PAGE_SHIFT];
 		b_pg_offset = b_offset & PAGE_MASK;
 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 		pte2_store(cmap1_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(a_pg),
 		    PTE2_AP_KR | PTE2_NM, vm_page_pte2_attr(a_pg)));
 		tlb_flush_local((vm_offset_t)pc->pc_cmap1_addr);
 		pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(b_pg),
 		    PTE2_AP_KRW, vm_page_pte2_attr(b_pg)));
 		tlb_flush_local((vm_offset_t)pc->pc_cmap2_addr);
 		a_cp = pc->pc_cmap1_addr + a_pg_offset;
 		b_cp = pc->pc_cmap2_addr + b_pg_offset;
 		bcopy(a_cp, b_cp, cnt);
 		a_offset += cnt;
 		b_offset += cnt;
 		xfersize -= cnt;
 	}
 	pte2_clear(cmap1_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap1_addr);
 	pte2_clear(cmap2_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 	sched_unpin();
 	mtx_unlock(&pc->pc_cmap_lock);
 }
 
 vm_offset_t
 pmap_quick_enter_page(vm_page_t m)
 {
 	struct pcpu *pc;
 	pt2_entry_t *pte2p;
 
 	critical_enter();
 	pc = get_pcpu();
 	pte2p = pc->pc_qmap_pte2p;
 
 	KASSERT(pte2_load(pte2p) == 0, ("%s: PTE2 busy", __func__));
 
 	pte2_store(pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW,
 	    vm_page_pte2_attr(m)));
 	return (pc->pc_qmap_addr);
 }
 
 void
 pmap_quick_remove_page(vm_offset_t addr)
 {
 	struct pcpu *pc;
 	pt2_entry_t *pte2p;
 
 	pc = get_pcpu();
 	pte2p = pc->pc_qmap_pte2p;
 
 	KASSERT(addr == pc->pc_qmap_addr, ("%s: invalid address", __func__));
 	KASSERT(pte2_load(pte2p) != 0, ("%s: PTE2 not in use", __func__));
 
 	pte2_clear(pte2p);
 	tlb_flush(pc->pc_qmap_addr);
 	critical_exit();
 }
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
     vm_offset_t src_addr)
 {
 	struct spglist free;
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t nextva;
 
 	if (dst_addr != src_addr)
 		return;
 
 	if (!pmap_is_current(src_pmap))
 		return;
 
 	rw_wlock(&pvh_global_lock);
 	if (dst_pmap < src_pmap) {
 		PMAP_LOCK(dst_pmap);
 		PMAP_LOCK(src_pmap);
 	} else {
 		PMAP_LOCK(src_pmap);
 		PMAP_LOCK(dst_pmap);
 	}
 	sched_pin();
 	for (addr = src_addr; addr < end_addr; addr = nextva) {
 		pt2_entry_t *src_pte2p, *dst_pte2p;
 		vm_page_t dst_mpt2pg, src_mpt2pg;
 		pt1_entry_t src_pte1;
 		u_int pte1_idx;
 
 		KASSERT(addr < VM_MAXUSER_ADDRESS,
 		    ("%s: invalid to pmap_copy page tables", __func__));
 
 		nextva = pte1_trunc(addr + PTE1_SIZE);
 		if (nextva < addr)
 			nextva = end_addr;
 
 		pte1_idx = pte1_index(addr);
 		src_pte1 = src_pmap->pm_pt1[pte1_idx];
 		if (pte1_is_section(src_pte1)) {
 			if ((addr & PTE1_OFFSET) != 0 ||
 			    (addr + PTE1_SIZE) > end_addr)
 				continue;
 			if (dst_pmap->pm_pt1[pte1_idx] == 0 &&
 			    (!pte1_is_managed(src_pte1) ||
 			    pmap_pv_insert_pte1(dst_pmap, addr, src_pte1,
 			    PMAP_ENTER_NORECLAIM))) {
 				dst_pmap->pm_pt1[pte1_idx] = src_pte1 &
 				    ~PTE1_W;
 				dst_pmap->pm_stats.resident_count +=
 				    PTE1_SIZE / PAGE_SIZE;
 				pmap_pte1_mappings++;
 			}
 			continue;
 		} else if (!pte1_is_link(src_pte1))
 			continue;
 
 		src_mpt2pg = PHYS_TO_VM_PAGE(pte1_link_pa(src_pte1));
 
 		/*
 		 * We leave PT2s to be linked from PT1 even if they are not
 		 * referenced until all PT2s in a page are without reference.
 		 *
 		 * QQQ: It could be changed ...
 		 */
 #if 0 /* single_pt2_link_is_cleared */
 		KASSERT(pt2_wirecount_get(src_mpt2pg, pte1_idx) > 0,
 		    ("%s: source page table page is unused", __func__));
 #else
 		if (pt2_wirecount_get(src_mpt2pg, pte1_idx) == 0)
 			continue;
 #endif
 		if (nextva > end_addr)
 			nextva = end_addr;
 
 		src_pte2p = pt2map_entry(addr);
 		while (addr < nextva) {
 			pt2_entry_t temp_pte2;
 			temp_pte2 = pte2_load(src_pte2p);
 			/*
 			 * we only virtual copy managed pages
 			 */
 			if (pte2_is_managed(temp_pte2)) {
 				dst_mpt2pg = pmap_allocpte2(dst_pmap, addr,
 				    PMAP_ENTER_NOSLEEP);
 				if (dst_mpt2pg == NULL)
 					goto out;
 				dst_pte2p = pmap_pte2_quick(dst_pmap, addr);
 				if (!pte2_is_valid(pte2_load(dst_pte2p)) &&
 				    pmap_try_insert_pv_entry(dst_pmap, addr,
 				    PHYS_TO_VM_PAGE(pte2_pa(temp_pte2)))) {
 					/*
 					 * Clear the wired, modified, and
 					 * accessed (referenced) bits
 					 * during the copy.
 					 */
 					temp_pte2 &=  ~(PTE2_W | PTE2_A);
 					temp_pte2 |= PTE2_NM;
 					pte2_store(dst_pte2p, temp_pte2);
 					dst_pmap->pm_stats.resident_count++;
 				} else {
 					SLIST_INIT(&free);
 					if (pmap_unwire_pt2(dst_pmap, addr,
 					    dst_mpt2pg, &free)) {
 						pmap_tlb_flush(dst_pmap, addr);
 						vm_page_free_pages_toq(&free,
 						    false);
 					}
 					goto out;
 				}
 				if (pt2_wirecount_get(dst_mpt2pg, pte1_idx) >=
 				    pt2_wirecount_get(src_mpt2pg, pte1_idx))
 					break;
 			}
 			addr += PAGE_SIZE;
 			src_pte2p++;
 		}
 	}
 out:
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(src_pmap);
 	PMAP_UNLOCK(dst_pmap);
 }
 
 /*
  *	Increase the starting virtual address of the given mapping if a
  *	different alignment might result in more section mappings.
  */
 void
 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t size)
 {
 	vm_offset_t pte1_offset;
 
 	if (size < PTE1_SIZE)
 		return;
 	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
 		offset += ptoa(object->pg_color);
 	pte1_offset = offset & PTE1_OFFSET;
 	if (size - ((PTE1_SIZE - pte1_offset) & PTE1_OFFSET) < PTE1_SIZE ||
 	    (*addr & PTE1_OFFSET) == pte1_offset)
 		return;
 	if ((*addr & PTE1_OFFSET) < pte1_offset)
 		*addr = pte1_trunc(*addr) + pte1_offset;
 	else
 		*addr = pte1_roundup(*addr) + pte1_offset;
 }
 
 void
 pmap_activate(struct thread *td)
 {
 	pmap_t pmap, oldpmap;
 	u_int cpuid, ttb;
 
 	PDEBUG(9, printf("%s: td = %08x\n", __func__, (uint32_t)td));
 
 	critical_enter();
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	oldpmap = PCPU_GET(curpmap);
 	cpuid = PCPU_GET(cpuid);
 
 #if defined(SMP)
 	CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
 	CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
 #else
 	CPU_CLR(cpuid, &oldpmap->pm_active);
 	CPU_SET(cpuid, &pmap->pm_active);
 #endif
 
 	ttb = pmap_ttb_get(pmap);
 
 	/*
 	 * pmap_activate is for the current thread on the current cpu
 	 */
 	td->td_pcb->pcb_pagedir = ttb;
 	cp15_ttbr_set(ttb);
 	PCPU_SET(curpmap, pmap);
 	critical_exit();
 }
 
 /*
  *  Perform the pmap work for mincore.
  */
 int
 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
 {
 	pt1_entry_t *pte1p, pte1;
 	pt2_entry_t *pte2p, pte2;
 	vm_paddr_t pa;
 	bool managed;
 	int val;
 
 	PMAP_LOCK(pmap);
 retry:
 	pte1p = pmap_pte1(pmap, addr);
 	pte1 = pte1_load(pte1p);
 	if (pte1_is_section(pte1)) {
 		pa = trunc_page(pte1_pa(pte1) | (addr & PTE1_OFFSET));
 		managed = pte1_is_managed(pte1);
 		val = MINCORE_SUPER | MINCORE_INCORE;
 		if (pte1_is_dirty(pte1))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if (pte1 & PTE1_A)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 	} else if (pte1_is_link(pte1)) {
 		pte2p = pmap_pte2(pmap, addr);
 		pte2 = pte2_load(pte2p);
 		pmap_pte2_release(pte2p);
 		pa = pte2_pa(pte2);
 		managed = pte2_is_managed(pte2);
 		val = MINCORE_INCORE;
 		if (pte2_is_dirty(pte2))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if (pte2 & PTE2_A)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 	} else {
 		managed = false;
 		val = 0;
 	}
 	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
 	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
 		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
 		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
 			goto retry;
 	} else
 		PA_UNLOCK_COND(*locked_pa);
 	PMAP_UNLOCK(pmap);
 	return (val);
 }
 
 void
 pmap_kenter_device(vm_offset_t va, vm_size_t size, vm_paddr_t pa)
 {
 	vm_offset_t sva;
 	uint32_t l2attr;
 
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("%s: device mapping not page-sized", __func__));
 
 	sva = va;
 	l2attr = vm_memattr_to_pte2(VM_MEMATTR_DEVICE);
 	while (size != 0) {
 		pmap_kenter_prot_attr(va, pa, PTE2_AP_KRW, l2attr);
 		va += PAGE_SIZE;
 		pa += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 	tlb_flush_range(sva, va - sva);
 }
 
 void
 pmap_kremove_device(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t sva;
 
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("%s: device mapping not page-sized", __func__));
 
 	sva = va;
 	while (size != 0) {
 		pmap_kremove(va);
 		va += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 	tlb_flush_range(sva, va - sva);
 }
 
 void
 pmap_set_pcb_pagedir(pmap_t pmap, struct pcb *pcb)
 {
 
 	pcb->pcb_pagedir = pmap_ttb_get(pmap);
 }
 
 
 /*
  *  Clean L1 data cache range by physical address.
  *  The range must be within a single page.
  */
 static void
 pmap_dcache_wb_pou(vm_paddr_t pa, vm_size_t size, uint32_t attr)
 {
 	pt2_entry_t *cmap2_pte2p;
 	struct pcpu *pc;
 
 	KASSERT(((pa & PAGE_MASK) + size) <= PAGE_SIZE,
 	    ("%s: not on single page", __func__));
 
 	sched_pin();
 	pc = get_pcpu();
 	cmap2_pte2p = pc->pc_cmap2_pte2p;
 	mtx_lock(&pc->pc_cmap_lock);
 	if (pte2_load(cmap2_pte2p) != 0)
 		panic("%s: CMAP2 busy", __func__);
 	pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW, attr));
 	dcache_wb_pou((vm_offset_t)pc->pc_cmap2_addr + (pa & PAGE_MASK), size);
 	pte2_clear(cmap2_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 	sched_unpin();
 	mtx_unlock(&pc->pc_cmap_lock);
 }
 
 /*
  *  Sync instruction cache range which is not mapped yet.
  */
 void
 cache_icache_sync_fresh(vm_offset_t va, vm_paddr_t pa, vm_size_t size)
 {
 	uint32_t len, offset;
 	vm_page_t m;
 
 	/* Write back d-cache on given address range. */
 	offset = pa & PAGE_MASK;
 	for ( ; size != 0; size -= len, pa += len, offset = 0) {
 		len = min(PAGE_SIZE - offset, size);
 		m = PHYS_TO_VM_PAGE(pa);
 		KASSERT(m != NULL, ("%s: vm_page_t is null for %#x",
 		  __func__, pa));
 		pmap_dcache_wb_pou(pa, len, vm_page_pte2_attr(m));
 	}
 	/*
 	 * I-cache is VIPT. Only way how to flush all virtual mappings
 	 * on given physical address is to invalidate all i-cache.
 	 */
 	icache_inv_all();
 }
 
 void
 pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t size)
 {
 
 	/* Write back d-cache on given address range. */
 	if (va >= VM_MIN_KERNEL_ADDRESS) {
 		dcache_wb_pou(va, size);
 	} else {
 		uint32_t len, offset;
 		vm_paddr_t pa;
 		vm_page_t m;
 
 		offset = va & PAGE_MASK;
 		for ( ; size != 0; size -= len, va += len, offset = 0) {
 			pa = pmap_extract(pmap, va); /* offset is preserved */
 			len = min(PAGE_SIZE - offset, size);
 			m = PHYS_TO_VM_PAGE(pa);
 			KASSERT(m != NULL, ("%s: vm_page_t is null for %#x",
 				__func__, pa));
 			pmap_dcache_wb_pou(pa, len, vm_page_pte2_attr(m));
 		}
 	}
 	/*
 	 * I-cache is VIPT. Only way how to flush all virtual mappings
 	 * on given physical address is to invalidate all i-cache.
 	 */
 	icache_inv_all();
 }
 
 /*
  *  The implementation of pmap_fault() uses IN_RANGE2() macro which
  *  depends on the fact that given range size is a power of 2.
  */
 CTASSERT(powerof2(NB_IN_PT1));
 CTASSERT(powerof2(PT2MAP_SIZE));
 
 #define IN_RANGE2(addr, start, size)	\
     ((vm_offset_t)(start) == ((vm_offset_t)(addr) & ~((size) - 1)))
 
 /*
  *  Handle access and R/W emulation faults.
  */
 int
 pmap_fault(pmap_t pmap, vm_offset_t far, uint32_t fsr, int idx, bool usermode)
 {
 	pt1_entry_t *pte1p, pte1;
 	pt2_entry_t *pte2p, pte2;
 
 	if (pmap == NULL)
 		pmap = kernel_pmap;
 
 	/*
 	 * In kernel, we should never get abort with FAR which is in range of
 	 * pmap->pm_pt1 or PT2MAP address spaces. If it happens, stop here
 	 * and print out a useful abort message and even get to the debugger
 	 * otherwise it likely ends with never ending loop of aborts.
 	 */
 	if (__predict_false(IN_RANGE2(far, pmap->pm_pt1, NB_IN_PT1))) {
 		/*
 		 * All L1 tables should always be mapped and present.
 		 * However, we check only current one herein. For user mode,
 		 * only permission abort from malicious user is not fatal.
 		 * And alignment abort as it may have higher priority.
 		 */
 		if (!usermode || (idx != FAULT_ALIGN && idx != FAULT_PERM_L2)) {
 			CTR4(KTR_PMAP, "%s: pmap %#x pm_pt1 %#x far %#x",
 			    __func__, pmap, pmap->pm_pt1, far);
 			panic("%s: pm_pt1 abort", __func__);
 		}
 		return (KERN_INVALID_ADDRESS);
 	}
 	if (__predict_false(IN_RANGE2(far, PT2MAP, PT2MAP_SIZE))) {
 		/*
 		 * PT2MAP should be always mapped and present in current
 		 * L1 table. However, only existing L2 tables are mapped
 		 * in PT2MAP. For user mode, only L2 translation abort and
 		 * permission abort from malicious user is not fatal.
 		 * And alignment abort as it may have higher priority.
 		 */
 		if (!usermode || (idx != FAULT_ALIGN &&
 		    idx != FAULT_TRAN_L2 && idx != FAULT_PERM_L2)) {
 			CTR4(KTR_PMAP, "%s: pmap %#x PT2MAP %#x far %#x",
 			    __func__, pmap, PT2MAP, far);
 			panic("%s: PT2MAP abort", __func__);
 		}
 		return (KERN_INVALID_ADDRESS);
 	}
 
 	/*
 	 * A pmap lock is used below for handling of access and R/W emulation
 	 * aborts. They were handled by atomic operations before so some
 	 * analysis of new situation is needed to answer the following question:
 	 * Is it safe to use the lock even for these aborts?
 	 *
 	 * There may happen two cases in general:
 	 *
 	 * (1) Aborts while the pmap lock is locked already - this should not
 	 * happen as pmap lock is not recursive. However, under pmap lock only
 	 * internal kernel data should be accessed and such data should be
 	 * mapped with A bit set and NM bit cleared. If double abort happens,
 	 * then a mapping of data which has caused it must be fixed. Further,
 	 * all new mappings are always made with A bit set and the bit can be
 	 * cleared only on managed mappings.
 	 *
 	 * (2) Aborts while another lock(s) is/are locked - this already can
 	 * happen. However, there is no difference here if it's either access or
 	 * R/W emulation abort, or if it's some other abort.
 	 */
 
 	PMAP_LOCK(pmap);
 #ifdef INVARIANTS
 	pte1 = pte1_load(pmap_pte1(pmap, far));
 	if (pte1_is_link(pte1)) {
 		/*
 		 * Check in advance that associated L2 page table is mapped into
 		 * PT2MAP space. Note that faulty access to not mapped L2 page
 		 * table is caught in more general check above where "far" is
 		 * checked that it does not lay in PT2MAP space. Note also that
 		 * L1 page table and PT2TAB always exist and are mapped.
 		 */
 		pte2 = pt2tab_load(pmap_pt2tab_entry(pmap, far));
 		if (!pte2_is_valid(pte2))
 			panic("%s: missing L2 page table (%p, %#x)",
 			    __func__, pmap, far);
 	}
 #endif
 #ifdef SMP
 	/*
 	 * Special treatment is due to break-before-make approach done when
 	 * pte1 is updated for userland mapping during section promotion or
 	 * demotion. If not caught here, pmap_enter() can find a section
 	 * mapping on faulting address. That is not allowed.
 	 */
 	if (idx == FAULT_TRAN_L1 && usermode && cp15_ats1cur_check(far) == 0) {
 		PMAP_UNLOCK(pmap);
 		return (KERN_SUCCESS);
 	}
 #endif
 	/*
 	 * Accesss bits for page and section. Note that the entry
 	 * is not in TLB yet, so TLB flush is not necessary.
 	 *
 	 * QQQ: This is hardware emulation, we do not call userret()
 	 *      for aborts from user mode.
 	 */
 	if (idx == FAULT_ACCESS_L2) {
 		pte1 = pte1_load(pmap_pte1(pmap, far));
 		if (pte1_is_link(pte1)) {
 			/* L2 page table should exist and be mapped. */
 			pte2p = pt2map_entry(far);
 			pte2 = pte2_load(pte2p);
 			if (pte2_is_valid(pte2)) {
 				pte2_store(pte2p, pte2 | PTE2_A);
 				PMAP_UNLOCK(pmap);
 				return (KERN_SUCCESS);
 			}
 		} else {
 			/*
 			 * We got L2 access fault but PTE1 is not a link.
 			 * Probably some race happened, do nothing.
 			 */
 			CTR3(KTR_PMAP, "%s: FAULT_ACCESS_L2 - pmap %#x far %#x",
 			    __func__, pmap, far);
 			PMAP_UNLOCK(pmap);
 			return (KERN_SUCCESS);
 		}
 	}
 	if (idx == FAULT_ACCESS_L1) {
 		pte1p = pmap_pte1(pmap, far);
 		pte1 = pte1_load(pte1p);
 		if (pte1_is_section(pte1)) {
 			pte1_store(pte1p, pte1 | PTE1_A);
 			PMAP_UNLOCK(pmap);
 			return (KERN_SUCCESS);
 		} else {
 			/*
 			 * We got L1 access fault but PTE1 is not section
 			 * mapping. Probably some race happened, do nothing.
 			 */
 			CTR3(KTR_PMAP, "%s: FAULT_ACCESS_L1 - pmap %#x far %#x",
 			    __func__, pmap, far);
 			PMAP_UNLOCK(pmap);
 			return (KERN_SUCCESS);
 		}
 	}
 
 	/*
 	 * Handle modify bits for page and section. Note that the modify
 	 * bit is emulated by software. So PTEx_RO is software read only
 	 * bit and PTEx_NM flag is real hardware read only bit.
 	 *
 	 * QQQ: This is hardware emulation, we do not call userret()
 	 *      for aborts from user mode.
 	 */
 	if ((fsr & FSR_WNR) && (idx == FAULT_PERM_L2)) {
 		pte1 = pte1_load(pmap_pte1(pmap, far));
 		if (pte1_is_link(pte1)) {
 			/* L2 page table should exist and be mapped. */
 			pte2p = pt2map_entry(far);
 			pte2 = pte2_load(pte2p);
 			if (pte2_is_valid(pte2) && !(pte2 & PTE2_RO) &&
 			    (pte2 & PTE2_NM)) {
 				pte2_store(pte2p, pte2 & ~PTE2_NM);
 				tlb_flush(trunc_page(far));
 				PMAP_UNLOCK(pmap);
 				return (KERN_SUCCESS);
 			}
 		} else {
 			/*
 			 * We got L2 permission fault but PTE1 is not a link.
 			 * Probably some race happened, do nothing.
 			 */
 			CTR3(KTR_PMAP, "%s: FAULT_PERM_L2 - pmap %#x far %#x",
 			    __func__, pmap, far);
 			PMAP_UNLOCK(pmap);
 			return (KERN_SUCCESS);
 		}
 	}
 	if ((fsr & FSR_WNR) && (idx == FAULT_PERM_L1)) {
 		pte1p = pmap_pte1(pmap, far);
 		pte1 = pte1_load(pte1p);
 		if (pte1_is_section(pte1)) {
 			if (!(pte1 & PTE1_RO) && (pte1 & PTE1_NM)) {
 				pte1_store(pte1p, pte1 & ~PTE1_NM);
 				tlb_flush(pte1_trunc(far));
 				PMAP_UNLOCK(pmap);
 				return (KERN_SUCCESS);
 			}
 		} else {
 			/*
 			 * We got L1 permission fault but PTE1 is not section
 			 * mapping. Probably some race happened, do nothing.
 			 */
 			CTR3(KTR_PMAP, "%s: FAULT_PERM_L1 - pmap %#x far %#x",
 			    __func__, pmap, far);
 			PMAP_UNLOCK(pmap);
 			return (KERN_SUCCESS);
 		}
 	}
 
 	/*
 	 * QQQ: The previous code, mainly fast handling of access and
 	 *      modify bits aborts, could be moved to ASM. Now we are
 	 *      starting to deal with not fast aborts.
 	 */
 	PMAP_UNLOCK(pmap);
 	return (KERN_FAILURE);
 }
 
 #if defined(PMAP_DEBUG)
 /*
  *  Reusing of KVA used in pmap_zero_page function !!!
  */
 static void
 pmap_zero_page_check(vm_page_t m)
 {
 	pt2_entry_t *cmap2_pte2p;
 	uint32_t *p, *end;
 	struct pcpu *pc;
 
 	sched_pin();
 	pc = get_pcpu();
 	cmap2_pte2p = pc->pc_cmap2_pte2p;
 	mtx_lock(&pc->pc_cmap_lock);
 	if (pte2_load(cmap2_pte2p) != 0)
 		panic("%s: CMAP2 busy", __func__);
 	pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW,
 	    vm_page_pte2_attr(m)));
 	end = (uint32_t*)(pc->pc_cmap2_addr + PAGE_SIZE);
 	for (p = (uint32_t*)pc->pc_cmap2_addr; p < end; p++)
 		if (*p != 0)
 			panic("%s: page %p not zero, va: %p", __func__, m,
 			    pc->pc_cmap2_addr);
 	pte2_clear(cmap2_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 	sched_unpin();
 	mtx_unlock(&pc->pc_cmap_lock);
 }
 
 int
 pmap_pid_dump(int pid)
 {
 	pmap_t pmap;
 	struct proc *p;
 	int npte2 = 0;
 	int i, j, index;
 
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_pid != pid || p->p_vmspace == NULL)
 			continue;
 		index = 0;
 		pmap = vmspace_pmap(p->p_vmspace);
 		for (i = 0; i < NPTE1_IN_PT1; i++) {
 			pt1_entry_t pte1;
 			pt2_entry_t *pte2p, pte2;
 			vm_offset_t base, va;
 			vm_paddr_t pa;
 			vm_page_t m;
 
 			base = i << PTE1_SHIFT;
 			pte1 = pte1_load(&pmap->pm_pt1[i]);
 
 			if (pte1_is_section(pte1)) {
 				/*
 				 * QQQ: Do something here!
 				 */
 			} else if (pte1_is_link(pte1)) {
 				for (j = 0; j < NPTE2_IN_PT2; j++) {
 					va = base + (j << PAGE_SHIFT);
 					if (va >= VM_MIN_KERNEL_ADDRESS) {
 						if (index) {
 							index = 0;
 							printf("\n");
 						}
 						sx_sunlock(&allproc_lock);
 						return (npte2);
 					}
 					pte2p = pmap_pte2(pmap, va);
 					pte2 = pte2_load(pte2p);
 					pmap_pte2_release(pte2p);
 					if (!pte2_is_valid(pte2))
 						continue;
 
 					pa = pte2_pa(pte2);
 					m = PHYS_TO_VM_PAGE(pa);
 					printf("va: 0x%x, pa: 0x%x, h: %d, w:"
 					    " %d, f: 0x%x", va, pa,
 					    m->hold_count, m->wire_count,
 					    m->flags);
 					npte2++;
 					index++;
 					if (index >= 2) {
 						index = 0;
 						printf("\n");
 					} else {
 						printf(" ");
 					}
 				}
 			}
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	return (npte2);
 }
 
 #endif
 
 #ifdef DDB
 static pt2_entry_t *
 pmap_pte2_ddb(pmap_t pmap, vm_offset_t va)
 {
 	pt1_entry_t pte1;
 	vm_paddr_t pt2pg_pa;
 
 	pte1 = pte1_load(pmap_pte1(pmap, va));
 	if (!pte1_is_link(pte1))
 		return (NULL);
 
 	if (pmap_is_current(pmap))
 		return (pt2map_entry(va));
 
 	/* Note that L2 page table size is not equal to PAGE_SIZE. */
 	pt2pg_pa = trunc_page(pte1_link_pa(pte1));
 	if (pte2_pa(pte2_load(PMAP3)) != pt2pg_pa) {
 		pte2_store(PMAP3, PTE2_KPT(pt2pg_pa));
 #ifdef SMP
 		PMAP3cpu = PCPU_GET(cpuid);
 #endif
 		tlb_flush_local((vm_offset_t)PADDR3);
 	}
 #ifdef SMP
 	else if (PMAP3cpu != PCPU_GET(cpuid)) {
 		PMAP3cpu = PCPU_GET(cpuid);
 		tlb_flush_local((vm_offset_t)PADDR3);
 	}
 #endif
 	return (PADDR3 + (arm32_btop(va) & (NPTE2_IN_PG - 1)));
 }
 
 static void
 dump_pmap(pmap_t pmap)
 {
 
 	printf("pmap %p\n", pmap);
 	printf("  pm_pt1: %p\n", pmap->pm_pt1);
 	printf("  pm_pt2tab: %p\n", pmap->pm_pt2tab);
 	printf("  pm_active: 0x%08lX\n", pmap->pm_active.__bits[0]);
 }
 
 DB_SHOW_COMMAND(pmaps, pmap_list_pmaps)
 {
 
 	pmap_t pmap;
 	LIST_FOREACH(pmap, &allpmaps, pm_list) {
 		dump_pmap(pmap);
 	}
 }
 
 static int
 pte2_class(pt2_entry_t pte2)
 {
 	int cls;
 
 	cls = (pte2 >> 2) & 0x03;
 	cls |= (pte2 >> 4) & 0x04;
 	return (cls);
 }
 
 static void
 dump_section(pmap_t pmap, uint32_t pte1_idx)
 {
 }
 
 static void
 dump_link(pmap_t pmap, uint32_t pte1_idx, boolean_t invalid_ok)
 {
 	uint32_t i;
 	vm_offset_t va;
 	pt2_entry_t *pte2p, pte2;
 	vm_page_t m;
 
 	va = pte1_idx << PTE1_SHIFT;
 	pte2p = pmap_pte2_ddb(pmap, va);
 	for (i = 0; i < NPTE2_IN_PT2; i++, pte2p++, va += PAGE_SIZE) {
 		pte2 = pte2_load(pte2p);
 		if (pte2 == 0)
 			continue;
 		if (!pte2_is_valid(pte2)) {
 			printf(" 0x%08X: 0x%08X", va, pte2);
 			if (!invalid_ok)
 				printf(" - not valid !!!");
 			printf("\n");
 			continue;
 		}
 		m = PHYS_TO_VM_PAGE(pte2_pa(pte2));
 		printf(" 0x%08X: 0x%08X, TEX%d, s:%d, g:%d, m:%p", va , pte2,
 		    pte2_class(pte2), !!(pte2 & PTE2_S), !(pte2 & PTE2_NG), m);
 		if (m != NULL) {
 			printf(" v:%d h:%d w:%d f:0x%04X\n", m->valid,
 			    m->hold_count, m->wire_count, m->flags);
 		} else {
 			printf("\n");
 		}
 	}
 }
 
 static __inline boolean_t
 is_pv_chunk_space(vm_offset_t va)
 {
 
 	if ((((vm_offset_t)pv_chunkbase) <= va) &&
 	    (va < ((vm_offset_t)pv_chunkbase + PAGE_SIZE * pv_maxchunks)))
 		return (TRUE);
 	return (FALSE);
 }
 
 DB_SHOW_COMMAND(pmap, pmap_pmap_print)
 {
 	/* XXX convert args. */
 	pmap_t pmap = (pmap_t)addr;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 	vm_offset_t va, eva;
 	vm_page_t m;
 	uint32_t i;
 	boolean_t invalid_ok, dump_link_ok, dump_pv_chunk;
 
 	if (have_addr) {
 		pmap_t pm;
 
 		LIST_FOREACH(pm, &allpmaps, pm_list)
 			if (pm == pmap) break;
 		if (pm == NULL) {
 			printf("given pmap %p is not in allpmaps list\n", pmap);
 			return;
 		}
 	} else
 		pmap = PCPU_GET(curpmap);
 
 	eva = (modif[0] == 'u') ? VM_MAXUSER_ADDRESS : 0xFFFFFFFF;
 	dump_pv_chunk = FALSE; /* XXX evaluate from modif[] */
 
 	printf("pmap: 0x%08X\n", (uint32_t)pmap);
 	printf("PT2MAP: 0x%08X\n", (uint32_t)PT2MAP);
 	printf("pt2tab: 0x%08X\n", (uint32_t)pmap->pm_pt2tab);
 
 	for(i = 0; i < NPTE1_IN_PT1; i++) {
 		pte1 = pte1_load(&pmap->pm_pt1[i]);
 		if (pte1 == 0)
 			continue;
 		va = i << PTE1_SHIFT;
 		if (va >= eva)
 			break;
 
 		if (pte1_is_section(pte1)) {
 			printf("0x%08X: Section 0x%08X, s:%d g:%d\n", va, pte1,
 			    !!(pte1 & PTE1_S), !(pte1 & PTE1_NG));
 			dump_section(pmap, i);
 		} else if (pte1_is_link(pte1)) {
 			dump_link_ok = TRUE;
 			invalid_ok = FALSE;
 			pte2 = pte2_load(pmap_pt2tab_entry(pmap, va));
 			m = PHYS_TO_VM_PAGE(pte1_link_pa(pte1));
 			printf("0x%08X: Link 0x%08X, pt2tab: 0x%08X m: %p",
 			    va, pte1, pte2, m);
 			if (is_pv_chunk_space(va)) {
 				printf(" - pv_chunk space");
 				if (dump_pv_chunk)
 					invalid_ok = TRUE;
 				else
 					dump_link_ok = FALSE;
 			}
 			else if (m != NULL)
 				printf(" w:%d w2:%u", m->wire_count,
 				    pt2_wirecount_get(m, pte1_index(va)));
 			if (pte2 == 0)
 				printf(" !!! pt2tab entry is ZERO");
 			else if (pte2_pa(pte1) != pte2_pa(pte2))
 				printf(" !!! pt2tab entry is DIFFERENT - m: %p",
 				    PHYS_TO_VM_PAGE(pte2_pa(pte2)));
 			printf("\n");
 			if (dump_link_ok)
 				dump_link(pmap, i, invalid_ok);
 		} else
 			printf("0x%08X: Invalid entry 0x%08X\n", va, pte1);
 	}
 }
 
 static void
 dump_pt2tab(pmap_t pmap)
 {
 	uint32_t i;
 	pt2_entry_t pte2;
 	vm_offset_t va;
 	vm_paddr_t pa;
 	vm_page_t m;
 
 	printf("PT2TAB:\n");
 	for (i = 0; i < PT2TAB_ENTRIES; i++) {
 		pte2 = pte2_load(&pmap->pm_pt2tab[i]);
 		if (!pte2_is_valid(pte2))
 			continue;
 		va = i << PT2TAB_SHIFT;
 		pa = pte2_pa(pte2);
 		m = PHYS_TO_VM_PAGE(pa);
 		printf(" 0x%08X: 0x%08X, TEX%d, s:%d, m:%p", va, pte2,
 		    pte2_class(pte2), !!(pte2 & PTE2_S), m);
 		if (m != NULL)
 			printf(" , h: %d, w: %d, f: 0x%04X pidx: %lld",
 			    m->hold_count, m->wire_count, m->flags, m->pindex);
 		printf("\n");
 	}
 }
 
 DB_SHOW_COMMAND(pmap_pt2tab, pmap_pt2tab_print)
 {
 	/* XXX convert args. */
 	pmap_t pmap = (pmap_t)addr;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 	vm_offset_t va;
 	uint32_t i, start;
 
 	if (have_addr) {
 		printf("supported only on current pmap\n");
 		return;
 	}
 
 	pmap = PCPU_GET(curpmap);
 	printf("curpmap: 0x%08X\n", (uint32_t)pmap);
 	printf("PT2MAP: 0x%08X\n", (uint32_t)PT2MAP);
 	printf("pt2tab: 0x%08X\n", (uint32_t)pmap->pm_pt2tab);
 
 	start = pte1_index((vm_offset_t)PT2MAP);
 	for (i = start; i < (start + NPT2_IN_PT2TAB); i++) {
 		pte1 = pte1_load(&pmap->pm_pt1[i]);
 		if (pte1 == 0)
 			continue;
 		va = i << PTE1_SHIFT;
 		if (pte1_is_section(pte1)) {
 			printf("0x%08X: Section 0x%08X, s:%d\n", va, pte1,
 			    !!(pte1 & PTE1_S));
 			dump_section(pmap, i);
 		} else if (pte1_is_link(pte1)) {
 			pte2 = pte2_load(pmap_pt2tab_entry(pmap, va));
 			printf("0x%08X: Link 0x%08X, pt2tab: 0x%08X\n", va,
 			    pte1, pte2);
 			if (pte2 == 0)
 				printf("  !!! pt2tab entry is ZERO\n");
 		} else
 			printf("0x%08X: Invalid entry 0x%08X\n", va, pte1);
 	}
 	dump_pt2tab(pmap);
 }
 #endif
Index: head/sys/arm/freescale/imx/imx6_sdma.c
===================================================================
--- head/sys/arm/freescale/imx/imx6_sdma.c	(revision 338317)
+++ head/sys/arm/freescale/imx/imx6_sdma.c	(revision 338318)
@@ -1,516 +1,515 @@
 /*-
  * Copyright (c) 2015 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * i.MX6 Smart Direct Memory Access Controller (sDMA)
  * Chapter 41, i.MX 6Dual/6Quad Applications Processor Reference Manual,
  * Rev. 1, 04/2013
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/endian.h>
 #include <sys/rman.h>
 #include <sys/timeet.h>
 #include <sys/timetc.h>
 #include <sys/firmware.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/pmap.h>
 
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <machine/bus.h>
 #include <machine/cpu.h>
 #include <machine/intr.h>
 
 #include <arm/freescale/imx/imx6_sdma.h>
 
 #define	MAX_BD	(PAGE_SIZE / sizeof(struct sdma_buffer_descriptor))
 
 #define	READ4(_sc, _reg)	\
 	bus_space_read_4(_sc->bst, _sc->bsh, _reg)
 #define	WRITE4(_sc, _reg, _val)	\
 	bus_space_write_4(_sc->bst, _sc->bsh, _reg, _val)
 
 struct sdma_softc *sdma_sc;
 
 static struct resource_spec sdma_spec[] = {
 	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },
 	{ SYS_RES_IRQ,		0,	RF_ACTIVE },
 	{ -1, 0 }
 };
 
 static void
 sdma_intr(void *arg)
 {
 	struct sdma_buffer_descriptor *bd;
 	struct sdma_channel *channel;
 	struct sdma_conf *conf;
 	struct sdma_softc *sc;
 	int pending;
 	int i;
 	int j;
 
 	sc = arg;
 
 	pending = READ4(sc, SDMAARM_INTR);
 
 	/* Ack intr */
 	WRITE4(sc, SDMAARM_INTR, pending);
 
 	for (i = 0; i < SDMA_N_CHANNELS; i++) {
 		if ((pending & (1 << i)) == 0)
 			continue;
 		channel = &sc->channel[i];
 		conf = channel->conf;
 		if (!conf)
 			continue;
 		for (j = 0; j < conf->num_bd; j++) {
 			bd = &channel->bd[j];
 			bd->mode.status |= BD_DONE;
 			if (bd->mode.status & BD_RROR)
 				printf("sDMA error\n");
 		}
 
 		conf->ih(conf->ih_user, 1);
 
 		WRITE4(sc, SDMAARM_HSTART, (1 << i));
 	}
 }
 
 static int
 sdma_probe(device_t dev)
 {
 
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 
 	if (!ofw_bus_is_compatible(dev, "fsl,imx6q-sdma"))
 		return (ENXIO);
 
 	device_set_desc(dev, "i.MX6 Smart Direct Memory Access Controller");
 	return (BUS_PROBE_DEFAULT);
 }
 
 int
 sdma_start(int chn)
 {
 	struct sdma_softc *sc;
 
 	sc = sdma_sc;
 
 	WRITE4(sc, SDMAARM_HSTART, (1 << chn));
 
 	return (0);
 }
 
 int
 sdma_stop(int chn)
 {
 	struct sdma_softc *sc;
 
 	sc = sdma_sc;
 
 	WRITE4(sc, SDMAARM_STOP_STAT, (1 << chn));
 
 	return (0);
 }
 
 int
 sdma_alloc(void)
 {
 	struct sdma_channel *channel;
 	struct sdma_softc *sc;
 	int found;
 	int chn;
 	int i;
 
 	sc = sdma_sc;
 	found = 0;
 
 	/* Channel 0 can't be used */
 	for (i = 1; i < SDMA_N_CHANNELS; i++) {
 		channel = &sc->channel[i];
 		if (channel->in_use == 0) {
 			channel->in_use = 1;
 			found = 1;
 			break;
 		}
 	}
 
 	if (!found)
 		return (-1);
 
 	chn = i;
 
 	/* Allocate area for buffer descriptors */
 	channel->bd = (void *)kmem_alloc_contig(PAGE_SIZE, M_ZERO, 0, ~0,
 	    PAGE_SIZE, 0, VM_MEMATTR_UNCACHEABLE);
 
 	return (chn);
 }
 
 int
 sdma_free(int chn)
 {
 	struct sdma_channel *channel;
 	struct sdma_softc *sc;
 
 	sc = sdma_sc;
 
 	channel = &sc->channel[chn];
 	channel->in_use = 0;
 
-	kmem_free(kernel_arena, (vm_offset_t)channel->bd,
-			PAGE_SIZE);
+	kmem_free((vm_offset_t)channel->bd, PAGE_SIZE);
 
 	return (0);
 }
 
 static int
 sdma_overrides(struct sdma_softc *sc, int chn,
 		int evt, int host, int dsp)
 {
 	int reg;
 
 	/* Ignore sDMA requests */
 	reg = READ4(sc, SDMAARM_EVTOVR);
 	if (evt)
 		reg |= (1 << chn);
 	else
 		reg &= ~(1 << chn);
 	WRITE4(sc, SDMAARM_EVTOVR, reg);
 
 	/* Ignore enable bit (HE) */
 	reg = READ4(sc, SDMAARM_HOSTOVR);
 	if (host)
 		reg |= (1 << chn);
 	else
 		reg &= ~(1 << chn);
 	WRITE4(sc, SDMAARM_HOSTOVR, reg);
 
 	/* Prevent sDMA channel from starting */
 	reg = READ4(sc, SDMAARM_DSPOVR);
 	if (!dsp)
 		reg |= (1 << chn);
 	else
 		reg &= ~(1 << chn);
 	WRITE4(sc, SDMAARM_DSPOVR, reg);
 
 	return (0);
 }
 
 int
 sdma_configure(int chn, struct sdma_conf *conf)
 {
 	struct sdma_buffer_descriptor *bd0;
 	struct sdma_buffer_descriptor *bd;
 	struct sdma_context_data *context;
 	struct sdma_channel *channel;
 	struct sdma_softc *sc;
 #if 0
 	int timeout;
 	int ret;
 #endif
 	int i;
 
 	sc = sdma_sc;
 
 	channel = &sc->channel[chn];
 	channel->conf = conf;
 
 	/* Ensure operation has stopped */
 	sdma_stop(chn);
 
 	/* Set priority and enable the channel */
 	WRITE4(sc, SDMAARM_SDMA_CHNPRI(chn), 1);
 	WRITE4(sc, SDMAARM_CHNENBL(conf->event), (1 << chn));
 
 	sdma_overrides(sc, chn, 0, 0, 0);
 
 	if (conf->num_bd > MAX_BD) {
 		device_printf(sc->dev, "Error: too much buffer"
 				" descriptors requested\n");
 		return (-1);
 	}
 
 	for (i = 0; i < conf->num_bd; i++) {
 		bd = &channel->bd[i];
 		bd->mode.command = conf->command;
 		bd->mode.status = BD_DONE | BD_EXTD | BD_CONT | BD_INTR;
 		if (i == (conf->num_bd - 1))
 			bd->mode.status |= BD_WRAP;
 		bd->mode.count = conf->period;
 		bd->buffer_addr = conf->saddr + (conf->period * i);
 		bd->ext_buffer_addr = 0;
 	}
 
 	sc->ccb[chn].base_bd_ptr = vtophys(channel->bd);
 	sc->ccb[chn].current_bd_ptr = vtophys(channel->bd);
 
 	/*
 	 * Load context.
 	 *
 	 * i.MX6 Reference Manual: Appendix A SDMA Scripts
 	 * A.3.1.7.1 (mcu_2_app)
 	 */
 
 	/*
 	 * TODO: allow using other scripts
 	 */
 	context = sc->context;
 	memset(context, 0, sizeof(*context));
 	context->channel_state.pc = sc->fw_scripts->mcu_2_app_addr;
 
 	/*
 	 * Tx FIFO 0 address (r6)
 	 * Event_mask (r1)
 	 * Event2_mask (r0)
 	 * Watermark level (r7)
 	 */
 
 	if (conf->event > 32) {
 		context->gReg[0] = (1 << (conf->event % 32));
 		context->gReg[1] = 0;
 	} else {
 		context->gReg[0] = 0;
 		context->gReg[1] = (1 << conf->event);
 	}
 
 	context->gReg[6] = conf->daddr;
 	context->gReg[7] = conf->word_length;
 
 	bd0 = sc->bd0;
 	bd0->mode.command = C0_SETDM;
 	bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
 	bd0->mode.count = sizeof(*context) / 4;
 	bd0->buffer_addr = sc->context_phys;
 	bd0->ext_buffer_addr = 2048 + (sizeof(*context) / 4) * chn;
 
 	WRITE4(sc, SDMAARM_HSTART, 1);
 
 #if 0
 	/* Debug purposes */
 
 	timeout = 1000;
 	while (!(ret = READ4(sc, SDMAARM_INTR) & 1)) {
 		if (timeout-- <= 0)
 			break;
 		DELAY(10);
 	};
 
 	if (!ret) {
 		device_printf(sc->dev, "Failed to load context.\n");
 		return (-1);
 	}
 
 	WRITE4(sc, SDMAARM_INTR, ret);
 
 	device_printf(sc->dev, "Context loaded successfully.\n");
 #endif
 
 	return (0);
 }
 
 static int
 load_firmware(struct sdma_softc *sc)
 {
 	const struct sdma_firmware_header *header;
 	const struct firmware *fp;
 
 	fp = firmware_get("sdma_fw");
 	if (fp == NULL) {
 		device_printf(sc->dev, "Can't get firmware.\n");
 		return (-1);
 	}
 
 	header = fp->data;
 	if (header->magic != FW_HEADER_MAGIC) {
 		device_printf(sc->dev, "Can't use firmware.\n");
 		return (-1);
 	}
 
 	sc->fw_header = header;
 	sc->fw_scripts = (const void *)((const char *)header +
 				header->script_addrs_start);
 
 	return (0);
 }
 
 static int
 boot_firmware(struct sdma_softc *sc)
 {
 	struct sdma_buffer_descriptor *bd0;
 	const uint32_t *ram_code;
 	int timeout;
 	int ret;
 	int chn;
 	int sz;
 	int i;
 
 	ram_code = (const void *)((const char *)sc->fw_header +
 			sc->fw_header->ram_code_start);
 
 	/* Make sure SDMA has not started yet */
 	WRITE4(sc, SDMAARM_MC0PTR, 0);
 
 	sz = SDMA_N_CHANNELS * sizeof(struct sdma_channel_control) + \
 	    sizeof(struct sdma_context_data);
 	sc->ccb = (void *)kmem_alloc_contig(sz, M_ZERO, 0, ~0, PAGE_SIZE, 0,
 	    VM_MEMATTR_UNCACHEABLE);
 	sc->ccb_phys = vtophys(sc->ccb);
 
 	sc->context = (void *)((char *)sc->ccb + \
 	    SDMA_N_CHANNELS * sizeof(struct sdma_channel_control));
 	sc->context_phys = vtophys(sc->context);
 
 	/* Disable all the channels */
 	for (i = 0; i < SDMA_N_EVENTS; i++)
 		WRITE4(sc, SDMAARM_CHNENBL(i), 0);
 
 	/* All channels have priority 0 */
 	for (i = 0; i < SDMA_N_CHANNELS; i++)
 		WRITE4(sc, SDMAARM_SDMA_CHNPRI(i), 0);
 
 	/* Channel 0 is used for booting firmware */
 	chn = 0;
 
 	sc->bd0 = (void *)kmem_alloc_contig(PAGE_SIZE, M_ZERO, 0, ~0, PAGE_SIZE,
 	    0, VM_MEMATTR_UNCACHEABLE);
 	bd0 = sc->bd0;
 	sc->ccb[chn].base_bd_ptr = vtophys(bd0);
 	sc->ccb[chn].current_bd_ptr = vtophys(bd0);
 
 	WRITE4(sc, SDMAARM_SDMA_CHNPRI(chn), 1);
 
 	sdma_overrides(sc, chn, 1, 0, 0);
 
 	/* XXX: not sure what is that */
 	WRITE4(sc, SDMAARM_CHN0ADDR, 0x4050);
 
 	WRITE4(sc, SDMAARM_CONFIG, 0);
 	WRITE4(sc, SDMAARM_MC0PTR, sc->ccb_phys);
 	WRITE4(sc, SDMAARM_CONFIG, CONFIG_CSM);
 	WRITE4(sc, SDMAARM_SDMA_CHNPRI(chn), 1);
 
 	bd0->mode.command = C0_SETPM;
 	bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
 	bd0->mode.count = sc->fw_header->ram_code_size / 2;
 	bd0->buffer_addr = vtophys(ram_code);
 	bd0->ext_buffer_addr = sc->fw_scripts->ram_code_start_addr;
 
 	WRITE4(sc, SDMAARM_HSTART, 1);
 
 	timeout = 100;
 	while (!(ret = READ4(sc, SDMAARM_INTR) & 1)) {
 		if (timeout-- <= 0)
 			break;
 		DELAY(10);
 	}
 
 	if (ret == 0) {
 		device_printf(sc->dev, "SDMA failed to boot\n");
 		return (-1);
 	}
 
 	WRITE4(sc, SDMAARM_INTR, ret);
 
 #if 0
 	device_printf(sc->dev, "SDMA booted successfully.\n");
 #endif
 
 	/* Debug is disabled */
 	WRITE4(sc, SDMAARM_ONCE_ENB, 0);
 
 	return (0);
 }
 
 static int
 sdma_attach(device_t dev)
 {
 	struct sdma_softc *sc;
 	int err;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	if (bus_alloc_resources(dev, sdma_spec, sc->res)) {
 		device_printf(dev, "could not allocate resources\n");
 		return (ENXIO);
 	}
 
 	/* Memory interface */
 	sc->bst = rman_get_bustag(sc->res[0]);
 	sc->bsh = rman_get_bushandle(sc->res[0]);
 
 	sdma_sc = sc;
 
 	/* Setup interrupt handler */
 	err = bus_setup_intr(dev, sc->res[1], INTR_TYPE_MISC | INTR_MPSAFE,
 	    NULL, sdma_intr, sc, &sc->ih);
 	if (err) {
 		device_printf(dev, "Unable to alloc interrupt resource.\n");
 		return (ENXIO);
 	}
 
 	if (load_firmware(sc) == -1)
 		return (ENXIO);
 
 	if (boot_firmware(sc) == -1)
 		return (ENXIO);
 
 	return (0);
 };
 
 static device_method_t sdma_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		sdma_probe),
 	DEVMETHOD(device_attach,	sdma_attach),
 	{ 0, 0 }
 };
 
 static driver_t sdma_driver = {
 	"sdma",
 	sdma_methods,
 	sizeof(struct sdma_softc),
 };
 
 static devclass_t sdma_devclass;
 
 EARLY_DRIVER_MODULE(sdma, simplebus, sdma_driver, sdma_devclass, 0, 0,
     BUS_PASS_RESOURCE);
Index: head/sys/arm/nvidia/tegra_xhci.c
===================================================================
--- head/sys/arm/nvidia/tegra_xhci.c	(revision 338317)
+++ head/sys/arm/nvidia/tegra_xhci.c	(revision 338318)
@@ -1,1160 +1,1160 @@
 /*-
  * Copyright (c) 2016 Michal Meloun <mmel@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * XHCI driver for Tegra SoCs.
  */
 #include "opt_bus.h"
 #include "opt_platform.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/clock.h>
 #include <sys/condvar.h>
 #include <sys/firmware.h>
 #include <sys/rman.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/pmap.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 
 
 #include <dev/extres/clk/clk.h>
 #include <dev/extres/hwreset/hwreset.h>
 #include <dev/extres/phy/phy.h>
 #include <dev/extres/regulator/regulator.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #include <dev/usb/usb.h>
 #include <dev/usb/usbdi.h>
 #include <dev/usb/usb_busdma.h>
 #include <dev/usb/usb_process.h>
 #include <dev/usb/usb_controller.h>
 #include <dev/usb/usb_bus.h>
 #include <dev/usb/controller/xhci.h>
 #include <dev/usb/controller/xhcireg.h>
 
 #include <arm/nvidia/tegra_pmc.h>
 
 #include "usbdevs.h"
 
 /* FPCI address space */
 #define	T_XUSB_CFG_0				0x000
 #define	T_XUSB_CFG_1				0x004
 #define	 CFG_1_BUS_MASTER				(1 << 2)
 #define	 CFG_1_MEMORY_SPACE				(1 << 1)
 #define	 CFG_1_IO_SPACE					(1 << 0)
 
 #define	T_XUSB_CFG_2				0x008
 #define	T_XUSB_CFG_3				0x00C
 #define	T_XUSB_CFG_4				0x010
 #define	 CFG_4_BASE_ADDRESS(x)				(((x) & 0x1FFFF) << 15)
 
 #define	T_XUSB_CFG_5				0x014
 #define	T_XUSB_CFG_ARU_MAILBOX_CMD		0x0E4
 #define  ARU_MAILBOX_CMD_INT_EN				(1U << 31)
 #define  ARU_MAILBOX_CMD_DEST_XHCI			(1  << 30)
 #define  ARU_MAILBOX_CMD_DEST_SMI			(1  << 29)
 #define  ARU_MAILBOX_CMD_DEST_PME			(1  << 28)
 #define  ARU_MAILBOX_CMD_DEST_FALC			(1  << 27)
 
 #define	T_XUSB_CFG_ARU_MAILBOX_DATA_IN		0x0E8
 #define	 ARU_MAILBOX_DATA_IN_DATA(x)			(((x) & 0xFFFFFF) <<  0)
 #define	 ARU_MAILBOX_DATA_IN_TYPE(x)			(((x) & 0x0000FF) << 24)
 
 #define	T_XUSB_CFG_ARU_MAILBOX_DATA_OUT		0x0EC
 #define	 ARU_MAILBOX_DATA_OUT_DATA(x)			(((x) >>  0) & 0xFFFFFF)
 #define	 ARU_MAILBOX_DATA_OUT_TYPE(x)			(((x) >> 24) & 0x0000FF)
 
 #define	T_XUSB_CFG_ARU_MAILBOX_OWNER		0x0F0
 #define	 ARU_MAILBOX_OWNER_SW				2
 #define	 ARU_MAILBOX_OWNER_FW				1
 #define	 ARU_MAILBOX_OWNER_NONE				0
 
 #define	XUSB_CFG_ARU_C11_CSBRANGE		0x41C	/* ! UNDOCUMENTED ! */
 #define	 ARU_C11_CSBRANGE_PAGE(x)			((x) >> 9)
 #define	 ARU_C11_CSBRANGE_ADDR(x)			(0x800 + ((x) & 0x1FF))
 #define	XUSB_CFG_ARU_SMI_INTR			0x428	/* ! UNDOCUMENTED ! */
 #define  ARU_SMI_INTR_EN				(1 << 3)
 #define  ARU_SMI_INTR_FW_HANG				(1 << 1)
 #define	XUSB_CFG_ARU_RST			0x42C	/* ! UNDOCUMENTED ! */
 #define	 ARU_RST_RESET					(1 << 0)
 
 #define	XUSB_HOST_CONFIGURATION			0x180
 #define	 CONFIGURATION_CLKEN_OVERRIDE			(1U<< 31)
 #define	 CONFIGURATION_PW_NO_DEVSEL_ERR_CYA		(1 << 19)
 #define	 CONFIGURATION_INITIATOR_READ_IDLE		(1 << 18)
 #define	 CONFIGURATION_INITIATOR_WRITE_IDLE		(1 << 17)
 #define	 CONFIGURATION_WDATA_LEAD_CYA			(1 << 15)
 #define	 CONFIGURATION_WR_INTRLV_CYA			(1 << 14)
 #define	 CONFIGURATION_TARGET_READ_IDLE			(1 << 11)
 #define	 CONFIGURATION_TARGET_WRITE_IDLE		(1 << 10)
 #define	 CONFIGURATION_MSI_VEC_EMPTY			(1 <<  9)
 #define	 CONFIGURATION_UFPCI_MSIAW			(1 <<  7)
 #define	 CONFIGURATION_UFPCI_PWPASSPW			(1 <<  6)
 #define	 CONFIGURATION_UFPCI_PASSPW			(1 <<  5)
 #define	 CONFIGURATION_UFPCI_PWPASSNPW			(1 <<  4)
 #define	 CONFIGURATION_DFPCI_PWPASSNPW			(1 <<  3)
 #define	 CONFIGURATION_DFPCI_RSPPASSPW			(1 <<  2)
 #define	 CONFIGURATION_DFPCI_PASSPW			(1 <<  1)
 #define	 CONFIGURATION_EN_FPCI				(1 <<  0)
 
 /* IPFS address space */
 #define	XUSB_HOST_FPCI_ERROR_MASKS		0x184
 #define	 FPCI_ERROR_MASTER_ABORT			(1 <<  2)
 #define	 FPCI_ERRORI_DATA_ERROR				(1 <<  1)
 #define	 FPCI_ERROR_TARGET_ABORT			(1 <<  0)
 
 #define	XUSB_HOST_INTR_MASK			0x188
 #define	 INTR_IP_INT_MASK				(1 << 16)
 #define	 INTR_MSI_MASK					(1 <<  8)
 #define	 INTR_INT_MASK					(1 <<  0)
 
 #define	XUSB_HOST_CLKGATE_HYSTERESIS		0x1BC
 
  /* CSB Falcon CPU */
 #define	XUSB_FALCON_CPUCTL			0x100
 #define	 CPUCTL_STOPPED					(1 << 5)
 #define	 CPUCTL_HALTED					(1 << 4)
 #define	 CPUCTL_HRESET					(1 << 3)
 #define	 CPUCTL_SRESET					(1 << 2)
 #define	 CPUCTL_STARTCPU				(1 << 1)
 #define	 CPUCTL_IINVAL					(1 << 0)
 
 #define	XUSB_FALCON_BOOTVEC			0x104
 #define	XUSB_FALCON_DMACTL			0x10C
 #define	XUSB_FALCON_IMFILLRNG1			0x154
 #define	 IMFILLRNG1_TAG_HI(x)				(((x) & 0xFFF) << 16)
 #define	 IMFILLRNG1_TAG_LO(x)				(((x) & 0xFFF) <<  0)
 #define	XUSB_FALCON_IMFILLCTL			0x158
 
 /* CSB mempool */
 #define	XUSB_CSB_MEMPOOL_APMAP			0x10181C
 #define	 APMAP_BOOTPATH					(1U << 31)
 
 #define	XUSB_CSB_MEMPOOL_ILOAD_ATTR		0x101A00
 #define	XUSB_CSB_MEMPOOL_ILOAD_BASE_LO		0x101A04
 #define	XUSB_CSB_MEMPOOL_ILOAD_BASE_HI		0x101A08
 #define	XUSB_CSB_MEMPOOL_L2IMEMOP_SIZE		0x101A10
 #define	 L2IMEMOP_SIZE_OFFSET(x)			(((x) & 0x3FF) <<  8)
 #define	 L2IMEMOP_SIZE_SIZE(x)				(((x) & 0x0FF) << 24)
 
 #define	XUSB_CSB_MEMPOOL_L2IMEMOP_TRIG		0x101A14
 #define	 L2IMEMOP_INVALIDATE_ALL			(0x40 << 24)
 #define	 L2IMEMOP_LOAD_LOCKED_RESULT			(0x11 << 24)
 
 #define	XUSB_CSB_MEMPOOL_L2IMEMOP_RESULT        0x101A18
 #define	 L2IMEMOP_RESULT_VLD       (1U << 31)
 
 #define XUSB_CSB_IMEM_BLOCK_SIZE	256
 
 #define	TEGRA_XHCI_SS_HIGH_SPEED	120000000
 #define	TEGRA_XHCI_SS_LOW_SPEED		 12000000
 
 /* MBOX commands. */
 #define	MBOX_CMD_MSG_ENABLED			 1
 #define	MBOX_CMD_INC_FALC_CLOCK			 2
 #define	MBOX_CMD_DEC_FALC_CLOCK			 3
 #define	MBOX_CMD_INC_SSPI_CLOCK			 4
 #define	MBOX_CMD_DEC_SSPI_CLOCK			 5
 #define	MBOX_CMD_SET_BW				 6
 #define	MBOX_CMD_SET_SS_PWR_GATING		 7
 #define	MBOX_CMD_SET_SS_PWR_UNGATING		 8
 #define	MBOX_CMD_SAVE_DFE_CTLE_CTX		 9
 #define	MBOX_CMD_AIRPLANE_MODE_ENABLED		10
 #define	MBOX_CMD_AIRPLANE_MODE_DISABLED		11
 #define	MBOX_CMD_START_HSIC_IDLE		12
 #define	MBOX_CMD_STOP_HSIC_IDLE			13
 #define	MBOX_CMD_DBC_WAKE_STACK			14
 #define	MBOX_CMD_HSIC_PRETEND_CONNECT		15
 #define	MBOX_CMD_RESET_SSPI			16
 #define	MBOX_CMD_DISABLE_SS_LFPS_DETECTION	17
 #define	MBOX_CMD_ENABLE_SS_LFPS_DETECTION	18
 
 /* MBOX responses. */
 #define	MBOX_CMD_ACK				(0x80 + 0)
 #define	MBOX_CMD_NAK				(0x80 + 1)
 
 
 #define	IPFS_WR4(_sc, _r, _v)	bus_write_4((_sc)->mem_res_ipfs, (_r), (_v))
 #define	IPFS_RD4(_sc, _r)	bus_read_4((_sc)->mem_res_ipfs, (_r))
 #define	FPCI_WR4(_sc, _r, _v)	bus_write_4((_sc)->mem_res_fpci, (_r), (_v))
 #define	FPCI_RD4(_sc, _r)	bus_read_4((_sc)->mem_res_fpci, (_r))
 
 #define	LOCK(_sc)		mtx_lock(&(_sc)->mtx)
 #define	UNLOCK(_sc)		mtx_unlock(&(_sc)->mtx)
 #define	SLEEP(_sc, timeout)						\
     mtx_sleep(sc, &sc->mtx, 0, "tegra_xhci", timeout);
 #define	LOCK_INIT(_sc)							\
     mtx_init(&_sc->mtx, device_get_nameunit(_sc->dev), "tegra_xhci", MTX_DEF)
 #define	LOCK_DESTROY(_sc)	mtx_destroy(&_sc->mtx)
 #define	ASSERT_LOCKED(_sc)	mtx_assert(&_sc->mtx, MA_OWNED)
 #define	ASSERT_UNLOCKED(_sc)	mtx_assert(&_sc->mtx, MA_NOTOWNED)
 
 struct tegra_xusb_fw_hdr {
 	uint32_t	boot_loadaddr_in_imem;
 	uint32_t	boot_codedfi_offset;
 	uint32_t	boot_codetag;
 	uint32_t	boot_codesize;
 
 	uint32_t	phys_memaddr;
 	uint16_t	reqphys_memsize;
 	uint16_t	alloc_phys_memsize;
 
 	uint32_t	rodata_img_offset;
 	uint32_t	rodata_section_start;
 	uint32_t	rodata_section_end;
 	uint32_t	main_fnaddr;
 
 	uint32_t	fwimg_cksum;
 	uint32_t	fwimg_created_time;
 
 	uint32_t	imem_resident_start;
 	uint32_t	imem_resident_end;
 	uint32_t	idirect_start;
 	uint32_t	idirect_end;
 	uint32_t	l2_imem_start;
 	uint32_t	l2_imem_end;
 	uint32_t	version_id;
 	uint8_t		init_ddirect;
 	uint8_t		reserved[3];
 	uint32_t	phys_addr_log_buffer;
 	uint32_t	total_log_entries;
 	uint32_t	dequeue_ptr;
 	uint32_t	dummy[2];
 	uint32_t	fwimg_len;
 	uint8_t		magic[8];
 	uint32_t	ss_low_power_entry_timeout;
 	uint8_t		num_hsic_port;
 	uint8_t		ss_portmap;
 	uint8_t		build;
 	uint8_t		padding[137]; /* Pad to 256 bytes */
 };
 
 /* Compatible devices. */
 static struct ofw_compat_data compat_data[] = {
 	{"nvidia,tegra124-xusb",	1},
 	{NULL,		 		0}
 };
 
 struct tegra_xhci_softc {
 	struct xhci_softc 	xhci_softc;
 	device_t		dev;
 	struct mtx		mtx;
 	struct resource		*mem_res_fpci;
 	struct resource		*mem_res_ipfs;
 	struct resource		*irq_res_mbox;
 	void			*irq_hdl_mbox;
 
 	clk_t			clk_xusb_host;
 	clk_t			clk_xusb_gate;
 	clk_t			clk_xusb_falcon_src;
 	clk_t			clk_xusb_ss;
 	clk_t			clk_xusb_hs_src;
 	clk_t			clk_xusb_fs_src;
 	hwreset_t		hwreset_xusb_host;
 	hwreset_t		hwreset_xusb_ss;
 	regulator_t		supply_avddio_pex;
 	regulator_t		supply_dvddio_pex;
 	regulator_t		supply_avdd_usb;
 	regulator_t		supply_avdd_pll_utmip;
 	regulator_t		supply_avdd_pll_erefe;
 	regulator_t		supply_avdd_usb_ss_pll;
 	regulator_t		supply_hvdd_usb_ss;
 	regulator_t		supply_hvdd_usb_ss_pll_e;
 	phy_t 			phy_usb2_0;
 	phy_t 			phy_usb2_1;
 	phy_t 			phy_usb2_2;
 	phy_t 			phy_usb3_0;
 
 	struct intr_config_hook	irq_hook;
 	bool			xhci_inited;
 	char			*fw_name;
 	vm_offset_t		fw_vaddr;
 	vm_size_t		fw_size;
 };
 
 static uint32_t
 CSB_RD4(struct tegra_xhci_softc *sc, uint32_t addr)
 {
 
 	FPCI_WR4(sc, XUSB_CFG_ARU_C11_CSBRANGE, ARU_C11_CSBRANGE_PAGE(addr));
 	return (FPCI_RD4(sc, ARU_C11_CSBRANGE_ADDR(addr)));
 }
 
 static void
 CSB_WR4(struct tegra_xhci_softc *sc, uint32_t addr, uint32_t val)
 {
 
 	FPCI_WR4(sc, XUSB_CFG_ARU_C11_CSBRANGE, ARU_C11_CSBRANGE_PAGE(addr));
 	FPCI_WR4(sc, ARU_C11_CSBRANGE_ADDR(addr), val);
 }
 
 static int
 get_fdt_resources(struct tegra_xhci_softc *sc, phandle_t node)
 {
 	int rv;
 
 	rv = regulator_get_by_ofw_property(sc->dev, 0, "avddio-pex-supply",
 	    &sc->supply_avddio_pex);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot get 'avddio-pex' regulator\n");
 		return (ENXIO);
 	}
 	rv = regulator_get_by_ofw_property(sc->dev, 0, "dvddio-pex-supply",
 	    &sc->supply_dvddio_pex);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot get 'dvddio-pex' regulator\n");
 		return (ENXIO);
 	}
 	rv = regulator_get_by_ofw_property(sc->dev, 0, "avdd-usb-supply",
 	    &sc->supply_avdd_usb);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot get 'avdd-usb' regulator\n");
 		return (ENXIO);
 	}
 	rv = regulator_get_by_ofw_property(sc->dev, 0, "avdd-pll-utmip-supply",
 	    &sc->supply_avdd_pll_utmip);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot get 'avdd-pll-utmip' regulator\n");
 		return (ENXIO);
 	}
 	rv = regulator_get_by_ofw_property(sc->dev, 0, "avdd-pll-erefe-supply",
 	    &sc->supply_avdd_pll_erefe);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot get 'avdd-pll-erefe' regulator\n");
 		return (ENXIO);
 	}
 	rv = regulator_get_by_ofw_property(sc->dev, 0, "avdd-usb-ss-pll-supply",
 	    &sc->supply_avdd_usb_ss_pll);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot get 'avdd-usb-ss-pll' regulator\n");
 		return (ENXIO);
 	}
 	rv = regulator_get_by_ofw_property(sc->dev, 0, "hvdd-usb-ss-supply",
 	    &sc->supply_hvdd_usb_ss);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot get 'hvdd-usb-ss' regulator\n");
 		return (ENXIO);
 	}
 	rv = regulator_get_by_ofw_property(sc->dev, 0,
 	    "hvdd-usb-ss-pll-e-supply", &sc->supply_hvdd_usb_ss_pll_e);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot get 'hvdd-usb-ss-pll-e' regulator\n");
 		return (ENXIO);
 	}
 
 	rv = hwreset_get_by_ofw_name(sc->dev, 0, "xusb_host",
 	    &sc->hwreset_xusb_host);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot get 'xusb_host' reset\n");
 		return (ENXIO);
 	}
 	rv = hwreset_get_by_ofw_name(sc->dev, 0, "xusb_ss",
 	    &sc->hwreset_xusb_ss);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot get 'xusb_ss' reset\n");
 		return (ENXIO);
 	}
 
 	rv = phy_get_by_ofw_name(sc->dev, 0, "usb2-0", &sc->phy_usb2_0);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot get 'usb2-0' phy\n");
 		return (ENXIO);
 	}
 	rv = phy_get_by_ofw_name(sc->dev, 0, "usb2-1", &sc->phy_usb2_1);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot get 'usb2-1' phy\n");
 		return (ENXIO);
 	}
 	rv = phy_get_by_ofw_name(sc->dev, 0, "usb2-2", &sc->phy_usb2_2);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot get 'usb2-2' phy\n");
 		return (ENXIO);
 	}
 	rv = phy_get_by_ofw_name(sc->dev, 0, "usb3-0", &sc->phy_usb3_0);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot get 'usb3-0' phy\n");
 		return (ENXIO);
 	}
 
 	rv = clk_get_by_ofw_name(sc->dev, 0, "xusb_host",
 	    &sc->clk_xusb_host);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot get 'xusb_host' clock\n");
 		return (ENXIO);
 	}
 	rv = clk_get_by_ofw_name(sc->dev, 0, "xusb_falcon_src",
 	    &sc->clk_xusb_falcon_src);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot get 'xusb_falcon_src' clock\n");
 		return (ENXIO);
 	}
 	rv = clk_get_by_ofw_name(sc->dev, 0, "xusb_ss",
 	    &sc->clk_xusb_ss);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot get 'xusb_ss' clock\n");
 		return (ENXIO);
 	}
 	rv = clk_get_by_ofw_name(sc->dev, 0, "xusb_hs_src",
 	    &sc->clk_xusb_hs_src);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot get 'xusb_hs_src' clock\n");
 		return (ENXIO);
 	}
 	rv = clk_get_by_ofw_name(sc->dev, 0, "xusb_fs_src",
 	    &sc->clk_xusb_fs_src);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot get 'xusb_fs_src' clock\n");
 		return (ENXIO);
 	}
 	rv = clk_get_by_ofw_index_prop(sc->dev, 0, "freebsd,clock-xusb-gate", 0,
 	    &sc->clk_xusb_gate);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot get 'xusb_gate' clock\n");
 		return (ENXIO);
 	}
 	return (0);
 }
 
 static int
 enable_fdt_resources(struct tegra_xhci_softc *sc)
 {
 	int rv;
 
 	rv = hwreset_assert(sc->hwreset_xusb_host);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot reset 'xusb_host' reset\n");
 		return (rv);
 	}
 	rv = hwreset_assert(sc->hwreset_xusb_ss);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot reset 'xusb_ss' reset\n");
 		return (rv);
 	}
 
 	rv = regulator_enable(sc->supply_avddio_pex);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot enable 'avddio_pex' regulator\n");
 		return (rv);
 	}
 	rv = regulator_enable(sc->supply_dvddio_pex);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot enable 'dvddio_pex' regulator\n");
 		return (rv);
 	}
 	rv = regulator_enable(sc->supply_avdd_usb);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot enable 'avdd_usb' regulator\n");
 		return (rv);
 	}
 	rv = regulator_enable(sc->supply_avdd_pll_utmip);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot enable 'avdd_pll_utmip-5v' regulator\n");
 		return (rv);
 	}
 	rv = regulator_enable(sc->supply_avdd_pll_erefe);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot enable 'avdd_pll_erefe' regulator\n");
 		return (rv);
 	}
 	rv = regulator_enable(sc->supply_avdd_usb_ss_pll);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot enable 'avdd_usb_ss_pll' regulator\n");
 		return (rv);
 	}
 	rv = regulator_enable(sc->supply_hvdd_usb_ss);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot enable 'hvdd_usb_ss' regulator\n");
 		return (rv);
 	}
 	rv = regulator_enable(sc->supply_hvdd_usb_ss_pll_e);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot enable 'hvdd_usb_ss_pll_e' regulator\n");
 		return (rv);
 	}
 
 	/* Power off XUSB host and XUSB SS domains. */
 	rv = tegra_powergate_power_off(TEGRA_POWERGATE_XUSBA);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot powerdown  'xusba' domain\n");
 		return (rv);
 	}
 	rv = tegra_powergate_power_off(TEGRA_POWERGATE_XUSBC);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot powerdown  'xusbc' domain\n");
 		return (rv);
 	}
 
 	/* Setup XUSB ss_src clock first */
 	clk_set_freq(sc->clk_xusb_ss, TEGRA_XHCI_SS_HIGH_SPEED, 0);
 	if (rv != 0)
 		return (rv);
 
 	/* The XUSB gate clock must be enabled before XUSBA can be powered. */
 	rv = clk_enable(sc->clk_xusb_gate);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot enable 'xusb_gate' clock\n");
 		return (rv);
 	}
 
 	/* Power on XUSB host and XUSB SS domains. */
 	rv = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_XUSBC,
 	    sc->clk_xusb_host, sc->hwreset_xusb_host);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot powerup 'xusbc' domain\n");
 		return (rv);
 	}
 	rv = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_XUSBA,
 	    sc->clk_xusb_ss, sc->hwreset_xusb_ss);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot powerup 'xusba' domain\n");
 		return (rv);
 	}
 
 	/* Enable rest of clocks */
 	rv = clk_enable(sc->clk_xusb_falcon_src);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot enable 'xusb_falcon_src' clock\n");
 		return (rv);
 	}
 	rv = clk_enable(sc->clk_xusb_fs_src);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot enable 'xusb_fs_src' clock\n");
 		return (rv);
 	}
 	rv = clk_enable(sc->clk_xusb_hs_src);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Cannot enable 'xusb_hs_src' clock\n");
 		return (rv);
 	}
 
 	rv = phy_enable(sc->phy_usb2_0);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot enable USB2_0 phy\n");
 		return (rv);
 	}
 	rv = phy_enable(sc->phy_usb2_1);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot enable USB2_1 phy\n");
 		return (rv);
 	}
 	rv = phy_enable(sc->phy_usb2_2);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot enable USB2_2 phy\n");
 		return (rv);
 	}
 	rv = phy_enable(sc->phy_usb3_0);
 	if (rv != 0) {
 		device_printf(sc->dev, "Cannot enable USB3_0 phy\n");
 		return (rv);
 	}
 
 	return (0);
 }
 
 /* Respond by ACK/NAK back to FW */
 static void
 mbox_send_ack(struct tegra_xhci_softc *sc, uint32_t cmd, uint32_t data)
 {
 	uint32_t reg;
 
 	reg = ARU_MAILBOX_DATA_IN_TYPE(cmd) | ARU_MAILBOX_DATA_IN_DATA(data);
 	FPCI_WR4(sc, T_XUSB_CFG_ARU_MAILBOX_DATA_IN, reg);
 
 	reg = FPCI_RD4(sc, T_XUSB_CFG_ARU_MAILBOX_CMD);
 	reg |= ARU_MAILBOX_CMD_DEST_FALC | ARU_MAILBOX_CMD_INT_EN;
 	FPCI_WR4(sc, T_XUSB_CFG_ARU_MAILBOX_CMD, reg);
 }
 
 /* Sent command to FW */
 static int
 mbox_send_cmd(struct tegra_xhci_softc *sc, uint32_t cmd, uint32_t data)
 {
 	uint32_t reg;
 	int i;
 
 	reg = FPCI_RD4(sc, T_XUSB_CFG_ARU_MAILBOX_OWNER);
 	if (reg != ARU_MAILBOX_OWNER_NONE) {
 		device_printf(sc->dev,
 		    "CPU mailbox is busy: 0x%08X\n", reg);
 		return (EBUSY);
 	}
 	/* XXX Is this right? Retry loop? Wait before send? */
 	FPCI_WR4(sc, T_XUSB_CFG_ARU_MAILBOX_OWNER, ARU_MAILBOX_OWNER_SW);
 	reg = FPCI_RD4(sc, T_XUSB_CFG_ARU_MAILBOX_OWNER);
 	if (reg != ARU_MAILBOX_OWNER_SW) {
 		device_printf(sc->dev,
 		    "Cannot acquire CPU mailbox: 0x%08X\n", reg);
 		return (EBUSY);
 	}
 	reg = ARU_MAILBOX_DATA_IN_TYPE(cmd) | ARU_MAILBOX_DATA_IN_DATA(data);
 	FPCI_WR4(sc, T_XUSB_CFG_ARU_MAILBOX_DATA_IN, reg);
 
 	reg = FPCI_RD4(sc, T_XUSB_CFG_ARU_MAILBOX_CMD);
 	reg |= ARU_MAILBOX_CMD_DEST_FALC | ARU_MAILBOX_CMD_INT_EN;
 	FPCI_WR4(sc, T_XUSB_CFG_ARU_MAILBOX_CMD, reg);
 
 	for (i = 250; i > 0; i--) {
 		reg = FPCI_RD4(sc, T_XUSB_CFG_ARU_MAILBOX_OWNER);
 		if (reg == ARU_MAILBOX_OWNER_NONE)
 			break;
 		DELAY(100);
 	}
 	if (i <= 0) {
 		device_printf(sc->dev,
 		    "Command response timeout: 0x%08X\n", reg);
 		return (ETIMEDOUT);
 	}
 
 	return(0);
 }
 
 static void
 process_msg(struct tegra_xhci_softc *sc, uint32_t req_cmd, uint32_t req_data,
     uint32_t *resp_cmd, uint32_t *resp_data)
 {
 	uint64_t freq;
 	int rv;
 
 	/* In most cases, data are echoed back. */
 	*resp_data = req_data;
 	switch (req_cmd) {
 	case MBOX_CMD_INC_FALC_CLOCK:
 	case MBOX_CMD_DEC_FALC_CLOCK:
 		rv = clk_set_freq(sc->clk_xusb_falcon_src, req_data * 1000ULL,
 		    0);
 		if (rv == 0) {
 			rv = clk_get_freq(sc->clk_xusb_falcon_src, &freq);
 			*resp_data = (uint32_t)(freq / 1000);
 		}
 		*resp_cmd = rv == 0 ? MBOX_CMD_ACK: MBOX_CMD_NAK;
 		break;
 
 	case MBOX_CMD_INC_SSPI_CLOCK:
 	case MBOX_CMD_DEC_SSPI_CLOCK:
 		rv = clk_set_freq(sc->clk_xusb_ss, req_data * 1000ULL,
 		    0);
 		if (rv == 0) {
 			rv = clk_get_freq(sc->clk_xusb_ss, &freq);
 			*resp_data = (uint32_t)(freq / 1000);
 		}
 		*resp_cmd = rv == 0 ? MBOX_CMD_ACK: MBOX_CMD_NAK;
 		break;
 
 	case MBOX_CMD_SET_BW:
 		/* No respense is expected. */
 		*resp_cmd = 0;
 		break;
 
 	case MBOX_CMD_SET_SS_PWR_GATING:
 	case MBOX_CMD_SET_SS_PWR_UNGATING:
 		*resp_cmd = MBOX_CMD_NAK;
 		break;
 
 	case MBOX_CMD_SAVE_DFE_CTLE_CTX:
 		/* Not implemented yet. */
 		*resp_cmd = MBOX_CMD_ACK;
 		break;
 
 
 	case MBOX_CMD_START_HSIC_IDLE:
 	case MBOX_CMD_STOP_HSIC_IDLE:
 		/* Not implemented yet. */
 		*resp_cmd = MBOX_CMD_NAK;
 		break;
 
 	case MBOX_CMD_DISABLE_SS_LFPS_DETECTION:
 	case MBOX_CMD_ENABLE_SS_LFPS_DETECTION:
 		/* Not implemented yet. */
 		*resp_cmd = MBOX_CMD_NAK;
 		break;
 
 	case MBOX_CMD_AIRPLANE_MODE_ENABLED:
 	case MBOX_CMD_AIRPLANE_MODE_DISABLED:
 	case MBOX_CMD_DBC_WAKE_STACK:
 	case MBOX_CMD_HSIC_PRETEND_CONNECT:
 	case MBOX_CMD_RESET_SSPI:
 		device_printf(sc->dev,
 		    "Received unused/unexpected command: %u\n", req_cmd);
 		*resp_cmd = 0;
 		break;
 
 	default:
 		device_printf(sc->dev,
 		    "Received unknown command: %u\n", req_cmd);
 	}
 }
 
 static void
 intr_mbox(void *arg)
 {
 	struct tegra_xhci_softc *sc;
 	uint32_t reg, msg, resp_cmd, resp_data;
 
 	sc = (struct tegra_xhci_softc *)arg;
 
 	/* Clear interrupt first */
 	reg = FPCI_RD4(sc, XUSB_CFG_ARU_SMI_INTR);
 	FPCI_WR4(sc, XUSB_CFG_ARU_SMI_INTR, reg);
 	if (reg & ARU_SMI_INTR_FW_HANG) {
 		device_printf(sc->dev,
 		    "XUSB CPU firmware hang!!! CPUCTL: 0x%08X\n",
 		    CSB_RD4(sc, XUSB_FALCON_CPUCTL));
 	}
 
 	msg = FPCI_RD4(sc, T_XUSB_CFG_ARU_MAILBOX_DATA_OUT);
 	resp_cmd = 0;
 	process_msg(sc, ARU_MAILBOX_DATA_OUT_TYPE(msg),
 	   ARU_MAILBOX_DATA_OUT_DATA(msg), &resp_cmd, &resp_data);
 	if (resp_cmd != 0)
 		mbox_send_ack(sc, resp_cmd, resp_data);
 	else
 		FPCI_WR4(sc, T_XUSB_CFG_ARU_MAILBOX_OWNER,
 		    ARU_MAILBOX_OWNER_NONE);
 
 	reg = FPCI_RD4(sc, T_XUSB_CFG_ARU_MAILBOX_CMD);
 	reg &= ~ARU_MAILBOX_CMD_DEST_SMI;
 	FPCI_WR4(sc, T_XUSB_CFG_ARU_MAILBOX_CMD, reg);
 
 }
 
 static int
 load_fw(struct tegra_xhci_softc *sc)
 {
 	const struct firmware *fw;
 	const struct tegra_xusb_fw_hdr *fw_hdr;
 	vm_paddr_t fw_paddr, fw_base;
 	vm_offset_t fw_vaddr;
 	vm_size_t fw_size;
 	uint32_t code_tags, code_size;
 	struct clocktime fw_clock;
 	struct timespec	fw_timespec;
 	int i;
 
 	/* Reset ARU */
 	FPCI_WR4(sc, XUSB_CFG_ARU_RST, ARU_RST_RESET);
 	DELAY(3000);
 
 	/* Check if FALCON already runs */
 	if (CSB_RD4(sc, XUSB_CSB_MEMPOOL_ILOAD_BASE_LO) != 0) {
 		device_printf(sc->dev,
 		    "XUSB CPU is already loaded, CPUCTL: 0x%08X\n",
 			 CSB_RD4(sc, XUSB_FALCON_CPUCTL));
 		return (0);
 	}
 
 	fw = firmware_get(sc->fw_name);
 	if (fw == NULL) {
 		device_printf(sc->dev, "Cannot read xusb firmware\n");
 		return (ENOENT);
 	}
 
 	/* Allocate uncached memory and copy firmware into. */
 	fw_hdr = (const struct tegra_xusb_fw_hdr *)fw->data;
 	fw_size = fw_hdr->fwimg_len;
 
 	fw_vaddr = kmem_alloc_contig(fw_size, M_WAITOK, 0, -1UL, PAGE_SIZE, 0,
 	    VM_MEMATTR_UNCACHEABLE);
 	fw_paddr = vtophys(fw_vaddr);
 	fw_hdr = (const struct tegra_xusb_fw_hdr *)fw_vaddr;
 	memcpy((void *)fw_vaddr, fw->data, fw_size);
 
 	firmware_put(fw, FIRMWARE_UNLOAD);
 	sc->fw_vaddr = fw_vaddr;
 	sc->fw_size = fw_size;
 
 	/* Setup firmware physical address and size. */
 	fw_base = fw_paddr + sizeof(*fw_hdr);
 	CSB_WR4(sc, XUSB_CSB_MEMPOOL_ILOAD_ATTR, fw_size);
 	CSB_WR4(sc, XUSB_CSB_MEMPOOL_ILOAD_BASE_LO, fw_base & 0xFFFFFFFF);
 	CSB_WR4(sc, XUSB_CSB_MEMPOOL_ILOAD_BASE_HI, (uint64_t)fw_base >> 32);
 	CSB_WR4(sc, XUSB_CSB_MEMPOOL_APMAP, APMAP_BOOTPATH);
 
 	/* Invalidate full L2IMEM context. */
 	CSB_WR4(sc, XUSB_CSB_MEMPOOL_L2IMEMOP_TRIG,
 	    L2IMEMOP_INVALIDATE_ALL);
 
 	/* Program load of L2IMEM by boot code. */
 	code_tags = howmany(fw_hdr->boot_codetag, XUSB_CSB_IMEM_BLOCK_SIZE);
 	code_size = howmany(fw_hdr->boot_codesize, XUSB_CSB_IMEM_BLOCK_SIZE);
 	CSB_WR4(sc, XUSB_CSB_MEMPOOL_L2IMEMOP_SIZE,
 	    L2IMEMOP_SIZE_OFFSET(code_tags) |
 	    L2IMEMOP_SIZE_SIZE(code_size));
 
 	/* Execute L2IMEM boot code fetch. */
 	CSB_WR4(sc, XUSB_CSB_MEMPOOL_L2IMEMOP_TRIG,
 	    L2IMEMOP_LOAD_LOCKED_RESULT);
 
 	/* Program FALCON auto-fill range and block count */
 	CSB_WR4(sc, XUSB_FALCON_IMFILLCTL, code_size);
 	CSB_WR4(sc, XUSB_FALCON_IMFILLRNG1,
 	    IMFILLRNG1_TAG_LO(code_tags) |
 	    IMFILLRNG1_TAG_HI(code_tags + code_size));
 
 	CSB_WR4(sc, XUSB_FALCON_DMACTL, 0);
 	/* Wait for CPU */
 	for (i = 500; i > 0; i--) {
 		if (CSB_RD4(sc, XUSB_CSB_MEMPOOL_L2IMEMOP_RESULT) &
 		     L2IMEMOP_RESULT_VLD)
 			break;
 		DELAY(100);
 	}
 	if (i <= 0) {
 		device_printf(sc->dev, "Timedout while wating for DMA, "
 		    "state: 0x%08X\n",
 		    CSB_RD4(sc, XUSB_CSB_MEMPOOL_L2IMEMOP_RESULT));
 		return (ETIMEDOUT);
 	}
 
 	/* Boot FALCON cpu */
 	CSB_WR4(sc, XUSB_FALCON_BOOTVEC, fw_hdr->boot_codetag);
 	CSB_WR4(sc, XUSB_FALCON_CPUCTL, CPUCTL_STARTCPU);
 
 	/* Wait for CPU */
 	for (i = 50; i > 0; i--) {
 		if (CSB_RD4(sc, XUSB_FALCON_CPUCTL) == CPUCTL_STOPPED)
 			break;
 		DELAY(100);
 	}
 	if (i <= 0) {
 		device_printf(sc->dev, "Timedout while wating for FALCON cpu, "
 		    "state: 0x%08X\n", CSB_RD4(sc, XUSB_FALCON_CPUCTL));
 		return (ETIMEDOUT);
 	}
 
 	fw_timespec.tv_sec = fw_hdr->fwimg_created_time;
 	fw_timespec.tv_nsec = 0;
 	clock_ts_to_ct(&fw_timespec, &fw_clock);
 	device_printf(sc->dev,
 	    " Falcon firmware version: %02X.%02X.%04X,"
 	    " (%d/%d/%d %d:%02d:%02d UTC)\n",
 	    (fw_hdr->version_id >> 24) & 0xFF,(fw_hdr->version_id >> 15) & 0xFF,
 	    fw_hdr->version_id & 0xFFFF,
 	    fw_clock.day, fw_clock.mon, fw_clock.year,
 	    fw_clock.hour, fw_clock.min, fw_clock.sec);
 
 	return (0);
 }
 
 static int
 init_hw(struct tegra_xhci_softc *sc)
 {
 	int rv;
 	uint32_t reg;
 	rman_res_t base_addr;
 
 	base_addr = rman_get_start(sc->xhci_softc.sc_io_res);
 
 	/* Enable FPCI access */
 	reg = IPFS_RD4(sc, XUSB_HOST_CONFIGURATION);
 	reg |= CONFIGURATION_EN_FPCI;
 	IPFS_WR4(sc, XUSB_HOST_CONFIGURATION, reg);
 	IPFS_RD4(sc, XUSB_HOST_CONFIGURATION);
 
 
 	/* Program bar for XHCI base address */
 	reg = FPCI_RD4(sc, T_XUSB_CFG_4);
 	reg &= ~CFG_4_BASE_ADDRESS(~0);
 	reg |= CFG_4_BASE_ADDRESS((uint32_t)base_addr >> 15);
 	FPCI_WR4(sc, T_XUSB_CFG_4, reg);
 	FPCI_WR4(sc, T_XUSB_CFG_5, (uint32_t)((uint64_t)(base_addr) >> 32));
 
 	/* Enable bus master */
 	reg = FPCI_RD4(sc, T_XUSB_CFG_1);
 	reg |= CFG_1_IO_SPACE;
 	reg |= CFG_1_MEMORY_SPACE;
 	reg |= CFG_1_BUS_MASTER;
 	FPCI_WR4(sc, T_XUSB_CFG_1, reg);
 
 	/* Enable Interrupts */
 	reg = IPFS_RD4(sc, XUSB_HOST_INTR_MASK);
 	reg |= INTR_IP_INT_MASK;
 	IPFS_WR4(sc, XUSB_HOST_INTR_MASK, reg);
 
 	/* Set hysteresis */
 	IPFS_WR4(sc, XUSB_HOST_CLKGATE_HYSTERESIS, 128);
 
 	rv = load_fw(sc);
 	if (rv != 0)
 		return rv;
 	return (0);
 }
 
 static int
 tegra_xhci_probe(device_t dev)
 {
 
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 
 	if (ofw_bus_search_compatible(dev, compat_data)->ocd_data != 0) {
 		device_set_desc(dev, "Nvidia Tegra XHCI controller");
 		return (BUS_PROBE_DEFAULT);
 	}
 	return (ENXIO);
 }
 
 static int
 tegra_xhci_detach(device_t dev)
 {
 	struct tegra_xhci_softc *sc;
 	struct xhci_softc *xsc;
 
 	sc = device_get_softc(dev);
 	xsc = &sc->xhci_softc;
 
 	/* during module unload there are lots of children leftover */
 	device_delete_children(dev);
 	if (sc->xhci_inited) {
 		usb_callout_drain(&xsc->sc_callout);
 		xhci_halt_controller(xsc);
 	}
 
 	if (xsc->sc_irq_res && xsc->sc_intr_hdl) {
 		bus_teardown_intr(dev, xsc->sc_irq_res, xsc->sc_intr_hdl);
 		xsc->sc_intr_hdl = NULL;
 	}
 	if (xsc->sc_irq_res) {
 		bus_release_resource(dev, SYS_RES_IRQ,
 		    rman_get_rid(xsc->sc_irq_res), xsc->sc_irq_res);
 		xsc->sc_irq_res = NULL;
 	}
 	if (xsc->sc_io_res != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    rman_get_rid(xsc->sc_io_res), xsc->sc_io_res);
 		xsc->sc_io_res = NULL;
 	}
 	if (sc->xhci_inited)
 		xhci_uninit(xsc);
 	if (sc->irq_hdl_mbox != NULL)
 		bus_teardown_intr(dev, sc->irq_res_mbox, sc->irq_hdl_mbox);
 	if (sc->fw_vaddr != 0)
-		kmem_free(kernel_arena, sc->fw_vaddr, sc->fw_size);
+		kmem_free(sc->fw_vaddr, sc->fw_size);
 	LOCK_DESTROY(sc);
 	return (0);
 }
 
 static int
 tegra_xhci_attach(device_t dev)
 {
 	struct tegra_xhci_softc *sc;
 	struct xhci_softc *xsc;
 	int rv, rid;
 	phandle_t node;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 	sc->fw_name = "tegra124_xusb_fw";
 	node = ofw_bus_get_node(dev);
 	xsc = &sc->xhci_softc;
 	LOCK_INIT(sc);
 
 	rv = get_fdt_resources(sc, node);
 	if (rv != 0) {
 		rv = ENXIO;
 		goto error;
 	}
 	rv = enable_fdt_resources(sc);
 	if (rv != 0) {
 		rv = ENXIO;
 		goto error;
 	}
 
 	/* Allocate resources. */
 	rid = 0;
 	xsc->sc_io_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (xsc->sc_io_res == NULL) {
 		device_printf(dev,
 		    "Could not allocate HCD memory resources\n");
 		rv = ENXIO;
 		goto error;
 	}
 	rid = 1;
 	sc->mem_res_fpci = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (sc->mem_res_fpci == NULL) {
 		device_printf(dev,
 		    "Could not allocate FPCI memory resources\n");
 		rv = ENXIO;
 		goto error;
 	}
 	rid = 2;
 	sc->mem_res_ipfs = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (sc->mem_res_ipfs == NULL) {
 		device_printf(dev,
 		    "Could not allocate IPFS memory resources\n");
 		rv = ENXIO;
 		goto error;
 	}
 
 	rid = 0;
 	xsc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 	    RF_ACTIVE);
 	if (xsc->sc_irq_res == NULL) {
 		device_printf(dev, "Could not allocate HCD IRQ resources\n");
 		rv = ENXIO;
 		goto error;
 	}
 	rid = 1;
 	sc->irq_res_mbox = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 	    RF_ACTIVE);
 	if (sc->irq_res_mbox == NULL) {
 		device_printf(dev, "Could not allocate MBOX IRQ resources\n");
 		rv = ENXIO;
 		goto error;
 	}
 
 	rv = init_hw(sc);
 	if (rv != 0) {
 		device_printf(dev, "Could not initialize  XUSB hardware\n");
 		goto error;
 	}
 
 	/* Wakeup and enable firmaware */
 	rv = mbox_send_cmd(sc, MBOX_CMD_MSG_ENABLED, 0);
 	if (rv != 0) {
 		device_printf(sc->dev, "Could not enable XUSB firmware\n");
 		goto error;
 	}
 
 	/* Fill data for XHCI driver. */
 	xsc->sc_bus.parent = dev;
 	xsc->sc_bus.devices = xsc->sc_devices;
 	xsc->sc_bus.devices_max = XHCI_MAX_DEVICES;
 
 	xsc->sc_io_tag = rman_get_bustag(xsc->sc_io_res);
 	xsc->sc_io_hdl = rman_get_bushandle(xsc->sc_io_res);
 	xsc->sc_io_size = rman_get_size(xsc->sc_io_res);
 	strlcpy(xsc->sc_vendor, "Nvidia", sizeof(xsc->sc_vendor));
 
 	/* Add USB bus device. */
 	xsc->sc_bus.bdev = device_add_child(sc->dev, "usbus", -1);
 	if (xsc->sc_bus.bdev == NULL) {
 		device_printf(sc->dev, "Could not add USB device\n");
 		rv = ENXIO;
 		goto error;
 	}
 	device_set_ivars(xsc->sc_bus.bdev, &xsc->sc_bus);
 	device_set_desc(xsc->sc_bus.bdev, "Nvidia USB 3.0 controller");
 
 	rv = xhci_init(xsc, sc->dev, 1);
 	if (rv != 0) {
 		device_printf(sc->dev, "USB init failed: %d\n", rv);
 		goto error;
 	}
 	sc->xhci_inited = true;
 	rv = xhci_start_controller(xsc);
 	if (rv != 0) {
 		device_printf(sc->dev,
 		    "Could not start XHCI controller: %d\n", rv);
 		goto error;
 	}
 
 	rv = bus_setup_intr(dev, sc->irq_res_mbox, INTR_TYPE_MISC | INTR_MPSAFE,
 	    NULL, intr_mbox, sc, &sc->irq_hdl_mbox);
 	if (rv != 0) {
 		device_printf(dev, "Could not setup error IRQ: %d\n",rv);
 		xsc->sc_intr_hdl = NULL;
 		goto error;
 	}
 
 	rv = bus_setup_intr(dev, xsc->sc_irq_res, INTR_TYPE_BIO | INTR_MPSAFE,
 	    NULL, (driver_intr_t *)xhci_interrupt, xsc, &xsc->sc_intr_hdl);
 	if (rv != 0) {
 		device_printf(dev, "Could not setup error IRQ: %d\n",rv);
 		xsc->sc_intr_hdl = NULL;
 		goto error;
 	}
 
 	/* Probe the bus. */
 	rv = device_probe_and_attach(xsc->sc_bus.bdev);
 	if (rv != 0) {
 		device_printf(sc->dev, "Could not initialize USB: %d\n", rv);
 		goto error;
 	}
 
 	return (0);
 
 error:
 panic("XXXXX");
 	tegra_xhci_detach(dev);
 	return (rv);
 }
 
 static device_method_t xhci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, tegra_xhci_probe),
 	DEVMETHOD(device_attach, tegra_xhci_attach),
 	DEVMETHOD(device_detach, tegra_xhci_detach),
 	DEVMETHOD(device_suspend, bus_generic_suspend),
 	DEVMETHOD(device_resume, bus_generic_resume),
 	DEVMETHOD(device_shutdown, bus_generic_shutdown),
 
 	/* Bus interface */
 	DEVMETHOD(bus_print_child, bus_generic_print_child),
 
 	DEVMETHOD_END
 };
 
 static devclass_t xhci_devclass;
 static DEFINE_CLASS_0(xhci, xhci_driver, xhci_methods,
     sizeof(struct tegra_xhci_softc));
 DRIVER_MODULE(tegra_xhci, simplebus, xhci_driver, xhci_devclass, NULL, NULL);
 MODULE_DEPEND(tegra_xhci, usb, 1, 1, 1);
Index: head/sys/arm64/arm64/busdma_bounce.c
===================================================================
--- head/sys/arm64/arm64/busdma_bounce.c	(revision 338317)
+++ head/sys/arm64/arm64/busdma_bounce.c	(revision 338318)
@@ -1,1331 +1,1330 @@
 /*-
  * Copyright (c) 1997, 1998 Justin T. Gibbs.
  * Copyright (c) 2015-2016 The FreeBSD Foundation
  * All rights reserved.
  *
  * Portions of this software were developed by Andrew Turner
  * under sponsorship of the FreeBSD Foundation.
  *
  * Portions of this software were developed by Semihalf
  * under sponsorship of the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/memdesc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/md_var.h>
 #include <arm64/include/bus_dma_impl.h>
 
 #define MAX_BPAGES 4096
 
 enum {
 	BF_COULD_BOUNCE		= 0x01,
 	BF_MIN_ALLOC_COMP	= 0x02,
 	BF_KMEM_ALLOC		= 0x04,
 	BF_COHERENT		= 0x10,
 };
 
 struct bounce_zone;
 
 struct bus_dma_tag {
 	struct bus_dma_tag_common common;
 	int			map_count;
 	int			bounce_flags;
 	bus_dma_segment_t	*segments;
 	struct bounce_zone	*bounce_zone;
 };
 
 struct bounce_page {
 	vm_offset_t	vaddr;		/* kva of bounce buffer */
 	bus_addr_t	busaddr;	/* Physical address */
 	vm_offset_t	datavaddr;	/* kva of client data */
 	vm_page_t	datapage;	/* physical page of client data */
 	vm_offset_t	dataoffs;	/* page offset of client data */
 	bus_size_t	datacount;	/* client data count */
 	STAILQ_ENTRY(bounce_page) links;
 };
 
 int busdma_swi_pending;
 
 struct bounce_zone {
 	STAILQ_ENTRY(bounce_zone) links;
 	STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
 	int		total_bpages;
 	int		free_bpages;
 	int		reserved_bpages;
 	int		active_bpages;
 	int		total_bounced;
 	int		total_deferred;
 	int		map_count;
 	bus_size_t	alignment;
 	bus_addr_t	lowaddr;
 	char		zoneid[8];
 	char		lowaddrid[20];
 	struct sysctl_ctx_list sysctl_tree;
 	struct sysctl_oid *sysctl_tree_top;
 };
 
 static struct mtx bounce_lock;
 static int total_bpages;
 static int busdma_zonecount;
 static STAILQ_HEAD(, bounce_zone) bounce_zone_list;
 
 static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters");
 SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0,
 	   "Total bounce pages");
 
 struct sync_list {
 	vm_offset_t	vaddr;		/* kva of client data */
 	bus_addr_t	paddr;		/* physical address */
 	vm_page_t	pages;		/* starting page of client data */
 	bus_size_t	datacount;	/* client data count */
 };
 
 struct bus_dmamap {
 	struct bp_list	       bpages;
 	int		       pagesneeded;
 	int		       pagesreserved;
 	bus_dma_tag_t	       dmat;
 	struct memdesc	       mem;
 	bus_dmamap_callback_t *callback;
 	void		      *callback_arg;
 	STAILQ_ENTRY(bus_dmamap) links;
 	u_int			flags;
 #define	DMAMAP_COULD_BOUNCE	(1 << 0)
 #define	DMAMAP_FROM_DMAMEM	(1 << 1)
 	int			sync_count;
 	struct sync_list	slist[];
 };
 
 static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
 static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
 
 static void init_bounce_pages(void *dummy);
 static int alloc_bounce_zone(bus_dma_tag_t dmat);
 static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
 static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     int commit);
 static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_offset_t vaddr, bus_addr_t addr, bus_size_t size);
 static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
 int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr);
 static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     pmap_t pmap, void *buf, bus_size_t buflen, int flags);
 static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_paddr_t buf, bus_size_t buflen, int flags);
 static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     int flags);
 
 /*
  * Allocate a device specific dma_tag.
  */
 static int
 bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
     bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
     bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
     int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
     void *lockfuncarg, bus_dma_tag_t *dmat)
 {
 	bus_dma_tag_t newtag;
 	int error;
 
 	*dmat = NULL;
 	error = common_bus_dma_tag_create(parent != NULL ? &parent->common :
 	    NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg,
 	    maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
 	    sizeof (struct bus_dma_tag), (void **)&newtag);
 	if (error != 0)
 		return (error);
 
 	newtag->common.impl = &bus_dma_bounce_impl;
 	newtag->map_count = 0;
 	newtag->segments = NULL;
 
 	if ((flags & BUS_DMA_COHERENT) != 0)
 		newtag->bounce_flags |= BF_COHERENT;
 
 	if (parent != NULL) {
 		if ((newtag->common.filter != NULL ||
 		    (parent->bounce_flags & BF_COULD_BOUNCE) != 0))
 			newtag->bounce_flags |= BF_COULD_BOUNCE;
 
 		/* Copy some flags from the parent */
 		newtag->bounce_flags |= parent->bounce_flags & BF_COHERENT;
 	}
 
 	if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) ||
 	    newtag->common.alignment > 1)
 		newtag->bounce_flags |= BF_COULD_BOUNCE;
 
 	if (((newtag->bounce_flags & BF_COULD_BOUNCE) != 0) &&
 	    (flags & BUS_DMA_ALLOCNOW) != 0) {
 		struct bounce_zone *bz;
 
 		/* Must bounce */
 		if ((error = alloc_bounce_zone(newtag)) != 0) {
 			free(newtag, M_DEVBUF);
 			return (error);
 		}
 		bz = newtag->bounce_zone;
 
 		if (ptoa(bz->total_bpages) < maxsize) {
 			int pages;
 
 			pages = atop(maxsize) - bz->total_bpages;
 
 			/* Add pages to our bounce pool */
 			if (alloc_bounce_pages(newtag, pages) < pages)
 				error = ENOMEM;
 		}
 		/* Performed initial allocation */
 		newtag->bounce_flags |= BF_MIN_ALLOC_COMP;
 	} else
 		error = 0;
 
 	if (error != 0)
 		free(newtag, M_DEVBUF);
 	else
 		*dmat = newtag;
 	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 	    __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
 	    error);
 	return (error);
 }
 
 static int
 bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat)
 {
 	bus_dma_tag_t dmat_copy, parent;
 	int error;
 
 	error = 0;
 	dmat_copy = dmat;
 
 	if (dmat != NULL) {
 		if (dmat->map_count != 0) {
 			error = EBUSY;
 			goto out;
 		}
 		while (dmat != NULL) {
 			parent = (bus_dma_tag_t)dmat->common.parent;
 			atomic_subtract_int(&dmat->common.ref_count, 1);
 			if (dmat->common.ref_count == 0) {
 				if (dmat->segments != NULL)
 					free(dmat->segments, M_DEVBUF);
 				free(dmat, M_DEVBUF);
 				/*
 				 * Last reference count, so
 				 * release our reference
 				 * count on our parent.
 				 */
 				dmat = parent;
 			} else
 				dmat = NULL;
 		}
 	}
 out:
 	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
 	return (error);
 }
 
 static bus_dmamap_t
 alloc_dmamap(bus_dma_tag_t dmat, int flags)
 {
 	u_long mapsize;
 	bus_dmamap_t map;
 
 	mapsize = sizeof(*map);
 	mapsize += sizeof(struct sync_list) * dmat->common.nsegments;
 	map = malloc(mapsize, M_DEVBUF, flags | M_ZERO);
 	if (map == NULL)
 		return (NULL);
 
 	/* Initialize the new map */
 	STAILQ_INIT(&map->bpages);
 
 	return (map);
 }
 
 /*
  * Allocate a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static int
 bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	struct bounce_zone *bz;
 	int error, maxpages, pages;
 
 	error = 0;
 
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc(
 		    sizeof(bus_dma_segment_t) * dmat->common.nsegments,
 		    M_DEVBUF, M_NOWAIT);
 		if (dmat->segments == NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 
 	*mapp = alloc_dmamap(dmat, M_NOWAIT);
 	if (*mapp == NULL) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 		    __func__, dmat, ENOMEM);
 		return (ENOMEM);
 	}
 
 	/*
 	 * Bouncing might be required if the driver asks for an active
 	 * exclusion region, a data alignment that is stricter than 1, and/or
 	 * an active address boundary.
 	 */
 	if (dmat->bounce_flags & BF_COULD_BOUNCE) {
 		/* Must bounce */
 		if (dmat->bounce_zone == NULL) {
 			if ((error = alloc_bounce_zone(dmat)) != 0) {
 				free(*mapp, M_DEVBUF);
 				return (error);
 			}
 		}
 		bz = dmat->bounce_zone;
 
 		(*mapp)->flags = DMAMAP_COULD_BOUNCE;
 
 		/*
 		 * Attempt to add pages to our pool on a per-instance
 		 * basis up to a sane limit.
 		 */
 		if (dmat->common.alignment > 1)
 			maxpages = MAX_BPAGES;
 		else
 			maxpages = MIN(MAX_BPAGES, Maxmem -
 			    atop(dmat->common.lowaddr));
 		if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP) == 0 ||
 		    (bz->map_count > 0 && bz->total_bpages < maxpages)) {
 			pages = MAX(atop(dmat->common.maxsize), 1);
 			pages = MIN(maxpages - bz->total_bpages, pages);
 			pages = MAX(pages, 1);
 			if (alloc_bounce_pages(dmat, pages) < pages)
 				error = ENOMEM;
 			if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP)
 			    == 0) {
 				if (error == 0) {
 					dmat->bounce_flags |=
 					    BF_MIN_ALLOC_COMP;
 				}
 			} else
 				error = 0;
 		}
 		bz->map_count++;
 	}
 	if (error == 0)
 		dmat->map_count++;
 	else
 		free(*mapp, M_DEVBUF);
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->common.flags, error);
 	return (error);
 }
 
 /*
  * Destroy a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static int
 bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 
 	/* Check we are destroying the correct map type */
 	if ((map->flags & DMAMAP_FROM_DMAMEM) != 0)
 		panic("bounce_bus_dmamap_destroy: Invalid map freed\n");
 
 	if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY);
 		return (EBUSY);
 	}
 	if (dmat->bounce_zone) {
 		KASSERT((map->flags & DMAMAP_COULD_BOUNCE) != 0,
 		    ("%s: Bounce zone when cannot bounce", __func__));
 		dmat->bounce_zone->map_count--;
 	}
 	free(map, M_DEVBUF);
 	dmat->map_count--;
 	CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
 	return (0);
 }
 
 
 /*
  * Allocate a piece of memory that can be efficiently mapped into
  * bus device space based on the constraints lited in the dma tag.
  * A dmamap to for use with dmamap_load is also allocated.
  */
 static int
 bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
     bus_dmamap_t *mapp)
 {
 	/*
 	 * XXX ARM64TODO:
 	 * This bus_dma implementation requires IO-Coherent architecutre.
 	 * If IO-Coherency is not guaranteed, the BUS_DMA_COHERENT flag has
 	 * to be implented using non-cacheable memory.
 	 */
 
 	vm_memattr_t attr;
 	int mflags;
 
 	if (flags & BUS_DMA_NOWAIT)
 		mflags = M_NOWAIT;
 	else
 		mflags = M_WAITOK;
 
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc(
 		    sizeof(bus_dma_segment_t) * dmat->common.nsegments,
 		    M_DEVBUF, mflags);
 		if (dmat->segments == NULL) {
 			CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 			    __func__, dmat, dmat->common.flags, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 	if (flags & BUS_DMA_ZERO)
 		mflags |= M_ZERO;
 	if (flags & BUS_DMA_NOCACHE)
 		attr = VM_MEMATTR_UNCACHEABLE;
 	else if ((flags & BUS_DMA_COHERENT) != 0 &&
 	    (dmat->bounce_flags & BF_COHERENT) == 0)
 		/*
 		 * If we have a non-coherent tag, and are trying to allocate
 		 * a coherent block of memory it needs to be uncached.
 		 */
 		attr = VM_MEMATTR_UNCACHEABLE;
 	else
 		attr = VM_MEMATTR_DEFAULT;
 
 	/*
 	 * Create the map, but don't set the could bounce flag as
 	 * this allocation should never bounce;
 	 */
 	*mapp = alloc_dmamap(dmat, mflags);
 	if (*mapp == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->common.flags, ENOMEM);
 		return (ENOMEM);
 	}
 	(*mapp)->flags = DMAMAP_FROM_DMAMEM;
 
 	/*
 	 * Allocate the buffer from the malloc(9) allocator if...
 	 *  - It's small enough to fit into a single power of two sized bucket.
 	 *  - The alignment is less than or equal to the maximum size
 	 *  - The low address requirement is fulfilled.
 	 * else allocate non-contiguous pages if...
 	 *  - The page count that could get allocated doesn't exceed
 	 *    nsegments also when the maximum segment size is less
 	 *    than PAGE_SIZE.
 	 *  - The alignment constraint isn't larger than a page boundary.
 	 *  - There are no boundary-crossing constraints.
 	 * else allocate a block of contiguous pages because one or more of the
 	 * constraints is something that only the contig allocator can fulfill.
 	 *
 	 * NOTE: The (dmat->common.alignment <= dmat->maxsize) check
 	 * below is just a quick hack. The exact alignment guarantees
 	 * of malloc(9) need to be nailed down, and the code below
 	 * should be rewritten to take that into account.
 	 *
 	 * In the meantime warn the user if malloc gets it wrong.
 	 */
 	if ((dmat->common.maxsize <= PAGE_SIZE) &&
 	   (dmat->common.alignment <= dmat->common.maxsize) &&
 	    dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) &&
 	    attr == VM_MEMATTR_DEFAULT) {
 		*vaddr = malloc(dmat->common.maxsize, M_DEVBUF, mflags);
 	} else if (dmat->common.nsegments >=
 	    howmany(dmat->common.maxsize, MIN(dmat->common.maxsegsz, PAGE_SIZE)) &&
 	    dmat->common.alignment <= PAGE_SIZE &&
 	    (dmat->common.boundary % PAGE_SIZE) == 0) {
 		/* Page-based multi-segment allocations allowed */
 		*vaddr = (void *)kmem_alloc_attr(dmat->common.maxsize, mflags,
 		    0ul, dmat->common.lowaddr, attr);
 		dmat->bounce_flags |= BF_KMEM_ALLOC;
 	} else {
 		*vaddr = (void *)kmem_alloc_contig(dmat->common.maxsize, mflags,
 		    0ul, dmat->common.lowaddr, dmat->common.alignment != 0 ?
 		    dmat->common.alignment : 1ul, dmat->common.boundary, attr);
 		dmat->bounce_flags |= BF_KMEM_ALLOC;
 	}
 	if (*vaddr == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->common.flags, ENOMEM);
 		free(*mapp, M_DEVBUF);
 		return (ENOMEM);
 	} else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) {
 		printf("bus_dmamem_alloc failed to align memory properly.\n");
 	}
 	dmat->map_count++;
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->common.flags, 0);
 	return (0);
 }
 
 /*
  * Free a piece of memory and it's allociated dmamap, that was allocated
  * via bus_dmamem_alloc.  Make the same choice for free/contigfree.
  */
 static void
 bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
 {
 
 	/*
 	 * Check the map came from bounce_bus_dmamem_alloc, so the map
 	 * should be NULL and the BF_KMEM_ALLOC flag cleared if malloc()
 	 * was used and set if kmem_alloc_contig() was used.
 	 */
 	if ((map->flags & DMAMAP_FROM_DMAMEM) == 0)
 		panic("bus_dmamem_free: Invalid map freed\n");
 	if ((dmat->bounce_flags & BF_KMEM_ALLOC) == 0)
 		free(vaddr, M_DEVBUF);
 	else
-		kmem_free(kernel_arena, (vm_offset_t)vaddr,
-		    dmat->common.maxsize);
+		kmem_free((vm_offset_t)vaddr, dmat->common.maxsize);
 	free(map, M_DEVBUF);
 	dmat->map_count--;
 	CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat,
 	    dmat->bounce_flags);
 }
 
 static void
 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags)
 {
 	bus_addr_t curaddr;
 	bus_size_t sgsize;
 
 	if ((map->flags & DMAMAP_COULD_BOUNCE) != 0 && map->pagesneeded == 0) {
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		curaddr = buf;
 		while (buflen != 0) {
 			sgsize = MIN(buflen, dmat->common.maxsegsz);
 			if (bus_dma_run_filter(&dmat->common, curaddr)) {
 				sgsize = MIN(sgsize,
 				    PAGE_SIZE - (curaddr & PAGE_MASK));
 				map->pagesneeded++;
 			}
 			curaddr += sgsize;
 			buflen -= sgsize;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static void
 _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap,
     void *buf, bus_size_t buflen, int flags)
 {
 	vm_offset_t vaddr;
 	vm_offset_t vendaddr;
 	bus_addr_t paddr;
 	bus_size_t sg_len;
 
 	if ((map->flags & DMAMAP_COULD_BOUNCE) != 0 && map->pagesneeded == 0) {
 		CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, "
 		    "alignment= %d", dmat->common.lowaddr,
 		    ptoa((vm_paddr_t)Maxmem),
 		    dmat->common.boundary, dmat->common.alignment);
 		CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map,
 		    map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		vaddr = (vm_offset_t)buf;
 		vendaddr = (vm_offset_t)buf + buflen;
 
 		while (vaddr < vendaddr) {
 			sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK);
 			if (pmap == kernel_pmap)
 				paddr = pmap_kextract(vaddr);
 			else
 				paddr = pmap_extract(pmap, vaddr);
 			if (bus_dma_run_filter(&dmat->common, paddr) != 0) {
 				sg_len = roundup2(sg_len,
 				    dmat->common.alignment);
 				map->pagesneeded++;
 			}
 			vaddr += sg_len;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static int
 _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags)
 {
 
 	/* Reserve Necessary Bounce Pages */
 	mtx_lock(&bounce_lock);
 	if (flags & BUS_DMA_NOWAIT) {
 		if (reserve_bounce_pages(dmat, map, 0) != 0) {
 			mtx_unlock(&bounce_lock);
 			return (ENOMEM);
 		}
 	} else {
 		if (reserve_bounce_pages(dmat, map, 1) != 0) {
 			/* Queue us for resources */
 			STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links);
 			mtx_unlock(&bounce_lock);
 			return (EINPROGRESS);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 
 	return (0);
 }
 
 /*
  * Add a single contiguous physical range to the segment list.
  */
 static int
 _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr,
     bus_size_t sgsize, bus_dma_segment_t *segs, int *segp)
 {
 	bus_addr_t baddr, bmask;
 	int seg;
 
 	/*
 	 * Make sure we don't cross any boundaries.
 	 */
 	bmask = ~(dmat->common.boundary - 1);
 	if (dmat->common.boundary > 0) {
 		baddr = (curaddr + dmat->common.boundary) & bmask;
 		if (sgsize > (baddr - curaddr))
 			sgsize = (baddr - curaddr);
 	}
 
 	/*
 	 * Insert chunk into a segment, coalescing with
 	 * previous segment if possible.
 	 */
 	seg = *segp;
 	if (seg == -1) {
 		seg = 0;
 		segs[seg].ds_addr = curaddr;
 		segs[seg].ds_len = sgsize;
 	} else {
 		if (curaddr == segs[seg].ds_addr + segs[seg].ds_len &&
 		    (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz &&
 		    (dmat->common.boundary == 0 ||
 		     (segs[seg].ds_addr & bmask) == (curaddr & bmask)))
 			segs[seg].ds_len += sgsize;
 		else {
 			if (++seg >= dmat->common.nsegments)
 				return (0);
 			segs[seg].ds_addr = curaddr;
 			segs[seg].ds_len = sgsize;
 		}
 	}
 	*segp = seg;
 	return (sgsize);
 }
 
 /*
  * Utility function to load a physical buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 static int
 bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	struct sync_list *sl;
 	bus_size_t sgsize;
 	bus_addr_t curaddr, sl_end;
 	int error;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_phys(dmat, map, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	sl = map->slist + map->sync_count - 1;
 	sl_end = 0;
 
 	while (buflen > 0) {
 		curaddr = buf;
 		sgsize = MIN(buflen, dmat->common.maxsegsz);
 		if (((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) &&
 		    map->pagesneeded != 0 &&
 		    bus_dma_run_filter(&dmat->common, curaddr)) {
 			sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK));
 			curaddr = add_bounce_page(dmat, map, 0, curaddr,
 			    sgsize);
 		} else if ((dmat->bounce_flags & BF_COHERENT) == 0) {
 			if (map->sync_count > 0)
 				sl_end = sl->paddr + sl->datacount;
 
 			if (map->sync_count == 0 || curaddr != sl_end) {
 				if (++map->sync_count > dmat->common.nsegments)
 					break;
 				sl++;
 				sl->vaddr = 0;
 				sl->paddr = curaddr;
 				sl->datacount = sgsize;
 				sl->pages = PHYS_TO_VM_PAGE(curaddr);
 				KASSERT(sl->pages != NULL,
 				    ("%s: page at PA:0x%08lx is not in "
 				    "vm_page_array", __func__, curaddr));
 			} else
 				sl->datacount += sgsize;
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		buf += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 /*
  * Utility function to load a linear buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 static int
 bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
     bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	struct sync_list *sl;
 	bus_size_t sgsize, max_sgsize;
 	bus_addr_t curaddr, sl_pend;
 	vm_offset_t kvaddr, vaddr, sl_vend;
 	int error;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	sl = map->slist + map->sync_count - 1;
 	vaddr = (vm_offset_t)buf;
 	sl_pend = 0;
 	sl_vend = 0;
 
 	while (buflen > 0) {
 		/*
 		 * Get the physical address for this segment.
 		 */
 		if (pmap == kernel_pmap) {
 			curaddr = pmap_kextract(vaddr);
 			kvaddr = vaddr;
 		} else {
 			curaddr = pmap_extract(pmap, vaddr);
 			kvaddr = 0;
 		}
 
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
 		max_sgsize = MIN(buflen, dmat->common.maxsegsz);
 		sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
 		if (((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) &&
 		    map->pagesneeded != 0 &&
 		    bus_dma_run_filter(&dmat->common, curaddr)) {
 			sgsize = roundup2(sgsize, dmat->common.alignment);
 			sgsize = MIN(sgsize, max_sgsize);
 			curaddr = add_bounce_page(dmat, map, kvaddr, curaddr,
 			    sgsize);
 		} else if ((dmat->bounce_flags & BF_COHERENT) == 0) {
 			sgsize = MIN(sgsize, max_sgsize);
 			if (map->sync_count > 0) {
 				sl_pend = sl->paddr + sl->datacount;
 				sl_vend = sl->vaddr + sl->datacount;
 			}
 
 			if (map->sync_count == 0 ||
 			    (kvaddr != 0 && kvaddr != sl_vend) ||
 			    (curaddr != sl_pend)) {
 
 				if (++map->sync_count > dmat->common.nsegments)
 					goto cleanup;
 				sl++;
 				sl->vaddr = kvaddr;
 				sl->paddr = curaddr;
 				if (kvaddr != 0) {
 					sl->pages = NULL;
 				} else {
 					sl->pages = PHYS_TO_VM_PAGE(curaddr);
 					KASSERT(sl->pages != NULL,
 					    ("%s: page at PA:0x%08lx is not "
 					    "in vm_page_array", __func__,
 					    curaddr));
 				}
 				sl->datacount = sgsize;
 			} else
 				sl->datacount += sgsize;
 		} else {
 			sgsize = MIN(sgsize, max_sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		vaddr += sgsize;
 		buflen -= sgsize;
 	}
 
 cleanup:
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 static void
 bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
 {
 
 	if ((map->flags & DMAMAP_COULD_BOUNCE) == 0)
 		return;
 	map->mem = *mem;
 	map->dmat = dmat;
 	map->callback = callback;
 	map->callback_arg = callback_arg;
 }
 
 static bus_dma_segment_t *
 bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dma_segment_t *segs, int nsegs, int error)
 {
 
 	if (segs == NULL)
 		segs = dmat->segments;
 	return (segs);
 }
 
 /*
  * Release the mapping held by map.
  */
 static void
 bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bounce_page *bpage;
 
 	while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 		STAILQ_REMOVE_HEAD(&map->bpages, links);
 		free_bounce_page(dmat, bpage);
 	}
 
 	map->sync_count = 0;
 }
 
 static void
 dma_preread_safe(vm_offset_t va, vm_size_t size)
 {
 	/*
 	 * Write back any partial cachelines immediately before and
 	 * after the DMA region.
 	 */
 	if (va & (dcache_line_size - 1))
 		cpu_dcache_wb_range(va, 1);
 	if ((va + size) & (dcache_line_size - 1))
 		cpu_dcache_wb_range(va + size, 1);
 
 	cpu_dcache_inv_range(va, size);
 }
 
 static void
 dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op)
 {
 	uint32_t len, offset;
 	vm_page_t m;
 	vm_paddr_t pa;
 	vm_offset_t va, tempva;
 	bus_size_t size;
 
 	offset = sl->paddr & PAGE_MASK;
 	m = sl->pages;
 	size = sl->datacount;
 	pa = sl->paddr;
 
 	for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) {
 		tempva = 0;
 		if (sl->vaddr == 0) {
 			len = min(PAGE_SIZE - offset, size);
 			tempva = pmap_quick_enter_page(m);
 			va = tempva | offset;
 			KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset),
 			    ("unexpected vm_page_t phys: 0x%16lx != 0x%16lx",
 			    VM_PAGE_TO_PHYS(m) | offset, pa));
 		} else {
 			len = sl->datacount;
 			va = sl->vaddr;
 		}
 
 		switch (op) {
 		case BUS_DMASYNC_PREWRITE:
 		case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD:
 			cpu_dcache_wb_range(va, len);
 			break;
 		case BUS_DMASYNC_PREREAD:
 			/*
 			 * An mbuf may start in the middle of a cacheline. There
 			 * will be no cpu writes to the beginning of that line
 			 * (which contains the mbuf header) while dma is in
 			 * progress.  Handle that case by doing a writeback of
 			 * just the first cacheline before invalidating the
 			 * overall buffer.  Any mbuf in a chain may have this
 			 * misalignment.  Buffers which are not mbufs bounce if
 			 * they are not aligned to a cacheline.
 			 */
 			dma_preread_safe(va, len);
 			break;
 		case BUS_DMASYNC_POSTREAD:
 		case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE:
 			cpu_dcache_inv_range(va, len);
 			break;
 		default:
 			panic("unsupported combination of sync operations: "
                               "0x%08x\n", op);
 		}
 
 		if (tempva != 0)
 			pmap_quick_remove_page(tempva);
 	}
 }
 
 static void
 bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dmasync_op_t op)
 {
 	struct bounce_page *bpage;
 	struct sync_list *sl, *end;
 	vm_offset_t datavaddr, tempvaddr;
 
 	if (op == BUS_DMASYNC_POSTWRITE)
 		return;
 
 	if ((op & BUS_DMASYNC_POSTREAD) != 0) {
 		/*
 		 * Wait for any DMA operations to complete before the bcopy.
 		 */
 		dsb(sy);
 	}
 
 	if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
 		    "performing bounce", __func__, dmat, dmat->common.flags,
 		    op);
 
 		if ((op & BUS_DMASYNC_PREWRITE) != 0) {
 			while (bpage != NULL) {
 				tempvaddr = 0;
 				datavaddr = bpage->datavaddr;
 				if (datavaddr == 0) {
 					tempvaddr = pmap_quick_enter_page(
 					    bpage->datapage);
 					datavaddr = tempvaddr | bpage->dataoffs;
 				}
 
 				bcopy((void *)datavaddr,
 				    (void *)bpage->vaddr, bpage->datacount);
 				if (tempvaddr != 0)
 					pmap_quick_remove_page(tempvaddr);
 				if ((dmat->bounce_flags & BF_COHERENT) == 0)
 					cpu_dcache_wb_range(bpage->vaddr,
 					    bpage->datacount);
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 			dmat->bounce_zone->total_bounced++;
 		} else if ((op & BUS_DMASYNC_PREREAD) != 0) {
 			while (bpage != NULL) {
 				if ((dmat->bounce_flags & BF_COHERENT) == 0)
 					cpu_dcache_wbinv_range(bpage->vaddr,
 					    bpage->datacount);
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 		}
 
 		if ((op & BUS_DMASYNC_POSTREAD) != 0) {
 			while (bpage != NULL) {
 				if ((dmat->bounce_flags & BF_COHERENT) == 0)
 					cpu_dcache_inv_range(bpage->vaddr,
 					    bpage->datacount);
 				tempvaddr = 0;
 				datavaddr = bpage->datavaddr;
 				if (datavaddr == 0) {
 					tempvaddr = pmap_quick_enter_page(
 					    bpage->datapage);
 					datavaddr = tempvaddr | bpage->dataoffs;
 				}
 
 				bcopy((void *)bpage->vaddr,
 				    (void *)datavaddr, bpage->datacount);
 
 				if (tempvaddr != 0)
 					pmap_quick_remove_page(tempvaddr);
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 			dmat->bounce_zone->total_bounced++;
 		}
 	}
 
 	/*
 	 * Cache maintenance for normal (non-COHERENT non-bounce) buffers.
 	 */
 	if (map->sync_count != 0) {
 		sl = &map->slist[0];
 		end = &map->slist[map->sync_count];
 		CTR3(KTR_BUSDMA, "%s: tag %p op 0x%x "
 		    "performing sync", __func__, dmat, op);
 
 		for ( ; sl != end; ++sl)
 			dma_dcache_sync(sl, op);
 	}
 
 	if ((op & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) != 0) {
 		/*
 		 * Wait for the bcopy to complete before any DMA operations.
 		 */
 		dsb(sy);
 	}
 }
 
 static void
 init_bounce_pages(void *dummy __unused)
 {
 
 	total_bpages = 0;
 	STAILQ_INIT(&bounce_zone_list);
 	STAILQ_INIT(&bounce_map_waitinglist);
 	STAILQ_INIT(&bounce_map_callbacklist);
 	mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF);
 }
 SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL);
 
 static struct sysctl_ctx_list *
 busdma_sysctl_tree(struct bounce_zone *bz)
 {
 
 	return (&bz->sysctl_tree);
 }
 
 static struct sysctl_oid *
 busdma_sysctl_tree_top(struct bounce_zone *bz)
 {
 
 	return (bz->sysctl_tree_top);
 }
 
 static int
 alloc_bounce_zone(bus_dma_tag_t dmat)
 {
 	struct bounce_zone *bz;
 
 	/* Check to see if we already have a suitable zone */
 	STAILQ_FOREACH(bz, &bounce_zone_list, links) {
 		if ((dmat->common.alignment <= bz->alignment) &&
 		    (dmat->common.lowaddr >= bz->lowaddr)) {
 			dmat->bounce_zone = bz;
 			return (0);
 		}
 	}
 
 	if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF,
 	    M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	STAILQ_INIT(&bz->bounce_page_list);
 	bz->free_bpages = 0;
 	bz->reserved_bpages = 0;
 	bz->active_bpages = 0;
 	bz->lowaddr = dmat->common.lowaddr;
 	bz->alignment = MAX(dmat->common.alignment, PAGE_SIZE);
 	bz->map_count = 0;
 	snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount);
 	busdma_zonecount++;
 	snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr);
 	STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links);
 	dmat->bounce_zone = bz;
 
 	sysctl_ctx_init(&bz->sysctl_tree);
 	bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree,
 	    SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid,
 	    CTLFLAG_RD, 0, "");
 	if (bz->sysctl_tree_top == NULL) {
 		sysctl_ctx_free(&bz->sysctl_tree);
 		return (0);	/* XXX error code? */
 	}
 
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0,
 	    "Total bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0,
 	    "Free bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0,
 	    "Reserved bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0,
 	    "Active bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0,
 	    "Total bounce requests");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0,
 	    "Total bounce requests that were deferred");
 	SYSCTL_ADD_STRING(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, "");
 	SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "alignment", CTLFLAG_RD, &bz->alignment, "");
 
 	return (0);
 }
 
 static int
 alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
 {
 	struct bounce_zone *bz;
 	int count;
 
 	bz = dmat->bounce_zone;
 	count = 0;
 	while (numpages > 0) {
 		struct bounce_page *bpage;
 
 		bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF,
 						     M_NOWAIT | M_ZERO);
 
 		if (bpage == NULL)
 			break;
 		bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF,
 		    M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0);
 		if (bpage->vaddr == 0) {
 			free(bpage, M_DEVBUF);
 			break;
 		}
 		bpage->busaddr = pmap_kextract(bpage->vaddr);
 		mtx_lock(&bounce_lock);
 		STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links);
 		total_bpages++;
 		bz->total_bpages++;
 		bz->free_bpages++;
 		mtx_unlock(&bounce_lock);
 		count++;
 		numpages--;
 	}
 	return (count);
 }
 
 static int
 reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit)
 {
 	struct bounce_zone *bz;
 	int pages;
 
 	mtx_assert(&bounce_lock, MA_OWNED);
 	bz = dmat->bounce_zone;
 	pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved);
 	if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages))
 		return (map->pagesneeded - (map->pagesreserved + pages));
 	bz->free_bpages -= pages;
 	bz->reserved_bpages += pages;
 	map->pagesreserved += pages;
 	pages = map->pagesneeded - map->pagesreserved;
 
 	return (pages);
 }
 
 static bus_addr_t
 add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
 		bus_addr_t addr, bus_size_t size)
 {
 	struct bounce_zone *bz;
 	struct bounce_page *bpage;
 
 	KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag"));
 	KASSERT((map->flags & DMAMAP_COULD_BOUNCE) != 0,
 	    ("add_bounce_page: bad map %p", map));
 
 	bz = dmat->bounce_zone;
 	if (map->pagesneeded == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesneeded--;
 
 	if (map->pagesreserved == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesreserved--;
 
 	mtx_lock(&bounce_lock);
 	bpage = STAILQ_FIRST(&bz->bounce_page_list);
 	if (bpage == NULL)
 		panic("add_bounce_page: free page list is empty");
 
 	STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links);
 	bz->reserved_bpages--;
 	bz->active_bpages++;
 	mtx_unlock(&bounce_lock);
 
 	if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/* Page offset needs to be preserved. */
 		bpage->vaddr |= addr & PAGE_MASK;
 		bpage->busaddr |= addr & PAGE_MASK;
 	}
 	bpage->datavaddr = vaddr;
 	bpage->datapage = PHYS_TO_VM_PAGE(addr);
 	bpage->dataoffs = addr & PAGE_MASK;
 	bpage->datacount = size;
 	STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
 	return (bpage->busaddr);
 }
 
 static void
 free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
 {
 	struct bus_dmamap *map;
 	struct bounce_zone *bz;
 
 	bz = dmat->bounce_zone;
 	bpage->datavaddr = 0;
 	bpage->datacount = 0;
 	if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/*
 		 * Reset the bounce page to start at offset 0.  Other uses
 		 * of this bounce page may need to store a full page of
 		 * data and/or assume it starts on a page boundary.
 		 */
 		bpage->vaddr &= ~PAGE_MASK;
 		bpage->busaddr &= ~PAGE_MASK;
 	}
 
 	mtx_lock(&bounce_lock);
 	STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links);
 	bz->free_bpages++;
 	bz->active_bpages--;
 	if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
 		if (reserve_bounce_pages(map->dmat, map, 1) == 0) {
 			STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
 			STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
 			    map, links);
 			busdma_swi_pending = 1;
 			bz->total_deferred++;
 			swi_sched(vm_ih, 0);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 void
 busdma_swi(void)
 {
 	bus_dma_tag_t dmat;
 	struct bus_dmamap *map;
 
 	mtx_lock(&bounce_lock);
 	while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
 		STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
 		mtx_unlock(&bounce_lock);
 		dmat = map->dmat;
 		(dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_LOCK);
 		bus_dmamap_load_mem(map->dmat, map, &map->mem,
 		    map->callback, map->callback_arg, BUS_DMA_WAITOK);
 		(dmat->common.lockfunc)(dmat->common.lockfuncarg,
 		    BUS_DMA_UNLOCK);
 		mtx_lock(&bounce_lock);
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 struct bus_dma_impl bus_dma_bounce_impl = {
 	.tag_create = bounce_bus_dma_tag_create,
 	.tag_destroy = bounce_bus_dma_tag_destroy,
 	.map_create = bounce_bus_dmamap_create,
 	.map_destroy = bounce_bus_dmamap_destroy,
 	.mem_alloc = bounce_bus_dmamem_alloc,
 	.mem_free = bounce_bus_dmamem_free,
 	.load_phys = bounce_bus_dmamap_load_phys,
 	.load_buffer = bounce_bus_dmamap_load_buffer,
 	.load_ma = bus_dmamap_load_ma_triv,
 	.map_waitok = bounce_bus_dmamap_waitok,
 	.map_complete = bounce_bus_dmamap_complete,
 	.map_unload = bounce_bus_dmamap_unload,
 	.map_sync = bounce_bus_dmamap_sync
 };
Index: head/sys/arm64/arm64/mp_machdep.c
===================================================================
--- head/sys/arm64/arm64/mp_machdep.c	(revision 338317)
+++ head/sys/arm64/arm64/mp_machdep.c	(revision 338318)
@@ -1,895 +1,894 @@
 /*-
  * Copyright (c) 2015-2016 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Andrew Turner under
  * sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include "opt_acpi.h"
 #include "opt_kstack_pages.h"
 #include "opt_platform.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
 #include <machine/machdep.h>
 #include <machine/intr.h>
 #include <machine/smp.h>
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
 #ifdef DEV_ACPI
 #include <contrib/dev/acpica/include/acpi.h>
 #include <dev/acpica/acpivar.h>
 #endif
 
 #ifdef FDT
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #include <dev/ofw/ofw_cpu.h>
 #endif
 
 #include <dev/psci/psci.h>
 
 #include "pic_if.h"
 
 #define	MP_QUIRK_CPULIST	0x01	/* The list of cpus may be wrong, */
 					/* don't panic if one fails to start */
 static uint32_t mp_quirks;
 
 #ifdef FDT
 static struct {
 	const char *compat;
 	uint32_t quirks;
 } fdt_quirks[] = {
 	{ "arm,foundation-aarch64",	MP_QUIRK_CPULIST },
 	{ "arm,fvp-base",		MP_QUIRK_CPULIST },
 	/* This is incorrect in some DTS files */
 	{ "arm,vfp-base",		MP_QUIRK_CPULIST },
 	{ NULL, 0 },
 };
 #endif
 
 typedef void intr_ipi_send_t(void *, cpuset_t, u_int);
 typedef void intr_ipi_handler_t(void *);
 
 #define INTR_IPI_NAMELEN	(MAXCOMLEN + 1)
 struct intr_ipi {
 	intr_ipi_handler_t *	ii_handler;
 	void *			ii_handler_arg;
 	intr_ipi_send_t *	ii_send;
 	void *			ii_send_arg;
 	char			ii_name[INTR_IPI_NAMELEN];
 	u_long *		ii_count;
 };
 
 static struct intr_ipi ipi_sources[INTR_IPI_COUNT];
 
 static struct intr_ipi *intr_ipi_lookup(u_int);
 static void intr_pic_ipi_setup(u_int, const char *, intr_ipi_handler_t *,
     void *);
 
 extern struct pcpu __pcpu[];
 
 static device_identify_t arm64_cpu_identify;
 static device_probe_t arm64_cpu_probe;
 static device_attach_t arm64_cpu_attach;
 
 static void ipi_ast(void *);
 static void ipi_hardclock(void *);
 static void ipi_preempt(void *);
 static void ipi_rendezvous(void *);
 static void ipi_stop(void *);
 
 struct mtx ap_boot_mtx;
 struct pcb stoppcbs[MAXCPU];
 
 static device_t cpu_list[MAXCPU];
 
 /*
  * Not all systems boot from the first CPU in the device tree. To work around
  * this we need to find which CPU we have booted from so when we later
  * enable the secondary CPUs we skip this one.
  */
 static int cpu0 = -1;
 
 void mpentry(unsigned long cpuid);
 void init_secondary(uint64_t);
 
 uint8_t secondary_stacks[MAXCPU - 1][PAGE_SIZE * KSTACK_PAGES] __aligned(16);
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 volatile int aps_ready = 0;
 
 /* Temporary variables for init_secondary()  */
 void *dpcpu[MAXCPU - 1];
 
 static device_method_t arm64_cpu_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	arm64_cpu_identify),
 	DEVMETHOD(device_probe,		arm64_cpu_probe),
 	DEVMETHOD(device_attach,	arm64_cpu_attach),
 
 	DEVMETHOD_END
 };
 
 static devclass_t arm64_cpu_devclass;
 static driver_t arm64_cpu_driver = {
 	"arm64_cpu",
 	arm64_cpu_methods,
 	0
 };
 
 DRIVER_MODULE(arm64_cpu, cpu, arm64_cpu_driver, arm64_cpu_devclass, 0, 0);
 
 static void
 arm64_cpu_identify(driver_t *driver, device_t parent)
 {
 
 	if (device_find_child(parent, "arm64_cpu", -1) != NULL)
 		return;
 	if (BUS_ADD_CHILD(parent, 0, "arm64_cpu", -1) == NULL)
 		device_printf(parent, "add child failed\n");
 }
 
 static int
 arm64_cpu_probe(device_t dev)
 {
 	u_int cpuid;
 
 	cpuid = device_get_unit(dev);
 	if (cpuid >= MAXCPU || cpuid > mp_maxid)
 		return (EINVAL);
 
 	device_quiet(dev);
 	return (0);
 }
 
 static int
 arm64_cpu_attach(device_t dev)
 {
 	const uint32_t *reg;
 	size_t reg_size;
 	u_int cpuid;
 	int i;
 
 	cpuid = device_get_unit(dev);
 
 	if (cpuid >= MAXCPU || cpuid > mp_maxid)
 		return (EINVAL);
 	KASSERT(cpu_list[cpuid] == NULL, ("Already have cpu %u", cpuid));
 
 	reg = cpu_get_cpuid(dev, &reg_size);
 	if (reg == NULL)
 		return (EINVAL);
 
 	if (bootverbose) {
 		device_printf(dev, "register <");
 		for (i = 0; i < reg_size; i++)
 			printf("%s%x", (i == 0) ? "" : " ", reg[i]);
 		printf(">\n");
 	}
 
 	/* Set the device to start it later */
 	cpu_list[cpuid] = dev;
 
 	return (0);
 }
 
 static void
 release_aps(void *dummy __unused)
 {
 	int i, started;
 
 	/* Only release CPUs if they exist */
 	if (mp_ncpus == 1)
 		return;
 
 	intr_pic_ipi_setup(IPI_AST, "ast", ipi_ast, NULL);
 	intr_pic_ipi_setup(IPI_PREEMPT, "preempt", ipi_preempt, NULL);
 	intr_pic_ipi_setup(IPI_RENDEZVOUS, "rendezvous", ipi_rendezvous, NULL);
 	intr_pic_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL);
 	intr_pic_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL);
 	intr_pic_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL);
 
 	atomic_store_rel_int(&aps_ready, 1);
 	/* Wake up the other CPUs */
 	__asm __volatile(
 	    "dsb ishst	\n"
 	    "sev	\n"
 	    ::: "memory");
 
 	printf("Release APs...");
 
 	started = 0;
 	for (i = 0; i < 2000; i++) {
 		if (smp_started) {
 			printf("done\n");
 			return;
 		}
 		/*
 		 * Don't time out while we are making progress. Some large
 		 * systems can take a while to start all CPUs.
 		 */
 		if (smp_cpus > started) {
 			i = 0;
 			started = smp_cpus;
 		}
 		DELAY(1000);
 	}
 
 	printf("APs not started\n");
 }
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 
 void
 init_secondary(uint64_t cpu)
 {
 	struct pcpu *pcpup;
 
 	pcpup = &__pcpu[cpu];
 	/*
 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
 	 * loaded when entering the kernel from userland.
 	 */
 	__asm __volatile(
 	    "mov x18, %0 \n"
 	    "msr tpidr_el1, %0" :: "r"(pcpup));
 
 	/* Spin until the BSP releases the APs */
 	while (!aps_ready)
 		__asm __volatile("wfe");
 
 	/* Initialize curthread */
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	pcpup->pc_curthread = pcpup->pc_idlethread;
 	pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb;
 
 	/*
 	 * Identify current CPU. This is necessary to setup
 	 * affinity registers and to provide support for
 	 * runtime chip identification.
 	 */
 	identify_cpu();
 	install_cpu_errata();
 
 	intr_pic_init_secondary();
 
 	/* Start per-CPU event timers. */
 	cpu_initclocks_ap();
 
 #ifdef VFP
 	vfp_init();
 #endif
 
 	dbg_init();
 	pan_enable();
 
 	/* Enable interrupts */
 	intr_enable();
 
 	mtx_lock_spin(&ap_boot_mtx);
 
 	atomic_add_rel_32(&smp_cpus, 1);
 
 	if (smp_cpus == mp_ncpus) {
 		/* enable IPI's, tlb shootdown, freezes etc */
 		atomic_store_rel_int(&smp_started, 1);
 	}
 
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* Enter the scheduler */
 	sched_throw(NULL);
 
 	panic("scheduler returned us to init_secondary");
 	/* NOTREACHED */
 }
 
 /*
  *  Send IPI thru interrupt controller.
  */
 static void
 pic_ipi_send(void *arg, cpuset_t cpus, u_int ipi)
 {
 
 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
 	PIC_IPI_SEND(intr_irq_root_dev, arg, cpus, ipi);
 }
 
 /*
  *  Setup IPI handler on interrupt controller.
  *
  *  Not SMP coherent.
  */
 static void
 intr_pic_ipi_setup(u_int ipi, const char *name, intr_ipi_handler_t *hand,
     void *arg)
 {
 	struct intr_irqsrc *isrc;
 	struct intr_ipi *ii;
 	int error;
 
 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
 	KASSERT(hand != NULL, ("%s: ipi %u no handler", __func__, ipi));
 
 	error = PIC_IPI_SETUP(intr_irq_root_dev, ipi, &isrc);
 	if (error != 0)
 		return;
 
 	isrc->isrc_handlers++;
 
 	ii = intr_ipi_lookup(ipi);
 	KASSERT(ii->ii_count == NULL, ("%s: ipi %u reused", __func__, ipi));
 
 	ii->ii_handler = hand;
 	ii->ii_handler_arg = arg;
 	ii->ii_send = pic_ipi_send;
 	ii->ii_send_arg = isrc;
 	strlcpy(ii->ii_name, name, INTR_IPI_NAMELEN);
 	ii->ii_count = intr_ipi_setup_counters(name);
 }
 
 static void
 intr_ipi_send(cpuset_t cpus, u_int ipi)
 {
 	struct intr_ipi *ii;
 
 	ii = intr_ipi_lookup(ipi);
 	if (ii->ii_count == NULL)
 		panic("%s: not setup IPI %u", __func__, ipi);
 
 	ii->ii_send(ii->ii_send_arg, cpus, ipi);
 }
 
 static void
 ipi_ast(void *dummy __unused)
 {
 
 	CTR0(KTR_SMP, "IPI_AST");
 }
 
 static void
 ipi_hardclock(void *dummy __unused)
 {
 
 	CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
 	hardclockintr();
 }
 
 static void
 ipi_preempt(void *dummy __unused)
 {
 	CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__);
 	sched_preempt(curthread);
 }
 
 static void
 ipi_rendezvous(void *dummy __unused)
 {
 
 	CTR0(KTR_SMP, "IPI_RENDEZVOUS");
 	smp_rendezvous_action();
 }
 
 static void
 ipi_stop(void *dummy __unused)
 {
 	u_int cpu;
 
 	CTR0(KTR_SMP, "IPI_STOP");
 
 	cpu = PCPU_GET(cpuid);
 	savectx(&stoppcbs[cpu]);
 
 	/* Indicate we are stopped */
 	CPU_SET_ATOMIC(cpu, &stopped_cpus);
 
 	/* Wait for restart */
 	while (!CPU_ISSET(cpu, &started_cpus))
 		cpu_spinwait();
 
 	CPU_CLR_ATOMIC(cpu, &started_cpus);
 	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
 	CTR0(KTR_SMP, "IPI_STOP (restart)");
 }
 
 struct cpu_group *
 cpu_topo(void)
 {
 
 	return (smp_topo_none());
 }
 
 /* Determine if we running MP machine */
 int
 cpu_mp_probe(void)
 {
 
 	/* ARM64TODO: Read the u bit of mpidr_el1 to determine this */
 	return (1);
 }
 
 static bool
 start_cpu(u_int id, uint64_t target_cpu)
 {
 	struct pcpu *pcpup;
 	vm_paddr_t pa;
 	u_int cpuid;
 	int err;
 
 	/* Check we are able to start this cpu */
 	if (id > mp_maxid)
 		return (false);
 
 	KASSERT(id < MAXCPU, ("Too many CPUs"));
 
 	/* We are already running on cpu 0 */
 	if (id == cpu0)
 		return (true);
 
 	/*
 	 * Rotate the CPU IDs to put the boot CPU as CPU 0. We keep the other
 	 * CPUs ordered as the are likely grouped into clusters so it can be
 	 * useful to keep that property, e.g. for the GICv3 driver to send
 	 * an IPI to all CPUs in the cluster.
 	 */
 	cpuid = id;
 	if (cpuid < cpu0)
 		cpuid += mp_maxid + 1;
 	cpuid -= cpu0;
 
 	pcpup = &__pcpu[cpuid];
 	pcpu_init(pcpup, cpuid, sizeof(struct pcpu));
 
 	dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO);
 	dpcpu_init(dpcpu[cpuid - 1], cpuid);
 
 	printf("Starting CPU %u (%lx)\n", cpuid, target_cpu);
 	pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry);
 
 	err = psci_cpu_on(target_cpu, pa, cpuid);
 	if (err != PSCI_RETVAL_SUCCESS) {
 		/*
 		 * Panic here if INVARIANTS are enabled and PSCI failed to
 		 * start the requested CPU. If psci_cpu_on returns PSCI_MISSING
 		 * to indicate we are unable to use it to start the given CPU.
 		 */
 		KASSERT(err == PSCI_MISSING ||
 		    (mp_quirks & MP_QUIRK_CPULIST) == MP_QUIRK_CPULIST,
 		    ("Failed to start CPU %u (%lx)\n", id, target_cpu));
 
 		pcpu_destroy(pcpup);
-		kmem_free(kernel_arena, (vm_offset_t)dpcpu[cpuid - 1],
-		    DPCPU_SIZE);
+		kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE);
 		dpcpu[cpuid - 1] = NULL;
 		mp_ncpus--;
 
 		/* Notify the user that the CPU failed to start */
 		printf("Failed to start CPU %u (%lx)\n", id, target_cpu);
 	} else
 		CPU_SET(cpuid, &all_cpus);
 
 	return (true);
 }
 
 #ifdef DEV_ACPI
 static void
 madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
 {
 	ACPI_MADT_GENERIC_INTERRUPT *intr;
 	u_int *cpuid;
 
 	switch(entry->Type) {
 	case ACPI_MADT_TYPE_GENERIC_INTERRUPT:
 		intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry;
 		cpuid = arg;
 
 		start_cpu((*cpuid), intr->ArmMpidr);
 		(*cpuid)++;
 		break;
 	default:
 		break;
 	}
 }
 
 static void
 cpu_init_acpi(void)
 {
 	ACPI_TABLE_MADT *madt;
 	vm_paddr_t physaddr;
 	u_int cpuid;
 
 	physaddr = acpi_find_table(ACPI_SIG_MADT);
 	if (physaddr == 0)
 		return;
 
 	madt = acpi_map_table(physaddr, ACPI_SIG_MADT);
 	if (madt == NULL) {
 		printf("Unable to map the MADT, not starting APs\n");
 		return;
 	}
 
 	cpuid = 0;
 	acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
 	    madt_handler, &cpuid);
 
 	acpi_unmap_table(madt);
 }
 #endif
 
 #ifdef FDT
 static boolean_t
 cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
 {
 	uint64_t target_cpu;
 	int domain;
 
 	target_cpu = reg[0];
 	if (addr_size == 2) {
 		target_cpu <<= 32;
 		target_cpu |= reg[1];
 	}
 
 	if (!start_cpu(id, target_cpu))
 		return (FALSE);
 
 	/* Try to read the numa node of this cpu */
 	if (OF_getencprop(node, "numa-node-id", &domain, sizeof(domain)) > 0) {
 		__pcpu[id].pc_domain = domain;
 		if (domain < MAXMEMDOM)
 			CPU_SET(id, &cpuset_domain[domain]);
 	}
 
 	return (TRUE);
 }
 #endif
 
 /* Initialize and fire up non-boot processors */
 void
 cpu_mp_start(void)
 {
 #ifdef FDT
 	phandle_t node;
 	int i;
 #endif
 
 	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
 
 	CPU_SET(0, &all_cpus);
 
 	switch(arm64_bus_method) {
 #ifdef DEV_ACPI
 	case ARM64_BUS_ACPI:
 		KASSERT(cpu0 >= 0, ("Current CPU was not found"));
 		cpu_init_acpi();
 		break;
 #endif
 #ifdef FDT
 	case ARM64_BUS_FDT:
 		node = OF_peer(0);
 		for (i = 0; fdt_quirks[i].compat != NULL; i++) {
 			if (ofw_bus_node_is_compatible(node,
 			    fdt_quirks[i].compat) != 0) {
 				mp_quirks = fdt_quirks[i].quirks;
 			}
 		}
 		KASSERT(cpu0 >= 0, ("Current CPU was not found"));
 		ofw_cpu_early_foreach(cpu_init_fdt, true);
 		break;
 #endif
 	default:
 		break;
 	}
 }
 
 /* Introduce rest of cores to the world */
 void
 cpu_mp_announce(void)
 {
 }
 
 #ifdef DEV_ACPI
 static void
 cpu_count_acpi_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
 {
 	ACPI_MADT_GENERIC_INTERRUPT *intr;
 	u_int *cores = arg;
 	uint64_t mpidr_reg;
 
 	switch(entry->Type) {
 	case ACPI_MADT_TYPE_GENERIC_INTERRUPT:
 		intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry;
 		if (cpu0 < 0) {
 			mpidr_reg = READ_SPECIALREG(mpidr_el1);
 			if ((mpidr_reg & 0xff00fffffful) == intr->ArmMpidr)
 				cpu0 = *cores;
 		}
 		(*cores)++;
 		break;
 	default:
 		break;
 	}
 }
 
 static u_int
 cpu_count_acpi(void)
 {
 	ACPI_TABLE_MADT *madt;
 	vm_paddr_t physaddr;
 	u_int cores;
 
 	physaddr = acpi_find_table(ACPI_SIG_MADT);
 	if (physaddr == 0)
 		return (0);
 
 	madt = acpi_map_table(physaddr, ACPI_SIG_MADT);
 	if (madt == NULL) {
 		printf("Unable to map the MADT, not starting APs\n");
 		return (0);
 	}
 
 	cores = 0;
 	acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
 	    cpu_count_acpi_handler, &cores);
 
 	acpi_unmap_table(madt);
 
 	return (cores);
 }
 #endif
 
 #ifdef FDT
 static boolean_t
 cpu_find_cpu0_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
 {
 	uint64_t mpidr_fdt, mpidr_reg;
 
 	if (cpu0 < 0) {
 		mpidr_fdt = reg[0];
 		if (addr_size == 2) {
 			mpidr_fdt <<= 32;
 			mpidr_fdt |= reg[1];
 		}
 
 		mpidr_reg = READ_SPECIALREG(mpidr_el1);
 
 		if ((mpidr_reg & 0xff00fffffful) == mpidr_fdt)
 			cpu0 = id;
 	}
 
 	return (TRUE);
 }
 #endif
 
 void
 cpu_mp_setmaxid(void)
 {
 #if defined(DEV_ACPI) || defined(FDT)
 	int cores;
 #endif
 
 	switch(arm64_bus_method) {
 #ifdef DEV_ACPI
 	case ARM64_BUS_ACPI:
 		cores = cpu_count_acpi();
 		if (cores > 0) {
 			cores = MIN(cores, MAXCPU);
 			if (bootverbose)
 				printf("Found %d CPUs in the ACPI tables\n",
 				    cores);
 			mp_ncpus = cores;
 			mp_maxid = cores - 1;
 			return;
 		}
 		break;
 #endif
 #ifdef FDT
 	case ARM64_BUS_FDT:
 		cores = ofw_cpu_early_foreach(cpu_find_cpu0_fdt, false);
 		if (cores > 0) {
 			cores = MIN(cores, MAXCPU);
 			if (bootverbose)
 				printf("Found %d CPUs in the device tree\n",
 				    cores);
 			mp_ncpus = cores;
 			mp_maxid = cores - 1;
 			return;
 		}
 		break;
 #endif
 	default:
 		break;
 	}
 
 	if (bootverbose)
 		printf("No CPU data, limiting to 1 core\n");
 	mp_ncpus = 1;
 	mp_maxid = 0;
 }
 
 /*
  *  Lookup IPI source.
  */
 static struct intr_ipi *
 intr_ipi_lookup(u_int ipi)
 {
 
 	if (ipi >= INTR_IPI_COUNT)
 		panic("%s: no such IPI %u", __func__, ipi);
 
 	return (&ipi_sources[ipi]);
 }
 
 /*
  *  interrupt controller dispatch function for IPIs. It should
  *  be called straight from the interrupt controller, when associated
  *  interrupt source is learned. Or from anybody who has an interrupt
  *  source mapped.
  */
 void
 intr_ipi_dispatch(u_int ipi, struct trapframe *tf)
 {
 	void *arg;
 	struct intr_ipi *ii;
 
 	ii = intr_ipi_lookup(ipi);
 	if (ii->ii_count == NULL)
 		panic("%s: not setup IPI %u", __func__, ipi);
 
 	intr_ipi_increment_count(ii->ii_count, PCPU_GET(cpuid));
 
 	/*
 	 * Supply ipi filter with trapframe argument
 	 * if none is registered.
 	 */
 	arg = ii->ii_handler_arg != NULL ? ii->ii_handler_arg : tf;
 	ii->ii_handler(arg);
 }
 
 #ifdef notyet
 /*
  *  Map IPI into interrupt controller.
  *
  *  Not SMP coherent.
  */
 static int
 ipi_map(struct intr_irqsrc *isrc, u_int ipi)
 {
 	boolean_t is_percpu;
 	int error;
 
 	if (ipi >= INTR_IPI_COUNT)
 		panic("%s: no such IPI %u", __func__, ipi);
 
 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
 
 	isrc->isrc_type = INTR_ISRCT_NAMESPACE;
 	isrc->isrc_nspc_type = INTR_IRQ_NSPC_IPI;
 	isrc->isrc_nspc_num = ipi_next_num;
 
 	error = PIC_REGISTER(intr_irq_root_dev, isrc, &is_percpu);
 	if (error == 0) {
 		isrc->isrc_dev = intr_irq_root_dev;
 		ipi_next_num++;
 	}
 	return (error);
 }
 
 /*
  *  Setup IPI handler to interrupt source.
  *
  *  Note that there could be more ways how to send and receive IPIs
  *  on a platform like fast interrupts for example. In that case,
  *  one can call this function with ASIF_NOALLOC flag set and then
  *  call intr_ipi_dispatch() when appropriate.
  *
  *  Not SMP coherent.
  */
 int
 intr_ipi_set_handler(u_int ipi, const char *name, intr_ipi_filter_t *filter,
     void *arg, u_int flags)
 {
 	struct intr_irqsrc *isrc;
 	int error;
 
 	if (filter == NULL)
 		return(EINVAL);
 
 	isrc = intr_ipi_lookup(ipi);
 	if (isrc->isrc_ipifilter != NULL)
 		return (EEXIST);
 
 	if ((flags & AISHF_NOALLOC) == 0) {
 		error = ipi_map(isrc, ipi);
 		if (error != 0)
 			return (error);
 	}
 
 	isrc->isrc_ipifilter = filter;
 	isrc->isrc_arg = arg;
 	isrc->isrc_handlers = 1;
 	isrc->isrc_count = intr_ipi_setup_counters(name);
 	isrc->isrc_index = 0; /* it should not be used in IPI case */
 
 	if (isrc->isrc_dev != NULL) {
 		PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
 		PIC_ENABLE_SOURCE(isrc->isrc_dev, isrc);
 	}
 	return (0);
 }
 #endif
 
 /* Sending IPI */
 void
 ipi_all_but_self(u_int ipi)
 {
 	cpuset_t cpus;
 
 	cpus = all_cpus;
 	CPU_CLR(PCPU_GET(cpuid), &cpus);
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	intr_ipi_send(cpus, ipi);
 }
 
 void
 ipi_cpu(int cpu, u_int ipi)
 {
 	cpuset_t cpus;
 
 	CPU_ZERO(&cpus);
 	CPU_SET(cpu, &cpus);
 
 	CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x", __func__, cpu, ipi);
 	intr_ipi_send(cpus, ipi);
 }
 
 void
 ipi_selected(cpuset_t cpus, u_int ipi)
 {
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	intr_ipi_send(cpus, ipi);
 }
Index: head/sys/compat/linuxkpi/common/include/linux/dma-mapping.h
===================================================================
--- head/sys/compat/linuxkpi/common/include/linux/dma-mapping.h	(revision 338317)
+++ head/sys/compat/linuxkpi/common/include/linux/dma-mapping.h	(revision 338318)
@@ -1,282 +1,282 @@
 /*-
  * Copyright (c) 2010 Isilon Systems, Inc.
  * Copyright (c) 2010 iX Systems, Inc.
  * Copyright (c) 2010 Panasas, Inc.
  * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 #ifndef	_LINUX_DMA_MAPPING_H_
 #define _LINUX_DMA_MAPPING_H_
 
 #include <linux/types.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/dma-attrs.h>
 #include <linux/scatterlist.h>
 #include <linux/mm.h>
 #include <linux/page.h>
 
 #include <sys/systm.h>
 #include <sys/malloc.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 #include <vm/pmap.h>
 
 #include <machine/bus.h>
 
 enum dma_data_direction {
 	DMA_BIDIRECTIONAL = 0,
 	DMA_TO_DEVICE = 1,
 	DMA_FROM_DEVICE = 2,
 	DMA_NONE = 3,
 };
 
 struct dma_map_ops {
 	void* (*alloc_coherent)(struct device *dev, size_t size,
 	    dma_addr_t *dma_handle, gfp_t gfp);
 	void (*free_coherent)(struct device *dev, size_t size,
 	    void *vaddr, dma_addr_t dma_handle);
 	dma_addr_t (*map_page)(struct device *dev, struct page *page,
 	    unsigned long offset, size_t size, enum dma_data_direction dir,
 	    struct dma_attrs *attrs);
 	void (*unmap_page)(struct device *dev, dma_addr_t dma_handle,
 	    size_t size, enum dma_data_direction dir, struct dma_attrs *attrs);
 	int (*map_sg)(struct device *dev, struct scatterlist *sg,
 	    int nents, enum dma_data_direction dir, struct dma_attrs *attrs);
 	void (*unmap_sg)(struct device *dev, struct scatterlist *sg, int nents,
 	    enum dma_data_direction dir, struct dma_attrs *attrs);
 	void (*sync_single_for_cpu)(struct device *dev, dma_addr_t dma_handle,
 	    size_t size, enum dma_data_direction dir);
 	void (*sync_single_for_device)(struct device *dev,
 	    dma_addr_t dma_handle, size_t size, enum dma_data_direction dir);
 	void (*sync_single_range_for_cpu)(struct device *dev,
 	    dma_addr_t dma_handle, unsigned long offset, size_t size,
 	    enum dma_data_direction dir);
 	void (*sync_single_range_for_device)(struct device *dev,
 	    dma_addr_t dma_handle, unsigned long offset, size_t size,
 	    enum dma_data_direction dir);
 	void (*sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg,
 	    int nents, enum dma_data_direction dir);
 	void (*sync_sg_for_device)(struct device *dev, struct scatterlist *sg,
 	    int nents, enum dma_data_direction dir);
 	int (*mapping_error)(struct device *dev, dma_addr_t dma_addr);
 	int (*dma_supported)(struct device *dev, u64 mask);
 	int is_phys;
 };
 
 #define	DMA_BIT_MASK(n)	((2ULL << ((n) - 1)) - 1ULL)
 
 static inline int
 dma_supported(struct device *dev, u64 mask)
 {
 
 	/* XXX busdma takes care of this elsewhere. */
 	return (1);
 }
 
 static inline int
 dma_set_mask(struct device *dev, u64 dma_mask)
 {
 
 	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
 		return -EIO;
 
 	*dev->dma_mask = dma_mask;
 	return (0);
 }
 
 static inline int
 dma_set_coherent_mask(struct device *dev, u64 mask)
 {
 
 	if (!dma_supported(dev, mask))
 		return -EIO;
 	/* XXX Currently we don't support a separate coherent mask. */
 	return 0;
 }
 
 static inline void *
 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
     gfp_t flag)
 {
 	vm_paddr_t high;
 	size_t align;
 	void *mem;
 
 	if (dev != NULL && dev->dma_mask)
 		high = *dev->dma_mask;
 	else if (flag & GFP_DMA32)
 		high = BUS_SPACE_MAXADDR_32BIT;
 	else
 		high = BUS_SPACE_MAXADDR;
 	align = PAGE_SIZE << get_order(size);
 	mem = (void *)kmem_alloc_contig(size, flag, 0, high, align, 0,
 	    VM_MEMATTR_DEFAULT);
 	if (mem)
 		*dma_handle = vtophys(mem);
 	else
 		*dma_handle = 0;
 	return (mem);
 }
 
 static inline void *
 dma_zalloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
     gfp_t flag)
 {
 
 	return (dma_alloc_coherent(dev, size, dma_handle, flag | __GFP_ZERO));
 }
 
 static inline void
 dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
     dma_addr_t dma_handle)
 {
 
-	kmem_free(kmem_arena, (vm_offset_t)cpu_addr, size);
+	kmem_free((vm_offset_t)cpu_addr, size);
 }
 
 /* XXX This only works with no iommu. */
 static inline dma_addr_t
 dma_map_single_attrs(struct device *dev, void *ptr, size_t size,
     enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 
 	return vtophys(ptr);
 }
 
 static inline void
 dma_unmap_single_attrs(struct device *dev, dma_addr_t addr, size_t size,
     enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 }
 
 static inline int
 dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nents,
     enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 	struct scatterlist *sg;
 	int i;
 
 	for_each_sg(sgl, sg, nents, i)
 		sg_dma_address(sg) = sg_phys(sg);
 
 	return (nents);
 }
 
 static inline void
 dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, int nents,
     enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 }
 
 static inline dma_addr_t
 dma_map_page(struct device *dev, struct page *page,
     unsigned long offset, size_t size, enum dma_data_direction direction)
 {
 
 	return VM_PAGE_TO_PHYS(page) + offset;
 }
 
 static inline void
 dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
     enum dma_data_direction direction)
 {
 }
 
 static inline void
 dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
     enum dma_data_direction direction)
 {
 }
 
 static inline void
 dma_sync_single(struct device *dev, dma_addr_t addr, size_t size,
     enum dma_data_direction dir)
 {
 	dma_sync_single_for_cpu(dev, addr, size, dir);
 }
 
 static inline void
 dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
     size_t size, enum dma_data_direction direction)
 {
 }
 
 static inline void
 dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
     enum dma_data_direction direction)
 {
 }
 
 static inline void
 dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
     enum dma_data_direction direction)
 {
 }
 
 static inline void
 dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
     unsigned long offset, size_t size, int direction)
 {
 }
 
 static inline void
 dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
     unsigned long offset, size_t size, int direction)
 {
 }
 
 static inline int
 dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 
 	return (0);
 }
 
 static inline unsigned int dma_set_max_seg_size(struct device *dev,
     unsigned int size)
 {
 	return (0);
 }
 
 
 #define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, NULL)
 #define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, NULL)
 #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL)
 #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, NULL)
 
 #define	DEFINE_DMA_UNMAP_ADDR(name)		dma_addr_t name
 #define	DEFINE_DMA_UNMAP_LEN(name)		__u32 name
 #define	dma_unmap_addr(p, name)			((p)->name)
 #define	dma_unmap_addr_set(p, name, v)		(((p)->name) = (v))
 #define	dma_unmap_len(p, name)			((p)->name)
 #define	dma_unmap_len_set(p, name, v)		(((p)->name) = (v))
 
 extern int uma_align_cache;
 #define	dma_get_cache_alignment()	uma_align_cache
 
 #endif	/* _LINUX_DMA_MAPPING_H_ */
Index: head/sys/compat/linuxkpi/common/src/linux_page.c
===================================================================
--- head/sys/compat/linuxkpi/common/src/linux_page.c	(revision 338317)
+++ head/sys/compat/linuxkpi/common/src/linux_page.c	(revision 338318)
@@ -1,387 +1,387 @@
 /*-
  * Copyright (c) 2010 Isilon Systems, Inc.
  * Copyright (c) 2016 Matthew Macy (mmacy@mattmacy.io)
  * Copyright (c) 2017 Mellanox Technologies, Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 
 #include <machine/bus.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/vm_extern.h>
 
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 
 #include <linux/gfp.h>
 #include <linux/mm.h>
 #include <linux/preempt.h>
 #include <linux/fs.h>
 
 void *
 linux_page_address(struct page *page)
 {
 
 	if (page->object != kmem_object && page->object != kernel_object) {
 		return (PMAP_HAS_DMAP ?
 		    ((void *)(uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(page))) :
 		    NULL);
 	}
 	return ((void *)(uintptr_t)(VM_MIN_KERNEL_ADDRESS +
 	    IDX_TO_OFF(page->pindex)));
 }
 
 vm_page_t
 linux_alloc_pages(gfp_t flags, unsigned int order)
 {
 	vm_page_t page;
 
 	if (PMAP_HAS_DMAP) {
 		unsigned long npages = 1UL << order;
 		int req = (flags & M_ZERO) ? (VM_ALLOC_ZERO | VM_ALLOC_NOOBJ |
 		    VM_ALLOC_NORMAL) : (VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL);
 
 		if (order == 0 && (flags & GFP_DMA32) == 0) {
 			page = vm_page_alloc(NULL, 0, req);
 			if (page == NULL)
 				return (NULL);
 		} else {
 			vm_paddr_t pmax = (flags & GFP_DMA32) ?
 			    BUS_SPACE_MAXADDR_32BIT : BUS_SPACE_MAXADDR;
 		retry:
 			page = vm_page_alloc_contig(NULL, 0, req,
 			    npages, 0, pmax, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
 
 			if (page == NULL) {
 				if (flags & M_WAITOK) {
 					if (!vm_page_reclaim_contig(req,
 					    npages, 0, pmax, PAGE_SIZE, 0)) {
 						vm_wait(NULL);
 					}
 					flags &= ~M_WAITOK;
 					goto retry;
 				}
 				return (NULL);
 			}
 		}
 		if (flags & M_ZERO) {
 			unsigned long x;
 
 			for (x = 0; x != npages; x++) {
 				vm_page_t pgo = page + x;
 
 				if ((pgo->flags & PG_ZERO) == 0)
 					pmap_zero_page(pgo);
 			}
 		}
 	} else {
 		vm_offset_t vaddr;
 
 		vaddr = linux_alloc_kmem(flags, order);
 		if (vaddr == 0)
 			return (NULL);
 
 		page = PHYS_TO_VM_PAGE(vtophys((void *)vaddr));
 
 		KASSERT(vaddr == (vm_offset_t)page_address(page),
 		    ("Page address mismatch"));
 	}
 
 	return (page);
 }
 
 void
 linux_free_pages(vm_page_t page, unsigned int order)
 {
 	if (PMAP_HAS_DMAP) {
 		unsigned long npages = 1UL << order;
 		unsigned long x;
 
 		for (x = 0; x != npages; x++) {
 			vm_page_t pgo = page + x;
 
 			vm_page_lock(pgo);
 			vm_page_free(pgo);
 			vm_page_unlock(pgo);
 		}
 	} else {
 		vm_offset_t vaddr;
 
 		vaddr = (vm_offset_t)page_address(page);
 
 		linux_free_kmem(vaddr, order);
 	}
 }
 
 vm_offset_t
 linux_alloc_kmem(gfp_t flags, unsigned int order)
 {
 	size_t size = ((size_t)PAGE_SIZE) << order;
 	vm_offset_t addr;
 
 	if ((flags & GFP_DMA32) == 0) {
 		addr = kmem_malloc(size, flags & GFP_NATIVE_MASK);
 	} else {
 		addr = kmem_alloc_contig(size, flags & GFP_NATIVE_MASK, 0,
 		    BUS_SPACE_MAXADDR_32BIT, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
 	}
 	return (addr);
 }
 
 void
 linux_free_kmem(vm_offset_t addr, unsigned int order)
 {
 	size_t size = ((size_t)PAGE_SIZE) << order;
 
-	kmem_free(kmem_arena, addr, size);
+	kmem_free(addr, size);
 }
 
 static int
 linux_get_user_pages_internal(vm_map_t map, unsigned long start, int nr_pages,
     int write, struct page **pages)
 {
 	vm_prot_t prot;
 	size_t len;
 	int count;
 	int i;
 
 	prot = write ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ;
 	len = ((size_t)nr_pages) << PAGE_SHIFT;
 	count = vm_fault_quick_hold_pages(map, start, len, prot, pages, nr_pages);
 	if (count == -1)
 		return (-EFAULT);
 
 	for (i = 0; i != nr_pages; i++) {
 		struct page *pg = pages[i];
 
 		vm_page_lock(pg);
 		vm_page_wire(pg);
 		vm_page_unhold(pg);
 		vm_page_unlock(pg);
 	}
 	return (nr_pages);
 }
 
 int
 __get_user_pages_fast(unsigned long start, int nr_pages, int write,
     struct page **pages)
 {
 	vm_map_t map;
 	vm_page_t *mp;
 	vm_offset_t va;
 	vm_offset_t end;
 	vm_prot_t prot;
 	int count;
 
 	if (nr_pages == 0 || in_interrupt())
 		return (0);
 
 	MPASS(pages != NULL);
 	va = start;
 	map = &curthread->td_proc->p_vmspace->vm_map;
 	end = start + (((size_t)nr_pages) << PAGE_SHIFT);
 	if (start < vm_map_min(map) || end > vm_map_max(map))
 		return (-EINVAL);
 	prot = write ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ;
 	for (count = 0, mp = pages, va = start; va < end;
 	    mp++, va += PAGE_SIZE, count++) {
 		*mp = pmap_extract_and_hold(map->pmap, va, prot);
 		if (*mp == NULL)
 			break;
 
 		vm_page_lock(*mp);
 		vm_page_wire(*mp);
 		vm_page_unhold(*mp);
 		vm_page_unlock(*mp);
 
 		if ((prot & VM_PROT_WRITE) != 0 &&
 		    (*mp)->dirty != VM_PAGE_BITS_ALL) {
 			/*
 			 * Explicitly dirty the physical page.  Otherwise, the
 			 * caller's changes may go unnoticed because they are
 			 * performed through an unmanaged mapping or by a DMA
 			 * operation.
 			 *
 			 * The object lock is not held here.
 			 * See vm_page_clear_dirty_mask().
 			 */
 			vm_page_dirty(*mp);
 		}
 	}
 	return (count);
 }
 
 long
 get_user_pages_remote(struct task_struct *task, struct mm_struct *mm,
     unsigned long start, unsigned long nr_pages, int gup_flags,
     struct page **pages, struct vm_area_struct **vmas)
 {
 	vm_map_t map;
 
 	map = &task->task_thread->td_proc->p_vmspace->vm_map;
 	return (linux_get_user_pages_internal(map, start, nr_pages,
 	    !!(gup_flags & FOLL_WRITE), pages));
 }
 
 long
 get_user_pages(unsigned long start, unsigned long nr_pages, int gup_flags,
     struct page **pages, struct vm_area_struct **vmas)
 {
 	vm_map_t map;
 
 	map = &curthread->td_proc->p_vmspace->vm_map;
 	return (linux_get_user_pages_internal(map, start, nr_pages,
 	    !!(gup_flags & FOLL_WRITE), pages));
 }
 
 int
 is_vmalloc_addr(const void *addr)
 {
 	return (vtoslab((vm_offset_t)addr & ~UMA_SLAB_MASK) != NULL);
 }
 
 struct page *
 linux_shmem_read_mapping_page_gfp(vm_object_t obj, int pindex, gfp_t gfp)
 {
 	vm_page_t page;
 	int rv;
 
 	if ((gfp & GFP_NOWAIT) != 0)
 		panic("GFP_NOWAIT is unimplemented");
 
 	VM_OBJECT_WLOCK(obj);
 	page = vm_page_grab(obj, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY |
 	    VM_ALLOC_WIRED);
 	if (page->valid != VM_PAGE_BITS_ALL) {
 		vm_page_xbusy(page);
 		if (vm_pager_has_page(obj, pindex, NULL, NULL)) {
 			rv = vm_pager_get_pages(obj, &page, 1, NULL, NULL);
 			if (rv != VM_PAGER_OK) {
 				vm_page_lock(page);
 				vm_page_unwire(page, PQ_NONE);
 				vm_page_free(page);
 				vm_page_unlock(page);
 				VM_OBJECT_WUNLOCK(obj);
 				return (ERR_PTR(-EINVAL));
 			}
 			MPASS(page->valid == VM_PAGE_BITS_ALL);
 		} else {
 			pmap_zero_page(page);
 			page->valid = VM_PAGE_BITS_ALL;
 			page->dirty = 0;
 		}
 		vm_page_xunbusy(page);
 	}
 	VM_OBJECT_WUNLOCK(obj);
 	return (page);
 }
 
 struct linux_file *
 linux_shmem_file_setup(const char *name, loff_t size, unsigned long flags)
 {
 	struct fileobj {
 		struct linux_file file __aligned(sizeof(void *));
 		struct vnode vnode __aligned(sizeof(void *));
 	};
 	struct fileobj *fileobj;
 	struct linux_file *filp;
 	struct vnode *vp;
 	int error;
 
 	fileobj = kzalloc(sizeof(*fileobj), GFP_KERNEL);
 	if (fileobj == NULL) {
 		error = -ENOMEM;
 		goto err_0;
 	}
 	filp = &fileobj->file;
 	vp = &fileobj->vnode;
 
 	filp->f_count = 1;
 	filp->f_vnode = vp;
 	filp->f_shmem = vm_pager_allocate(OBJT_DEFAULT, NULL, size,
 	    VM_PROT_READ | VM_PROT_WRITE, 0, curthread->td_ucred);
 	if (filp->f_shmem == NULL) {
 		error = -ENOMEM;
 		goto err_1;
 	}
 	return (filp);
 err_1:
 	kfree(filp);
 err_0:
 	return (ERR_PTR(error));
 }
 
 static vm_ooffset_t
 linux_invalidate_mapping_pages_sub(vm_object_t obj, vm_pindex_t start,
     vm_pindex_t end, int flags)
 {
 	int start_count, end_count;
 
 	VM_OBJECT_WLOCK(obj);
 	start_count = obj->resident_page_count;
 	vm_object_page_remove(obj, start, end, flags);
 	end_count = obj->resident_page_count;
 	VM_OBJECT_WUNLOCK(obj);
 	return (start_count - end_count);
 }
 
 unsigned long
 linux_invalidate_mapping_pages(vm_object_t obj, pgoff_t start, pgoff_t end)
 {
 
 	return (linux_invalidate_mapping_pages_sub(obj, start, end, OBJPR_CLEANONLY));
 }
 
 void
 linux_shmem_truncate_range(vm_object_t obj, loff_t lstart, loff_t lend)
 {
 	vm_pindex_t start = OFF_TO_IDX(lstart + PAGE_SIZE - 1);
 	vm_pindex_t end = OFF_TO_IDX(lend + 1);
 
 	(void) linux_invalidate_mapping_pages_sub(obj, start, end, 0);
 }
Index: head/sys/dev/agp/agp.c
===================================================================
--- head/sys/dev/agp/agp.c	(revision 338317)
+++ head/sys/dev/agp/agp.c	(revision 338318)
@@ -1,1059 +1,1059 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2000 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_agp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/ioccom.h>
 #include <sys/agpio.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 
 #include <dev/agp/agppriv.h>
 #include <dev/agp/agpvar.h>
 #include <dev/agp/agpreg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/pmap.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/rman.h>
 
 MODULE_VERSION(agp, 1);
 
 MALLOC_DEFINE(M_AGP, "agp", "AGP data structures");
 
 				/* agp_drv.c */
 static d_open_t agp_open;
 static d_close_t agp_close;
 static d_ioctl_t agp_ioctl;
 static d_mmap_t agp_mmap;
 
 static struct cdevsw agp_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_flags =	D_NEEDGIANT,
 	.d_open =	agp_open,
 	.d_close =	agp_close,
 	.d_ioctl =	agp_ioctl,
 	.d_mmap =	agp_mmap,
 	.d_name =	"agp",
 };
 
 static devclass_t agp_devclass;
 
 /* Helper functions for implementing chipset mini drivers. */
 
 u_int8_t
 agp_find_caps(device_t dev)
 {
 	int capreg;
 
 
 	if (pci_find_cap(dev, PCIY_AGP, &capreg) != 0)
 		capreg = 0;
 	return (capreg);
 }
 
 /*
  * Find an AGP display device (if any).
  */
 static device_t
 agp_find_display(void)
 {
 	devclass_t pci = devclass_find("pci");
 	device_t bus, dev = 0;
 	device_t *kids;
 	int busnum, numkids, i;
 
 	for (busnum = 0; busnum < devclass_get_maxunit(pci); busnum++) {
 		bus = devclass_get_device(pci, busnum);
 		if (!bus)
 			continue;
 		if (device_get_children(bus, &kids, &numkids) != 0)
 			continue;
 		for (i = 0; i < numkids; i++) {
 			dev = kids[i];
 			if (pci_get_class(dev) == PCIC_DISPLAY
 			    && pci_get_subclass(dev) == PCIS_DISPLAY_VGA)
 				if (agp_find_caps(dev)) {
 					free(kids, M_TEMP);
 					return dev;
 				}
 					
 		}
 		free(kids, M_TEMP);
 	}
 
 	return 0;
 }
 
 struct agp_gatt *
 agp_alloc_gatt(device_t dev)
 {
 	u_int32_t apsize = AGP_GET_APERTURE(dev);
 	u_int32_t entries = apsize >> AGP_PAGE_SHIFT;
 	struct agp_gatt *gatt;
 
 	if (bootverbose)
 		device_printf(dev,
 			      "allocating GATT for aperture of size %dM\n",
 			      apsize / (1024*1024));
 
 	if (entries == 0) {
 		device_printf(dev, "bad aperture size\n");
 		return NULL;
 	}
 
 	gatt = malloc(sizeof(struct agp_gatt), M_AGP, M_NOWAIT);
 	if (!gatt)
 		return 0;
 
 	gatt->ag_entries = entries;
 	gatt->ag_virtual = (void *)kmem_alloc_contig(entries *
 	    sizeof(u_int32_t), M_NOWAIT | M_ZERO, 0, ~0, PAGE_SIZE, 0,
 	    VM_MEMATTR_WRITE_COMBINING);
 	if (!gatt->ag_virtual) {
 		if (bootverbose)
 			device_printf(dev, "contiguous allocation failed\n");
 		free(gatt, M_AGP);
 		return 0;
 	}
 	gatt->ag_physical = vtophys((vm_offset_t) gatt->ag_virtual);
 
 	return gatt;
 }
 
 void
 agp_free_gatt(struct agp_gatt *gatt)
 {
-	kmem_free(kernel_arena, (vm_offset_t)gatt->ag_virtual,
-	    gatt->ag_entries * sizeof(u_int32_t));
+	kmem_free((vm_offset_t)gatt->ag_virtual, gatt->ag_entries *
+	    sizeof(u_int32_t));
 	free(gatt, M_AGP);
 }
 
 static u_int agp_max[][2] = {
 	{0,	0},
 	{32,	4},
 	{64,	28},
 	{128,	96},
 	{256,	204},
 	{512,	440},
 	{1024,	942},
 	{2048,	1920},
 	{4096,	3932}
 };
 #define	AGP_MAX_SIZE	nitems(agp_max)
 
 /**
  * Sets the PCI resource which represents the AGP aperture.
  *
  * If not called, the default AGP aperture resource of AGP_APBASE will
  * be used.  Must be called before agp_generic_attach().
  */
 void
 agp_set_aperture_resource(device_t dev, int rid)
 {
 	struct agp_softc *sc = device_get_softc(dev);
 
 	sc->as_aperture_rid = rid;
 }
 
 int
 agp_generic_attach(device_t dev)
 {
 	struct agp_softc *sc = device_get_softc(dev);
 	int i;
 	u_int memsize;
 
 	/*
 	 * Find and map the aperture, RF_SHAREABLE for DRM but not RF_ACTIVE
 	 * because the kernel doesn't need to map it.
 	 */
 
 	if (sc->as_aperture_rid != -1) {
 		if (sc->as_aperture_rid == 0)
 			sc->as_aperture_rid = AGP_APBASE;
 
 		sc->as_aperture = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 		    &sc->as_aperture_rid, RF_SHAREABLE);
 		if (!sc->as_aperture)
 			return ENOMEM;
 	}
 
 	/*
 	 * Work out an upper bound for agp memory allocation. This
 	 * uses a heurisitc table from the Linux driver.
 	 */
 	memsize = ptoa(realmem) >> 20;
 	for (i = 0; i < AGP_MAX_SIZE; i++) {
 		if (memsize <= agp_max[i][0])
 			break;
 	}
 	if (i == AGP_MAX_SIZE)
 		i = AGP_MAX_SIZE - 1;
 	sc->as_maxmem = agp_max[i][1] << 20U;
 
 	/*
 	 * The lock is used to prevent re-entry to
 	 * agp_generic_bind_memory() since that function can sleep.
 	 */
 	mtx_init(&sc->as_lock, "agp lock", NULL, MTX_DEF);
 
 	/*
 	 * Initialise stuff for the userland device.
 	 */
 	agp_devclass = devclass_find("agp");
 	TAILQ_INIT(&sc->as_memory);
 	sc->as_nextid = 1;
 
 	sc->as_devnode = make_dev(&agp_cdevsw,
 	    0, UID_ROOT, GID_WHEEL, 0600, "agpgart");
 	sc->as_devnode->si_drv1 = dev;
 
 	return 0;
 }
 
 void
 agp_free_cdev(device_t dev)
 {
 	struct agp_softc *sc = device_get_softc(dev);
 
 	destroy_dev(sc->as_devnode);
 }
 
 void
 agp_free_res(device_t dev)
 {
 	struct agp_softc *sc = device_get_softc(dev);
 
 	if (sc->as_aperture != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY, sc->as_aperture_rid,
 		    sc->as_aperture);
 	mtx_destroy(&sc->as_lock);
 }
 
 int
 agp_generic_detach(device_t dev)
 {
 
 	agp_free_cdev(dev);
 	agp_free_res(dev);
 	return 0;
 }
 
 /**
  * Default AGP aperture size detection which simply returns the size of
  * the aperture's PCI resource.
  */
 u_int32_t
 agp_generic_get_aperture(device_t dev)
 {
 	struct agp_softc *sc = device_get_softc(dev);
 
 	return rman_get_size(sc->as_aperture);
 }
 
 /**
  * Default AGP aperture size setting function, which simply doesn't allow
  * changes to resource size.
  */
 int
 agp_generic_set_aperture(device_t dev, u_int32_t aperture)
 {
 	u_int32_t current_aperture;
 
 	current_aperture = AGP_GET_APERTURE(dev);
 	if (current_aperture != aperture)
 		return EINVAL;
 	else
 		return 0;
 }
 
 /*
  * This does the enable logic for v3, with the same topology
  * restrictions as in place for v2 -- one bus, one device on the bus.
  */
 static int
 agp_v3_enable(device_t dev, device_t mdev, u_int32_t mode)
 {
 	u_int32_t tstatus, mstatus;
 	u_int32_t command;
 	int rq, sba, fw, rate, arqsz, cal;
 
 	tstatus = pci_read_config(dev, agp_find_caps(dev) + AGP_STATUS, 4);
 	mstatus = pci_read_config(mdev, agp_find_caps(mdev) + AGP_STATUS, 4);
 
 	/* Set RQ to the min of mode, tstatus and mstatus */
 	rq = AGP_MODE_GET_RQ(mode);
 	if (AGP_MODE_GET_RQ(tstatus) < rq)
 		rq = AGP_MODE_GET_RQ(tstatus);
 	if (AGP_MODE_GET_RQ(mstatus) < rq)
 		rq = AGP_MODE_GET_RQ(mstatus);
 
 	/*
 	 * ARQSZ - Set the value to the maximum one.
 	 * Don't allow the mode register to override values.
 	 */
 	arqsz = AGP_MODE_GET_ARQSZ(mode);
 	if (AGP_MODE_GET_ARQSZ(tstatus) > rq)
 		rq = AGP_MODE_GET_ARQSZ(tstatus);
 	if (AGP_MODE_GET_ARQSZ(mstatus) > rq)
 		rq = AGP_MODE_GET_ARQSZ(mstatus);
 
 	/* Calibration cycle - don't allow override by mode register */
 	cal = AGP_MODE_GET_CAL(tstatus);
 	if (AGP_MODE_GET_CAL(mstatus) < cal)
 		cal = AGP_MODE_GET_CAL(mstatus);
 
 	/* SBA must be supported for AGP v3. */
 	sba = 1;
 
 	/* Set FW if all three support it. */
 	fw = (AGP_MODE_GET_FW(tstatus)
 	       & AGP_MODE_GET_FW(mstatus)
 	       & AGP_MODE_GET_FW(mode));
 	
 	/* Figure out the max rate */
 	rate = (AGP_MODE_GET_RATE(tstatus)
 		& AGP_MODE_GET_RATE(mstatus)
 		& AGP_MODE_GET_RATE(mode));
 	if (rate & AGP_MODE_V3_RATE_8x)
 		rate = AGP_MODE_V3_RATE_8x;
 	else
 		rate = AGP_MODE_V3_RATE_4x;
 	if (bootverbose)
 		device_printf(dev, "Setting AGP v3 mode %d\n", rate * 4);
 
 	pci_write_config(dev, agp_find_caps(dev) + AGP_COMMAND, 0, 4);
 
 	/* Construct the new mode word and tell the hardware */
 	command = 0;
 	command = AGP_MODE_SET_RQ(0, rq);
 	command = AGP_MODE_SET_ARQSZ(command, arqsz);
 	command = AGP_MODE_SET_CAL(command, cal);
 	command = AGP_MODE_SET_SBA(command, sba);
 	command = AGP_MODE_SET_FW(command, fw);
 	command = AGP_MODE_SET_RATE(command, rate);
 	command = AGP_MODE_SET_MODE_3(command, 1);
 	command = AGP_MODE_SET_AGP(command, 1);
 	pci_write_config(dev, agp_find_caps(dev) + AGP_COMMAND, command, 4);
 	pci_write_config(mdev, agp_find_caps(mdev) + AGP_COMMAND, command, 4);
 
 	return 0;
 }
 
 static int
 agp_v2_enable(device_t dev, device_t mdev, u_int32_t mode)
 {
 	u_int32_t tstatus, mstatus;
 	u_int32_t command;
 	int rq, sba, fw, rate;
 
 	tstatus = pci_read_config(dev, agp_find_caps(dev) + AGP_STATUS, 4);
 	mstatus = pci_read_config(mdev, agp_find_caps(mdev) + AGP_STATUS, 4);
 
 	/* Set RQ to the min of mode, tstatus and mstatus */
 	rq = AGP_MODE_GET_RQ(mode);
 	if (AGP_MODE_GET_RQ(tstatus) < rq)
 		rq = AGP_MODE_GET_RQ(tstatus);
 	if (AGP_MODE_GET_RQ(mstatus) < rq)
 		rq = AGP_MODE_GET_RQ(mstatus);
 
 	/* Set SBA if all three can deal with SBA */
 	sba = (AGP_MODE_GET_SBA(tstatus)
 	       & AGP_MODE_GET_SBA(mstatus)
 	       & AGP_MODE_GET_SBA(mode));
 
 	/* Similar for FW */
 	fw = (AGP_MODE_GET_FW(tstatus)
 	       & AGP_MODE_GET_FW(mstatus)
 	       & AGP_MODE_GET_FW(mode));
 
 	/* Figure out the max rate */
 	rate = (AGP_MODE_GET_RATE(tstatus)
 		& AGP_MODE_GET_RATE(mstatus)
 		& AGP_MODE_GET_RATE(mode));
 	if (rate & AGP_MODE_V2_RATE_4x)
 		rate = AGP_MODE_V2_RATE_4x;
 	else if (rate & AGP_MODE_V2_RATE_2x)
 		rate = AGP_MODE_V2_RATE_2x;
 	else
 		rate = AGP_MODE_V2_RATE_1x;
 	if (bootverbose)
 		device_printf(dev, "Setting AGP v2 mode %d\n", rate);
 
 	/* Construct the new mode word and tell the hardware */
 	command = 0;
 	command = AGP_MODE_SET_RQ(0, rq);
 	command = AGP_MODE_SET_SBA(command, sba);
 	command = AGP_MODE_SET_FW(command, fw);
 	command = AGP_MODE_SET_RATE(command, rate);
 	command = AGP_MODE_SET_AGP(command, 1);
 	pci_write_config(dev, agp_find_caps(dev) + AGP_COMMAND, command, 4);
 	pci_write_config(mdev, agp_find_caps(mdev) + AGP_COMMAND, command, 4);
 
 	return 0;
 }
 
 int
 agp_generic_enable(device_t dev, u_int32_t mode)
 {
 	device_t mdev = agp_find_display();
 	u_int32_t tstatus, mstatus;
 
 	if (!mdev) {
 		AGP_DPF("can't find display\n");
 		return ENXIO;
 	}
 
 	tstatus = pci_read_config(dev, agp_find_caps(dev) + AGP_STATUS, 4);
 	mstatus = pci_read_config(mdev, agp_find_caps(mdev) + AGP_STATUS, 4);
 
 	/*
 	 * Check display and bridge for AGP v3 support.  AGP v3 allows
 	 * more variety in topology than v2, e.g. multiple AGP devices
 	 * attached to one bridge, or multiple AGP bridges in one
 	 * system.  This doesn't attempt to address those situations,
 	 * but should work fine for a classic single AGP slot system
 	 * with AGP v3.
 	 */
 	if (AGP_MODE_GET_MODE_3(mode) &&
 	    AGP_MODE_GET_MODE_3(tstatus) &&
 	    AGP_MODE_GET_MODE_3(mstatus))
 		return (agp_v3_enable(dev, mdev, mode));
 	else
 		return (agp_v2_enable(dev, mdev, mode));	    
 }
 
 struct agp_memory *
 agp_generic_alloc_memory(device_t dev, int type, vm_size_t size)
 {
 	struct agp_softc *sc = device_get_softc(dev);
 	struct agp_memory *mem;
 
 	if ((size & (AGP_PAGE_SIZE - 1)) != 0)
 		return 0;
 
 	if (size > sc->as_maxmem - sc->as_allocated)
 		return 0;
 
 	if (type != 0) {
 		printf("agp_generic_alloc_memory: unsupported type %d\n",
 		       type);
 		return 0;
 	}
 
 	mem = malloc(sizeof *mem, M_AGP, M_WAITOK);
 	mem->am_id = sc->as_nextid++;
 	mem->am_size = size;
 	mem->am_type = 0;
 	mem->am_obj = vm_object_allocate(OBJT_DEFAULT, atop(round_page(size)));
 	mem->am_physical = 0;
 	mem->am_offset = 0;
 	mem->am_is_bound = 0;
 	TAILQ_INSERT_TAIL(&sc->as_memory, mem, am_link);
 	sc->as_allocated += size;
 
 	return mem;
 }
 
 int
 agp_generic_free_memory(device_t dev, struct agp_memory *mem)
 {
 	struct agp_softc *sc = device_get_softc(dev);
 
 	if (mem->am_is_bound)
 		return EBUSY;
 
 	sc->as_allocated -= mem->am_size;
 	TAILQ_REMOVE(&sc->as_memory, mem, am_link);
 	vm_object_deallocate(mem->am_obj);
 	free(mem, M_AGP);
 	return 0;
 }
 
 int
 agp_generic_bind_memory(device_t dev, struct agp_memory *mem,
 			vm_offset_t offset)
 {
 	struct agp_softc *sc = device_get_softc(dev);
 	vm_offset_t i, j, k;
 	vm_page_t m;
 	int error;
 
 	/* Do some sanity checks first. */
 	if ((offset & (AGP_PAGE_SIZE - 1)) != 0 ||
 	    offset + mem->am_size > AGP_GET_APERTURE(dev)) {
 		device_printf(dev, "binding memory at bad offset %#x\n",
 		    (int)offset);
 		return EINVAL;
 	}
 
 	/*
 	 * Allocate the pages early, before acquiring the lock,
 	 * because vm_page_grab() may sleep and we can't hold a mutex
 	 * while sleeping.
 	 */
 	VM_OBJECT_WLOCK(mem->am_obj);
 	for (i = 0; i < mem->am_size; i += PAGE_SIZE) {
 		/*
 		 * Find a page from the object and wire it
 		 * down. This page will be mapped using one or more
 		 * entries in the GATT (assuming that PAGE_SIZE >=
 		 * AGP_PAGE_SIZE. If this is the first call to bind,
 		 * the pages will be allocated and zeroed.
 		 */
 		m = vm_page_grab(mem->am_obj, OFF_TO_IDX(i),
 		    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 		AGP_DPF("found page pa=%#jx\n", (uintmax_t)VM_PAGE_TO_PHYS(m));
 	}
 	VM_OBJECT_WUNLOCK(mem->am_obj);
 
 	mtx_lock(&sc->as_lock);
 
 	if (mem->am_is_bound) {
 		device_printf(dev, "memory already bound\n");
 		error = EINVAL;
 		VM_OBJECT_WLOCK(mem->am_obj);
 		i = 0;
 		goto bad;
 	}
 	
 	/*
 	 * Bind the individual pages and flush the chipset's
 	 * TLB.
 	 */
 	VM_OBJECT_WLOCK(mem->am_obj);
 	for (i = 0; i < mem->am_size; i += PAGE_SIZE) {
 		m = vm_page_lookup(mem->am_obj, OFF_TO_IDX(i));
 
 		/*
 		 * Install entries in the GATT, making sure that if
 		 * AGP_PAGE_SIZE < PAGE_SIZE and mem->am_size is not
 		 * aligned to PAGE_SIZE, we don't modify too many GATT 
 		 * entries.
 		 */
 		for (j = 0; j < PAGE_SIZE && i + j < mem->am_size;
 		     j += AGP_PAGE_SIZE) {
 			vm_offset_t pa = VM_PAGE_TO_PHYS(m) + j;
 			AGP_DPF("binding offset %#jx to pa %#jx\n",
 				(uintmax_t)offset + i + j, (uintmax_t)pa);
 			error = AGP_BIND_PAGE(dev, offset + i + j, pa);
 			if (error) {
 				/*
 				 * Bail out. Reverse all the mappings
 				 * and unwire the pages.
 				 */
 				for (k = 0; k < i + j; k += AGP_PAGE_SIZE)
 					AGP_UNBIND_PAGE(dev, offset + k);
 				goto bad;
 			}
 		}
 		vm_page_xunbusy(m);
 	}
 	VM_OBJECT_WUNLOCK(mem->am_obj);
 
 	/*
 	 * Make sure the chipset gets the new mappings.
 	 */
 	AGP_FLUSH_TLB(dev);
 
 	mem->am_offset = offset;
 	mem->am_is_bound = 1;
 
 	mtx_unlock(&sc->as_lock);
 
 	return 0;
 bad:
 	mtx_unlock(&sc->as_lock);
 	VM_OBJECT_ASSERT_WLOCKED(mem->am_obj);
 	for (k = 0; k < mem->am_size; k += PAGE_SIZE) {
 		m = vm_page_lookup(mem->am_obj, OFF_TO_IDX(k));
 		if (k >= i)
 			vm_page_xunbusy(m);
 		vm_page_lock(m);
 		vm_page_unwire(m, PQ_INACTIVE);
 		vm_page_unlock(m);
 	}
 	VM_OBJECT_WUNLOCK(mem->am_obj);
 
 	return error;
 }
 
 int
 agp_generic_unbind_memory(device_t dev, struct agp_memory *mem)
 {
 	struct agp_softc *sc = device_get_softc(dev);
 	vm_page_t m;
 	int i;
 
 	mtx_lock(&sc->as_lock);
 
 	if (!mem->am_is_bound) {
 		device_printf(dev, "memory is not bound\n");
 		mtx_unlock(&sc->as_lock);
 		return EINVAL;
 	}
 
 
 	/*
 	 * Unbind the individual pages and flush the chipset's
 	 * TLB. Unwire the pages so they can be swapped.
 	 */
 	for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE)
 		AGP_UNBIND_PAGE(dev, mem->am_offset + i);
 
 	AGP_FLUSH_TLB(dev);
 
 	VM_OBJECT_WLOCK(mem->am_obj);
 	for (i = 0; i < mem->am_size; i += PAGE_SIZE) {
 		m = vm_page_lookup(mem->am_obj, atop(i));
 		vm_page_lock(m);
 		vm_page_unwire(m, PQ_INACTIVE);
 		vm_page_unlock(m);
 	}
 	VM_OBJECT_WUNLOCK(mem->am_obj);
 
 	mem->am_offset = 0;
 	mem->am_is_bound = 0;
 
 	mtx_unlock(&sc->as_lock);
 
 	return 0;
 }
 
 /* Helper functions for implementing user/kernel api */
 
 static int
 agp_acquire_helper(device_t dev, enum agp_acquire_state state)
 {
 	struct agp_softc *sc = device_get_softc(dev);
 
 	if (sc->as_state != AGP_ACQUIRE_FREE)
 		return EBUSY;
 	sc->as_state = state;
 
 	return 0;
 }
 
 static int
 agp_release_helper(device_t dev, enum agp_acquire_state state)
 {
 	struct agp_softc *sc = device_get_softc(dev);
 
 	if (sc->as_state == AGP_ACQUIRE_FREE)
 		return 0;
 
 	if (sc->as_state != state)
 		return EBUSY;
 
 	sc->as_state = AGP_ACQUIRE_FREE;
 	return 0;
 }
 
 static struct agp_memory *
 agp_find_memory(device_t dev, int id)
 {
 	struct agp_softc *sc = device_get_softc(dev);
 	struct agp_memory *mem;
 
 	AGP_DPF("searching for memory block %d\n", id);
 	TAILQ_FOREACH(mem, &sc->as_memory, am_link) {
 		AGP_DPF("considering memory block %d\n", mem->am_id);
 		if (mem->am_id == id)
 			return mem;
 	}
 	return 0;
 }
 
 /* Implementation of the userland ioctl api */
 
 static int
 agp_info_user(device_t dev, agp_info *info)
 {
 	struct agp_softc *sc = device_get_softc(dev);
 
 	bzero(info, sizeof *info);
 	info->bridge_id = pci_get_devid(dev);
 	info->agp_mode = 
 	    pci_read_config(dev, agp_find_caps(dev) + AGP_STATUS, 4);
 	if (sc->as_aperture)
 		info->aper_base = rman_get_start(sc->as_aperture);
 	else
 		info->aper_base = 0;
 	info->aper_size = AGP_GET_APERTURE(dev) >> 20;
 	info->pg_total = info->pg_system = sc->as_maxmem >> AGP_PAGE_SHIFT;
 	info->pg_used = sc->as_allocated >> AGP_PAGE_SHIFT;
 
 	return 0;
 }
 
 static int
 agp_setup_user(device_t dev, agp_setup *setup)
 {
 	return AGP_ENABLE(dev, setup->agp_mode);
 }
 
 static int
 agp_allocate_user(device_t dev, agp_allocate *alloc)
 {
 	struct agp_memory *mem;
 
 	mem = AGP_ALLOC_MEMORY(dev,
 			       alloc->type,
 			       alloc->pg_count << AGP_PAGE_SHIFT);
 	if (mem) {
 		alloc->key = mem->am_id;
 		alloc->physical = mem->am_physical;
 		return 0;
 	} else {
 		return ENOMEM;
 	}
 }
 
 static int
 agp_deallocate_user(device_t dev, int id)
 {
 	struct agp_memory *mem = agp_find_memory(dev, id);
 
 	if (mem) {
 		AGP_FREE_MEMORY(dev, mem);
 		return 0;
 	} else {
 		return ENOENT;
 	}
 }
 
 static int
 agp_bind_user(device_t dev, agp_bind *bind)
 {
 	struct agp_memory *mem = agp_find_memory(dev, bind->key);
 
 	if (!mem)
 		return ENOENT;
 
 	return AGP_BIND_MEMORY(dev, mem, bind->pg_start << AGP_PAGE_SHIFT);
 }
 
 static int
 agp_unbind_user(device_t dev, agp_unbind *unbind)
 {
 	struct agp_memory *mem = agp_find_memory(dev, unbind->key);
 
 	if (!mem)
 		return ENOENT;
 
 	return AGP_UNBIND_MEMORY(dev, mem);
 }
 
 static int
 agp_chipset_flush(device_t dev)
 {
 
 	return (AGP_CHIPSET_FLUSH(dev));
 }
 
 static int
 agp_open(struct cdev *kdev, int oflags, int devtype, struct thread *td)
 {
 	device_t dev = kdev->si_drv1;
 	struct agp_softc *sc = device_get_softc(dev);
 
 	if (!sc->as_isopen) {
 		sc->as_isopen = 1;
 		device_busy(dev);
 	}
 
 	return 0;
 }
 
 static int
 agp_close(struct cdev *kdev, int fflag, int devtype, struct thread *td)
 {
 	device_t dev = kdev->si_drv1;
 	struct agp_softc *sc = device_get_softc(dev);
 	struct agp_memory *mem;
 
 	/*
 	 * Clear the GATT and force release on last close
 	 */
 	while ((mem = TAILQ_FIRST(&sc->as_memory)) != NULL) {
 		if (mem->am_is_bound)
 			AGP_UNBIND_MEMORY(dev, mem);
 		AGP_FREE_MEMORY(dev, mem);
 	}
 	if (sc->as_state == AGP_ACQUIRE_USER)
 		agp_release_helper(dev, AGP_ACQUIRE_USER);
 	sc->as_isopen = 0;
 	device_unbusy(dev);
 
 	return 0;
 }
 
 static int
 agp_ioctl(struct cdev *kdev, u_long cmd, caddr_t data, int fflag, struct thread *td)
 {
 	device_t dev = kdev->si_drv1;
 
 	switch (cmd) {
 	case AGPIOC_INFO:
 		return agp_info_user(dev, (agp_info *) data);
 
 	case AGPIOC_ACQUIRE:
 		return agp_acquire_helper(dev, AGP_ACQUIRE_USER);
 
 	case AGPIOC_RELEASE:
 		return agp_release_helper(dev, AGP_ACQUIRE_USER);
 
 	case AGPIOC_SETUP:
 		return agp_setup_user(dev, (agp_setup *)data);
 
 	case AGPIOC_ALLOCATE:
 		return agp_allocate_user(dev, (agp_allocate *)data);
 
 	case AGPIOC_DEALLOCATE:
 		return agp_deallocate_user(dev, *(int *) data);
 
 	case AGPIOC_BIND:
 		return agp_bind_user(dev, (agp_bind *)data);
 
 	case AGPIOC_UNBIND:
 		return agp_unbind_user(dev, (agp_unbind *)data);
 
 	case AGPIOC_CHIPSET_FLUSH:
 		return agp_chipset_flush(dev);
 	}
 
 	return EINVAL;
 }
 
 static int
 agp_mmap(struct cdev *kdev, vm_ooffset_t offset, vm_paddr_t *paddr,
     int prot, vm_memattr_t *memattr)
 {
 	device_t dev = kdev->si_drv1;
 	struct agp_softc *sc = device_get_softc(dev);
 
 	if (offset > AGP_GET_APERTURE(dev))
 		return -1;
 	if (sc->as_aperture == NULL)
 		return -1;
 	*paddr = rman_get_start(sc->as_aperture) + offset;
 	return 0;
 }
 
 /* Implementation of the kernel api */
 
 device_t
 agp_find_device()
 {
 	device_t *children, child;
 	int i, count;
 
 	if (!agp_devclass)
 		return NULL;
 	if (devclass_get_devices(agp_devclass, &children, &count) != 0)
 		return NULL;
 	child = NULL;
 	for (i = 0; i < count; i++) {
 		if (device_is_attached(children[i])) {
 			child = children[i];
 			break;
 		}
 	}
 	free(children, M_TEMP);
 	return child;
 }
 
 enum agp_acquire_state
 agp_state(device_t dev)
 {
 	struct agp_softc *sc = device_get_softc(dev);
 	return sc->as_state;
 }
 
 void
 agp_get_info(device_t dev, struct agp_info *info)
 {
 	struct agp_softc *sc = device_get_softc(dev);
 
 	info->ai_mode =
 		pci_read_config(dev, agp_find_caps(dev) + AGP_STATUS, 4);
 	if (sc->as_aperture != NULL)
 		info->ai_aperture_base = rman_get_start(sc->as_aperture);
 	else
 		info->ai_aperture_base = 0;
 	info->ai_aperture_size = AGP_GET_APERTURE(dev);
 	info->ai_memory_allowed = sc->as_maxmem;
 	info->ai_memory_used = sc->as_allocated;
 }
 
 int
 agp_acquire(device_t dev)
 {
 	return agp_acquire_helper(dev, AGP_ACQUIRE_KERNEL);
 }
 
 int
 agp_release(device_t dev)
 {
 	return agp_release_helper(dev, AGP_ACQUIRE_KERNEL);
 }
 
 int
 agp_enable(device_t dev, u_int32_t mode)
 {
 	return AGP_ENABLE(dev, mode);
 }
 
 void *agp_alloc_memory(device_t dev, int type, vm_size_t bytes)
 {
 	return  (void *) AGP_ALLOC_MEMORY(dev, type, bytes);
 }
 
 void agp_free_memory(device_t dev, void *handle)
 {
 	struct agp_memory *mem = (struct agp_memory *) handle;
 	AGP_FREE_MEMORY(dev, mem);
 }
 
 int agp_bind_memory(device_t dev, void *handle, vm_offset_t offset)
 {
 	struct agp_memory *mem = (struct agp_memory *) handle;
 	return AGP_BIND_MEMORY(dev, mem, offset);
 }
 
 int agp_unbind_memory(device_t dev, void *handle)
 {
 	struct agp_memory *mem = (struct agp_memory *) handle;
 	return AGP_UNBIND_MEMORY(dev, mem);
 }
 
 void agp_memory_info(device_t dev, void *handle, struct
 		     agp_memory_info *mi)
 {
 	struct agp_memory *mem = (struct agp_memory *) handle;
 
 	mi->ami_size = mem->am_size;
 	mi->ami_physical = mem->am_physical;
 	mi->ami_offset = mem->am_offset;
 	mi->ami_is_bound = mem->am_is_bound;
 }
 
 int
 agp_bind_pages(device_t dev, vm_page_t *pages, vm_size_t size,
     vm_offset_t offset)
 {
 	struct agp_softc *sc;
 	vm_offset_t i, j, k, pa;
 	vm_page_t m;
 	int error;
 
 	if ((size & (AGP_PAGE_SIZE - 1)) != 0 ||
 	    (offset & (AGP_PAGE_SIZE - 1)) != 0)
 		return (EINVAL);
 
 	sc = device_get_softc(dev);
 
 	mtx_lock(&sc->as_lock);
 	for (i = 0; i < size; i += PAGE_SIZE) {
 		m = pages[OFF_TO_IDX(i)];
 		KASSERT(m->wire_count > 0,
 		    ("agp_bind_pages: page %p hasn't been wired", m));
 
 		/*
 		 * Install entries in the GATT, making sure that if
 		 * AGP_PAGE_SIZE < PAGE_SIZE and size is not
 		 * aligned to PAGE_SIZE, we don't modify too many GATT 
 		 * entries.
 		 */
 		for (j = 0; j < PAGE_SIZE && i + j < size; j += AGP_PAGE_SIZE) {
 			pa = VM_PAGE_TO_PHYS(m) + j;
 			AGP_DPF("binding offset %#jx to pa %#jx\n",
 				(uintmax_t)offset + i + j, (uintmax_t)pa);
 			error = AGP_BIND_PAGE(dev, offset + i + j, pa);
 			if (error) {
 				/*
 				 * Bail out. Reverse all the mappings.
 				 */
 				for (k = 0; k < i + j; k += AGP_PAGE_SIZE)
 					AGP_UNBIND_PAGE(dev, offset + k);
 
 				mtx_unlock(&sc->as_lock);
 				return (error);
 			}
 		}
 	}
 
 	AGP_FLUSH_TLB(dev);
 
 	mtx_unlock(&sc->as_lock);
 	return (0);
 }
 
 int
 agp_unbind_pages(device_t dev, vm_size_t size, vm_offset_t offset)
 {
 	struct agp_softc *sc;
 	vm_offset_t i;
 
 	if ((size & (AGP_PAGE_SIZE - 1)) != 0 ||
 	    (offset & (AGP_PAGE_SIZE - 1)) != 0)
 		return (EINVAL);
 
 	sc = device_get_softc(dev);
 
 	mtx_lock(&sc->as_lock);
 	for (i = 0; i < size; i += AGP_PAGE_SIZE)
 		AGP_UNBIND_PAGE(dev, offset + i);
 
 	AGP_FLUSH_TLB(dev);
 
 	mtx_unlock(&sc->as_lock);
 	return (0);
 }
Index: head/sys/dev/agp/agp_amd.c
===================================================================
--- head/sys/dev/agp/agp_amd.c	(revision 338317)
+++ head/sys/dev/agp/agp_amd.c	(revision 338318)
@@ -1,412 +1,412 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2000 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 
 #include <dev/agp/agppriv.h>
 #include <dev/agp/agpreg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/pmap.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/rman.h>
 
 MALLOC_DECLARE(M_AGP);
 
 #define READ2(off)	bus_space_read_2(sc->bst, sc->bsh, off)
 #define READ4(off)	bus_space_read_4(sc->bst, sc->bsh, off)
 #define WRITE2(off,v)	bus_space_write_2(sc->bst, sc->bsh, off, v)
 #define WRITE4(off,v)	bus_space_write_4(sc->bst, sc->bsh, off, v)
 
 struct agp_amd_gatt {
 	u_int32_t	ag_entries;
 	u_int32_t      *ag_virtual;	/* virtual address of gatt */
 	vm_offset_t     ag_physical;
 	u_int32_t      *ag_vdir;	/* virtual address of page dir */
 	vm_offset_t	ag_pdir;	/* physical address of page dir */
 };
 
 struct agp_amd_softc {
 	struct agp_softc	agp;
 	struct resource	       *regs;	/* memory mapped control registers */
 	bus_space_tag_t		bst;	/* bus_space tag */
 	bus_space_handle_t	bsh;	/* bus_space handle */
 	u_int32_t		initial_aperture; /* aperture size at startup */
 	struct agp_amd_gatt    *gatt;
 };
 
 static struct agp_amd_gatt *
 agp_amd_alloc_gatt(device_t dev)
 {
 	u_int32_t apsize = AGP_GET_APERTURE(dev);
 	u_int32_t entries = apsize >> AGP_PAGE_SHIFT;
 	struct agp_amd_gatt *gatt;
 	int i, npages, pdir_offset;
 
 	if (bootverbose)
 		device_printf(dev,
 			      "allocating GATT for aperture of size %dM\n",
 			      apsize / (1024*1024));
 
 	gatt = malloc(sizeof(struct agp_amd_gatt), M_AGP, M_NOWAIT);
 	if (!gatt)
 		return 0;
 
 	/*
 	 * The AMD751 uses a page directory to map a non-contiguous
 	 * gatt so we don't need to use kmem_alloc_contig.
 	 * Allocate individual GATT pages and map them into the page
 	 * directory.
 	 */
 	gatt->ag_entries = entries;
 	gatt->ag_virtual = (void *)kmem_alloc_attr(entries * sizeof(u_int32_t),
 	    M_NOWAIT | M_ZERO, 0, ~0, VM_MEMATTR_WRITE_COMBINING);
 	if (!gatt->ag_virtual) {
 		if (bootverbose)
 			device_printf(dev, "allocation failed\n");
 		free(gatt, M_AGP);
 		return 0;
 	}
 
 	/*
 	 * Allocate the page directory.
 	 */
 	gatt->ag_vdir = (void *)kmem_alloc_attr(AGP_PAGE_SIZE, M_NOWAIT |
 	    M_ZERO, 0, ~0, VM_MEMATTR_WRITE_COMBINING);
 	if (!gatt->ag_vdir) {
 		if (bootverbose)
 			device_printf(dev,
 				      "failed to allocate page directory\n");
-		kmem_free(kernel_arena, (vm_offset_t)gatt->ag_virtual,
-		    entries * sizeof(u_int32_t));
+		kmem_free((vm_offset_t)gatt->ag_virtual, entries *
+		    sizeof(u_int32_t));
 		free(gatt, M_AGP);
 		return 0;
 	}
 
 	gatt->ag_pdir = vtophys((vm_offset_t) gatt->ag_vdir);
 	if(bootverbose)
 		device_printf(dev, "gatt -> ag_pdir %#lx\n",
 		    (u_long)gatt->ag_pdir);
 	/*
 	 * Allocate the gatt pages
 	 */
 	gatt->ag_entries = entries;
 	if(bootverbose)
 		device_printf(dev, "allocating GATT for %d AGP page entries\n", 
 			gatt->ag_entries);
 
 	gatt->ag_physical = vtophys((vm_offset_t) gatt->ag_virtual);
 
 	/*
 	 * Map the pages of the GATT into the page directory.
 	 *
 	 * The GATT page addresses are mapped into the directory offset by
 	 * an amount dependent on the base address of the aperture. This
 	 * is and offset into the page directory, not an offset added to
 	 * the addresses of the gatt pages.
 	 */
 
 	pdir_offset = pci_read_config(dev, AGP_AMD751_APBASE, 4) >> 22;
 
 	npages = ((entries * sizeof(u_int32_t) + AGP_PAGE_SIZE - 1)
 		  >> AGP_PAGE_SHIFT);
 
 	for (i = 0; i < npages; i++) {
 		vm_offset_t va;
 		vm_offset_t pa;
 
 		va = ((vm_offset_t) gatt->ag_virtual) + i * AGP_PAGE_SIZE;
 		pa = vtophys(va);
 		gatt->ag_vdir[i + pdir_offset] = pa | 1;
 	}
 
 	return gatt;
 }
 
 static void
 agp_amd_free_gatt(struct agp_amd_gatt *gatt)
 {
-	kmem_free(kernel_arena, (vm_offset_t)gatt->ag_vdir, AGP_PAGE_SIZE);
-	kmem_free(kernel_arena, (vm_offset_t)gatt->ag_virtual,
-	    gatt->ag_entries * sizeof(u_int32_t));
+	kmem_free((vm_offset_t)gatt->ag_vdir, AGP_PAGE_SIZE);
+	kmem_free((vm_offset_t)gatt->ag_virtual, gatt->ag_entries *
+	    sizeof(u_int32_t));
 	free(gatt, M_AGP);
 }
 
 static const char*
 agp_amd_match(device_t dev)
 {
 	if (pci_get_class(dev) != PCIC_BRIDGE
 	    || pci_get_subclass(dev) != PCIS_BRIDGE_HOST)
 		return NULL;
 
 	if (agp_find_caps(dev) == 0)
 		return NULL;
 
 	switch (pci_get_devid(dev)) {
 	case 0x70061022:
 		return ("AMD 751 host to AGP bridge");
 	case 0x700e1022:
 		return ("AMD 761 host to AGP bridge");
 	case 0x700c1022:
 		return ("AMD 762 host to AGP bridge");
 	}
 
 	return NULL;
 }
 
 static int
 agp_amd_probe(device_t dev)
 {
 	const char *desc;
 
 	if (resource_disabled("agp", device_get_unit(dev)))
 		return (ENXIO);
 	desc = agp_amd_match(dev);
 	if (desc) {
 		device_set_desc(dev, desc);
 		return BUS_PROBE_DEFAULT;
 	}
 
 	return ENXIO;
 }
 
 static int
 agp_amd_attach(device_t dev)
 {
 	struct agp_amd_softc *sc = device_get_softc(dev);
 	struct agp_amd_gatt *gatt;
 	int error, rid;
 
 	error = agp_generic_attach(dev);
 	if (error)
 		return error;
 
 	rid = AGP_AMD751_REGISTERS;
 	sc->regs = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 					  RF_ACTIVE);
 	if (!sc->regs) {
 		agp_generic_detach(dev);
 		return ENOMEM;
 	}
 
 	sc->bst = rman_get_bustag(sc->regs);
 	sc->bsh = rman_get_bushandle(sc->regs);
 
 	sc->initial_aperture = AGP_GET_APERTURE(dev);
 
 	for (;;) {
 		gatt = agp_amd_alloc_gatt(dev);
 		if (gatt)
 			break;
 
 		/*
 		 * Probably contigmalloc failure. Try reducing the
 		 * aperture so that the gatt size reduces.
 		 */
 		if (AGP_SET_APERTURE(dev, AGP_GET_APERTURE(dev) / 2))
 			return ENOMEM;
 	}
 	sc->gatt = gatt;
 
 	/* Install the gatt. */
 	WRITE4(AGP_AMD751_ATTBASE, gatt->ag_pdir);
 	
 	/* Enable synchronisation between host and agp. */
 	pci_write_config(dev,
 			 AGP_AMD751_MODECTRL,
 			 AGP_AMD751_MODECTRL_SYNEN, 1);
 
 	/* Set indexing mode for two-level and enable page dir cache */
 	pci_write_config(dev,
 			 AGP_AMD751_MODECTRL2,
 			 AGP_AMD751_MODECTRL2_GPDCE, 1);
 
 	/* Enable the TLB and flush */
 	WRITE2(AGP_AMD751_STATUS,
 	       READ2(AGP_AMD751_STATUS) | AGP_AMD751_STATUS_GCE);
 	AGP_FLUSH_TLB(dev);
 
 	return 0;
 }
 
 static int
 agp_amd_detach(device_t dev)
 {
 	struct agp_amd_softc *sc = device_get_softc(dev);
 
 	agp_free_cdev(dev);
 
 	/* Disable the TLB.. */
 	WRITE2(AGP_AMD751_STATUS,
 	       READ2(AGP_AMD751_STATUS) & ~AGP_AMD751_STATUS_GCE);
 	
 	/* Disable host-agp sync */
 	pci_write_config(dev, AGP_AMD751_MODECTRL, 0x00, 1);
 	
 	/* Clear the GATT base */
 	WRITE4(AGP_AMD751_ATTBASE, 0);
 
 	/* Put the aperture back the way it started. */
 	AGP_SET_APERTURE(dev, sc->initial_aperture);
 
 	agp_amd_free_gatt(sc->gatt);
 	agp_free_res(dev);
 
 	bus_release_resource(dev, SYS_RES_MEMORY,
 			     AGP_AMD751_REGISTERS, sc->regs);
 
 	return 0;
 }
 
 static u_int32_t
 agp_amd_get_aperture(device_t dev)
 {
 	int vas;
 
 	/*
 	 * The aperture size is equal to 32M<<vas.
 	 */
 	vas = (pci_read_config(dev, AGP_AMD751_APCTRL, 1) & 0x06) >> 1;
 	return (32*1024*1024) << vas;
 }
 
 static int
 agp_amd_set_aperture(device_t dev, u_int32_t aperture)
 {
 	int vas;
 
 	/*
 	 * Check for a power of two and make sure its within the
 	 * programmable range.
 	 */
 	if (aperture & (aperture - 1)
 	    || aperture < 32*1024*1024
 	    || aperture > 2U*1024*1024*1024)
 		return EINVAL;
 
 	vas = ffs(aperture / 32*1024*1024) - 1;
 	
 	/* 
 	 * While the size register is bits 1-3 of APCTRL, bit 0 must be
 	 * set for the size value to be 'valid'
 	 */
 	pci_write_config(dev, AGP_AMD751_APCTRL,
 			 (((pci_read_config(dev, AGP_AMD751_APCTRL, 1) & ~0x06)
 			  | ((vas << 1) | 1))), 1);
 
 	return 0;
 }
 
 static int
 agp_amd_bind_page(device_t dev, vm_offset_t offset, vm_offset_t physical)
 {
 	struct agp_amd_softc *sc = device_get_softc(dev);
 
 	if (offset >= (sc->gatt->ag_entries << AGP_PAGE_SHIFT))
 		return EINVAL;
 
 	sc->gatt->ag_virtual[offset >> AGP_PAGE_SHIFT] = physical | 1;
 	return 0;
 }
 
 static int
 agp_amd_unbind_page(device_t dev, vm_offset_t offset)
 {
 	struct agp_amd_softc *sc = device_get_softc(dev);
 
 	if (offset >= (sc->gatt->ag_entries << AGP_PAGE_SHIFT))
 		return EINVAL;
 
 	sc->gatt->ag_virtual[offset >> AGP_PAGE_SHIFT] = 0;
 	return 0;
 }
 
 static void
 agp_amd_flush_tlb(device_t dev)
 {
 	struct agp_amd_softc *sc = device_get_softc(dev);
 
 	/* Set the cache invalidate bit and wait for the chipset to clear */
 	WRITE4(AGP_AMD751_TLBCTRL, 1);
 	do {
 		DELAY(1);
 	} while (READ4(AGP_AMD751_TLBCTRL));
 }
 
 static device_method_t agp_amd_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		agp_amd_probe),
 	DEVMETHOD(device_attach,	agp_amd_attach),
 	DEVMETHOD(device_detach,	agp_amd_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	bus_generic_suspend),
 	DEVMETHOD(device_resume,	bus_generic_resume),
 
 	/* AGP interface */
 	DEVMETHOD(agp_get_aperture,	agp_amd_get_aperture),
 	DEVMETHOD(agp_set_aperture,	agp_amd_set_aperture),
 	DEVMETHOD(agp_bind_page,	agp_amd_bind_page),
 	DEVMETHOD(agp_unbind_page,	agp_amd_unbind_page),
 	DEVMETHOD(agp_flush_tlb,	agp_amd_flush_tlb),
 	DEVMETHOD(agp_enable,		agp_generic_enable),
 	DEVMETHOD(agp_alloc_memory,	agp_generic_alloc_memory),
 	DEVMETHOD(agp_free_memory,	agp_generic_free_memory),
 	DEVMETHOD(agp_bind_memory,	agp_generic_bind_memory),
 	DEVMETHOD(agp_unbind_memory,	agp_generic_unbind_memory),
 
 	{ 0, 0 }
 };
 
 static driver_t agp_amd_driver = {
 	"agp",
 	agp_amd_methods,
 	sizeof(struct agp_amd_softc),
 };
 
 static devclass_t agp_devclass;
 
 DRIVER_MODULE(agp_amd, hostb, agp_amd_driver, agp_devclass, 0, 0);
 MODULE_DEPEND(agp_amd, agp, 1, 1, 1);
 MODULE_DEPEND(agp_amd, pci, 1, 1, 1);
Index: head/sys/dev/agp/agp_ati.c
===================================================================
--- head/sys/dev/agp/agp_ati.c	(revision 338317)
+++ head/sys/dev/agp/agp_ati.c	(revision 338318)
@@ -1,386 +1,386 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005 Eric Anholt
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Based on reading the Linux 2.6.8.1 driver by Dave Jones.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 
 #include <dev/agp/agppriv.h>
 #include <dev/agp/agpreg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/pmap.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/rman.h>
 
 MALLOC_DECLARE(M_AGP);
 
 #define READ4(off)	bus_space_read_4(sc->bst, sc->bsh, off)
 #define WRITE4(off,v)	bus_space_write_4(sc->bst, sc->bsh, off, v)
 
 struct agp_ati_softc {
 	struct agp_softc agp;
 	struct resource *regs;	/* memory mapped control registers */
 	bus_space_tag_t bst;	/* bus_space tag */
 	bus_space_handle_t bsh;	/* bus_space handle */
 	u_int32_t	initial_aperture; /* aperture size at startup */
 	char		is_rs300;
 
 	/* The GATT */
 	u_int32_t	ag_entries;
 	u_int32_t      *ag_virtual;	/* virtual address of gatt */
 	u_int32_t      *ag_vdir;	/* virtual address of page dir */
 	vm_offset_t	ag_pdir;	/* physical address of page dir */
 };
 
 
 static const char*
 agp_ati_match(device_t dev)
 {
 	if (pci_get_class(dev) != PCIC_BRIDGE ||
 	    pci_get_subclass(dev) != PCIS_BRIDGE_HOST)
 		return NULL;
 
 	if (agp_find_caps(dev) == 0)
 		return NULL;
 
 	switch (pci_get_devid(dev)) {
 	case 0xcab01002:
 		return ("ATI RS100 AGP bridge");
 	case 0xcab21002:
 		return ("ATI RS200 AGP bridge");
 	case 0xcbb21002:
 		return ("ATI RS200M AGP bridge");
 	case 0xcab31002:
 		return ("ATI RS250 AGP bridge");
 	case 0x58301002:
 		return ("ATI RS300_100 AGP bridge");
 	case 0x58311002:
 		return ("ATI RS300_133 AGP bridge");
 	case 0x58321002:
 		return ("ATI RS300_166 AGP bridge");
 	case 0x58331002:
 		return ("ATI RS300_200 AGP bridge");
 	}
 
 	return NULL;
 }
 
 static int
 agp_ati_probe(device_t dev)
 {
 	const char *desc;
 
 	desc = agp_ati_match(dev);
 	if (desc) {
 		device_set_desc(dev, desc);
 		return 0;
 	}
 
 	return ENXIO;
 }
 
 static int
 agp_ati_alloc_gatt(device_t dev)
 {
 	struct agp_ati_softc *sc = device_get_softc(dev);
 	u_int32_t apsize = AGP_GET_APERTURE(dev);
 	u_int32_t entries = apsize >> AGP_PAGE_SHIFT;
 	u_int32_t apbase_offset;
 	int i;
 
 	/* Alloc the GATT -- pointers to pages of AGP memory */
 	sc->ag_entries = entries;
 	sc->ag_virtual = (void *)kmem_alloc_attr(entries * sizeof(u_int32_t),
 	    M_NOWAIT | M_ZERO, 0, ~0, VM_MEMATTR_WRITE_COMBINING);
 	if (sc->ag_virtual == NULL) {
 		if (bootverbose)
 			device_printf(dev, "GATT allocation failed\n");
 		return ENOMEM;
 	}
 
 	/* Alloc the page directory -- pointers to each page of the GATT */
 	sc->ag_vdir = (void *)kmem_alloc_attr(AGP_PAGE_SIZE, M_NOWAIT | M_ZERO,
 	    0, ~0, VM_MEMATTR_WRITE_COMBINING);
 	if (sc->ag_vdir == NULL) {
 		if (bootverbose)
 			device_printf(dev, "pagedir allocation failed\n");
-		kmem_free(kernel_arena, (vm_offset_t)sc->ag_virtual,
-		    entries * sizeof(u_int32_t));
+		kmem_free((vm_offset_t)sc->ag_virtual, entries *
+		    sizeof(u_int32_t));
 		return ENOMEM;
 	}
 	sc->ag_pdir = vtophys((vm_offset_t)sc->ag_vdir);
 
 	apbase_offset = pci_read_config(dev, AGP_APBASE, 4) >> 22;
 	/* Fill in the pagedir's pointers to GATT pages */
 	for (i = 0; i < sc->ag_entries / 1024; i++) {
 		vm_offset_t va;
 		vm_offset_t pa;
 
 		va = ((vm_offset_t)sc->ag_virtual) + i * AGP_PAGE_SIZE;
 		pa = vtophys(va);
 		sc->ag_vdir[apbase_offset + i] = pa | 1;
 	}
 
 	return 0;
 }
 
 
 static int
 agp_ati_attach(device_t dev)
 {
 	struct agp_ati_softc *sc = device_get_softc(dev);
 	int error, rid;
 	u_int32_t temp;
 	u_int32_t apsize_reg, agpmode_reg;
 
 	error = agp_generic_attach(dev);
 	if (error)
 		return error;
 
 	switch (pci_get_devid(dev)) {
 	case 0xcab01002: /* ATI RS100 AGP bridge */
 	case 0xcab21002: /* ATI RS200 AGP bridge */
 	case 0xcbb21002: /* ATI RS200M AGP bridge */
 	case 0xcab31002: /* ATI RS250 AGP bridge */
 		sc->is_rs300 = 0;
 		apsize_reg = ATI_RS100_APSIZE;
 		agpmode_reg = ATI_RS100_IG_AGPMODE;
 		break;
 	case 0x58301002: /* ATI RS300_100 AGP bridge */
 	case 0x58311002: /* ATI RS300_133 AGP bridge */
 	case 0x58321002: /* ATI RS300_166 AGP bridge */
 	case 0x58331002: /* ATI RS300_200 AGP bridge */
 		sc->is_rs300 = 1;
 		apsize_reg = ATI_RS300_APSIZE;
 		agpmode_reg = ATI_RS300_IG_AGPMODE;
 		break;
 	default:
 		/* Unknown chipset */
 		return EINVAL;
 	}
 
 	rid = ATI_GART_MMADDR;
 	sc->regs = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE);
 	if (!sc->regs) {
 		agp_generic_detach(dev);
 		return ENOMEM;
 	}
 
 	sc->bst = rman_get_bustag(sc->regs);
 	sc->bsh = rman_get_bushandle(sc->regs);
 
 	sc->initial_aperture = AGP_GET_APERTURE(dev);
 
 	for (;;) {
 		if (agp_ati_alloc_gatt(dev) == 0)
 			break;
 
 		/*
 		 * Probably contigmalloc failure. Try reducing the
 		 * aperture so that the gatt size reduces.
 		 */
 		if (AGP_SET_APERTURE(dev, AGP_GET_APERTURE(dev) / 2))
 			return ENOMEM;
 	}
 
 	temp = pci_read_config(dev, apsize_reg, 4);
 	pci_write_config(dev, apsize_reg, temp | 1, 4);
 
 	pci_write_config(dev, agpmode_reg, 0x20000, 4);
 
 	WRITE4(ATI_GART_FEATURE_ID, 0x00060000);
 
 	temp = pci_read_config(dev, 4, 4);	/* XXX: Magic reg# */
 	pci_write_config(dev, 4, temp | (1 << 14), 4);
 
 	WRITE4(ATI_GART_BASE, sc->ag_pdir);
 
 	AGP_FLUSH_TLB(dev);
 
 	return 0;
 }
 
 static int
 agp_ati_detach(device_t dev)
 {
 	struct agp_ati_softc *sc = device_get_softc(dev);
 	u_int32_t apsize_reg, temp;
 
 	agp_free_cdev(dev);
 
 	if (sc->is_rs300)
 		apsize_reg = ATI_RS300_APSIZE;
 	else
 		apsize_reg = ATI_RS100_APSIZE;
 
 	/* Clear the GATT base */
 	WRITE4(ATI_GART_BASE, 0);
 
 	/* Put the aperture back the way it started. */
 	AGP_SET_APERTURE(dev, sc->initial_aperture);
 
 	temp = pci_read_config(dev, apsize_reg, 4);
 	pci_write_config(dev, apsize_reg, temp & ~1, 4);
 
-	kmem_free(kernel_arena, (vm_offset_t)sc->ag_vdir, AGP_PAGE_SIZE);
-	kmem_free(kernel_arena, (vm_offset_t)sc->ag_virtual,
-	    sc->ag_entries * sizeof(u_int32_t));
+	kmem_free((vm_offset_t)sc->ag_vdir, AGP_PAGE_SIZE);
+	kmem_free((vm_offset_t)sc->ag_virtual, sc->ag_entries *
+	    sizeof(u_int32_t));
 
 	bus_release_resource(dev, SYS_RES_MEMORY, ATI_GART_MMADDR, sc->regs);
 	agp_free_res(dev);
 
 	return 0;
 }
 
 static u_int32_t
 agp_ati_get_aperture(device_t dev)
 {
 	struct agp_ati_softc *sc = device_get_softc(dev);
 	int size_value;
 
 	if (sc->is_rs300)
 		size_value = pci_read_config(dev, ATI_RS300_APSIZE, 4);
 	else
 		size_value = pci_read_config(dev, ATI_RS100_APSIZE, 4);
 
 	size_value = (size_value & 0x0000000e) >> 1;
 	size_value = (32 * 1024 * 1024) << size_value;
 
 	return size_value;
 }
 
 static int
 agp_ati_set_aperture(device_t dev, u_int32_t aperture)
 {
 	struct agp_ati_softc *sc = device_get_softc(dev);
 	int size_value;
 	u_int32_t apsize_reg;
 
 	if (sc->is_rs300)
 		apsize_reg = ATI_RS300_APSIZE;
 	else
 		apsize_reg = ATI_RS100_APSIZE;
 
 	size_value = pci_read_config(dev, apsize_reg, 4);
 
 	size_value &= ~0x0000000e;
 	size_value |= (ffs(aperture / (32 * 1024 * 1024)) - 1) << 1;
 
 	pci_write_config(dev, apsize_reg, size_value, 4);
 
 	return 0;
 }
 
 static int
 agp_ati_bind_page(device_t dev, vm_offset_t offset, vm_offset_t physical)
 {
 	struct agp_ati_softc *sc = device_get_softc(dev);
 
 	if (offset >= (sc->ag_entries << AGP_PAGE_SHIFT))
 		return EINVAL;
 
 	sc->ag_virtual[offset >> AGP_PAGE_SHIFT] = physical | 1;
 
 	return 0;
 }
 
 static int
 agp_ati_unbind_page(device_t dev, vm_offset_t offset)
 {
 	struct agp_ati_softc *sc = device_get_softc(dev);
 
 	if (offset >= (sc->ag_entries << AGP_PAGE_SHIFT))
 		return EINVAL;
 
 	sc->ag_virtual[offset >> AGP_PAGE_SHIFT] = 0;
 	return 0;
 }
 
 static void
 agp_ati_flush_tlb(device_t dev)
 {
 	struct agp_ati_softc *sc = device_get_softc(dev);
 
 	/* Set the cache invalidate bit and wait for the chipset to clear */
 	WRITE4(ATI_GART_CACHE_CNTRL, 1);
 	(void)READ4(ATI_GART_CACHE_CNTRL);
 }
 
 static device_method_t agp_ati_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		agp_ati_probe),
 	DEVMETHOD(device_attach,	agp_ati_attach),
 	DEVMETHOD(device_detach,	agp_ati_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	bus_generic_suspend),
 	DEVMETHOD(device_resume,	bus_generic_resume),
 
 	/* AGP interface */
 	DEVMETHOD(agp_get_aperture,	agp_ati_get_aperture),
 	DEVMETHOD(agp_set_aperture,	agp_ati_set_aperture),
 	DEVMETHOD(agp_bind_page,	agp_ati_bind_page),
 	DEVMETHOD(agp_unbind_page,	agp_ati_unbind_page),
 	DEVMETHOD(agp_flush_tlb,	agp_ati_flush_tlb),
 	DEVMETHOD(agp_enable,		agp_generic_enable),
 	DEVMETHOD(agp_alloc_memory,	agp_generic_alloc_memory),
 	DEVMETHOD(agp_free_memory,	agp_generic_free_memory),
 	DEVMETHOD(agp_bind_memory,	agp_generic_bind_memory),
 	DEVMETHOD(agp_unbind_memory,	agp_generic_unbind_memory),
 
 	{ 0, 0 }
 };
 
 static driver_t agp_ati_driver = {
 	"agp",
 	agp_ati_methods,
 	sizeof(struct agp_ati_softc),
 };
 
 static devclass_t agp_devclass;
 
 DRIVER_MODULE(agp_ati, hostb, agp_ati_driver, agp_devclass, 0, 0);
 MODULE_DEPEND(agp_ati, agp, 1, 1, 1);
 MODULE_DEPEND(agp_ati, pci, 1, 1, 1);
Index: head/sys/dev/agp/agp_i810.c
===================================================================
--- head/sys/dev/agp/agp_i810.c	(revision 338317)
+++ head/sys/dev/agp/agp_i810.c	(revision 338318)
@@ -1,2375 +1,2375 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2000 Doug Rabson
  * Copyright (c) 2000 Ruslan Ermilov
  * Copyright (c) 2011 The FreeBSD Foundation
  * All rights reserved.
  *
  * Portions of this software were developed by Konstantin Belousov
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Fixes for 830/845G support: David Dawes <dawes@xfree86.org>
  * 852GM/855GM/865G support added by David Dawes <dawes@xfree86.org>
  *
  * This is generic Intel GTT handling code, morphed from the AGP
  * bridge code.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #if 0
 #define	KTR_AGP_I810	KTR_DEV
 #else
 #define	KTR_AGP_I810	0
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 
 #include <dev/agp/agppriv.h>
 #include <dev/agp/agpreg.h>
 #include <dev/agp/agp_i810.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pci_private.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/pmap.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <machine/md_var.h>
 #include <sys/rman.h>
 
 MALLOC_DECLARE(M_AGP);
 
 struct agp_i810_match;
 
 static int agp_i810_check_active(device_t bridge_dev);
 static int agp_i830_check_active(device_t bridge_dev);
 static int agp_i915_check_active(device_t bridge_dev);
 
 static void agp_82852_set_desc(device_t dev,
     const struct agp_i810_match *match);
 static void agp_i810_set_desc(device_t dev, const struct agp_i810_match *match);
 
 static void agp_i810_dump_regs(device_t dev);
 static void agp_i830_dump_regs(device_t dev);
 static void agp_i855_dump_regs(device_t dev);
 static void agp_i915_dump_regs(device_t dev);
 static void agp_i965_dump_regs(device_t dev);
 
 static int agp_i810_get_stolen_size(device_t dev);
 static int agp_i830_get_stolen_size(device_t dev);
 static int agp_i915_get_stolen_size(device_t dev);
 
 static int agp_i810_get_gtt_mappable_entries(device_t dev);
 static int agp_i830_get_gtt_mappable_entries(device_t dev);
 static int agp_i915_get_gtt_mappable_entries(device_t dev);
 
 static int agp_i810_get_gtt_total_entries(device_t dev);
 static int agp_i965_get_gtt_total_entries(device_t dev);
 static int agp_gen5_get_gtt_total_entries(device_t dev);
 
 static int agp_i810_install_gatt(device_t dev);
 static int agp_i830_install_gatt(device_t dev);
 static int agp_i965_install_gatt(device_t dev);
 static int agp_g4x_install_gatt(device_t dev);
 
 static void agp_i810_deinstall_gatt(device_t dev);
 static void agp_i830_deinstall_gatt(device_t dev);
 
 static void agp_i810_install_gtt_pte(device_t dev, u_int index,
     vm_offset_t physical, int flags);
 static void agp_i830_install_gtt_pte(device_t dev, u_int index,
     vm_offset_t physical, int flags);
 static void agp_i915_install_gtt_pte(device_t dev, u_int index,
     vm_offset_t physical, int flags);
 static void agp_i965_install_gtt_pte(device_t dev, u_int index,
     vm_offset_t physical, int flags);
 static void agp_g4x_install_gtt_pte(device_t dev, u_int index,
     vm_offset_t physical, int flags);
 
 static void agp_i810_write_gtt(device_t dev, u_int index, uint32_t pte);
 static void agp_i915_write_gtt(device_t dev, u_int index, uint32_t pte);
 static void agp_i965_write_gtt(device_t dev, u_int index, uint32_t pte);
 static void agp_g4x_write_gtt(device_t dev, u_int index, uint32_t pte);
 
 static u_int32_t agp_i810_read_gtt_pte(device_t dev, u_int index);
 static u_int32_t agp_i915_read_gtt_pte(device_t dev, u_int index);
 static u_int32_t agp_i965_read_gtt_pte(device_t dev, u_int index);
 static u_int32_t agp_g4x_read_gtt_pte(device_t dev, u_int index);
 
 static vm_paddr_t agp_i810_read_gtt_pte_paddr(device_t dev, u_int index);
 static vm_paddr_t agp_i915_read_gtt_pte_paddr(device_t dev, u_int index);
 
 static int agp_i810_set_aperture(device_t dev, u_int32_t aperture);
 static int agp_i830_set_aperture(device_t dev, u_int32_t aperture);
 static int agp_i915_set_aperture(device_t dev, u_int32_t aperture);
 
 static int agp_i810_chipset_flush_setup(device_t dev);
 static int agp_i915_chipset_flush_setup(device_t dev);
 static int agp_i965_chipset_flush_setup(device_t dev);
 
 static void agp_i810_chipset_flush_teardown(device_t dev);
 static void agp_i915_chipset_flush_teardown(device_t dev);
 static void agp_i965_chipset_flush_teardown(device_t dev);
 
 static void agp_i810_chipset_flush(device_t dev);
 static void agp_i830_chipset_flush(device_t dev);
 static void agp_i915_chipset_flush(device_t dev);
 
 enum {
 	CHIP_I810,	/* i810/i815 */
 	CHIP_I830,	/* 830M/845G */
 	CHIP_I855,	/* 852GM/855GM/865G */
 	CHIP_I915,	/* 915G/915GM */
 	CHIP_I965,	/* G965 */
 	CHIP_G33,	/* G33/Q33/Q35 */
 	CHIP_IGD,	/* Pineview */
 	CHIP_G4X,	/* G45/Q45 */
 };
 
 /* The i810 through i855 have the registers at BAR 1, and the GATT gets
  * allocated by us.  The i915 has registers in BAR 0 and the GATT is at the
  * start of the stolen memory, and should only be accessed by the OS through
  * BAR 3.  The G965 has registers and GATT in the same BAR (0) -- first 512KB
  * is registers, second 512KB is GATT.
  */
 static struct resource_spec agp_i810_res_spec[] = {
 	{ SYS_RES_MEMORY, AGP_I810_MMADR, RF_ACTIVE | RF_SHAREABLE },
 	{ -1, 0 }
 };
 
 static struct resource_spec agp_i915_res_spec[] = {
 	{ SYS_RES_MEMORY, AGP_I915_MMADR, RF_ACTIVE | RF_SHAREABLE },
 	{ SYS_RES_MEMORY, AGP_I915_GTTADR, RF_ACTIVE | RF_SHAREABLE },
 	{ -1, 0 }
 };
 
 static struct resource_spec agp_i965_res_spec[] = {
 	{ SYS_RES_MEMORY, AGP_I965_GTTMMADR, RF_ACTIVE | RF_SHAREABLE },
 	{ SYS_RES_MEMORY, AGP_I965_APBASE, RF_ACTIVE | RF_SHAREABLE },
 	{ -1, 0 }
 };
 
 struct agp_i810_softc {
 	struct agp_softc agp;
 	u_int32_t initial_aperture;	/* aperture size at startup */
 	struct agp_gatt *gatt;
 	u_int32_t dcache_size;		/* i810 only */
 	u_int32_t stolen;		/* number of i830/845 gtt
 					   entries for stolen memory */
 	u_int stolen_size;		/* BIOS-reserved graphics memory */
 	u_int gtt_total_entries;	/* Total number of gtt ptes */
 	u_int gtt_mappable_entries;	/* Number of gtt ptes mappable by CPU */
 	device_t bdev;			/* bridge device */
 	void *argb_cursor;		/* contigmalloc area for ARGB cursor */
 	struct resource *sc_res[2];
 	const struct agp_i810_match *match;
 	int sc_flush_page_rid;
 	struct resource *sc_flush_page_res;
 	void *sc_flush_page_vaddr;
 	int sc_bios_allocated_flush_page;
 };
 
 static device_t intel_agp;
 
 struct agp_i810_driver {
 	int chiptype;
 	int gen;
 	int busdma_addr_mask_sz;
 	struct resource_spec *res_spec;
 	int (*check_active)(device_t);
 	void (*set_desc)(device_t, const struct agp_i810_match *);
 	void (*dump_regs)(device_t);
 	int (*get_stolen_size)(device_t);
 	int (*get_gtt_total_entries)(device_t);
 	int (*get_gtt_mappable_entries)(device_t);
 	int (*install_gatt)(device_t);
 	void (*deinstall_gatt)(device_t);
 	void (*write_gtt)(device_t, u_int, uint32_t);
 	void (*install_gtt_pte)(device_t, u_int, vm_offset_t, int);
 	u_int32_t (*read_gtt_pte)(device_t, u_int);
 	vm_paddr_t (*read_gtt_pte_paddr)(device_t , u_int);
 	int (*set_aperture)(device_t, u_int32_t);
 	int (*chipset_flush_setup)(device_t);
 	void (*chipset_flush_teardown)(device_t);
 	void (*chipset_flush)(device_t);
 };
 
 static struct {
 	struct intel_gtt base;
 } intel_private;
 
 static const struct agp_i810_driver agp_i810_i810_driver = {
 	.chiptype = CHIP_I810,
 	.gen = 1,
 	.busdma_addr_mask_sz = 32,
 	.res_spec = agp_i810_res_spec,
 	.check_active = agp_i810_check_active,
 	.set_desc = agp_i810_set_desc,
 	.dump_regs = agp_i810_dump_regs,
 	.get_stolen_size = agp_i810_get_stolen_size,
 	.get_gtt_mappable_entries = agp_i810_get_gtt_mappable_entries,
 	.get_gtt_total_entries = agp_i810_get_gtt_total_entries,
 	.install_gatt = agp_i810_install_gatt,
 	.deinstall_gatt = agp_i810_deinstall_gatt,
 	.write_gtt = agp_i810_write_gtt,
 	.install_gtt_pte = agp_i810_install_gtt_pte,
 	.read_gtt_pte = agp_i810_read_gtt_pte,
 	.read_gtt_pte_paddr = agp_i810_read_gtt_pte_paddr,
 	.set_aperture = agp_i810_set_aperture,
 	.chipset_flush_setup = agp_i810_chipset_flush_setup,
 	.chipset_flush_teardown = agp_i810_chipset_flush_teardown,
 	.chipset_flush = agp_i810_chipset_flush,
 };
 
 static const struct agp_i810_driver agp_i810_i815_driver = {
 	.chiptype = CHIP_I810,
 	.gen = 2,
 	.busdma_addr_mask_sz = 32,
 	.res_spec = agp_i810_res_spec,
 	.check_active = agp_i810_check_active,
 	.set_desc = agp_i810_set_desc,
 	.dump_regs = agp_i810_dump_regs,
 	.get_stolen_size = agp_i810_get_stolen_size,
 	.get_gtt_mappable_entries = agp_i830_get_gtt_mappable_entries,
 	.get_gtt_total_entries = agp_i810_get_gtt_total_entries,
 	.install_gatt = agp_i810_install_gatt,
 	.deinstall_gatt = agp_i810_deinstall_gatt,
 	.write_gtt = agp_i810_write_gtt,
 	.install_gtt_pte = agp_i810_install_gtt_pte,
 	.read_gtt_pte = agp_i810_read_gtt_pte,
 	.read_gtt_pte_paddr = agp_i810_read_gtt_pte_paddr,
 	.set_aperture = agp_i810_set_aperture,
 	.chipset_flush_setup = agp_i810_chipset_flush_setup,
 	.chipset_flush_teardown = agp_i810_chipset_flush_teardown,
 	.chipset_flush = agp_i830_chipset_flush,
 };
 
 static const struct agp_i810_driver agp_i810_i830_driver = {
 	.chiptype = CHIP_I830,
 	.gen = 2,
 	.busdma_addr_mask_sz = 32,
 	.res_spec = agp_i810_res_spec,
 	.check_active = agp_i830_check_active,
 	.set_desc = agp_i810_set_desc,
 	.dump_regs = agp_i830_dump_regs,
 	.get_stolen_size = agp_i830_get_stolen_size,
 	.get_gtt_mappable_entries = agp_i830_get_gtt_mappable_entries,
 	.get_gtt_total_entries = agp_i810_get_gtt_total_entries,
 	.install_gatt = agp_i830_install_gatt,
 	.deinstall_gatt = agp_i830_deinstall_gatt,
 	.write_gtt = agp_i810_write_gtt,
 	.install_gtt_pte = agp_i830_install_gtt_pte,
 	.read_gtt_pte = agp_i810_read_gtt_pte,
 	.read_gtt_pte_paddr = agp_i810_read_gtt_pte_paddr,
 	.set_aperture = agp_i830_set_aperture,
 	.chipset_flush_setup = agp_i810_chipset_flush_setup,
 	.chipset_flush_teardown = agp_i810_chipset_flush_teardown,
 	.chipset_flush = agp_i830_chipset_flush,
 };
 
 static const struct agp_i810_driver agp_i810_i855_driver = {
 	.chiptype = CHIP_I855,
 	.gen = 2,
 	.busdma_addr_mask_sz = 32,
 	.res_spec = agp_i810_res_spec,
 	.check_active = agp_i830_check_active,
 	.set_desc = agp_82852_set_desc,
 	.dump_regs = agp_i855_dump_regs,
 	.get_stolen_size = agp_i915_get_stolen_size,
 	.get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries,
 	.get_gtt_total_entries = agp_i810_get_gtt_total_entries,
 	.install_gatt = agp_i830_install_gatt,
 	.deinstall_gatt = agp_i830_deinstall_gatt,
 	.write_gtt = agp_i810_write_gtt,
 	.install_gtt_pte = agp_i830_install_gtt_pte,
 	.read_gtt_pte = agp_i810_read_gtt_pte,
 	.read_gtt_pte_paddr = agp_i810_read_gtt_pte_paddr,
 	.set_aperture = agp_i830_set_aperture,
 	.chipset_flush_setup = agp_i810_chipset_flush_setup,
 	.chipset_flush_teardown = agp_i810_chipset_flush_teardown,
 	.chipset_flush = agp_i830_chipset_flush,
 };
 
 static const struct agp_i810_driver agp_i810_i865_driver = {
 	.chiptype = CHIP_I855,
 	.gen = 2,
 	.busdma_addr_mask_sz = 32,
 	.res_spec = agp_i810_res_spec,
 	.check_active = agp_i830_check_active,
 	.set_desc = agp_i810_set_desc,
 	.dump_regs = agp_i855_dump_regs,
 	.get_stolen_size = agp_i915_get_stolen_size,
 	.get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries,
 	.get_gtt_total_entries = agp_i810_get_gtt_total_entries,
 	.install_gatt = agp_i830_install_gatt,
 	.deinstall_gatt = agp_i830_deinstall_gatt,
 	.write_gtt = agp_i810_write_gtt,
 	.install_gtt_pte = agp_i830_install_gtt_pte,
 	.read_gtt_pte = agp_i810_read_gtt_pte,
 	.read_gtt_pte_paddr = agp_i810_read_gtt_pte_paddr,
 	.set_aperture = agp_i915_set_aperture,
 	.chipset_flush_setup = agp_i810_chipset_flush_setup,
 	.chipset_flush_teardown = agp_i810_chipset_flush_teardown,
 	.chipset_flush = agp_i830_chipset_flush,
 };
 
 static const struct agp_i810_driver agp_i810_i915_driver = {
 	.chiptype = CHIP_I915,
 	.gen = 3,
 	.busdma_addr_mask_sz = 32,
 	.res_spec = agp_i915_res_spec,
 	.check_active = agp_i915_check_active,
 	.set_desc = agp_i810_set_desc,
 	.dump_regs = agp_i915_dump_regs,
 	.get_stolen_size = agp_i915_get_stolen_size,
 	.get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries,
 	.get_gtt_total_entries = agp_i810_get_gtt_total_entries,
 	.install_gatt = agp_i830_install_gatt,
 	.deinstall_gatt = agp_i830_deinstall_gatt,
 	.write_gtt = agp_i915_write_gtt,
 	.install_gtt_pte = agp_i915_install_gtt_pte,
 	.read_gtt_pte = agp_i915_read_gtt_pte,
 	.read_gtt_pte_paddr = agp_i915_read_gtt_pte_paddr,
 	.set_aperture = agp_i915_set_aperture,
 	.chipset_flush_setup = agp_i915_chipset_flush_setup,
 	.chipset_flush_teardown = agp_i915_chipset_flush_teardown,
 	.chipset_flush = agp_i915_chipset_flush,
 };
 
 static const struct agp_i810_driver agp_i810_g33_driver = {
 	.chiptype = CHIP_G33,
 	.gen = 3,
 	.busdma_addr_mask_sz = 36,
 	.res_spec = agp_i915_res_spec,
 	.check_active = agp_i915_check_active,
 	.set_desc = agp_i810_set_desc,
 	.dump_regs = agp_i965_dump_regs,
 	.get_stolen_size = agp_i915_get_stolen_size,
 	.get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries,
 	.get_gtt_total_entries = agp_i965_get_gtt_total_entries,
 	.install_gatt = agp_i830_install_gatt,
 	.deinstall_gatt = agp_i830_deinstall_gatt,
 	.write_gtt = agp_i915_write_gtt,
 	.install_gtt_pte = agp_i915_install_gtt_pte,
 	.read_gtt_pte = agp_i915_read_gtt_pte,
 	.read_gtt_pte_paddr = agp_i915_read_gtt_pte_paddr,
 	.set_aperture = agp_i915_set_aperture,
 	.chipset_flush_setup = agp_i965_chipset_flush_setup,
 	.chipset_flush_teardown = agp_i965_chipset_flush_teardown,
 	.chipset_flush = agp_i915_chipset_flush,
 };
 
 static const struct agp_i810_driver agp_i810_igd_driver = {
 	.chiptype = CHIP_IGD,
 	.gen = 3,
 	.busdma_addr_mask_sz = 36,
 	.res_spec = agp_i915_res_spec,
 	.check_active = agp_i915_check_active,
 	.set_desc = agp_i810_set_desc,
 	.dump_regs = agp_i915_dump_regs,
 	.get_stolen_size = agp_i915_get_stolen_size,
 	.get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries,
 	.get_gtt_total_entries = agp_i965_get_gtt_total_entries,
 	.install_gatt = agp_i830_install_gatt,
 	.deinstall_gatt = agp_i830_deinstall_gatt,
 	.write_gtt = agp_i915_write_gtt,
 	.install_gtt_pte = agp_i915_install_gtt_pte,
 	.read_gtt_pte = agp_i915_read_gtt_pte,
 	.read_gtt_pte_paddr = agp_i915_read_gtt_pte_paddr,
 	.set_aperture = agp_i915_set_aperture,
 	.chipset_flush_setup = agp_i965_chipset_flush_setup,
 	.chipset_flush_teardown = agp_i965_chipset_flush_teardown,
 	.chipset_flush = agp_i915_chipset_flush,
 };
 
 static const struct agp_i810_driver agp_i810_g965_driver = {
 	.chiptype = CHIP_I965,
 	.gen = 4,
 	.busdma_addr_mask_sz = 36,
 	.res_spec = agp_i965_res_spec,
 	.check_active = agp_i915_check_active,
 	.set_desc = agp_i810_set_desc,
 	.dump_regs = agp_i965_dump_regs,
 	.get_stolen_size = agp_i915_get_stolen_size,
 	.get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries,
 	.get_gtt_total_entries = agp_i965_get_gtt_total_entries,
 	.install_gatt = agp_i965_install_gatt,
 	.deinstall_gatt = agp_i830_deinstall_gatt,
 	.write_gtt = agp_i965_write_gtt,
 	.install_gtt_pte = agp_i965_install_gtt_pte,
 	.read_gtt_pte = agp_i965_read_gtt_pte,
 	.read_gtt_pte_paddr = agp_i915_read_gtt_pte_paddr,
 	.set_aperture = agp_i915_set_aperture,
 	.chipset_flush_setup = agp_i965_chipset_flush_setup,
 	.chipset_flush_teardown = agp_i965_chipset_flush_teardown,
 	.chipset_flush = agp_i915_chipset_flush,
 };
 
 static const struct agp_i810_driver agp_i810_g4x_driver = {
 	.chiptype = CHIP_G4X,
 	.gen = 5,
 	.busdma_addr_mask_sz = 36,
 	.res_spec = agp_i965_res_spec,
 	.check_active = agp_i915_check_active,
 	.set_desc = agp_i810_set_desc,
 	.dump_regs = agp_i965_dump_regs,
 	.get_stolen_size = agp_i915_get_stolen_size,
 	.get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries,
 	.get_gtt_total_entries = agp_gen5_get_gtt_total_entries,
 	.install_gatt = agp_g4x_install_gatt,
 	.deinstall_gatt = agp_i830_deinstall_gatt,
 	.write_gtt = agp_g4x_write_gtt,
 	.install_gtt_pte = agp_g4x_install_gtt_pte,
 	.read_gtt_pte = agp_g4x_read_gtt_pte,
 	.read_gtt_pte_paddr = agp_i915_read_gtt_pte_paddr,
 	.set_aperture = agp_i915_set_aperture,
 	.chipset_flush_setup = agp_i965_chipset_flush_setup,
 	.chipset_flush_teardown = agp_i965_chipset_flush_teardown,
 	.chipset_flush = agp_i915_chipset_flush,
 };
 
 /* For adding new devices, devid is the id of the graphics controller
  * (pci:0:2:0, for example).  The placeholder (usually at pci:0:2:1) for the
  * second head should never be added.  The bridge_offset is the offset to
  * subtract from devid to get the id of the hostb that the device is on.
  */
 static const struct agp_i810_match {
 	int devid;
 	char *name;
 	const struct agp_i810_driver *driver;
 } agp_i810_matches[] = {
 	{
 		.devid = 0x71218086,
 		.name = "Intel 82810 (i810 GMCH) SVGA controller",
 		.driver = &agp_i810_i810_driver
 	},
 	{
 		.devid = 0x71238086,
 		.name = "Intel 82810-DC100 (i810-DC100 GMCH) SVGA controller",
 		.driver = &agp_i810_i810_driver
 	},
 	{
 		.devid = 0x71258086,
 		.name = "Intel 82810E (i810E GMCH) SVGA controller",
 		.driver = &agp_i810_i810_driver
 	},
 	{
 		.devid = 0x11328086,
 		.name = "Intel 82815 (i815 GMCH) SVGA controller",
 		.driver = &agp_i810_i815_driver
 	},
 	{
 		.devid = 0x35778086,
 		.name = "Intel 82830M (830M GMCH) SVGA controller",
 		.driver = &agp_i810_i830_driver
 	},
 	{
 		.devid = 0x25628086,
 		.name = "Intel 82845M (845M GMCH) SVGA controller",
 		.driver = &agp_i810_i830_driver
 	},
 	{
 		.devid = 0x35828086,
 		.name = "Intel 82852/855GM SVGA controller",
 		.driver = &agp_i810_i855_driver
 	},
 	{
 		.devid = 0x25728086,
 		.name = "Intel 82865G (865G GMCH) SVGA controller",
 		.driver = &agp_i810_i865_driver
 	},
 	{
 		.devid = 0x25828086,
 		.name = "Intel 82915G (915G GMCH) SVGA controller",
 		.driver = &agp_i810_i915_driver
 	},
 	{
 		.devid = 0x258A8086,
 		.name = "Intel E7221 SVGA controller",
 		.driver = &agp_i810_i915_driver
 	},
 	{
 		.devid = 0x25928086,
 		.name = "Intel 82915GM (915GM GMCH) SVGA controller",
 		.driver = &agp_i810_i915_driver
 	},
 	{
 		.devid = 0x27728086,
 		.name = "Intel 82945G (945G GMCH) SVGA controller",
 		.driver = &agp_i810_i915_driver
 	},
 	{
 		.devid = 0x27A28086,
 		.name = "Intel 82945GM (945GM GMCH) SVGA controller",
 		.driver = &agp_i810_i915_driver
 	},
 	{
 		.devid = 0x27AE8086,
 		.name = "Intel 945GME SVGA controller",
 		.driver = &agp_i810_i915_driver
 	},
 	{
 		.devid = 0x29728086,
 		.name = "Intel 946GZ SVGA controller",
 		.driver = &agp_i810_g965_driver
 	},
 	{
 		.devid = 0x29828086,
 		.name = "Intel G965 SVGA controller",
 		.driver = &agp_i810_g965_driver
 	},
 	{
 		.devid = 0x29928086,
 		.name = "Intel Q965 SVGA controller",
 		.driver = &agp_i810_g965_driver
 	},
 	{
 		.devid = 0x29A28086,
 		.name = "Intel G965 SVGA controller",
 		.driver = &agp_i810_g965_driver
 	},
 	{
 		.devid = 0x29B28086,
 		.name = "Intel Q35 SVGA controller",
 		.driver = &agp_i810_g33_driver
 	},
 	{
 		.devid = 0x29C28086,
 		.name = "Intel G33 SVGA controller",
 		.driver = &agp_i810_g33_driver
 	},
 	{
 		.devid = 0x29D28086,
 		.name = "Intel Q33 SVGA controller",
 		.driver = &agp_i810_g33_driver
 	},
 	{
 		.devid = 0xA0018086,
 		.name = "Intel Pineview SVGA controller",
 		.driver = &agp_i810_igd_driver
 	},
 	{
 		.devid = 0xA0118086,
 		.name = "Intel Pineview (M) SVGA controller",
 		.driver = &agp_i810_igd_driver
 	},
 	{
 		.devid = 0x2A028086,
 		.name = "Intel GM965 SVGA controller",
 		.driver = &agp_i810_g965_driver
 	},
 	{
 		.devid = 0x2A128086,
 		.name = "Intel GME965 SVGA controller",
 		.driver = &agp_i810_g965_driver
 	},
 	{
 		.devid = 0x2A428086,
 		.name = "Intel GM45 SVGA controller",
 		.driver = &agp_i810_g4x_driver
 	},
 	{
 		.devid = 0x2E028086,
 		.name = "Intel Eaglelake SVGA controller",
 		.driver = &agp_i810_g4x_driver
 	},
 	{
 		.devid = 0x2E128086,
 		.name = "Intel Q45 SVGA controller",
 		.driver = &agp_i810_g4x_driver
 	},
 	{
 		.devid = 0x2E228086,
 		.name = "Intel G45 SVGA controller",
 		.driver = &agp_i810_g4x_driver
 	},
 	{
 		.devid = 0x2E328086,
 		.name = "Intel G41 SVGA controller",
 		.driver = &agp_i810_g4x_driver
 	},
 	{
 		.devid = 0x00428086,
 		.name = "Intel Ironlake (D) SVGA controller",
 		.driver = &agp_i810_g4x_driver
 	},
 	{
 		.devid = 0x00468086,
 		.name = "Intel Ironlake (M) SVGA controller",
 		.driver = &agp_i810_g4x_driver
 	},
 	{
 		.devid = 0,
 	}
 };
 
 static const struct agp_i810_match*
 agp_i810_match(device_t dev)
 {
 	int i, devid;
 
 	if (pci_get_class(dev) != PCIC_DISPLAY
 	    || (pci_get_subclass(dev) != PCIS_DISPLAY_VGA &&
 	    pci_get_subclass(dev) != PCIS_DISPLAY_OTHER))
 		return (NULL);
 
 	devid = pci_get_devid(dev);
 	for (i = 0; agp_i810_matches[i].devid != 0; i++) {
 		if (agp_i810_matches[i].devid == devid)
 			break;
 	}
 	if (agp_i810_matches[i].devid == 0)
 		return (NULL);
 	else
 		return (&agp_i810_matches[i]);
 }
 
 /*
  * Find bridge device.
  */
 static device_t
 agp_i810_find_bridge(device_t dev)
 {
 
 	return (pci_find_dbsf(0, 0, 0, 0));
 }
 
 static void
 agp_i810_identify(driver_t *driver, device_t parent)
 {
 
 	if (device_find_child(parent, "agp", -1) == NULL &&
 	    agp_i810_match(parent))
 		device_add_child(parent, "agp", -1);
 }
 
 static int
 agp_i810_check_active(device_t bridge_dev)
 {
 	u_int8_t smram;
 
 	smram = pci_read_config(bridge_dev, AGP_I810_SMRAM, 1);
 	if ((smram & AGP_I810_SMRAM_GMS) == AGP_I810_SMRAM_GMS_DISABLED)
 		return (ENXIO);
 	return (0);
 }
 
 static int
 agp_i830_check_active(device_t bridge_dev)
 {
 	int gcc1;
 
 	gcc1 = pci_read_config(bridge_dev, AGP_I830_GCC1, 1);
 	if ((gcc1 & AGP_I830_GCC1_DEV2) == AGP_I830_GCC1_DEV2_DISABLED)
 		return (ENXIO);
 	return (0);
 }
 
 static int
 agp_i915_check_active(device_t bridge_dev)
 {
 	int deven;
 
 	deven = pci_read_config(bridge_dev, AGP_I915_DEVEN, 4);
 	if ((deven & AGP_I915_DEVEN_D2F0) == AGP_I915_DEVEN_D2F0_DISABLED)
 		return (ENXIO);
 	return (0);
 }
 
 static void
 agp_82852_set_desc(device_t dev, const struct agp_i810_match *match)
 {
 
 	switch (pci_read_config(dev, AGP_I85X_CAPID, 1)) {
 	case AGP_I855_GME:
 		device_set_desc(dev,
 		    "Intel 82855GME (855GME GMCH) SVGA controller");
 		break;
 	case AGP_I855_GM:
 		device_set_desc(dev,
 		    "Intel 82855GM (855GM GMCH) SVGA controller");
 		break;
 	case AGP_I852_GME:
 		device_set_desc(dev,
 		    "Intel 82852GME (852GME GMCH) SVGA controller");
 		break;
 	case AGP_I852_GM:
 		device_set_desc(dev,
 		    "Intel 82852GM (852GM GMCH) SVGA controller");
 		break;
 	default:
 		device_set_desc(dev,
 		    "Intel 8285xM (85xGM GMCH) SVGA controller");
 		break;
 	}
 }
 
 static void
 agp_i810_set_desc(device_t dev, const struct agp_i810_match *match)
 {
 
 	device_set_desc(dev, match->name);
 }
 
 static int
 agp_i810_probe(device_t dev)
 {
 	device_t bdev;
 	const struct agp_i810_match *match;
 	int err;
 
 	if (resource_disabled("agp", device_get_unit(dev)))
 		return (ENXIO);
 	match = agp_i810_match(dev);
 	if (match == NULL)
 		return (ENXIO);
 
 	bdev = agp_i810_find_bridge(dev);
 	if (bdev == NULL) {
 		if (bootverbose)
 			printf("I810: can't find bridge device\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * checking whether internal graphics device has been activated.
 	 */
 	err = match->driver->check_active(bdev);
 	if (err != 0) {
 		if (bootverbose)
 			printf("i810: disabled, not probing\n");
 		return (err);
 	}
 
 	match->driver->set_desc(dev, match);
 	return (BUS_PROBE_DEFAULT);
 }
 
 static void
 agp_i810_dump_regs(device_t dev)
 {
 	struct agp_i810_softc *sc = device_get_softc(dev);
 
 	device_printf(dev, "AGP_I810_PGTBL_CTL: %08x\n",
 	    bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL));
 	device_printf(dev, "AGP_I810_MISCC: 0x%04x\n",
 	    pci_read_config(sc->bdev, AGP_I810_MISCC, 2));
 }
 
 static void
 agp_i830_dump_regs(device_t dev)
 {
 	struct agp_i810_softc *sc = device_get_softc(dev);
 
 	device_printf(dev, "AGP_I810_PGTBL_CTL: %08x\n",
 	    bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL));
 	device_printf(dev, "AGP_I830_GCC1: 0x%02x\n",
 	    pci_read_config(sc->bdev, AGP_I830_GCC1, 1));
 }
 
 static void
 agp_i855_dump_regs(device_t dev)
 {
 	struct agp_i810_softc *sc = device_get_softc(dev);
 
 	device_printf(dev, "AGP_I810_PGTBL_CTL: %08x\n",
 	    bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL));
 	device_printf(dev, "AGP_I855_GCC1: 0x%02x\n",
 	    pci_read_config(sc->bdev, AGP_I855_GCC1, 1));
 }
 
 static void
 agp_i915_dump_regs(device_t dev)
 {
 	struct agp_i810_softc *sc = device_get_softc(dev);
 
 	device_printf(dev, "AGP_I810_PGTBL_CTL: %08x\n",
 	    bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL));
 	device_printf(dev, "AGP_I855_GCC1: 0x%02x\n",
 	    pci_read_config(sc->bdev, AGP_I855_GCC1, 1));
 	device_printf(dev, "AGP_I915_MSAC: 0x%02x\n",
 	    pci_read_config(sc->bdev, AGP_I915_MSAC, 1));
 }
 
 static void
 agp_i965_dump_regs(device_t dev)
 {
 	struct agp_i810_softc *sc = device_get_softc(dev);
 
 	device_printf(dev, "AGP_I965_PGTBL_CTL2: %08x\n",
 	    bus_read_4(sc->sc_res[0], AGP_I965_PGTBL_CTL2));
 	device_printf(dev, "AGP_I855_GCC1: 0x%02x\n",
 	    pci_read_config(sc->bdev, AGP_I855_GCC1, 1));
 	device_printf(dev, "AGP_I965_MSAC: 0x%02x\n",
 	    pci_read_config(sc->bdev, AGP_I965_MSAC, 1));
 }
 
 static int
 agp_i810_get_stolen_size(device_t dev)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(dev);
 	sc->stolen = 0;
 	sc->stolen_size = 0;
 	return (0);
 }
 
 static int
 agp_i830_get_stolen_size(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	unsigned int gcc1;
 
 	sc = device_get_softc(dev);
 
 	gcc1 = pci_read_config(sc->bdev, AGP_I830_GCC1, 1);
 	switch (gcc1 & AGP_I830_GCC1_GMS) {
 	case AGP_I830_GCC1_GMS_STOLEN_512:
 		sc->stolen = (512 - 132) * 1024 / 4096;
 		sc->stolen_size = 512 * 1024;
 		break;
 	case AGP_I830_GCC1_GMS_STOLEN_1024:
 		sc->stolen = (1024 - 132) * 1024 / 4096;
 		sc->stolen_size = 1024 * 1024;
 		break;
 	case AGP_I830_GCC1_GMS_STOLEN_8192:
 		sc->stolen = (8192 - 132) * 1024 / 4096;
 		sc->stolen_size = 8192 * 1024;
 		break;
 	default:
 		sc->stolen = 0;
 		device_printf(dev,
 		    "unknown memory configuration, disabling (GCC1 %x)\n",
 		    gcc1);
 		return (EINVAL);
 	}
 	return (0);
 }
 
 static int
 agp_i915_get_stolen_size(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	unsigned int gcc1, stolen, gtt_size;
 
 	sc = device_get_softc(dev);
 
 	/*
 	 * Stolen memory is set up at the beginning of the aperture by
 	 * the BIOS, consisting of the GATT followed by 4kb for the
 	 * BIOS display.
 	 */
 	switch (sc->match->driver->chiptype) {
 	case CHIP_I855:
 		gtt_size = 128;
 		break;
 	case CHIP_I915:
 		gtt_size = 256;
 		break;
 	case CHIP_I965:
 		switch (bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL) &
 			AGP_I810_PGTBL_SIZE_MASK) {
 		case AGP_I810_PGTBL_SIZE_128KB:
 			gtt_size = 128;
 			break;
 		case AGP_I810_PGTBL_SIZE_256KB:
 			gtt_size = 256;
 			break;
 		case AGP_I810_PGTBL_SIZE_512KB:
 			gtt_size = 512;
 			break;
 		case AGP_I965_PGTBL_SIZE_1MB:
 			gtt_size = 1024;
 			break;
 		case AGP_I965_PGTBL_SIZE_2MB:
 			gtt_size = 2048;
 			break;
 		case AGP_I965_PGTBL_SIZE_1_5MB:
 			gtt_size = 1024 + 512;
 			break;
 		default:
 			device_printf(dev, "Bad PGTBL size\n");
 			return (EINVAL);
 		}
 		break;
 	case CHIP_G33:
 		gcc1 = pci_read_config(sc->bdev, AGP_I855_GCC1, 2);
 		switch (gcc1 & AGP_G33_MGGC_GGMS_MASK) {
 		case AGP_G33_MGGC_GGMS_SIZE_1M:
 			gtt_size = 1024;
 			break;
 		case AGP_G33_MGGC_GGMS_SIZE_2M:
 			gtt_size = 2048;
 			break;
 		default:
 			device_printf(dev, "Bad PGTBL size\n");
 			return (EINVAL);
 		}
 		break;
 	case CHIP_IGD:
 	case CHIP_G4X:
 		gtt_size = 0;
 		break;
 	default:
 		device_printf(dev, "Bad chiptype\n");
 		return (EINVAL);
 	}
 
 	/* GCC1 is called MGGC on i915+ */
 	gcc1 = pci_read_config(sc->bdev, AGP_I855_GCC1, 1);
 	switch (gcc1 & AGP_I855_GCC1_GMS) {
 	case AGP_I855_GCC1_GMS_STOLEN_1M:
 		stolen = 1024;
 		break;
 	case AGP_I855_GCC1_GMS_STOLEN_4M:
 		stolen = 4 * 1024;
 		break;
 	case AGP_I855_GCC1_GMS_STOLEN_8M:
 		stolen = 8 * 1024;
 		break;
 	case AGP_I855_GCC1_GMS_STOLEN_16M:
 		stolen = 16 * 1024;
 		break;
 	case AGP_I855_GCC1_GMS_STOLEN_32M:
 		stolen = 32 * 1024;
 		break;
 	case AGP_I915_GCC1_GMS_STOLEN_48M:
 		stolen = sc->match->driver->gen > 2 ? 48 * 1024 : 0;
 		break;
 	case AGP_I915_GCC1_GMS_STOLEN_64M:
 		stolen = sc->match->driver->gen > 2 ? 64 * 1024 : 0;
 		break;
 	case AGP_G33_GCC1_GMS_STOLEN_128M:
 		stolen = sc->match->driver->gen > 2 ? 128 * 1024 : 0;
 		break;
 	case AGP_G33_GCC1_GMS_STOLEN_256M:
 		stolen = sc->match->driver->gen > 2 ? 256 * 1024 : 0;
 		break;
 	case AGP_G4X_GCC1_GMS_STOLEN_96M:
 		if (sc->match->driver->chiptype == CHIP_I965 ||
 		    sc->match->driver->chiptype == CHIP_G4X)
 			stolen = 96 * 1024;
 		else
 			stolen = 0;
 		break;
 	case AGP_G4X_GCC1_GMS_STOLEN_160M:
 		if (sc->match->driver->chiptype == CHIP_I965 ||
 		    sc->match->driver->chiptype == CHIP_G4X)
 			stolen = 160 * 1024;
 		else
 			stolen = 0;
 		break;
 	case AGP_G4X_GCC1_GMS_STOLEN_224M:
 		if (sc->match->driver->chiptype == CHIP_I965 ||
 		    sc->match->driver->chiptype == CHIP_G4X)
 			stolen = 224 * 1024;
 		else
 			stolen = 0;
 		break;
 	case AGP_G4X_GCC1_GMS_STOLEN_352M:
 		if (sc->match->driver->chiptype == CHIP_I965 ||
 		    sc->match->driver->chiptype == CHIP_G4X)
 			stolen = 352 * 1024;
 		else
 			stolen = 0;
 		break;
 	default:
 		device_printf(dev,
 		    "unknown memory configuration, disabling (GCC1 %x)\n",
 		    gcc1);
 		return (EINVAL);
 	}
 
 	gtt_size += 4;
 	sc->stolen_size = stolen * 1024;
 	sc->stolen = (stolen - gtt_size) * 1024 / 4096;
 
 	return (0);
 }
 
 static int
 agp_i810_get_gtt_mappable_entries(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	uint32_t ap;
 	uint16_t miscc;
 
 	sc = device_get_softc(dev);
 	miscc = pci_read_config(sc->bdev, AGP_I810_MISCC, 2);
 	if ((miscc & AGP_I810_MISCC_WINSIZE) == AGP_I810_MISCC_WINSIZE_32)
 		ap = 32;
 	else
 		ap = 64;
 	sc->gtt_mappable_entries = (ap * 1024 * 1024) >> AGP_PAGE_SHIFT;
 	return (0);
 }
 
 static int
 agp_i830_get_gtt_mappable_entries(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	uint32_t ap;
 	uint16_t gmch_ctl;
 
 	sc = device_get_softc(dev);
 	gmch_ctl = pci_read_config(sc->bdev, AGP_I830_GCC1, 2);
 	if ((gmch_ctl & AGP_I830_GCC1_GMASIZE) == AGP_I830_GCC1_GMASIZE_64)
 		ap = 64;
 	else
 		ap = 128;
 	sc->gtt_mappable_entries = (ap * 1024 * 1024) >> AGP_PAGE_SHIFT;
 	return (0);
 }
 
 static int
 agp_i915_get_gtt_mappable_entries(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	uint32_t ap;
 
 	sc = device_get_softc(dev);
 	ap = AGP_GET_APERTURE(dev);
 	sc->gtt_mappable_entries = ap >> AGP_PAGE_SHIFT;
 	return (0);
 }
 
 static int
 agp_i810_get_gtt_total_entries(device_t dev)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(dev);
 	sc->gtt_total_entries = sc->gtt_mappable_entries;
 	return (0);
 }
 
 static int
 agp_i965_get_gtt_total_entries(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	uint32_t pgetbl_ctl;
 	int error;
 
 	sc = device_get_softc(dev);
 	error = 0;
 	pgetbl_ctl = bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL);
 	switch (pgetbl_ctl & AGP_I810_PGTBL_SIZE_MASK) {
 	case AGP_I810_PGTBL_SIZE_128KB:
 		sc->gtt_total_entries = 128 * 1024 / 4;
 		break;
 	case AGP_I810_PGTBL_SIZE_256KB:
 		sc->gtt_total_entries = 256 * 1024 / 4;
 		break;
 	case AGP_I810_PGTBL_SIZE_512KB:
 		sc->gtt_total_entries = 512 * 1024 / 4;
 		break;
 	/* GTT pagetable sizes bigger than 512KB are not possible on G33! */
 	case AGP_I810_PGTBL_SIZE_1MB:
 		sc->gtt_total_entries = 1024 * 1024 / 4;
 		break;
 	case AGP_I810_PGTBL_SIZE_2MB:
 		sc->gtt_total_entries = 2 * 1024 * 1024 / 4;
 		break;
 	case AGP_I810_PGTBL_SIZE_1_5MB:
 		sc->gtt_total_entries = (1024 + 512) * 1024 / 4;
 		break;
 	default:
 		device_printf(dev, "Unknown page table size\n");
 		error = ENXIO;
 	}
 	return (error);
 }
 
 static void
 agp_gen5_adjust_pgtbl_size(device_t dev, uint32_t sz)
 {
 	struct agp_i810_softc *sc;
 	uint32_t pgetbl_ctl, pgetbl_ctl2;
 
 	sc = device_get_softc(dev);
 
 	/* Disable per-process page table. */
 	pgetbl_ctl2 = bus_read_4(sc->sc_res[0], AGP_I965_PGTBL_CTL2);
 	pgetbl_ctl2 &= ~AGP_I810_PGTBL_ENABLED;
 	bus_write_4(sc->sc_res[0], AGP_I965_PGTBL_CTL2, pgetbl_ctl2);
 
 	/* Write the new ggtt size. */
 	pgetbl_ctl = bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL);
 	pgetbl_ctl &= ~AGP_I810_PGTBL_SIZE_MASK;
 	pgetbl_ctl |= sz;
 	bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, pgetbl_ctl);
 }
 
 static int
 agp_gen5_get_gtt_total_entries(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	uint16_t gcc1;
 
 	sc = device_get_softc(dev);
 
 	gcc1 = pci_read_config(sc->bdev, AGP_I830_GCC1, 2);
 	switch (gcc1 & AGP_G4x_GCC1_SIZE_MASK) {
 	case AGP_G4x_GCC1_SIZE_1M:
 	case AGP_G4x_GCC1_SIZE_VT_1M:
 		agp_gen5_adjust_pgtbl_size(dev, AGP_I810_PGTBL_SIZE_1MB);
 		break;
 	case AGP_G4x_GCC1_SIZE_VT_1_5M:
 		agp_gen5_adjust_pgtbl_size(dev, AGP_I810_PGTBL_SIZE_1_5MB);
 		break;
 	case AGP_G4x_GCC1_SIZE_2M:
 	case AGP_G4x_GCC1_SIZE_VT_2M:
 		agp_gen5_adjust_pgtbl_size(dev, AGP_I810_PGTBL_SIZE_2MB);
 		break;
 	default:
 		device_printf(dev, "Unknown page table size\n");
 		return (ENXIO);
 	}
 
 	return (agp_i965_get_gtt_total_entries(dev));
 }
 
 static int
 agp_i810_install_gatt(device_t dev)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	/* Some i810s have on-chip memory called dcache. */
 	if ((bus_read_1(sc->sc_res[0], AGP_I810_DRT) & AGP_I810_DRT_POPULATED)
 	    != 0)
 		sc->dcache_size = 4 * 1024 * 1024;
 	else
 		sc->dcache_size = 0;
 
 	/* According to the specs the gatt on the i810 must be 64k. */
 	sc->gatt->ag_virtual = (void *)kmem_alloc_contig(64 * 1024, M_NOWAIT |
 	    M_ZERO, 0, ~0, PAGE_SIZE, 0, VM_MEMATTR_WRITE_COMBINING);
 	if (sc->gatt->ag_virtual == NULL) {
 		if (bootverbose)
 			device_printf(dev, "contiguous allocation failed\n");
 		return (ENOMEM);
 	}
 
 	sc->gatt->ag_physical = vtophys((vm_offset_t)sc->gatt->ag_virtual);
 	/* Install the GATT. */
 	bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL,
 	    sc->gatt->ag_physical | 1);
 	return (0);
 }
 
 static void
 agp_i830_install_gatt_init(struct agp_i810_softc *sc)
 {
 	uint32_t pgtblctl;
 
 	/*
 	 * The i830 automatically initializes the 128k gatt on boot.
 	 * GATT address is already in there, make sure it's enabled.
 	 */
 	pgtblctl = bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL);
 	pgtblctl |= 1;
 	bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, pgtblctl);
 	
 	sc->gatt->ag_physical = pgtblctl & ~1;
 }
 
 static int
 agp_i830_install_gatt(device_t dev)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(dev);
 	agp_i830_install_gatt_init(sc);
 	return (0);
 }
 
 static int
 agp_gen4_install_gatt(device_t dev, const vm_size_t gtt_offset)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(dev);
 	pmap_change_attr((vm_offset_t)rman_get_virtual(sc->sc_res[0]) +
 	    gtt_offset, rman_get_size(sc->sc_res[0]) - gtt_offset,
 	    VM_MEMATTR_WRITE_COMBINING);
 	agp_i830_install_gatt_init(sc);
 	return (0);
 }
 
 static int
 agp_i965_install_gatt(device_t dev)
 {
 
 	return (agp_gen4_install_gatt(dev, 512 * 1024));
 }
 
 static int
 agp_g4x_install_gatt(device_t dev)
 {
 
 	return (agp_gen4_install_gatt(dev, 2 * 1024 * 1024));
 }
 
 static int
 agp_i810_attach(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	int error;
 
 	sc = device_get_softc(dev);
 	sc->bdev = agp_i810_find_bridge(dev);
 	if (sc->bdev == NULL)
 		return (ENOENT);
 
 	sc->match = agp_i810_match(dev);
 
 	agp_set_aperture_resource(dev, sc->match->driver->gen <= 2 ?
 	    AGP_APBASE : AGP_I915_GMADR);
 	error = agp_generic_attach(dev);
 	if (error)
 		return (error);
 
 	if (ptoa((vm_paddr_t)Maxmem) >
 	    (1ULL << sc->match->driver->busdma_addr_mask_sz) - 1) {
 		device_printf(dev, "agp_i810 does not support physical "
 		    "memory above %ju.\n", (uintmax_t)(1ULL <<
 		    sc->match->driver->busdma_addr_mask_sz) - 1);
 		return (ENOENT);
 	}
 
 	if (bus_alloc_resources(dev, sc->match->driver->res_spec, sc->sc_res)) {
 		agp_generic_detach(dev);
 		return (ENODEV);
 	}
 
 	sc->initial_aperture = AGP_GET_APERTURE(dev);
 	sc->gatt = malloc(sizeof(struct agp_gatt), M_AGP, M_WAITOK);
 	sc->gatt->ag_entries = AGP_GET_APERTURE(dev) >> AGP_PAGE_SHIFT;
 
 	if ((error = sc->match->driver->get_stolen_size(dev)) != 0 ||
 	    (error = sc->match->driver->install_gatt(dev)) != 0 ||
 	    (error = sc->match->driver->get_gtt_mappable_entries(dev)) != 0 ||
 	    (error = sc->match->driver->get_gtt_total_entries(dev)) != 0 ||
 	    (error = sc->match->driver->chipset_flush_setup(dev)) != 0) {
 		bus_release_resources(dev, sc->match->driver->res_spec,
 		    sc->sc_res);
 		free(sc->gatt, M_AGP);
 		agp_generic_detach(dev);
 		return (error);
 	}
 
 	intel_agp = dev;
 	device_printf(dev, "aperture size is %dM",
 	    sc->initial_aperture / 1024 / 1024);
 	if (sc->stolen > 0)
 		printf(", detected %dk stolen memory\n", sc->stolen * 4);
 	else
 		printf("\n");
 	if (bootverbose) {
 		sc->match->driver->dump_regs(dev);
 		device_printf(dev, "Mappable GTT entries: %d\n",
 		    sc->gtt_mappable_entries);
 		device_printf(dev, "Total GTT entries: %d\n",
 		    sc->gtt_total_entries);
 	}
 	return (0);
 }
 
 static void
 agp_i810_deinstall_gatt(device_t dev)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(dev);
 	bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, 0);
-	kmem_free(kernel_arena, (vm_offset_t)sc->gatt->ag_virtual, 64 * 1024);
+	kmem_free((vm_offset_t)sc->gatt->ag_virtual, 64 * 1024);
 }
 
 static void
 agp_i830_deinstall_gatt(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	unsigned int pgtblctl;
 
 	sc = device_get_softc(dev);
 	pgtblctl = bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL);
 	pgtblctl &= ~1;
 	bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, pgtblctl);
 }
 
 static int
 agp_i810_detach(device_t dev)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(dev);
 	agp_free_cdev(dev);
 
 	/* Clear the GATT base. */
 	sc->match->driver->deinstall_gatt(dev);
 
 	sc->match->driver->chipset_flush_teardown(dev);
 
 	/* Put the aperture back the way it started. */
 	AGP_SET_APERTURE(dev, sc->initial_aperture);
 
 	free(sc->gatt, M_AGP);
 	bus_release_resources(dev, sc->match->driver->res_spec, sc->sc_res);
 	agp_free_res(dev);
 
 	return (0);
 }
 
 static int
 agp_i810_resume(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	sc = device_get_softc(dev);
 
 	AGP_SET_APERTURE(dev, sc->initial_aperture);
 
 	/* Install the GATT. */
 	bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL,
 	sc->gatt->ag_physical | 1);
 
 	return (bus_generic_resume(dev));
 }
 
 /**
  * Sets the PCI resource size of the aperture on i830-class and below chipsets,
  * while returning failure on later chipsets when an actual change is
  * requested.
  *
  * This whole function is likely bogus, as the kernel would probably need to
  * reconfigure the placement of the AGP aperture if a larger size is requested,
  * which doesn't happen currently.
  */
 static int
 agp_i810_set_aperture(device_t dev, u_int32_t aperture)
 {
 	struct agp_i810_softc *sc;
 	u_int16_t miscc;
 
 	sc = device_get_softc(dev);
 	/*
 	 * Double check for sanity.
 	 */
 	if (aperture != 32 * 1024 * 1024 && aperture != 64 * 1024 * 1024) {
 		device_printf(dev, "bad aperture size %d\n", aperture);
 		return (EINVAL);
 	}
 
 	miscc = pci_read_config(sc->bdev, AGP_I810_MISCC, 2);
 	miscc &= ~AGP_I810_MISCC_WINSIZE;
 	if (aperture == 32 * 1024 * 1024)
 		miscc |= AGP_I810_MISCC_WINSIZE_32;
 	else
 		miscc |= AGP_I810_MISCC_WINSIZE_64;
 	
 	pci_write_config(sc->bdev, AGP_I810_MISCC, miscc, 2);
 	return (0);
 }
 
 static int
 agp_i830_set_aperture(device_t dev, u_int32_t aperture)
 {
 	struct agp_i810_softc *sc;
 	u_int16_t gcc1;
 
 	sc = device_get_softc(dev);
 
 	if (aperture != 64 * 1024 * 1024 &&
 	    aperture != 128 * 1024 * 1024) {
 		device_printf(dev, "bad aperture size %d\n", aperture);
 		return (EINVAL);
 	}
 	gcc1 = pci_read_config(sc->bdev, AGP_I830_GCC1, 2);
 	gcc1 &= ~AGP_I830_GCC1_GMASIZE;
 	if (aperture == 64 * 1024 * 1024)
 		gcc1 |= AGP_I830_GCC1_GMASIZE_64;
 	else
 		gcc1 |= AGP_I830_GCC1_GMASIZE_128;
 
 	pci_write_config(sc->bdev, AGP_I830_GCC1, gcc1, 2);
 	return (0);
 }
 
 static int
 agp_i915_set_aperture(device_t dev, u_int32_t aperture)
 {
 
 	return (agp_generic_set_aperture(dev, aperture));
 }
 
 static int
 agp_i810_method_set_aperture(device_t dev, u_int32_t aperture)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(dev);
 	return (sc->match->driver->set_aperture(dev, aperture));
 }
 
 /**
  * Writes a GTT entry mapping the page at the given offset from the
  * beginning of the aperture to the given physical address.  Setup the
  * caching mode according to flags.
  *
  * For gen 1, 2 and 3, GTT start is located at AGP_I810_GTT offset
  * from corresponding BAR start. For gen 4, offset is 512KB +
  * AGP_I810_GTT, for gen 5 and 6 it is 2MB + AGP_I810_GTT.
  *
  * Also, the bits of the physical page address above 4GB needs to be
  * placed into bits 40-32 of PTE.
  */
 static void
 agp_i810_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical,
     int flags)
 {
 	uint32_t pte;
 
 	pte = (u_int32_t)physical | I810_PTE_VALID;
 	if (flags == AGP_DCACHE_MEMORY)
 		pte |= I810_PTE_LOCAL;
 	else if (flags == AGP_USER_CACHED_MEMORY)
 		pte |= I830_PTE_SYSTEM_CACHED;
 	agp_i810_write_gtt(dev, index, pte);
 }
 
 static void
 agp_i810_write_gtt(device_t dev, u_int index, uint32_t pte)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(dev);
 	bus_write_4(sc->sc_res[0], AGP_I810_GTT + index * 4, pte);
 	CTR2(KTR_AGP_I810, "810_pte %x %x", index, pte);
 }
 
 static void
 agp_i830_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical,
     int flags)
 {
 	uint32_t pte;
 
 	pte = (u_int32_t)physical | I810_PTE_VALID;
 	if (flags == AGP_USER_CACHED_MEMORY)
 		pte |= I830_PTE_SYSTEM_CACHED;
 	agp_i810_write_gtt(dev, index, pte);
 }
 
 static void
 agp_i915_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical,
     int flags)
 {
 	uint32_t pte;
 
 	pte = (u_int32_t)physical | I810_PTE_VALID;
 	if (flags == AGP_USER_CACHED_MEMORY)
 		pte |= I830_PTE_SYSTEM_CACHED;
 	pte |= (physical & 0x0000000f00000000ull) >> 28;
 	agp_i915_write_gtt(dev, index, pte);
 }
 
 static void
 agp_i915_write_gtt(device_t dev, u_int index, uint32_t pte)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(dev);
 	bus_write_4(sc->sc_res[1], index * 4, pte);
 	CTR2(KTR_AGP_I810, "915_pte %x %x", index, pte);
 }
 
 static void
 agp_i965_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical,
     int flags)
 {
 	uint32_t pte;
 
 	pte = (u_int32_t)physical | I810_PTE_VALID;
 	if (flags == AGP_USER_CACHED_MEMORY)
 		pte |= I830_PTE_SYSTEM_CACHED;
 	pte |= (physical & 0x0000000f00000000ull) >> 28;
 	agp_i965_write_gtt(dev, index, pte);
 }
 
 static void
 agp_i965_write_gtt(device_t dev, u_int index, uint32_t pte)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(dev);
 	bus_write_4(sc->sc_res[0], index * 4 + (512 * 1024), pte);
 	CTR2(KTR_AGP_I810, "965_pte %x %x", index, pte);
 }
 
 static void
 agp_g4x_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical,
     int flags)
 {
 	uint32_t pte;
 
 	pte = (u_int32_t)physical | I810_PTE_VALID;
 	if (flags == AGP_USER_CACHED_MEMORY)
 		pte |= I830_PTE_SYSTEM_CACHED;
 	pte |= (physical & 0x0000000f00000000ull) >> 28;
 	agp_g4x_write_gtt(dev, index, pte);
 }
 
 static void
 agp_g4x_write_gtt(device_t dev, u_int index, uint32_t pte)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(dev);
 	bus_write_4(sc->sc_res[0], index * 4 + (2 * 1024 * 1024), pte);
 	CTR2(KTR_AGP_I810, "g4x_pte %x %x", index, pte);
 }
 
 static int
 agp_i810_bind_page(device_t dev, vm_offset_t offset, vm_offset_t physical)
 {
 	struct agp_i810_softc *sc = device_get_softc(dev);
 	u_int index;
 
 	if (offset >= (sc->gatt->ag_entries << AGP_PAGE_SHIFT)) {
 		device_printf(dev, "failed: offset is 0x%08jx, "
 		    "shift is %d, entries is %d\n", (intmax_t)offset,
 		    AGP_PAGE_SHIFT, sc->gatt->ag_entries);
 		return (EINVAL);
 	}
 	index = offset >> AGP_PAGE_SHIFT;
 	if (sc->stolen != 0 && index < sc->stolen) {
 		device_printf(dev, "trying to bind into stolen memory\n");
 		return (EINVAL);
 	}
 	sc->match->driver->install_gtt_pte(dev, index, physical, 0);
 	return (0);
 }
 
 static int
 agp_i810_unbind_page(device_t dev, vm_offset_t offset)
 {
 	struct agp_i810_softc *sc;
 	u_int index;
 
 	sc = device_get_softc(dev);
 	if (offset >= (sc->gatt->ag_entries << AGP_PAGE_SHIFT))
 		return (EINVAL);
 	index = offset >> AGP_PAGE_SHIFT;
 	if (sc->stolen != 0 && index < sc->stolen) {
 		device_printf(dev, "trying to unbind from stolen memory\n");
 		return (EINVAL);
 	}
 	sc->match->driver->install_gtt_pte(dev, index, 0, 0);
 	return (0);
 }
 
 static u_int32_t
 agp_i810_read_gtt_pte(device_t dev, u_int index)
 {
 	struct agp_i810_softc *sc;
 	u_int32_t pte;
 
 	sc = device_get_softc(dev);
 	pte = bus_read_4(sc->sc_res[0], AGP_I810_GTT + index * 4);
 	return (pte);
 }
 
 static u_int32_t
 agp_i915_read_gtt_pte(device_t dev, u_int index)
 {
 	struct agp_i810_softc *sc;
 	u_int32_t pte;
 
 	sc = device_get_softc(dev);
 	pte = bus_read_4(sc->sc_res[1], index * 4);
 	return (pte);
 }
 
 static u_int32_t
 agp_i965_read_gtt_pte(device_t dev, u_int index)
 {
 	struct agp_i810_softc *sc;
 	u_int32_t pte;
 
 	sc = device_get_softc(dev);
 	pte = bus_read_4(sc->sc_res[0], index * 4 + (512 * 1024));
 	return (pte);
 }
 
 static u_int32_t
 agp_g4x_read_gtt_pte(device_t dev, u_int index)
 {
 	struct agp_i810_softc *sc;
 	u_int32_t pte;
 
 	sc = device_get_softc(dev);
 	pte = bus_read_4(sc->sc_res[0], index * 4 + (2 * 1024 * 1024));
 	return (pte);
 }
 
 static vm_paddr_t
 agp_i810_read_gtt_pte_paddr(device_t dev, u_int index)
 {
 	struct agp_i810_softc *sc;
 	u_int32_t pte;
 	vm_paddr_t res;
 
 	sc = device_get_softc(dev);
 	pte = sc->match->driver->read_gtt_pte(dev, index);
 	res = pte & ~PAGE_MASK;
 	return (res);
 }
 
 static vm_paddr_t
 agp_i915_read_gtt_pte_paddr(device_t dev, u_int index)
 {
 	struct agp_i810_softc *sc;
 	u_int32_t pte;
 	vm_paddr_t res;
 
 	sc = device_get_softc(dev);
 	pte = sc->match->driver->read_gtt_pte(dev, index);
 	res = (pte & ~PAGE_MASK) | ((pte & 0xf0) << 28);
 	return (res);
 }
 
 /*
  * Writing via memory mapped registers already flushes all TLBs.
  */
 static void
 agp_i810_flush_tlb(device_t dev)
 {
 }
 
 static int
 agp_i810_enable(device_t dev, u_int32_t mode)
 {
 
 	return (0);
 }
 
 static struct agp_memory *
 agp_i810_alloc_memory(device_t dev, int type, vm_size_t size)
 {
 	struct agp_i810_softc *sc;
 	struct agp_memory *mem;
 	vm_page_t m;
 
 	sc = device_get_softc(dev);
 
 	if ((size & (AGP_PAGE_SIZE - 1)) != 0 ||
 	    sc->agp.as_allocated + size > sc->agp.as_maxmem)
 		return (0);
 
 	if (type == 1) {
 		/*
 		 * Mapping local DRAM into GATT.
 		 */
 		if (sc->match->driver->chiptype != CHIP_I810)
 			return (0);
 		if (size != sc->dcache_size)
 			return (0);
 	} else if (type == 2) {
 		/*
 		 * Type 2 is the contiguous physical memory type, that hands
 		 * back a physical address.  This is used for cursors on i810.
 		 * Hand back as many single pages with physical as the user
 		 * wants, but only allow one larger allocation (ARGB cursor)
 		 * for simplicity.
 		 */
 		if (size != AGP_PAGE_SIZE) {
 			if (sc->argb_cursor != NULL)
 				return (0);
 
 			/* Allocate memory for ARGB cursor, if we can. */
 			sc->argb_cursor = contigmalloc(size, M_AGP,
 			   0, 0, ~0, PAGE_SIZE, 0);
 			if (sc->argb_cursor == NULL)
 				return (0);
 		}
 	}
 
 	mem = malloc(sizeof *mem, M_AGP, M_WAITOK);
 	mem->am_id = sc->agp.as_nextid++;
 	mem->am_size = size;
 	mem->am_type = type;
 	if (type != 1 && (type != 2 || size == AGP_PAGE_SIZE))
 		mem->am_obj = vm_object_allocate(OBJT_DEFAULT,
 		    atop(round_page(size)));
 	else
 		mem->am_obj = 0;
 
 	if (type == 2) {
 		if (size == AGP_PAGE_SIZE) {
 			/*
 			 * Allocate and wire down the page now so that we can
 			 * get its physical address.
 			 */
 			VM_OBJECT_WLOCK(mem->am_obj);
 			m = vm_page_grab(mem->am_obj, 0, VM_ALLOC_NOBUSY |
 			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 			VM_OBJECT_WUNLOCK(mem->am_obj);
 			mem->am_physical = VM_PAGE_TO_PHYS(m);
 		} else {
 			/* Our allocation is already nicely wired down for us.
 			 * Just grab the physical address.
 			 */
 			mem->am_physical = vtophys(sc->argb_cursor);
 		}
 	} else
 		mem->am_physical = 0;
 
 	mem->am_offset = 0;
 	mem->am_is_bound = 0;
 	TAILQ_INSERT_TAIL(&sc->agp.as_memory, mem, am_link);
 	sc->agp.as_allocated += size;
 
 	return (mem);
 }
 
 static int
 agp_i810_free_memory(device_t dev, struct agp_memory *mem)
 {
 	struct agp_i810_softc *sc;
 	vm_page_t m;
 
 	if (mem->am_is_bound)
 		return (EBUSY);
 
 	sc = device_get_softc(dev);
 
 	if (mem->am_type == 2) {
 		if (mem->am_size == AGP_PAGE_SIZE) {
 			/*
 			 * Unwire the page which we wired in alloc_memory.
 			 */
 			VM_OBJECT_WLOCK(mem->am_obj);
 			m = vm_page_lookup(mem->am_obj, 0);
 			vm_page_lock(m);
 			vm_page_unwire(m, PQ_INACTIVE);
 			vm_page_unlock(m);
 			VM_OBJECT_WUNLOCK(mem->am_obj);
 		} else {
 			contigfree(sc->argb_cursor, mem->am_size, M_AGP);
 			sc->argb_cursor = NULL;
 		}
 	}
 
 	sc->agp.as_allocated -= mem->am_size;
 	TAILQ_REMOVE(&sc->agp.as_memory, mem, am_link);
 	if (mem->am_obj)
 		vm_object_deallocate(mem->am_obj);
 	free(mem, M_AGP);
 	return (0);
 }
 
 static int
 agp_i810_bind_memory(device_t dev, struct agp_memory *mem, vm_offset_t offset)
 {
 	struct agp_i810_softc *sc;
 	vm_offset_t i;
 
 	/* Do some sanity checks first. */
 	if ((offset & (AGP_PAGE_SIZE - 1)) != 0 ||
 	    offset + mem->am_size > AGP_GET_APERTURE(dev)) {
 		device_printf(dev, "binding memory at bad offset %#x\n",
 		    (int)offset);
 		return (EINVAL);
 	}
 
 	sc = device_get_softc(dev);
 	if (mem->am_type == 2 && mem->am_size != AGP_PAGE_SIZE) {
 		mtx_lock(&sc->agp.as_lock);
 		if (mem->am_is_bound) {
 			mtx_unlock(&sc->agp.as_lock);
 			return (EINVAL);
 		}
 		/* The memory's already wired down, just stick it in the GTT. */
 		for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE) {
 			sc->match->driver->install_gtt_pte(dev, (offset + i) >>
 			    AGP_PAGE_SHIFT, mem->am_physical + i, 0);
 		}
 		mem->am_offset = offset;
 		mem->am_is_bound = 1;
 		mtx_unlock(&sc->agp.as_lock);
 		return (0);
 	}
 
 	if (mem->am_type != 1)
 		return (agp_generic_bind_memory(dev, mem, offset));
 
 	/*
 	 * Mapping local DRAM into GATT.
 	 */
 	if (sc->match->driver->chiptype != CHIP_I810)
 		return (EINVAL);
 	for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE)
 		bus_write_4(sc->sc_res[0],
 		    AGP_I810_GTT + (i >> AGP_PAGE_SHIFT) * 4, i | 3);
 
 	return (0);
 }
 
 static int
 agp_i810_unbind_memory(device_t dev, struct agp_memory *mem)
 {
 	struct agp_i810_softc *sc;
 	vm_offset_t i;
 
 	sc = device_get_softc(dev);
 
 	if (mem->am_type == 2 && mem->am_size != AGP_PAGE_SIZE) {
 		mtx_lock(&sc->agp.as_lock);
 		if (!mem->am_is_bound) {
 			mtx_unlock(&sc->agp.as_lock);
 			return (EINVAL);
 		}
 
 		for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE) {
 			sc->match->driver->install_gtt_pte(dev,
 			    (mem->am_offset + i) >> AGP_PAGE_SHIFT, 0, 0);
 		}
 		mem->am_is_bound = 0;
 		mtx_unlock(&sc->agp.as_lock);
 		return (0);
 	}
 
 	if (mem->am_type != 1)
 		return (agp_generic_unbind_memory(dev, mem));
 
 	if (sc->match->driver->chiptype != CHIP_I810)
 		return (EINVAL);
 	for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE) {
 		sc->match->driver->install_gtt_pte(dev, i >> AGP_PAGE_SHIFT,
 		    0, 0);
 	}
 	return (0);
 }
 
 static device_method_t agp_i810_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	agp_i810_identify),
 	DEVMETHOD(device_probe,		agp_i810_probe),
 	DEVMETHOD(device_attach,	agp_i810_attach),
 	DEVMETHOD(device_detach,	agp_i810_detach),
 	DEVMETHOD(device_suspend,	bus_generic_suspend),
 	DEVMETHOD(device_resume,	agp_i810_resume),
 
 	/* AGP interface */
 	DEVMETHOD(agp_get_aperture,	agp_generic_get_aperture),
 	DEVMETHOD(agp_set_aperture,	agp_i810_method_set_aperture),
 	DEVMETHOD(agp_bind_page,	agp_i810_bind_page),
 	DEVMETHOD(agp_unbind_page,	agp_i810_unbind_page),
 	DEVMETHOD(agp_flush_tlb,	agp_i810_flush_tlb),
 	DEVMETHOD(agp_enable,		agp_i810_enable),
 	DEVMETHOD(agp_alloc_memory,	agp_i810_alloc_memory),
 	DEVMETHOD(agp_free_memory,	agp_i810_free_memory),
 	DEVMETHOD(agp_bind_memory,	agp_i810_bind_memory),
 	DEVMETHOD(agp_unbind_memory,	agp_i810_unbind_memory),
 	DEVMETHOD(agp_chipset_flush,	agp_intel_gtt_chipset_flush),
 
 	{ 0, 0 }
 };
 
 static driver_t agp_i810_driver = {
 	"agp",
 	agp_i810_methods,
 	sizeof(struct agp_i810_softc),
 };
 
 static devclass_t agp_devclass;
 
 DRIVER_MODULE(agp_i810, vgapci, agp_i810_driver, agp_devclass, 0, 0);
 MODULE_DEPEND(agp_i810, agp, 1, 1, 1);
 MODULE_DEPEND(agp_i810, pci, 1, 1, 1);
 
 void
 agp_intel_gtt_clear_range(device_t dev, u_int first_entry, u_int num_entries)
 {
 	struct agp_i810_softc *sc;
 	u_int i;
 
 	sc = device_get_softc(dev);
 	for (i = 0; i < num_entries; i++)
 		sc->match->driver->install_gtt_pte(dev, first_entry + i,
 		    VM_PAGE_TO_PHYS(bogus_page), 0);
 	sc->match->driver->read_gtt_pte(dev, first_entry + num_entries - 1);
 }
 
 void
 agp_intel_gtt_insert_pages(device_t dev, u_int first_entry, u_int num_entries,
     vm_page_t *pages, u_int flags)
 {
 	struct agp_i810_softc *sc;
 	u_int i;
 
 	sc = device_get_softc(dev);
 	for (i = 0; i < num_entries; i++) {
 		MPASS(pages[i]->valid == VM_PAGE_BITS_ALL);
 		MPASS(pages[i]->wire_count > 0);
 		sc->match->driver->install_gtt_pte(dev, first_entry + i,
 		    VM_PAGE_TO_PHYS(pages[i]), flags);
 	}
 	sc->match->driver->read_gtt_pte(dev, first_entry + num_entries - 1);
 }
 
 struct intel_gtt
 agp_intel_gtt_get(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	struct intel_gtt res;
 
 	sc = device_get_softc(dev);
 	res.stolen_size = sc->stolen_size;
 	res.gtt_total_entries = sc->gtt_total_entries;
 	res.gtt_mappable_entries = sc->gtt_mappable_entries;
 	res.do_idle_maps = 0;
 	res.scratch_page_dma = VM_PAGE_TO_PHYS(bogus_page);
 	if (sc->agp.as_aperture != NULL)
 		res.gma_bus_addr = rman_get_start(sc->agp.as_aperture);
 	else
 		res.gma_bus_addr = 0;
 	return (res);
 }
 
 static int
 agp_i810_chipset_flush_setup(device_t dev)
 {
 
 	return (0);
 }
 
 static void
 agp_i810_chipset_flush_teardown(device_t dev)
 {
 
 	/* Nothing to do. */
 }
 
 static void
 agp_i810_chipset_flush(device_t dev)
 {
 
 	/* Nothing to do. */
 }
 
 static void
 agp_i830_chipset_flush(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	uint32_t hic;
 	int i;
 
 	sc = device_get_softc(dev);
 	pmap_invalidate_cache();
 	hic = bus_read_4(sc->sc_res[0], AGP_I830_HIC);
 	bus_write_4(sc->sc_res[0], AGP_I830_HIC, hic | (1U << 31));
 	for (i = 0; i < 20000 /* 1 sec */; i++) {
 		hic = bus_read_4(sc->sc_res[0], AGP_I830_HIC);
 		if ((hic & (1U << 31)) == 0)
 			break;
 		DELAY(50);
 	}
 }
 
 static int
 agp_i915_chipset_flush_alloc_page(device_t dev, uint64_t start, uint64_t end)
 {
 	struct agp_i810_softc *sc;
 	device_t vga;
 
 	sc = device_get_softc(dev);
 	vga = device_get_parent(dev);
 	sc->sc_flush_page_rid = 100;
 	sc->sc_flush_page_res = BUS_ALLOC_RESOURCE(device_get_parent(vga), dev,
 	    SYS_RES_MEMORY, &sc->sc_flush_page_rid, start, end, PAGE_SIZE,
 	    RF_ACTIVE);
 	if (sc->sc_flush_page_res == NULL) {
 		device_printf(dev, "Failed to allocate flush page at 0x%jx\n",
 		    (uintmax_t)start);
 		return (EINVAL);
 	}
 	sc->sc_flush_page_vaddr = rman_get_virtual(sc->sc_flush_page_res);
 	if (bootverbose) {
 		device_printf(dev, "Allocated flush page phys 0x%jx virt %p\n",
 		    (uintmax_t)rman_get_start(sc->sc_flush_page_res),
 		    sc->sc_flush_page_vaddr);
 	}
 	return (0);
 }
 
 static void
 agp_i915_chipset_flush_free_page(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	device_t vga;
 
 	sc = device_get_softc(dev);
 	vga = device_get_parent(dev);
 	if (sc->sc_flush_page_res == NULL)
 		return;
 	BUS_DEACTIVATE_RESOURCE(device_get_parent(vga), dev, SYS_RES_MEMORY,
 	    sc->sc_flush_page_rid, sc->sc_flush_page_res);
 	BUS_RELEASE_RESOURCE(device_get_parent(vga), dev, SYS_RES_MEMORY,
 	    sc->sc_flush_page_rid, sc->sc_flush_page_res);
 }
 
 static int
 agp_i915_chipset_flush_setup(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	uint32_t temp;
 	int error;
 
 	sc = device_get_softc(dev);
 	temp = pci_read_config(sc->bdev, AGP_I915_IFPADDR, 4);
 	if ((temp & 1) != 0) {
 		temp &= ~1;
 		if (bootverbose)
 			device_printf(dev,
 			    "Found already configured flush page at 0x%jx\n",
 			    (uintmax_t)temp);
 		sc->sc_bios_allocated_flush_page = 1;
 		/*
 		 * In the case BIOS initialized the flush pointer (?)
 		 * register, expect that BIOS also set up the resource
 		 * for the page.
 		 */
 		error = agp_i915_chipset_flush_alloc_page(dev, temp,
 		    temp + PAGE_SIZE - 1);
 		if (error != 0)
 			return (error);
 	} else {
 		sc->sc_bios_allocated_flush_page = 0;
 		error = agp_i915_chipset_flush_alloc_page(dev, 0, 0xffffffff);
 		if (error != 0)
 			return (error);
 		temp = rman_get_start(sc->sc_flush_page_res);
 		pci_write_config(sc->bdev, AGP_I915_IFPADDR, temp | 1, 4);
 	}
 	return (0);
 }
 
 static void
 agp_i915_chipset_flush_teardown(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	uint32_t temp;
 
 	sc = device_get_softc(dev);
 	if (sc->sc_flush_page_res == NULL)
 		return;
 	if (!sc->sc_bios_allocated_flush_page) {
 		temp = pci_read_config(sc->bdev, AGP_I915_IFPADDR, 4);
 		temp &= ~1;
 		pci_write_config(sc->bdev, AGP_I915_IFPADDR, temp, 4);
 	}		
 	agp_i915_chipset_flush_free_page(dev);
 }
 
 static int
 agp_i965_chipset_flush_setup(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	uint64_t temp;
 	uint32_t temp_hi, temp_lo;
 	int error;
 
 	sc = device_get_softc(dev);
 
 	temp_hi = pci_read_config(sc->bdev, AGP_I965_IFPADDR + 4, 4);
 	temp_lo = pci_read_config(sc->bdev, AGP_I965_IFPADDR, 4);
 
 	if ((temp_lo & 1) != 0) {
 		temp = ((uint64_t)temp_hi << 32) | (temp_lo & ~1);
 		if (bootverbose)
 			device_printf(dev,
 			    "Found already configured flush page at 0x%jx\n",
 			    (uintmax_t)temp);
 		sc->sc_bios_allocated_flush_page = 1;
 		/*
 		 * In the case BIOS initialized the flush pointer (?)
 		 * register, expect that BIOS also set up the resource
 		 * for the page.
 		 */
 		error = agp_i915_chipset_flush_alloc_page(dev, temp,
 		    temp + PAGE_SIZE - 1);
 		if (error != 0)
 			return (error);
 	} else {
 		sc->sc_bios_allocated_flush_page = 0;
 		error = agp_i915_chipset_flush_alloc_page(dev, 0, ~0);
 		if (error != 0)
 			return (error);
 		temp = rman_get_start(sc->sc_flush_page_res);
 		pci_write_config(sc->bdev, AGP_I965_IFPADDR + 4,
 		    (temp >> 32) & UINT32_MAX, 4);
 		pci_write_config(sc->bdev, AGP_I965_IFPADDR,
 		    (temp & UINT32_MAX) | 1, 4);
 	}
 	return (0);
 }
 
 static void
 agp_i965_chipset_flush_teardown(device_t dev)
 {
 	struct agp_i810_softc *sc;
 	uint32_t temp_lo;
 
 	sc = device_get_softc(dev);
 	if (sc->sc_flush_page_res == NULL)
 		return;
 	if (!sc->sc_bios_allocated_flush_page) {
 		temp_lo = pci_read_config(sc->bdev, AGP_I965_IFPADDR, 4);
 		temp_lo &= ~1;
 		pci_write_config(sc->bdev, AGP_I965_IFPADDR, temp_lo, 4);
 	}
 	agp_i915_chipset_flush_free_page(dev);
 }
 
 static void
 agp_i915_chipset_flush(device_t dev)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(dev);
 	*(uint32_t *)sc->sc_flush_page_vaddr = 1;
 }
 
 int
 agp_intel_gtt_chipset_flush(device_t dev)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(dev);
 	sc->match->driver->chipset_flush(dev);
 	return (0);
 }
 
 void
 agp_intel_gtt_unmap_memory(device_t dev, struct sglist *sg_list)
 {
 }
 
 int
 agp_intel_gtt_map_memory(device_t dev, vm_page_t *pages, u_int num_entries,
     struct sglist **sg_list)
 {
 	struct agp_i810_softc *sc;
 	struct sglist *sg;
 	int i;
 #if 0
 	int error;
 	bus_dma_tag_t dmat;
 #endif
 
 	if (*sg_list != NULL)
 		return (0);
 	sc = device_get_softc(dev);
 	sg = sglist_alloc(num_entries, M_WAITOK /* XXXKIB */);
 	for (i = 0; i < num_entries; i++) {
 		sg->sg_segs[i].ss_paddr = VM_PAGE_TO_PHYS(pages[i]);
 		sg->sg_segs[i].ss_len = PAGE_SIZE;
 	}
 
 #if 0
 	error = bus_dma_tag_create(bus_get_dma_tag(dev),
 	    1 /* alignment */, 0 /* boundary */,
 	    1ULL << sc->match->busdma_addr_mask_sz /* lowaddr */,
 	    BUS_SPACE_MAXADDR /* highaddr */,
             NULL /* filtfunc */, NULL /* filtfuncarg */,
 	    BUS_SPACE_MAXADDR /* maxsize */,
 	    BUS_SPACE_UNRESTRICTED /* nsegments */,
 	    BUS_SPACE_MAXADDR /* maxsegsz */,
 	    0 /* flags */, NULL /* lockfunc */, NULL /* lockfuncarg */,
 	    &dmat);
 	if (error != 0) {
 		sglist_free(sg);
 		return (error);
 	}
 	/* XXXKIB */
 #endif
 	*sg_list = sg;
 	return (0);
 }
 
 static void
 agp_intel_gtt_install_pte(device_t dev, u_int index, vm_paddr_t addr,
     u_int flags)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(dev);
 	sc->match->driver->install_gtt_pte(dev, index, addr, flags);
 }
 
 void
 agp_intel_gtt_insert_sg_entries(device_t dev, struct sglist *sg_list,
     u_int first_entry, u_int flags)
 {
 	struct agp_i810_softc *sc;
 	vm_paddr_t spaddr;
 	size_t slen;
 	u_int i, j;
 
 	sc = device_get_softc(dev);
 	for (i = j = 0; j < sg_list->sg_nseg; j++) {
 		spaddr = sg_list->sg_segs[i].ss_paddr;
 		slen = sg_list->sg_segs[i].ss_len;
 		for (; slen > 0; i++) {
 			sc->match->driver->install_gtt_pte(dev, first_entry + i,
 			    spaddr, flags);
 			spaddr += AGP_PAGE_SIZE;
 			slen -= AGP_PAGE_SIZE;
 		}
 	}
 	sc->match->driver->read_gtt_pte(dev, first_entry + i - 1);
 }
 
 void
 intel_gtt_clear_range(u_int first_entry, u_int num_entries)
 {
 
 	agp_intel_gtt_clear_range(intel_agp, first_entry, num_entries);
 }
 
 void
 intel_gtt_insert_pages(u_int first_entry, u_int num_entries, vm_page_t *pages,
     u_int flags)
 {
 
 	agp_intel_gtt_insert_pages(intel_agp, first_entry, num_entries,
 	    pages, flags);
 }
 
 struct intel_gtt *
 intel_gtt_get(void)
 {
 
 	intel_private.base = agp_intel_gtt_get(intel_agp);
 	return (&intel_private.base);
 }
 
 int
 intel_gtt_chipset_flush(void)
 {
 
 	return (agp_intel_gtt_chipset_flush(intel_agp));
 }
 
 void
 intel_gtt_unmap_memory(struct sglist *sg_list)
 {
 
 	agp_intel_gtt_unmap_memory(intel_agp, sg_list);
 }
 
 int
 intel_gtt_map_memory(vm_page_t *pages, u_int num_entries,
     struct sglist **sg_list)
 {
 
 	return (agp_intel_gtt_map_memory(intel_agp, pages, num_entries,
 	    sg_list));
 }
 
 void
 intel_gtt_insert_sg_entries(struct sglist *sg_list, u_int first_entry,
     u_int flags)
 {
 
 	agp_intel_gtt_insert_sg_entries(intel_agp, sg_list, first_entry, flags);
 }
 
 void
 intel_gtt_install_pte(u_int index, vm_paddr_t addr, u_int flags)
 {
 
 	agp_intel_gtt_install_pte(intel_agp, index, addr, flags);
 }
 
 device_t
 intel_gtt_get_bridge_device(void)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(intel_agp);
 	return (sc->bdev);
 }
 
 vm_paddr_t
 intel_gtt_read_pte_paddr(u_int entry)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(intel_agp);
 	return (sc->match->driver->read_gtt_pte_paddr(intel_agp, entry));
 }
 
 u_int32_t
 intel_gtt_read_pte(u_int entry)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(intel_agp);
 	return (sc->match->driver->read_gtt_pte(intel_agp, entry));
 }
 
 void
 intel_gtt_write(u_int entry, uint32_t val)
 {
 	struct agp_i810_softc *sc;
 
 	sc = device_get_softc(intel_agp);
 	return (sc->match->driver->write_gtt(intel_agp, entry, val));
 }
Index: head/sys/dev/amd_ecc_inject/ecc_inject.c
===================================================================
--- head/sys/dev/amd_ecc_inject/ecc_inject.c	(revision 338317)
+++ head/sys/dev/amd_ecc_inject/ecc_inject.c	(revision 338318)
@@ -1,243 +1,243 @@
 /*-
  * Copyright (c) 2017 Andriy Gapon
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/sysctl.h>
 #include <sys/types.h>
 
 #include <dev/pci/pcivar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 
 
 /*
  * See BKDG for AMD Family 15h Models 00h-0Fh Processors
  * (publication 42301 Rev 3.08 - March 12, 2012):
  * - 2.13.3.1 DRAM Error Injection
  * - D18F3xB8 NB Array Address
  * - D18F3xBC NB Array Data Port
  * - D18F3xBC_x8 DRAM ECC
  */
 #define	NB_MCA_CFG		0x44
 #define		DRAM_ECC_EN	(1 << 22)
 #define	NB_MCA_EXTCFG		0x180
 #define		ECC_SYMB_SZ	(1 << 25)
 #define	NB_ARRAY_ADDR		0xb8
 #define		DRAM_ECC_SEL	(0x8 << 28)
 #define		QUADRANT_SHIFT	1
 #define		QUADRANT_MASK	0x3
 #define	NB_ARRAY_PORT		0xbc
 #define		INJ_WORD_SHIFT	20
 #define		INJ_WORD_MASK	0x1ff
 #define		DRAM_ERR_EN	(1 << 18)
 #define		DRAM_WR_REQ	(1 << 17)
 #define		DRAM_RD_REQ	(1 << 16)
 #define		INJ_VECTOR_MASK	0xffff
 
 static void ecc_ei_inject(int);
 
 static device_t nbdev;
 static int delay_ms = 0;
 static int quadrant = 0;	/* 0 - 3 */
 static int word_mask = 0x001;	/* 9 bits: 8 + 1 for ECC */
 static int bit_mask = 0x0001;	/* 16 bits */
 
 static int
 sysctl_int_with_max(SYSCTL_HANDLER_ARGS)
 {
 	u_int value;
 	int error;
 
 	value = *(u_int *)arg1;
 	error = sysctl_handle_int(oidp, &value, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 	if (value > arg2)
 		return (EINVAL);
 	*(u_int *)arg1 = value;
 	return (0);
 }
 
 static int
 sysctl_nonzero_int_with_max(SYSCTL_HANDLER_ARGS)
 {
 	u_int value;
 	int error;
 
 	value = *(u_int *)arg1;
 	error = sysctl_int_with_max(oidp, &value, arg2, req);
 	if (error || req->newptr == NULL)
 		return (error);
 	if (value == 0)
 		return (EINVAL);
 	*(u_int *)arg1 = value;
 	return (0);
 }
 
 static int
 sysctl_proc_inject(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int i;
 
 	i = 0;
 	error = sysctl_handle_int(oidp, &i, 0, req);
 	if (error)
 		return (error);
 	if (i != 0)
 		ecc_ei_inject(i);
 	return (0);
 }
 
 static SYSCTL_NODE(_hw, OID_AUTO, error_injection, CTLFLAG_RD, NULL,
     "Hardware error injection");
 static SYSCTL_NODE(_hw_error_injection, OID_AUTO, dram_ecc, CTLFLAG_RD, NULL,
     "DRAM ECC error injection");
 SYSCTL_UINT(_hw_error_injection_dram_ecc, OID_AUTO, delay,
     CTLTYPE_UINT | CTLFLAG_RW, &delay_ms, 0,
     "Delay in milliseconds between error injections");
 SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, quadrant,
     CTLTYPE_UINT | CTLFLAG_RW, &quadrant, QUADRANT_MASK,
     sysctl_int_with_max, "IU",
     "Index of 16-byte quadrant within 64-byte line where errors "
     "should be injected");
 SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, word_mask,
     CTLTYPE_UINT | CTLFLAG_RW, &word_mask, INJ_WORD_MASK,
     sysctl_nonzero_int_with_max, "IU",
     "9-bit mask of words where errors should be injected (8 data + 1 ECC)");
 SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, bit_mask,
     CTLTYPE_UINT | CTLFLAG_RW, &bit_mask, INJ_VECTOR_MASK,
     sysctl_nonzero_int_with_max, "IU",
     "16-bit mask of bits within each selected word where errors "
     "should be injected");
 SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, inject,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, sysctl_proc_inject, "I",
     "Inject a number of errors according to configured parameters");
 
 static void
 ecc_ei_inject_one(void *arg, size_t size)
 {
 	volatile uint64_t *memory = arg;
 	uint32_t val;
 	int i;
 
 	val = DRAM_ECC_SEL | (quadrant << QUADRANT_SHIFT);
 	pci_write_config(nbdev, NB_ARRAY_ADDR, val, 4);
 
 	val = (word_mask << INJ_WORD_SHIFT) | DRAM_WR_REQ | bit_mask;
 	pci_write_config(nbdev, NB_ARRAY_PORT, val, 4);
 
 	for (i = 0; i < size / sizeof(uint64_t); i++) {
 		memory[i] = 0;
 		val = pci_read_config(nbdev, NB_ARRAY_PORT, 4);
 		if ((val & DRAM_WR_REQ) == 0)
 			break;
 	}
 	for (i = 0; i < size / sizeof(uint64_t); i++)
 		memory[0] = memory[i];
 }
 
 static void
 ecc_ei_inject(int count)
 {
 	vm_offset_t memory;
 	int injected;
 
 	KASSERT((quadrant & ~QUADRANT_MASK) == 0,
 	    ("quadrant value is outside of range: %u", quadrant));
 	KASSERT(word_mask != 0 && (word_mask & ~INJ_WORD_MASK) == 0,
 	    ("word mask value is outside of range: 0x%x", word_mask));
 	KASSERT(bit_mask != 0 && (bit_mask & ~INJ_VECTOR_MASK) == 0,
 	    ("bit mask value is outside of range: 0x%x", bit_mask));
 
 	memory = kmem_alloc_attr(PAGE_SIZE, M_WAITOK, 0, ~0,
 	    VM_MEMATTR_UNCACHEABLE);
 
 	for (injected = 0; injected < count; injected++) {
 		ecc_ei_inject_one((void*)memory, PAGE_SIZE);
 		if (delay_ms != 0 && injected != count - 1)
 			pause_sbt("ecc_ei_inject", delay_ms * SBT_1MS, 0, 0);
 	}
 
-	kmem_free(kernel_arena, memory, PAGE_SIZE);
+	kmem_free(memory, PAGE_SIZE);
 }
 
 static int
 ecc_ei_load(void)
 {
 	uint32_t val;
 
 	if (cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10) {
 		printf("DRAM ECC error injection is not supported\n");
 		return (ENXIO);
 	}
 	nbdev = pci_find_bsf(0, 24, 3);
 	if (nbdev == NULL) {
 		printf("Couldn't find NB PCI device\n");
 		return (ENXIO);
 	}
 	val = pci_read_config(nbdev, NB_MCA_CFG, 4);
 	if ((val & DRAM_ECC_EN) == 0) {
 		printf("DRAM ECC is not supported or disabled\n");
 		return (ENXIO);
 	}
 	printf("DRAM ECC error injection support loaded\n");
 	return (0);
 }
 
 static int
 tsc_modevent(module_t mod __unused, int type, void *data __unused)
 {
 	int error;
 
 	error = 0;
 	switch (type) {
 	case MOD_LOAD:
 		error = ecc_ei_load();
 		break;
 	case MOD_UNLOAD:
 	case MOD_SHUTDOWN:
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 DEV_MODULE(tsc, tsc_modevent, NULL);
Index: head/sys/dev/drm/drm_scatter.c
===================================================================
--- head/sys/dev/drm/drm_scatter.c	(revision 338317)
+++ head/sys/dev/drm/drm_scatter.c	(revision 338318)
@@ -1,129 +1,129 @@
 /*-
  * Copyright (c) 2009 Robert C. Noland III <rnoland@FreeBSD.org>
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /** @file drm_scatter.c
  * Allocation of memory for scatter-gather mappings by the graphics chip.
  * The memory allocated here is then made into an aperture in the card
  * by mapping the pages into the GART.
  */
 
 #include "dev/drm/drmP.h"
 
 int
 drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather *request)
 {
 	struct drm_sg_mem *entry;
 	vm_size_t size;
 	vm_pindex_t pindex;
 
 	if (dev->sg)
 		return EINVAL;
 
 	DRM_DEBUG("request size=%ld\n", request->size);
 
 	entry = malloc(sizeof(*entry), DRM_MEM_DRIVER, M_WAITOK | M_ZERO);
 
 	size = round_page(request->size);
 	entry->pages = atop(size);
 	entry->busaddr = malloc(entry->pages * sizeof(*entry->busaddr),
 	    DRM_MEM_SGLISTS, M_WAITOK | M_ZERO);
 
 	entry->vaddr = kmem_alloc_attr(size, M_WAITOK | M_ZERO, 0,
 	    BUS_SPACE_MAXADDR_32BIT, VM_MEMATTR_WRITE_COMBINING);
 	if (entry->vaddr == 0) {
 		drm_sg_cleanup(entry);
 		return (ENOMEM);
 	}
 
 	for(pindex = 0; pindex < entry->pages; pindex++) {
 		entry->busaddr[pindex] =
 		    vtophys(entry->vaddr + IDX_TO_OFF(pindex));
 	}
 
 	DRM_LOCK();
 	if (dev->sg) {
 		DRM_UNLOCK();
 		drm_sg_cleanup(entry);
 		return (EINVAL);
 	}
 	dev->sg = entry;
 	DRM_UNLOCK();
 
 	request->handle = entry->vaddr;
 
 	DRM_DEBUG("allocated %ju pages @ 0x%08zx, contents=%08lx\n",
 	    entry->pages, entry->vaddr, *(unsigned long *)entry->vaddr);
 
 	return (0);
 }
 
 int
 drm_sg_alloc_ioctl(struct drm_device *dev, void *data,
 		   struct drm_file *file_priv)
 {
 	struct drm_scatter_gather *request = data;
 
 	DRM_DEBUG("\n");
 
 	return (drm_sg_alloc(dev, request));
 }
 
 void
 drm_sg_cleanup(struct drm_sg_mem *entry)
 {
 	if (entry == NULL)
 		return;
 
 	if (entry->vaddr != 0)
-		kmem_free(kernel_arena, entry->vaddr, IDX_TO_OFF(entry->pages));
+		kmem_free(entry->vaddr, IDX_TO_OFF(entry->pages));
 
 	free(entry->busaddr, DRM_MEM_SGLISTS);
 	free(entry, DRM_MEM_DRIVER);
 
 	return;
 }
 
 int
 drm_sg_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	struct drm_scatter_gather *request = data;
 	struct drm_sg_mem *entry;
 
 	DRM_LOCK();
 	entry = dev->sg;
 	dev->sg = NULL;
 	DRM_UNLOCK();
 
 	if (!entry || entry->vaddr != request->handle)
 		return (EINVAL);
 
 	DRM_DEBUG("free 0x%zx\n", entry->vaddr);
 
 	drm_sg_cleanup(entry);
 
 	return (0);
 }
Index: head/sys/dev/drm2/drm_scatter.c
===================================================================
--- head/sys/dev/drm2/drm_scatter.c	(revision 338317)
+++ head/sys/dev/drm2/drm_scatter.c	(revision 338318)
@@ -1,136 +1,136 @@
 /*-
  * Copyright (c) 2009 Robert C. Noland III <rnoland@FreeBSD.org>
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /** @file drm_scatter.c
  * Allocation of memory for scatter-gather mappings by the graphics chip.
  * The memory allocated here is then made into an aperture in the card
  * by mapping the pages into the GART.
  */
 
 #include <dev/drm2/drmP.h>
 
 #define DEBUG_SCATTER 0
 
 static inline vm_offset_t drm_vmalloc_dma(vm_size_t size)
 {
 	return kmem_alloc_attr(size, M_NOWAIT | M_ZERO, 0,
 	    BUS_SPACE_MAXADDR_32BIT, VM_MEMATTR_WRITE_COMBINING);
 }
 
 void drm_sg_cleanup(struct drm_sg_mem * entry)
 {
 	if (entry == NULL)
 		return;
 
 	if (entry->vaddr != 0)
-		kmem_free(kernel_arena, entry->vaddr, IDX_TO_OFF(entry->pages));
+		kmem_free(entry->vaddr, IDX_TO_OFF(entry->pages));
 
 	free(entry->busaddr, DRM_MEM_SGLISTS);
 	free(entry, DRM_MEM_DRIVER);
 }
 
 int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request)
 {
 	struct drm_sg_mem *entry;
 	vm_size_t size;
 	vm_pindex_t pindex;
 
 	DRM_DEBUG("\n");
 
 	if (!drm_core_check_feature(dev, DRIVER_SG))
 		return -EINVAL;
 
 	if (dev->sg)
 		return -EINVAL;
 
 	entry = malloc(sizeof(*entry), DRM_MEM_DRIVER, M_NOWAIT | M_ZERO);
 	if (!entry)
 		return -ENOMEM;
 
 	DRM_DEBUG("request size=%ld\n", request->size);
 
 	size = round_page(request->size);
 	entry->pages = atop(size);
 	entry->busaddr = malloc(entry->pages * sizeof(*entry->busaddr),
 	    DRM_MEM_SGLISTS, M_NOWAIT | M_ZERO);
 	if (!entry->busaddr) {
 		free(entry, DRM_MEM_DRIVER);
 		return -ENOMEM;
 	}
 
 	entry->vaddr = drm_vmalloc_dma(size);
 	if (entry->vaddr == 0) {
 		free(entry->busaddr, DRM_MEM_DRIVER);
 		free(entry, DRM_MEM_DRIVER);
 		return -ENOMEM;
 	}
 
 	for (pindex = 0; pindex < entry->pages; pindex++) {
 		entry->busaddr[pindex] =
 		    vtophys(entry->vaddr + IDX_TO_OFF(pindex));
 	}
 
 	request->handle = entry->vaddr;
 
 	dev->sg = entry;
 
 	DRM_DEBUG("allocated %ju pages @ 0x%08zx, contents=%08lx\n",
 	    entry->pages, entry->vaddr, *(unsigned long *)entry->vaddr);
 
 	return 0;
 }
 
 int drm_sg_alloc_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
 {
 	struct drm_scatter_gather *request = data;
 
 	return drm_sg_alloc(dev, request);
 
 }
 
 int drm_sg_free(struct drm_device *dev, void *data,
 		struct drm_file *file_priv)
 {
 	struct drm_scatter_gather *request = data;
 	struct drm_sg_mem *entry;
 
 	if (!drm_core_check_feature(dev, DRIVER_SG))
 		return -EINVAL;
 
 	entry = dev->sg;
 	dev->sg = NULL;
 
 	if (!entry || entry->vaddr != request->handle)
 		return -EINVAL;
 
 	DRM_DEBUG("free 0x%zx\n", entry->vaddr);
 
 	drm_sg_cleanup(entry);
 
 	return 0;
 }
Index: head/sys/dev/hyperv/vmbus/hyperv.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hyperv.c	(revision 338317)
+++ head/sys/dev/hyperv/vmbus/hyperv.c	(revision 338318)
@@ -1,337 +1,336 @@
 /*-
  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /**
  * Implements low-level interactions with Hyper-V/Azure
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 #include <sys/timetc.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/pmap.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/include/hyperv_busdma.h>
 #include <dev/hyperv/vmbus/hyperv_machdep.h>
 #include <dev/hyperv/vmbus/hyperv_reg.h>
 #include <dev/hyperv/vmbus/hyperv_var.h>
 
 #define HYPERV_FREEBSD_BUILD		0ULL
 #define HYPERV_FREEBSD_VERSION		((uint64_t)__FreeBSD_version)
 #define HYPERV_FREEBSD_OSID		0ULL
 
 #define MSR_HV_GUESTID_BUILD_FREEBSD	\
 	(HYPERV_FREEBSD_BUILD & MSR_HV_GUESTID_BUILD_MASK)
 #define MSR_HV_GUESTID_VERSION_FREEBSD	\
 	((HYPERV_FREEBSD_VERSION << MSR_HV_GUESTID_VERSION_SHIFT) & \
 	 MSR_HV_GUESTID_VERSION_MASK)
 #define MSR_HV_GUESTID_OSID_FREEBSD	\
 	((HYPERV_FREEBSD_OSID << MSR_HV_GUESTID_OSID_SHIFT) & \
 	 MSR_HV_GUESTID_OSID_MASK)
 
 #define MSR_HV_GUESTID_FREEBSD		\
 	(MSR_HV_GUESTID_BUILD_FREEBSD |	\
 	 MSR_HV_GUESTID_VERSION_FREEBSD | \
 	 MSR_HV_GUESTID_OSID_FREEBSD |	\
 	 MSR_HV_GUESTID_OSTYPE_FREEBSD)
 
 struct hypercall_ctx {
 	void			*hc_addr;
 	vm_paddr_t		hc_paddr;
 };
 
 static u_int			hyperv_get_timecount(struct timecounter *);
 static bool			hyperv_identify(void);
 static void			hypercall_memfree(void);
 
 u_int				hyperv_ver_major;
 
 u_int				hyperv_features;
 u_int				hyperv_recommends;
 
 static u_int			hyperv_pm_features;
 static u_int			hyperv_features3;
 
 hyperv_tc64_t			hyperv_tc64;
 
 static struct timecounter	hyperv_timecounter = {
 	.tc_get_timecount	= hyperv_get_timecount,
 	.tc_poll_pps		= NULL,
 	.tc_counter_mask	= 0xffffffff,
 	.tc_frequency		= HYPERV_TIMER_FREQ,
 	.tc_name		= "Hyper-V",
 	.tc_quality		= 2000,
 	.tc_flags		= 0,
 	.tc_priv		= NULL
 };
 
 static struct hypercall_ctx	hypercall_context;
 
 static u_int
 hyperv_get_timecount(struct timecounter *tc __unused)
 {
 	return rdmsr(MSR_HV_TIME_REF_COUNT);
 }
 
 static uint64_t
 hyperv_tc64_rdmsr(void)
 {
 
 	return (rdmsr(MSR_HV_TIME_REF_COUNT));
 }
 
 uint64_t
 hypercall_post_message(bus_addr_t msg_paddr)
 {
 	return hypercall_md(hypercall_context.hc_addr,
 	    HYPERCALL_POST_MESSAGE, msg_paddr, 0);
 }
 
 uint64_t
 hypercall_signal_event(bus_addr_t monprm_paddr)
 {
 	return hypercall_md(hypercall_context.hc_addr,
 	    HYPERCALL_SIGNAL_EVENT, monprm_paddr, 0);
 }
 
 int
 hyperv_guid2str(const struct hyperv_guid *guid, char *buf, size_t sz)
 {
 	const uint8_t *d = guid->hv_guid;
 
 	return snprintf(buf, sz, "%02x%02x%02x%02x-"
 	    "%02x%02x-%02x%02x-%02x%02x-"
 	    "%02x%02x%02x%02x%02x%02x",
 	    d[3], d[2], d[1], d[0],
 	    d[5], d[4], d[7], d[6], d[8], d[9],
 	    d[10], d[11], d[12], d[13], d[14], d[15]);
 }
 
 static bool
 hyperv_identify(void)
 {
 	u_int regs[4];
 	unsigned int maxleaf;
 
 	if (vm_guest != VM_GUEST_HV)
 		return (false);
 
 	do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
 	maxleaf = regs[0];
 	if (maxleaf < CPUID_LEAF_HV_LIMITS)
 		return (false);
 
 	do_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
 	if (regs[0] != CPUID_HV_IFACE_HYPERV)
 		return (false);
 
 	do_cpuid(CPUID_LEAF_HV_FEATURES, regs);
 	if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0) {
 		/*
 		 * Hyper-V w/o Hypercall is impossible; someone
 		 * is faking Hyper-V.
 		 */
 		return (false);
 	}
 	hyperv_features = regs[0];
 	hyperv_pm_features = regs[2];
 	hyperv_features3 = regs[3];
 
 	do_cpuid(CPUID_LEAF_HV_IDENTITY, regs);
 	hyperv_ver_major = regs[1] >> 16;
 	printf("Hyper-V Version: %d.%d.%d [SP%d]\n",
 	    hyperv_ver_major, regs[1] & 0xffff, regs[0], regs[2]);
 
 	printf("  Features=0x%b\n", hyperv_features,
 	    "\020"
 	    "\001VPRUNTIME"	/* MSR_HV_VP_RUNTIME */
 	    "\002TMREFCNT"	/* MSR_HV_TIME_REF_COUNT */
 	    "\003SYNIC"		/* MSRs for SynIC */
 	    "\004SYNTM"		/* MSRs for SynTimer */
 	    "\005APIC"		/* MSR_HV_{EOI,ICR,TPR} */
 	    "\006HYPERCALL"	/* MSR_HV_{GUEST_OS_ID,HYPERCALL} */
 	    "\007VPINDEX"	/* MSR_HV_VP_INDEX */
 	    "\010RESET"		/* MSR_HV_RESET */
 	    "\011STATS"		/* MSR_HV_STATS_ */
 	    "\012REFTSC"	/* MSR_HV_REFERENCE_TSC */
 	    "\013IDLE"		/* MSR_HV_GUEST_IDLE */
 	    "\014TMFREQ"	/* MSR_HV_{TSC,APIC}_FREQUENCY */
 	    "\015DEBUG");	/* MSR_HV_SYNTH_DEBUG_ */
 	printf("  PM Features=0x%b [C%u]\n",
 	    (hyperv_pm_features & ~CPUPM_HV_CSTATE_MASK),
 	    "\020"
 	    "\005C3HPET",	/* HPET is required for C3 state */
 	    CPUPM_HV_CSTATE(hyperv_pm_features));
 	printf("  Features3=0x%b\n", hyperv_features3,
 	    "\020"
 	    "\001MWAIT"		/* MWAIT */
 	    "\002DEBUG"		/* guest debug support */
 	    "\003PERFMON"	/* performance monitor */
 	    "\004PCPUDPE"	/* physical CPU dynamic partition event */
 	    "\005XMMHC"		/* hypercall input through XMM regs */
 	    "\006IDLE"		/* guest idle support */
 	    "\007SLEEP"		/* hypervisor sleep support */
 	    "\010NUMA"		/* NUMA distance query support */
 	    "\011TMFREQ"	/* timer frequency query (TSC, LAPIC) */
 	    "\012SYNCMC"	/* inject synthetic machine checks */
 	    "\013CRASH"		/* MSRs for guest crash */
 	    "\014DEBUGMSR"	/* MSRs for guest debug */
 	    "\015NPIEP"		/* NPIEP */
 	    "\016HVDIS");	/* disabling hypervisor */
 
 	do_cpuid(CPUID_LEAF_HV_RECOMMENDS, regs);
 	hyperv_recommends = regs[0];
 	if (bootverbose)
 		printf("  Recommends: %08x %08x\n", regs[0], regs[1]);
 
 	do_cpuid(CPUID_LEAF_HV_LIMITS, regs);
 	if (bootverbose) {
 		printf("  Limits: Vcpu:%d Lcpu:%d Int:%d\n",
 		    regs[0], regs[1], regs[2]);
 	}
 
 	if (maxleaf >= CPUID_LEAF_HV_HWFEATURES) {
 		do_cpuid(CPUID_LEAF_HV_HWFEATURES, regs);
 		if (bootverbose) {
 			printf("  HW Features: %08x, AMD: %08x\n",
 			    regs[0], regs[3]);
 		}
 	}
 
 	return (true);
 }
 
 static void
 hyperv_init(void *dummy __unused)
 {
 	if (!hyperv_identify()) {
 		/* Not Hyper-V; reset guest id to the generic one. */
 		if (vm_guest == VM_GUEST_HV)
 			vm_guest = VM_GUEST_VM;
 		return;
 	}
 
 	/* Set guest id */
 	wrmsr(MSR_HV_GUEST_OS_ID, MSR_HV_GUESTID_FREEBSD);
 
 	if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT) {
 		/* Register Hyper-V timecounter */
 		tc_init(&hyperv_timecounter);
 
 		/*
 		 * Install 64 bits timecounter method for other modules
 		 * to use.
 		 */
 		hyperv_tc64 = hyperv_tc64_rdmsr;
 	}
 }
 SYSINIT(hyperv_initialize, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, hyperv_init,
     NULL);
 
 static void
 hypercall_memfree(void)
 {
-	kmem_free(kernel_arena, (vm_offset_t)hypercall_context.hc_addr,
-	    PAGE_SIZE);
+	kmem_free((vm_offset_t)hypercall_context.hc_addr, PAGE_SIZE);
 	hypercall_context.hc_addr = NULL;
 }
 
 static void
 hypercall_create(void *arg __unused)
 {
 	uint64_t hc, hc_orig;
 
 	if (vm_guest != VM_GUEST_HV)
 		return;
 
 	/*
 	 * NOTE:
 	 * - busdma(9), i.e. hyperv_dmamem APIs, can _not_ be used due to
 	 *   the NX bit.
 	 * - Assume kmem_malloc() returns properly aligned memory.
 	 */
 	hypercall_context.hc_addr = (void *)kmem_malloc(PAGE_SIZE, M_EXEC |
 	    M_WAITOK);
 	hypercall_context.hc_paddr = vtophys(hypercall_context.hc_addr);
 
 	/* Get the 'reserved' bits, which requires preservation. */
 	hc_orig = rdmsr(MSR_HV_HYPERCALL);
 
 	/*
 	 * Setup the Hypercall page.
 	 *
 	 * NOTE: 'reserved' bits MUST be preserved.
 	 */
 	hc = ((hypercall_context.hc_paddr >> PAGE_SHIFT) <<
 	    MSR_HV_HYPERCALL_PGSHIFT) |
 	    (hc_orig & MSR_HV_HYPERCALL_RSVD_MASK) |
 	    MSR_HV_HYPERCALL_ENABLE;
 	wrmsr(MSR_HV_HYPERCALL, hc);
 
 	/*
 	 * Confirm that Hypercall page did get setup.
 	 */
 	hc = rdmsr(MSR_HV_HYPERCALL);
 	if ((hc & MSR_HV_HYPERCALL_ENABLE) == 0) {
 		printf("hyperv: Hypercall setup failed\n");
 		hypercall_memfree();
 		/* Can't perform any Hyper-V specific actions */
 		vm_guest = VM_GUEST_VM;
 		return;
 	}
 	if (bootverbose)
 		printf("hyperv: Hypercall created\n");
 }
 SYSINIT(hypercall_ctor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_create, NULL);
 
 static void
 hypercall_destroy(void *arg __unused)
 {
 	uint64_t hc;
 
 	if (hypercall_context.hc_addr == NULL)
 		return;
 
 	/* Disable Hypercall */
 	hc = rdmsr(MSR_HV_HYPERCALL);
 	wrmsr(MSR_HV_HYPERCALL, (hc & MSR_HV_HYPERCALL_RSVD_MASK));
 	hypercall_memfree();
 
 	if (bootverbose)
 		printf("hyperv: Hypercall destroyed\n");
 }
 SYSUNINIT(hypercall_dtor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_destroy,
     NULL);
Index: head/sys/dev/liquidio/lio_network.h
===================================================================
--- head/sys/dev/liquidio/lio_network.h	(revision 338317)
+++ head/sys/dev/liquidio/lio_network.h	(revision 338318)
@@ -1,293 +1,293 @@
 /*
  *   BSD LICENSE
  *
  *   Copyright(c) 2017 Cavium, Inc.. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
  *   are met:
  *
  *     * Redistributions of source code must retain the above copyright
  *       notice, this list of conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright
  *       notice, this list of conditions and the following disclaimer in
  *       the documentation and/or other materials provided with the
  *       distribution.
  *     * Neither the name of Cavium, Inc. nor the names of its
  *       contributors may be used to endorse or promote products derived
  *       from this software without specific prior written permission.
  *
  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  *   OWNER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*$FreeBSD$*/
 
 /* \file  lio_network.h
  * \brief Host NIC Driver: Structure and Macro definitions used by NIC Module.
  */
 
 #ifndef __LIO_NETWORK_H__
 #define __LIO_NETWORK_H__
 
 #include "lio_rss.h"
 
 #define LIO_MIN_MTU_SIZE	72
 #define LIO_MAX_MTU_SIZE	(LIO_MAX_FRM_SIZE - LIO_FRM_HEADER_SIZE)
 
 #define LIO_MAX_SG		64
 #define LIO_MAX_FRAME_SIZE	60000
 
 struct lio_fw_stats_resp {
 	uint64_t	rh;
 	struct octeon_link_stats stats;
 	uint64_t	status;
 };
 
 /* LiquidIO per-interface network private data */
 struct lio {
 	/* State of the interface. Rx/Tx happens only in the RUNNING state.  */
 	int	ifstate;
 
 	/*
 	 * Octeon Interface index number. This device will be represented as
 	 * oct<ifidx> in the system.
 	 */
 	int	ifidx;
 
 	/* Octeon Input queue to use to transmit for this network interface. */
 	int	txq;
 
 	/*
 	 * Octeon Output queue from which pkts arrive
 	 * for this network interface.
 	 */
 	int	rxq;
 
 	/* Guards each glist */
 	struct mtx	*glist_lock;
 
 #define LIO_DEFAULT_STATS_INTERVAL 10000
 	/* callout timer for stats */
 	struct callout	stats_timer;
 
 	/* Stats Update Interval in milli Seconds */
 	uint16_t	stats_interval;
 
 	/* IRQ coalescing driver stats */
 	struct octeon_intrmod_cfg intrmod_cfg;
 
 	/* Array of gather component linked lists */
 	struct lio_stailq_head	*ghead;
 	void	**glists_virt_base;
 	vm_paddr_t	*glists_dma_base;
 	uint32_t	glist_entry_size;
 
 	/* Pointer to the octeon device structure. */
 	struct octeon_device	*oct_dev;
 
 	struct ifnet	*ifp;
 	struct ifmedia	ifmedia;
 	int		if_flags;
 
 	/* Link information sent by the core application for this interface. */
 	struct octeon_link_info	linfo;
 
 	/* counter of link changes */
 	uint64_t	link_changes;
 
 	/* Size of Tx queue for this octeon device. */
 	uint32_t	tx_qsize;
 
 	/* Size of Rx queue for this octeon device. */
 	uint32_t	rx_qsize;
 
 	/* Size of MTU this octeon device. */
 	uint32_t	mtu;
 
 	/* msg level flag per interface. */
 	uint32_t	msg_enable;
 
 	/* Interface info */
 	uint32_t	intf_open;
 
 	/* task queue for  rx oom status */
 	struct lio_tq	rx_status_tq;
 
 	/* VLAN Filtering related */
 	eventhandler_tag	vlan_attach;
 	eventhandler_tag	vlan_detach;
 #ifdef RSS
 	struct lio_rss_params_set rss_set;
 #endif	/* RSS */
 };
 
 #define LIO_MAX_CORES	12
 
 /*
  * \brief Enable or disable feature
  * @param ifp       pointer to network device
  * @param cmd       Command that just requires acknowledgment
  * @param param1    Parameter to command
  */
 int	lio_set_feature(struct ifnet *ifp, int cmd, uint16_t param1);
 
 /*
  * \brief Link control command completion callback
  * @param nctrl_ptr pointer to control packet structure
  *
  * This routine is called by the callback function when a ctrl pkt sent to
  * core app completes. The nctrl_ptr contains a copy of the command type
  * and data sent to the core app. This routine is only called if the ctrl
  * pkt was sent successfully to the core app.
  */
 void	lio_ctrl_cmd_completion(void *nctrl_ptr);
 
 int	lio_setup_io_queues(struct octeon_device *octeon_dev, int ifidx,
 			    uint32_t num_iqs, uint32_t num_oqs);
 
 int	lio_setup_interrupt(struct octeon_device *oct, uint32_t num_ioqs);
 
 static inline void *
 lio_recv_buffer_alloc(uint32_t size)
 {
 	struct mbuf	*mb = NULL;
 
 	mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
 	if (mb != NULL)
 		mb->m_pkthdr.len = mb->m_len = size;
 
 	return ((void *)mb);
 }
 
 static inline void
 lio_recv_buffer_free(void *buffer)
 {
 
 	m_freem((struct mbuf *)buffer);
 }
 
 static inline int
 lio_get_order(unsigned long size)
 {
 	int	order;
 
 	size = (size - 1) >> PAGE_SHIFT;
 	order = 0;
 	while (size) {
 		order++;
 		size >>= 1;
 	}
 
 	return (order);
 }
 
 static inline void *
 lio_dma_alloc(size_t size, vm_paddr_t *dma_handle)
 {
 	size_t	align;
 	void	*mem;
 
 	align = PAGE_SIZE << lio_get_order(size);
 	mem = (void *)kmem_alloc_contig(size, M_WAITOK, 0, ~0ul, align, 0,
 	    VM_MEMATTR_DEFAULT);
 	if (mem != NULL)
 		*dma_handle = vtophys(mem);
 	else
 		*dma_handle = 0;
 
 	return (mem);
 }
 
 static inline void
 lio_dma_free(size_t size, void *cpu_addr)
 {
 
-	kmem_free(kmem_arena, (vm_offset_t)cpu_addr, size);
+	kmem_free((vm_offset_t)cpu_addr, size);
 }
 
 static inline uint64_t
 lio_map_ring(device_t dev, void *buf, uint32_t size)
 {
 	vm_paddr_t	dma_addr;
 
 	dma_addr = vtophys(((struct mbuf *)buf)->m_data);
 	return ((uint64_t)dma_addr);
 }
 
 /*
  * \brief check interface state
  * @param lio per-network private data
  * @param state_flag flag state to check
  */
 static inline int
 lio_ifstate_check(struct lio *lio, int state_flag)
 {
 
 	return (atomic_load_acq_int(&lio->ifstate) & state_flag);
 }
 
 /*
  * \brief set interface state
  * @param lio per-network private data
  * @param state_flag flag state to set
  */
 static inline void
 lio_ifstate_set(struct lio *lio, int state_flag)
 {
 
 	atomic_store_rel_int(&lio->ifstate,
 			     (atomic_load_acq_int(&lio->ifstate) | state_flag));
 }
 
 /*
  * \brief clear interface state
  * @param lio per-network private data
  * @param state_flag flag state to clear
  */
 static inline void
 lio_ifstate_reset(struct lio *lio, int state_flag)
 {
 
 	atomic_store_rel_int(&lio->ifstate,
 			     (atomic_load_acq_int(&lio->ifstate) &
 			      ~(state_flag)));
 }
 
 /*
  * \brief wait for all pending requests to complete
  * @param oct Pointer to Octeon device
  *
  * Called during shutdown sequence
  */
 static inline int
 lio_wait_for_pending_requests(struct octeon_device *oct)
 {
 	int	i, pcount = 0;
 
 	for (i = 0; i < 100; i++) {
 		pcount = atomic_load_acq_int(
 				     &oct->response_list[LIO_ORDERED_SC_LIST].
 					     pending_req_count);
 		if (pcount)
 			lio_sleep_timeout(100);
 		else
 			break;
 	}
 
 	if (pcount)
 		return (1);
 
 	return (0);
 }
 
 #endif	/* __LIO_NETWORK_H__ */
Index: head/sys/kern/kern_malloc.c
===================================================================
--- head/sys/kern/kern_malloc.c	(revision 338317)
+++ head/sys/kern/kern_malloc.c	(revision 338318)
@@ -1,1278 +1,1278 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1987, 1991, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2005-2009 Robert N. M. Watson
  * Copyright (c) 2008 Otto Moerbeek <otto@drijf.net> (mallocarray)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_malloc.c	8.3 (Berkeley) 1/4/94
  */
 
 /*
  * Kernel malloc(9) implementation -- general purpose kernel memory allocator
  * based on memory types.  Back end is implemented using the UMA(9) zone
  * allocator.  A set of fixed-size buckets are used for smaller allocations,
  * and a special UMA allocation interface is used for larger allocations.
  * Callers declare memory types, and statistics are maintained independently
  * for each memory type.  Statistics are maintained per-CPU for performance
  * reasons.  See malloc(9) and comments in malloc.h for a detailed
  * description.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/vmmeter.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 #include <sys/vmem.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 #include <vm/uma_dbg.h>
 
 #ifdef DEBUG_MEMGUARD
 #include <vm/memguard.h>
 #endif
 #ifdef DEBUG_REDZONE
 #include <vm/redzone.h>
 #endif
 
 #if defined(INVARIANTS) && defined(__i386__)
 #include <machine/cpu.h>
 #endif
 
 #include <ddb/ddb.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 
 bool	__read_frequently			dtrace_malloc_enabled;
 dtrace_malloc_probe_func_t __read_mostly	dtrace_malloc_probe;
 #endif
 
 #if defined(INVARIANTS) || defined(MALLOC_MAKE_FAILURES) ||		\
     defined(DEBUG_MEMGUARD) || defined(DEBUG_REDZONE)
 #define	MALLOC_DEBUG	1
 #endif
 
 /*
  * When realloc() is called, if the new size is sufficiently smaller than
  * the old size, realloc() will allocate a new, smaller block to avoid
  * wasting memory. 'Sufficiently smaller' is defined as: newsize <=
  * oldsize / 2^n, where REALLOC_FRACTION defines the value of 'n'.
  */
 #ifndef REALLOC_FRACTION
 #define	REALLOC_FRACTION	1	/* new block if <= half the size */
 #endif
 
 /*
  * Centrally define some common malloc types.
  */
 MALLOC_DEFINE(M_CACHE, "cache", "Various Dynamically allocated caches");
 MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory");
 MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers");
 
 static struct malloc_type *kmemstatistics;
 static int kmemcount;
 
 #define KMEM_ZSHIFT	4
 #define KMEM_ZBASE	16
 #define KMEM_ZMASK	(KMEM_ZBASE - 1)
 
 #define KMEM_ZMAX	65536
 #define KMEM_ZSIZE	(KMEM_ZMAX >> KMEM_ZSHIFT)
 static uint8_t kmemsize[KMEM_ZSIZE + 1];
 
 #ifndef MALLOC_DEBUG_MAXZONES
 #define	MALLOC_DEBUG_MAXZONES	1
 #endif
 static int numzones = MALLOC_DEBUG_MAXZONES;
 
 /*
  * Small malloc(9) memory allocations are allocated from a set of UMA buckets
  * of various sizes.
  *
  * XXX: The comment here used to read "These won't be powers of two for
  * long."  It's possible that a significant amount of wasted memory could be
  * recovered by tuning the sizes of these buckets.
  */
 struct {
 	int kz_size;
 	char *kz_name;
 	uma_zone_t kz_zone[MALLOC_DEBUG_MAXZONES];
 } kmemzones[] = {
 	{16, "16", },
 	{32, "32", },
 	{64, "64", },
 	{128, "128", },
 	{256, "256", },
 	{512, "512", },
 	{1024, "1024", },
 	{2048, "2048", },
 	{4096, "4096", },
 	{8192, "8192", },
 	{16384, "16384", },
 	{32768, "32768", },
 	{65536, "65536", },
 	{0, NULL},
 };
 
 /*
  * Zone to allocate malloc type descriptions from.  For ABI reasons, memory
  * types are described by a data structure passed by the declaring code, but
  * the malloc(9) implementation has its own data structure describing the
  * type and statistics.  This permits the malloc(9)-internal data structures
  * to be modified without breaking binary-compiled kernel modules that
  * declare malloc types.
  */
 static uma_zone_t mt_zone;
 
 u_long vm_kmem_size;
 SYSCTL_ULONG(_vm, OID_AUTO, kmem_size, CTLFLAG_RDTUN, &vm_kmem_size, 0,
     "Size of kernel memory");
 
 static u_long kmem_zmax = KMEM_ZMAX;
 SYSCTL_ULONG(_vm, OID_AUTO, kmem_zmax, CTLFLAG_RDTUN, &kmem_zmax, 0,
     "Maximum allocation size that malloc(9) would use UMA as backend");
 
 static u_long vm_kmem_size_min;
 SYSCTL_ULONG(_vm, OID_AUTO, kmem_size_min, CTLFLAG_RDTUN, &vm_kmem_size_min, 0,
     "Minimum size of kernel memory");
 
 static u_long vm_kmem_size_max;
 SYSCTL_ULONG(_vm, OID_AUTO, kmem_size_max, CTLFLAG_RDTUN, &vm_kmem_size_max, 0,
     "Maximum size of kernel memory");
 
 static u_int vm_kmem_size_scale;
 SYSCTL_UINT(_vm, OID_AUTO, kmem_size_scale, CTLFLAG_RDTUN, &vm_kmem_size_scale, 0,
     "Scale factor for kernel memory size");
 
 static int sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vm, OID_AUTO, kmem_map_size,
     CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_kmem_map_size, "LU", "Current kmem allocation size");
 
 static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vm, OID_AUTO, kmem_map_free,
     CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_kmem_map_free, "LU", "Free space in kmem");
 
 /*
  * The malloc_mtx protects the kmemstatistics linked list.
  */
 struct mtx malloc_mtx;
 
 #ifdef MALLOC_PROFILE
 uint64_t krequests[KMEM_ZSIZE + 1];
 
 static int sysctl_kern_mprof(SYSCTL_HANDLER_ARGS);
 #endif
 
 static int sysctl_kern_malloc_stats(SYSCTL_HANDLER_ARGS);
 
 /*
  * time_uptime of the last malloc(9) failure (induced or real).
  */
 static time_t t_malloc_fail;
 
 #if defined(MALLOC_MAKE_FAILURES) || (MALLOC_DEBUG_MAXZONES > 1)
 static SYSCTL_NODE(_debug, OID_AUTO, malloc, CTLFLAG_RD, 0,
     "Kernel malloc debugging options");
 #endif
 
 /*
  * malloc(9) fault injection -- cause malloc failures every (n) mallocs when
  * the caller specifies M_NOWAIT.  If set to 0, no failures are caused.
  */
 #ifdef MALLOC_MAKE_FAILURES
 static int malloc_failure_rate;
 static int malloc_nowait_count;
 static int malloc_failure_count;
 SYSCTL_INT(_debug_malloc, OID_AUTO, failure_rate, CTLFLAG_RWTUN,
     &malloc_failure_rate, 0, "Every (n) mallocs with M_NOWAIT will fail");
 SYSCTL_INT(_debug_malloc, OID_AUTO, failure_count, CTLFLAG_RD,
     &malloc_failure_count, 0, "Number of imposed M_NOWAIT malloc failures");
 #endif
 
 static int
 sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS)
 {
 	u_long size;
 
 	size = uma_size();
 	return (sysctl_handle_long(oidp, &size, 0, req));
 }
 
 static int
 sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS)
 {
 	u_long size, limit;
 
 	/* The sysctl is unsigned, implement as a saturation value. */
 	size = uma_size();
 	limit = uma_limit();
 	if (size > limit)
 		size = 0;
 	else
 		size = limit - size;
 	return (sysctl_handle_long(oidp, &size, 0, req));
 }
 
 /*
  * malloc(9) uma zone separation -- sub-page buffer overruns in one
  * malloc type will affect only a subset of other malloc types.
  */
 #if MALLOC_DEBUG_MAXZONES > 1
 static void
 tunable_set_numzones(void)
 {
 
 	TUNABLE_INT_FETCH("debug.malloc.numzones",
 	    &numzones);
 
 	/* Sanity check the number of malloc uma zones. */
 	if (numzones <= 0)
 		numzones = 1;
 	if (numzones > MALLOC_DEBUG_MAXZONES)
 		numzones = MALLOC_DEBUG_MAXZONES;
 }
 SYSINIT(numzones, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_set_numzones, NULL);
 SYSCTL_INT(_debug_malloc, OID_AUTO, numzones, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &numzones, 0, "Number of malloc uma subzones");
 
 /*
  * Any number that changes regularly is an okay choice for the
  * offset.  Build numbers are pretty good of you have them.
  */
 static u_int zone_offset = __FreeBSD_version;
 TUNABLE_INT("debug.malloc.zone_offset", &zone_offset);
 SYSCTL_UINT(_debug_malloc, OID_AUTO, zone_offset, CTLFLAG_RDTUN,
     &zone_offset, 0, "Separate malloc types by examining the "
     "Nth character in the malloc type short description.");
 
 static void
 mtp_set_subzone(struct malloc_type *mtp)
 {
 	struct malloc_type_internal *mtip;
 	const char *desc;
 	size_t len;
 	u_int val;
 
 	mtip = mtp->ks_handle;
 	desc = mtp->ks_shortdesc;
 	if (desc == NULL || (len = strlen(desc)) == 0)
 		val = 0;
 	else
 		val = desc[zone_offset % len];
 	mtip->mti_zone = (val % numzones);
 }
 
 static inline u_int
 mtp_get_subzone(struct malloc_type *mtp)
 {
 	struct malloc_type_internal *mtip;
 
 	mtip = mtp->ks_handle;
 
 	KASSERT(mtip->mti_zone < numzones,
 	    ("mti_zone %u out of range %d",
 	    mtip->mti_zone, numzones));
 	return (mtip->mti_zone);
 }
 #elif MALLOC_DEBUG_MAXZONES == 0
 #error "MALLOC_DEBUG_MAXZONES must be positive."
 #else
 static void
 mtp_set_subzone(struct malloc_type *mtp)
 {
 	struct malloc_type_internal *mtip;
 
 	mtip = mtp->ks_handle;
 	mtip->mti_zone = 0;
 }
 
 static inline u_int
 mtp_get_subzone(struct malloc_type *mtp)
 {
 
 	return (0);
 }
 #endif /* MALLOC_DEBUG_MAXZONES > 1 */
 
 int
 malloc_last_fail(void)
 {
 
 	return (time_uptime - t_malloc_fail);
 }
 
 /*
  * An allocation has succeeded -- update malloc type statistics for the
  * amount of bucket size.  Occurs within a critical section so that the
  * thread isn't preempted and doesn't migrate while updating per-PCU
  * statistics.
  */
 static void
 malloc_type_zone_allocated(struct malloc_type *mtp, unsigned long size,
     int zindx)
 {
 	struct malloc_type_internal *mtip;
 	struct malloc_type_stats *mtsp;
 
 	critical_enter();
 	mtip = mtp->ks_handle;
 	mtsp = &mtip->mti_stats[curcpu];
 	if (size > 0) {
 		mtsp->mts_memalloced += size;
 		mtsp->mts_numallocs++;
 	}
 	if (zindx != -1)
 		mtsp->mts_size |= 1 << zindx;
 
 #ifdef KDTRACE_HOOKS
 	if (__predict_false(dtrace_malloc_enabled)) {
 		uint32_t probe_id = mtip->mti_probes[DTMALLOC_PROBE_MALLOC];
 		if (probe_id != 0)
 			(dtrace_malloc_probe)(probe_id,
 			    (uintptr_t) mtp, (uintptr_t) mtip,
 			    (uintptr_t) mtsp, size, zindx);
 	}
 #endif
 
 	critical_exit();
 }
 
 void
 malloc_type_allocated(struct malloc_type *mtp, unsigned long size)
 {
 
 	if (size > 0)
 		malloc_type_zone_allocated(mtp, size, -1);
 }
 
 /*
  * A free operation has occurred -- update malloc type statistics for the
  * amount of the bucket size.  Occurs within a critical section so that the
  * thread isn't preempted and doesn't migrate while updating per-CPU
  * statistics.
  */
 void
 malloc_type_freed(struct malloc_type *mtp, unsigned long size)
 {
 	struct malloc_type_internal *mtip;
 	struct malloc_type_stats *mtsp;
 
 	critical_enter();
 	mtip = mtp->ks_handle;
 	mtsp = &mtip->mti_stats[curcpu];
 	mtsp->mts_memfreed += size;
 	mtsp->mts_numfrees++;
 
 #ifdef KDTRACE_HOOKS
 	if (__predict_false(dtrace_malloc_enabled)) {
 		uint32_t probe_id = mtip->mti_probes[DTMALLOC_PROBE_FREE];
 		if (probe_id != 0)
 			(dtrace_malloc_probe)(probe_id,
 			    (uintptr_t) mtp, (uintptr_t) mtip,
 			    (uintptr_t) mtsp, size, 0);
 	}
 #endif
 
 	critical_exit();
 }
 
 /*
  *	contigmalloc:
  *
  *	Allocate a block of physically contiguous memory.
  *
  *	If M_NOWAIT is set, this routine will not block and return NULL if
  *	the allocation fails.
  */
 void *
 contigmalloc(unsigned long size, struct malloc_type *type, int flags,
     vm_paddr_t low, vm_paddr_t high, unsigned long alignment,
     vm_paddr_t boundary)
 {
 	void *ret;
 
 	ret = (void *)kmem_alloc_contig(size, flags, low, high, alignment,
 	    boundary, VM_MEMATTR_DEFAULT);
 	if (ret != NULL)
 		malloc_type_allocated(type, round_page(size));
 	return (ret);
 }
 
 void *
 contigmalloc_domain(unsigned long size, struct malloc_type *type,
     int domain, int flags, vm_paddr_t low, vm_paddr_t high,
     unsigned long alignment, vm_paddr_t boundary)
 {
 	void *ret;
 
 	ret = (void *)kmem_alloc_contig_domain(domain, size, flags, low, high,
 	    alignment, boundary, VM_MEMATTR_DEFAULT);
 	if (ret != NULL)
 		malloc_type_allocated(type, round_page(size));
 	return (ret);
 }
 
 /*
  *	contigfree:
  *
  *	Free a block of memory allocated by contigmalloc.
  *
  *	This routine may not block.
  */
 void
 contigfree(void *addr, unsigned long size, struct malloc_type *type)
 {
 
-	kmem_free(kernel_arena, (vm_offset_t)addr, size);
+	kmem_free((vm_offset_t)addr, size);
 	malloc_type_freed(type, round_page(size));
 }
 
 #ifdef MALLOC_DEBUG
 static int
 malloc_dbg(caddr_t *vap, size_t *sizep, struct malloc_type *mtp,
     int flags)
 {
 #ifdef INVARIANTS
 	int indx;
 
 	KASSERT(mtp->ks_magic == M_MAGIC, ("malloc: bad malloc type magic"));
 	/*
 	 * Check that exactly one of M_WAITOK or M_NOWAIT is specified.
 	 */
 	indx = flags & (M_WAITOK | M_NOWAIT);
 	if (indx != M_NOWAIT && indx != M_WAITOK) {
 		static	struct timeval lasterr;
 		static	int curerr, once;
 		if (once == 0 && ppsratecheck(&lasterr, &curerr, 1)) {
 			printf("Bad malloc flags: %x\n", indx);
 			kdb_backtrace();
 			flags |= M_WAITOK;
 			once++;
 		}
 	}
 #endif
 #ifdef MALLOC_MAKE_FAILURES
 	if ((flags & M_NOWAIT) && (malloc_failure_rate != 0)) {
 		atomic_add_int(&malloc_nowait_count, 1);
 		if ((malloc_nowait_count % malloc_failure_rate) == 0) {
 			atomic_add_int(&malloc_failure_count, 1);
 			t_malloc_fail = time_uptime;
 			*vap = NULL;
 			return (EJUSTRETURN);
 		}
 	}
 #endif
 	if (flags & M_WAITOK) {
 		KASSERT(curthread->td_intr_nesting_level == 0,
 		   ("malloc(M_WAITOK) in interrupt context"));
 		KASSERT(curthread->td_epochnest == 0,
 			("malloc(M_WAITOK) in epoch context"));		
 	}
 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
 	    ("malloc: called with spinlock or critical section held"));
 
 #ifdef DEBUG_MEMGUARD
 	if (memguard_cmp_mtp(mtp, *sizep)) {
 		*vap = memguard_alloc(*sizep, flags);
 		if (*vap != NULL)
 			return (EJUSTRETURN);
 		/* This is unfortunate but should not be fatal. */
 	}
 #endif
 
 #ifdef DEBUG_REDZONE
 	*sizep = redzone_size_ntor(*sizep);
 #endif
 
 	return (0);
 }
 #endif
 
 /*
  *	malloc:
  *
  *	Allocate a block of memory.
  *
  *	If M_NOWAIT is set, this routine will not block and return NULL if
  *	the allocation fails.
  */
 void *
 (malloc)(size_t size, struct malloc_type *mtp, int flags)
 {
 	int indx;
 	caddr_t va;
 	uma_zone_t zone;
 #if defined(DEBUG_REDZONE)
 	unsigned long osize = size;
 #endif
 
 #ifdef MALLOC_DEBUG
 	va = NULL;
 	if (malloc_dbg(&va, &size, mtp, flags) != 0)
 		return (va);
 #endif
 
 	if (size <= kmem_zmax && (flags & M_EXEC) == 0) {
 		if (size & KMEM_ZMASK)
 			size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
 		indx = kmemsize[size >> KMEM_ZSHIFT];
 		zone = kmemzones[indx].kz_zone[mtp_get_subzone(mtp)];
 #ifdef MALLOC_PROFILE
 		krequests[size >> KMEM_ZSHIFT]++;
 #endif
 		va = uma_zalloc(zone, flags);
 		if (va != NULL)
 			size = zone->uz_size;
 		malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx);
 	} else {
 		size = roundup(size, PAGE_SIZE);
 		zone = NULL;
 		va = uma_large_malloc(size, flags);
 		malloc_type_allocated(mtp, va == NULL ? 0 : size);
 	}
 	if (flags & M_WAITOK)
 		KASSERT(va != NULL, ("malloc(M_WAITOK) returned NULL"));
 	else if (va == NULL)
 		t_malloc_fail = time_uptime;
 #ifdef DEBUG_REDZONE
 	if (va != NULL)
 		va = redzone_setup(va, osize);
 #endif
 	return ((void *) va);
 }
 
 void *
 malloc_domain(size_t size, struct malloc_type *mtp, int domain,
     int flags)
 {
 	int indx;
 	caddr_t va;
 	uma_zone_t zone;
 #if defined(DEBUG_REDZONE)
 	unsigned long osize = size;
 #endif
 
 #ifdef MALLOC_DEBUG
 	va = NULL;
 	if (malloc_dbg(&va, &size, mtp, flags) != 0)
 		return (va);
 #endif
 	if (size <= kmem_zmax && (flags & M_EXEC) == 0) {
 		if (size & KMEM_ZMASK)
 			size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
 		indx = kmemsize[size >> KMEM_ZSHIFT];
 		zone = kmemzones[indx].kz_zone[mtp_get_subzone(mtp)];
 #ifdef MALLOC_PROFILE
 		krequests[size >> KMEM_ZSHIFT]++;
 #endif
 		va = uma_zalloc_domain(zone, NULL, domain, flags);
 		if (va != NULL)
 			size = zone->uz_size;
 		malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx);
 	} else {
 		size = roundup(size, PAGE_SIZE);
 		zone = NULL;
 		va = uma_large_malloc_domain(size, domain, flags);
 		malloc_type_allocated(mtp, va == NULL ? 0 : size);
 	}
 	if (flags & M_WAITOK)
 		KASSERT(va != NULL, ("malloc(M_WAITOK) returned NULL"));
 	else if (va == NULL)
 		t_malloc_fail = time_uptime;
 #ifdef DEBUG_REDZONE
 	if (va != NULL)
 		va = redzone_setup(va, osize);
 #endif
 	return ((void *) va);
 }
 
 void *
 mallocarray(size_t nmemb, size_t size, struct malloc_type *type, int flags)
 {
 
 	if (WOULD_OVERFLOW(nmemb, size))
 		panic("mallocarray: %zu * %zu overflowed", nmemb, size);
 
 	return (malloc(size * nmemb, type, flags));
 }
 
 #ifdef INVARIANTS
 static void
 free_save_type(void *addr, struct malloc_type *mtp, u_long size)
 {
 	struct malloc_type **mtpp = addr;
 
 	/*
 	 * Cache a pointer to the malloc_type that most recently freed
 	 * this memory here.  This way we know who is most likely to
 	 * have stepped on it later.
 	 *
 	 * This code assumes that size is a multiple of 8 bytes for
 	 * 64 bit machines
 	 */
 	mtpp = (struct malloc_type **) ((unsigned long)mtpp & ~UMA_ALIGN_PTR);
 	mtpp += (size - sizeof(struct malloc_type *)) /
 	    sizeof(struct malloc_type *);
 	*mtpp = mtp;
 }
 #endif
 
 #ifdef MALLOC_DEBUG
 static int
 free_dbg(void **addrp, struct malloc_type *mtp)
 {
 	void *addr;
 
 	addr = *addrp;
 	KASSERT(mtp->ks_magic == M_MAGIC, ("free: bad malloc type magic"));
 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
 	    ("free: called with spinlock or critical section held"));
 
 	/* free(NULL, ...) does nothing */
 	if (addr == NULL)
 		return (EJUSTRETURN);
 
 #ifdef DEBUG_MEMGUARD
 	if (is_memguard_addr(addr)) {
 		memguard_free(addr);
 		return (EJUSTRETURN);
 	}
 #endif
 
 #ifdef DEBUG_REDZONE
 	redzone_check(addr);
 	*addrp = redzone_addr_ntor(addr);
 #endif
 
 	return (0);
 }
 #endif
 
 /*
  *	free:
  *
  *	Free a block of memory allocated by malloc.
  *
  *	This routine may not block.
  */
 void
 free(void *addr, struct malloc_type *mtp)
 {
 	uma_slab_t slab;
 	u_long size;
 
 #ifdef MALLOC_DEBUG
 	if (free_dbg(&addr, mtp) != 0)
 		return;
 #endif
 	/* free(NULL, ...) does nothing */
 	if (addr == NULL)
 		return;
 
 	slab = vtoslab((vm_offset_t)addr & (~UMA_SLAB_MASK));
 	if (slab == NULL)
 		panic("free: address %p(%p) has not been allocated.\n",
 		    addr, (void *)((u_long)addr & (~UMA_SLAB_MASK)));
 
 	if (!(slab->us_flags & UMA_SLAB_MALLOC)) {
 		size = slab->us_keg->uk_size;
 #ifdef INVARIANTS
 		free_save_type(addr, mtp, size);
 #endif
 		uma_zfree_arg(LIST_FIRST(&slab->us_keg->uk_zones), addr, slab);
 	} else {
 		size = slab->us_size;
 		uma_large_free(slab);
 	}
 	malloc_type_freed(mtp, size);
 }
 
 void
 free_domain(void *addr, struct malloc_type *mtp)
 {
 	uma_slab_t slab;
 	u_long size;
 
 #ifdef MALLOC_DEBUG
 	if (free_dbg(&addr, mtp) != 0)
 		return;
 #endif
 
 	/* free(NULL, ...) does nothing */
 	if (addr == NULL)
 		return;
 
 	slab = vtoslab((vm_offset_t)addr & (~UMA_SLAB_MASK));
 	if (slab == NULL)
 		panic("free_domain: address %p(%p) has not been allocated.\n",
 		    addr, (void *)((u_long)addr & (~UMA_SLAB_MASK)));
 
 	if (!(slab->us_flags & UMA_SLAB_MALLOC)) {
 		size = slab->us_keg->uk_size;
 #ifdef INVARIANTS
 		free_save_type(addr, mtp, size);
 #endif
 		uma_zfree_domain(LIST_FIRST(&slab->us_keg->uk_zones),
 		    addr, slab);
 	} else {
 		size = slab->us_size;
 		uma_large_free(slab);
 	}
 	malloc_type_freed(mtp, size);
 }
 
 /*
  *	realloc: change the size of a memory block
  */
 void *
 realloc(void *addr, size_t size, struct malloc_type *mtp, int flags)
 {
 	uma_slab_t slab;
 	unsigned long alloc;
 	void *newaddr;
 
 	KASSERT(mtp->ks_magic == M_MAGIC,
 	    ("realloc: bad malloc type magic"));
 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
 	    ("realloc: called with spinlock or critical section held"));
 
 	/* realloc(NULL, ...) is equivalent to malloc(...) */
 	if (addr == NULL)
 		return (malloc(size, mtp, flags));
 
 	/*
 	 * XXX: Should report free of old memory and alloc of new memory to
 	 * per-CPU stats.
 	 */
 
 #ifdef DEBUG_MEMGUARD
 	if (is_memguard_addr(addr))
 		return (memguard_realloc(addr, size, mtp, flags));
 #endif
 
 #ifdef DEBUG_REDZONE
 	slab = NULL;
 	alloc = redzone_get_size(addr);
 #else
 	slab = vtoslab((vm_offset_t)addr & ~(UMA_SLAB_MASK));
 
 	/* Sanity check */
 	KASSERT(slab != NULL,
 	    ("realloc: address %p out of range", (void *)addr));
 
 	/* Get the size of the original block */
 	if (!(slab->us_flags & UMA_SLAB_MALLOC))
 		alloc = slab->us_keg->uk_size;
 	else
 		alloc = slab->us_size;
 
 	/* Reuse the original block if appropriate */
 	if (size <= alloc
 	    && (size > (alloc >> REALLOC_FRACTION) || alloc == MINALLOCSIZE))
 		return (addr);
 #endif /* !DEBUG_REDZONE */
 
 	/* Allocate a new, bigger (or smaller) block */
 	if ((newaddr = malloc(size, mtp, flags)) == NULL)
 		return (NULL);
 
 	/* Copy over original contents */
 	bcopy(addr, newaddr, min(size, alloc));
 	free(addr, mtp);
 	return (newaddr);
 }
 
 /*
  *	reallocf: same as realloc() but free memory on failure.
  */
 void *
 reallocf(void *addr, size_t size, struct malloc_type *mtp, int flags)
 {
 	void *mem;
 
 	if ((mem = realloc(addr, size, mtp, flags)) == NULL)
 		free(addr, mtp);
 	return (mem);
 }
 
 #ifndef __sparc64__
 CTASSERT(VM_KMEM_SIZE_SCALE >= 1);
 #endif
 
 /*
  * Initialize the kernel memory (kmem) arena.
  */
 void
 kmeminit(void)
 {
 	u_long mem_size;
 	u_long tmp;
 
 #ifdef VM_KMEM_SIZE
 	if (vm_kmem_size == 0)
 		vm_kmem_size = VM_KMEM_SIZE;
 #endif
 #ifdef VM_KMEM_SIZE_MIN
 	if (vm_kmem_size_min == 0)
 		vm_kmem_size_min = VM_KMEM_SIZE_MIN;
 #endif
 #ifdef VM_KMEM_SIZE_MAX
 	if (vm_kmem_size_max == 0)
 		vm_kmem_size_max = VM_KMEM_SIZE_MAX;
 #endif
 	/*
 	 * Calculate the amount of kernel virtual address (KVA) space that is
 	 * preallocated to the kmem arena.  In order to support a wide range
 	 * of machines, it is a function of the physical memory size,
 	 * specifically,
 	 *
 	 *	min(max(physical memory size / VM_KMEM_SIZE_SCALE,
 	 *	    VM_KMEM_SIZE_MIN), VM_KMEM_SIZE_MAX)
 	 *
 	 * Every architecture must define an integral value for
 	 * VM_KMEM_SIZE_SCALE.  However, the definitions of VM_KMEM_SIZE_MIN
 	 * and VM_KMEM_SIZE_MAX, which represent respectively the floor and
 	 * ceiling on this preallocation, are optional.  Typically,
 	 * VM_KMEM_SIZE_MAX is itself a function of the available KVA space on
 	 * a given architecture.
 	 */
 	mem_size = vm_cnt.v_page_count;
 	if (mem_size <= 32768) /* delphij XXX 128MB */
 		kmem_zmax = PAGE_SIZE;
 
 	if (vm_kmem_size_scale < 1)
 		vm_kmem_size_scale = VM_KMEM_SIZE_SCALE;
 
 	/*
 	 * Check if we should use defaults for the "vm_kmem_size"
 	 * variable:
 	 */
 	if (vm_kmem_size == 0) {
 		vm_kmem_size = (mem_size / vm_kmem_size_scale) * PAGE_SIZE;
 
 		if (vm_kmem_size_min > 0 && vm_kmem_size < vm_kmem_size_min)
 			vm_kmem_size = vm_kmem_size_min;
 		if (vm_kmem_size_max > 0 && vm_kmem_size >= vm_kmem_size_max)
 			vm_kmem_size = vm_kmem_size_max;
 	}
 
 	/*
 	 * The amount of KVA space that is preallocated to the
 	 * kmem arena can be set statically at compile-time or manually
 	 * through the kernel environment.  However, it is still limited to
 	 * twice the physical memory size, which has been sufficient to handle
 	 * the most severe cases of external fragmentation in the kmem arena. 
 	 */
 	if (vm_kmem_size / 2 / PAGE_SIZE > mem_size)
 		vm_kmem_size = 2 * mem_size * PAGE_SIZE;
 
 	vm_kmem_size = round_page(vm_kmem_size);
 #ifdef DEBUG_MEMGUARD
 	tmp = memguard_fudge(vm_kmem_size, kernel_map);
 #else
 	tmp = vm_kmem_size;
 #endif
 	uma_set_limit(tmp);
 
 #ifdef DEBUG_MEMGUARD
 	/*
 	 * Initialize MemGuard if support compiled in.  MemGuard is a
 	 * replacement allocator used for detecting tamper-after-free
 	 * scenarios as they occur.  It is only used for debugging.
 	 */
 	memguard_init(kernel_arena);
 #endif
 }
 
 /*
  * Initialize the kernel memory allocator
  */
 /* ARGSUSED*/
 static void
 mallocinit(void *dummy)
 {
 	int i;
 	uint8_t indx;
 
 	mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF);
 
 	kmeminit();
 
 	if (kmem_zmax < PAGE_SIZE || kmem_zmax > KMEM_ZMAX)
 		kmem_zmax = KMEM_ZMAX;
 
 	mt_zone = uma_zcreate("mt_zone", sizeof(struct malloc_type_internal),
 #ifdef INVARIANTS
 	    mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini,
 #else
 	    NULL, NULL, NULL, NULL,
 #endif
 	    UMA_ALIGN_PTR, UMA_ZONE_MALLOC);
 	for (i = 0, indx = 0; kmemzones[indx].kz_size != 0; indx++) {
 		int size = kmemzones[indx].kz_size;
 		char *name = kmemzones[indx].kz_name;
 		int subzone;
 
 		for (subzone = 0; subzone < numzones; subzone++) {
 			kmemzones[indx].kz_zone[subzone] =
 			    uma_zcreate(name, size,
 #ifdef INVARIANTS
 			    mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini,
 #else
 			    NULL, NULL, NULL, NULL,
 #endif
 			    UMA_ALIGN_PTR, UMA_ZONE_MALLOC);
 		}		    
 		for (;i <= size; i+= KMEM_ZBASE)
 			kmemsize[i >> KMEM_ZSHIFT] = indx;
 
 	}
 }
 SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_SECOND, mallocinit, NULL);
 
 void
 malloc_init(void *data)
 {
 	struct malloc_type_internal *mtip;
 	struct malloc_type *mtp;
 
 	KASSERT(vm_cnt.v_page_count != 0, ("malloc_register before vm_init"));
 
 	mtp = data;
 	if (mtp->ks_magic != M_MAGIC)
 		panic("malloc_init: bad malloc type magic");
 
 	mtip = uma_zalloc(mt_zone, M_WAITOK | M_ZERO);
 	mtp->ks_handle = mtip;
 	mtp_set_subzone(mtp);
 
 	mtx_lock(&malloc_mtx);
 	mtp->ks_next = kmemstatistics;
 	kmemstatistics = mtp;
 	kmemcount++;
 	mtx_unlock(&malloc_mtx);
 }
 
 void
 malloc_uninit(void *data)
 {
 	struct malloc_type_internal *mtip;
 	struct malloc_type_stats *mtsp;
 	struct malloc_type *mtp, *temp;
 	uma_slab_t slab;
 	long temp_allocs, temp_bytes;
 	int i;
 
 	mtp = data;
 	KASSERT(mtp->ks_magic == M_MAGIC,
 	    ("malloc_uninit: bad malloc type magic"));
 	KASSERT(mtp->ks_handle != NULL, ("malloc_deregister: cookie NULL"));
 
 	mtx_lock(&malloc_mtx);
 	mtip = mtp->ks_handle;
 	mtp->ks_handle = NULL;
 	if (mtp != kmemstatistics) {
 		for (temp = kmemstatistics; temp != NULL;
 		    temp = temp->ks_next) {
 			if (temp->ks_next == mtp) {
 				temp->ks_next = mtp->ks_next;
 				break;
 			}
 		}
 		KASSERT(temp,
 		    ("malloc_uninit: type '%s' not found", mtp->ks_shortdesc));
 	} else
 		kmemstatistics = mtp->ks_next;
 	kmemcount--;
 	mtx_unlock(&malloc_mtx);
 
 	/*
 	 * Look for memory leaks.
 	 */
 	temp_allocs = temp_bytes = 0;
 	for (i = 0; i < MAXCPU; i++) {
 		mtsp = &mtip->mti_stats[i];
 		temp_allocs += mtsp->mts_numallocs;
 		temp_allocs -= mtsp->mts_numfrees;
 		temp_bytes += mtsp->mts_memalloced;
 		temp_bytes -= mtsp->mts_memfreed;
 	}
 	if (temp_allocs > 0 || temp_bytes > 0) {
 		printf("Warning: memory type %s leaked memory on destroy "
 		    "(%ld allocations, %ld bytes leaked).\n", mtp->ks_shortdesc,
 		    temp_allocs, temp_bytes);
 	}
 
 	slab = vtoslab((vm_offset_t) mtip & (~UMA_SLAB_MASK));
 	uma_zfree_arg(mt_zone, mtip, slab);
 }
 
 struct malloc_type *
 malloc_desc2type(const char *desc)
 {
 	struct malloc_type *mtp;
 
 	mtx_assert(&malloc_mtx, MA_OWNED);
 	for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) {
 		if (strcmp(mtp->ks_shortdesc, desc) == 0)
 			return (mtp);
 	}
 	return (NULL);
 }
 
 static int
 sysctl_kern_malloc_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct malloc_type_stream_header mtsh;
 	struct malloc_type_internal *mtip;
 	struct malloc_type_header mth;
 	struct malloc_type *mtp;
 	int error, i;
 	struct sbuf sbuf;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 	sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
 	mtx_lock(&malloc_mtx);
 
 	/*
 	 * Insert stream header.
 	 */
 	bzero(&mtsh, sizeof(mtsh));
 	mtsh.mtsh_version = MALLOC_TYPE_STREAM_VERSION;
 	mtsh.mtsh_maxcpus = MAXCPU;
 	mtsh.mtsh_count = kmemcount;
 	(void)sbuf_bcat(&sbuf, &mtsh, sizeof(mtsh));
 
 	/*
 	 * Insert alternating sequence of type headers and type statistics.
 	 */
 	for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) {
 		mtip = (struct malloc_type_internal *)mtp->ks_handle;
 
 		/*
 		 * Insert type header.
 		 */
 		bzero(&mth, sizeof(mth));
 		strlcpy(mth.mth_name, mtp->ks_shortdesc, MALLOC_MAX_NAME);
 		(void)sbuf_bcat(&sbuf, &mth, sizeof(mth));
 
 		/*
 		 * Insert type statistics for each CPU.
 		 */
 		for (i = 0; i < MAXCPU; i++) {
 			(void)sbuf_bcat(&sbuf, &mtip->mti_stats[i],
 			    sizeof(mtip->mti_stats[i]));
 		}
 	}
 	mtx_unlock(&malloc_mtx);
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 
 SYSCTL_PROC(_kern, OID_AUTO, malloc_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
     0, 0, sysctl_kern_malloc_stats, "s,malloc_type_ustats",
     "Return malloc types");
 
 SYSCTL_INT(_kern, OID_AUTO, malloc_count, CTLFLAG_RD, &kmemcount, 0,
     "Count of kernel malloc types");
 
 void
 malloc_type_list(malloc_type_list_func_t *func, void *arg)
 {
 	struct malloc_type *mtp, **bufmtp;
 	int count, i;
 	size_t buflen;
 
 	mtx_lock(&malloc_mtx);
 restart:
 	mtx_assert(&malloc_mtx, MA_OWNED);
 	count = kmemcount;
 	mtx_unlock(&malloc_mtx);
 
 	buflen = sizeof(struct malloc_type *) * count;
 	bufmtp = malloc(buflen, M_TEMP, M_WAITOK);
 
 	mtx_lock(&malloc_mtx);
 
 	if (count < kmemcount) {
 		free(bufmtp, M_TEMP);
 		goto restart;
 	}
 
 	for (mtp = kmemstatistics, i = 0; mtp != NULL; mtp = mtp->ks_next, i++)
 		bufmtp[i] = mtp;
 
 	mtx_unlock(&malloc_mtx);
 
 	for (i = 0; i < count; i++)
 		(func)(bufmtp[i], arg);
 
 	free(bufmtp, M_TEMP);
 }
 
 #ifdef DDB
 DB_SHOW_COMMAND(malloc, db_show_malloc)
 {
 	struct malloc_type_internal *mtip;
 	struct malloc_type *mtp;
 	uint64_t allocs, frees;
 	uint64_t alloced, freed;
 	int i;
 
 	db_printf("%18s %12s  %12s %12s\n", "Type", "InUse", "MemUse",
 	    "Requests");
 	for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) {
 		mtip = (struct malloc_type_internal *)mtp->ks_handle;
 		allocs = 0;
 		frees = 0;
 		alloced = 0;
 		freed = 0;
 		for (i = 0; i < MAXCPU; i++) {
 			allocs += mtip->mti_stats[i].mts_numallocs;
 			frees += mtip->mti_stats[i].mts_numfrees;
 			alloced += mtip->mti_stats[i].mts_memalloced;
 			freed += mtip->mti_stats[i].mts_memfreed;
 		}
 		db_printf("%18s %12ju %12juK %12ju\n",
 		    mtp->ks_shortdesc, allocs - frees,
 		    (alloced - freed + 1023) / 1024, allocs);
 		if (db_pager_quit)
 			break;
 	}
 }
 
 #if MALLOC_DEBUG_MAXZONES > 1
 DB_SHOW_COMMAND(multizone_matches, db_show_multizone_matches)
 {
 	struct malloc_type_internal *mtip;
 	struct malloc_type *mtp;
 	u_int subzone;
 
 	if (!have_addr) {
 		db_printf("Usage: show multizone_matches <malloc type/addr>\n");
 		return;
 	}
 	mtp = (void *)addr;
 	if (mtp->ks_magic != M_MAGIC) {
 		db_printf("Magic %lx does not match expected %x\n",
 		    mtp->ks_magic, M_MAGIC);
 		return;
 	}
 
 	mtip = mtp->ks_handle;
 	subzone = mtip->mti_zone;
 
 	for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) {
 		mtip = mtp->ks_handle;
 		if (mtip->mti_zone != subzone)
 			continue;
 		db_printf("%s\n", mtp->ks_shortdesc);
 		if (db_pager_quit)
 			break;
 	}
 }
 #endif /* MALLOC_DEBUG_MAXZONES > 1 */
 #endif /* DDB */
 
 #ifdef MALLOC_PROFILE
 
 static int
 sysctl_kern_mprof(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sbuf;
 	uint64_t count;
 	uint64_t waste;
 	uint64_t mem;
 	int error;
 	int rsize;
 	int size;
 	int i;
 
 	waste = 0;
 	mem = 0;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 	sbuf_printf(&sbuf, 
 	    "\n  Size                    Requests  Real Size\n");
 	for (i = 0; i < KMEM_ZSIZE; i++) {
 		size = i << KMEM_ZSHIFT;
 		rsize = kmemzones[kmemsize[i]].kz_size;
 		count = (long long unsigned)krequests[i];
 
 		sbuf_printf(&sbuf, "%6d%28llu%11d\n", size,
 		    (unsigned long long)count, rsize);
 
 		if ((rsize * count) > (size * count))
 			waste += (rsize * count) - (size * count);
 		mem += (rsize * count);
 	}
 	sbuf_printf(&sbuf,
 	    "\nTotal memory used:\t%30llu\nTotal Memory wasted:\t%30llu\n",
 	    (unsigned long long)mem, (unsigned long long)waste);
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 
 SYSCTL_OID(_kern, OID_AUTO, mprof, CTLTYPE_STRING|CTLFLAG_RD,
     NULL, 0, sysctl_kern_mprof, "A", "Malloc Profiling");
 #endif /* MALLOC_PROFILE */
Index: head/sys/kern/subr_busdma_bufalloc.c
===================================================================
--- head/sys/kern/subr_busdma_bufalloc.c	(revision 338317)
+++ head/sys/kern/subr_busdma_bufalloc.c	(revision 338318)
@@ -1,176 +1,176 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2012 Ian Lepore
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Buffer allocation support routines for bus_dmamem_alloc implementations.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/busdma_bufalloc.h>
 #include <sys/malloc.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/uma.h>
 
 /*
  * We manage buffer zones up to a page in size.  Buffers larger than a page can
  * be managed by one of the kernel's page-oriented memory allocation routines as
  * efficiently as what we can do here.  Also, a page is the largest size for
  * which we can g'tee contiguity when using uma, and contiguity is one of the
  * requirements we have to fulfill.
  */
 #define	MIN_ZONE_BUFSIZE	32
 #define	MAX_ZONE_BUFSIZE	PAGE_SIZE
 
 /*
  * The static array of 12 bufzones is big enough to handle all the zones for the
  * smallest supported allocation size of 32 through the largest supported page
  * size of 64K.  If you up the biggest page size number, up the array size too.
  * Basically the size of the array needs to be log2(maxsize)-log2(minsize)+1,
  * but I don't know of an easy way to express that as a compile-time constant.
  */
 #if PAGE_SIZE > 65536
 #error Unsupported page size
 #endif
 
 struct busdma_bufalloc {
 	bus_size_t		min_size;
 	size_t			num_zones;
 	struct busdma_bufzone	buf_zones[12];
 };
 
 busdma_bufalloc_t 
 busdma_bufalloc_create(const char *name, bus_size_t minimum_alignment,
     uma_alloc alloc_func, uma_free free_func, u_int32_t zcreate_flags)
 {
 	struct busdma_bufalloc *ba;
 	struct busdma_bufzone *bz;
 	int i;
 	bus_size_t cursize;
 
 	ba = malloc(sizeof(struct busdma_bufalloc), M_DEVBUF, 
 	    M_ZERO | M_WAITOK);
 
 	ba->min_size = MAX(MIN_ZONE_BUFSIZE, minimum_alignment);
 
 	/*
 	 * Each uma zone is created with an alignment of size-1, meaning that
 	 * the alignment is equal to the size (I.E., 64 byte buffers are aligned
 	 * to 64 byte boundaries, etc).  This allows for a fast efficient test
 	 * when deciding whether a pool buffer meets the constraints of a given
 	 * tag used for allocation: the buffer is usable if tag->alignment <=
 	 * bufzone->size.
 	 */
 	for (i = 0, bz = ba->buf_zones, cursize = ba->min_size;
 	    i < nitems(ba->buf_zones) && cursize <= MAX_ZONE_BUFSIZE;
 	    ++i, ++bz, cursize <<= 1) {
 		snprintf(bz->name, sizeof(bz->name), "dma %.10s %ju",
 		    name, (uintmax_t)cursize);
 		bz->size = cursize;
 		bz->umazone = uma_zcreate(bz->name, bz->size,
 		    NULL, NULL, NULL, NULL, bz->size - 1, zcreate_flags);
 		if (bz->umazone == NULL) {
 			busdma_bufalloc_destroy(ba);
 			return (NULL);
 		}
 		if (alloc_func != NULL)
 			uma_zone_set_allocf(bz->umazone, alloc_func);
 		if (free_func != NULL)
 			uma_zone_set_freef(bz->umazone, free_func);
 		++ba->num_zones;
 	}
 
 	return (ba);
 }
 
 void 
 busdma_bufalloc_destroy(busdma_bufalloc_t ba)
 {
 	struct busdma_bufzone *bz;
 	int i;
 
 	if (ba == NULL)
 		return;
 
 	for (i = 0, bz = ba->buf_zones; i < ba->num_zones; ++i, ++bz) {
 		uma_zdestroy(bz->umazone);
 	}
 
 	free(ba, M_DEVBUF);
 }
 
 struct busdma_bufzone * 
 busdma_bufalloc_findzone(busdma_bufalloc_t ba, bus_size_t size)
 {
 	struct busdma_bufzone *bz;
 	int i;
 
 	if (size > MAX_ZONE_BUFSIZE)
 		return (NULL);
 
 	for (i = 0, bz = ba->buf_zones; i < ba->num_zones; ++i, ++bz) {
 		if (bz->size >= size)
 			return (bz);
 	}
 
 	panic("Didn't find a buffer zone of the right size");
 }
 
 void *
 busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, vm_size_t size, int domain,
     uint8_t *pflag, int wait)
 {
 #ifdef VM_MEMATTR_UNCACHEABLE
 
 	/* Inform UMA that this allocator uses kernel_arena/object. */
 	*pflag = UMA_SLAB_KERNEL;
 
 	return ((void *)kmem_alloc_attr_domain(domain, size, wait, 0,
 	    BUS_SPACE_MAXADDR, VM_MEMATTR_UNCACHEABLE));
 
 #else
 
 	panic("VM_MEMATTR_UNCACHEABLE unavailable");
 
 #endif	/* VM_MEMATTR_UNCACHEABLE */
 }
 
 void 
 busdma_bufalloc_free_uncacheable(void *item, vm_size_t size, uint8_t pflag)
 {
 
-	kmem_free(kernel_arena, (vm_offset_t)item, size);
+	kmem_free((vm_offset_t)item, size);
 }
 
Index: head/sys/mips/ingenic/jz4780_lcd.c
===================================================================
--- head/sys/mips/ingenic/jz4780_lcd.c	(revision 338317)
+++ head/sys/mips/ingenic/jz4780_lcd.c	(revision 338318)
@@ -1,575 +1,575 @@
 /*-
  * Copyright (c) 2016 Jared McNeill <jmcneill@invisible.ca>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Ingenic JZ4780 LCD Controller
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 #include <sys/condvar.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/fbio.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/pmap.h>
 
 #include <machine/bus.h>
 
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <dev/videomode/videomode.h>
 #include <dev/videomode/edidvar.h>
 
 #include <dev/extres/clk/clk.h>
 
 #include <mips/ingenic/jz4780_lcd.h>
 
 #include "fb_if.h"
 #include "hdmi_if.h"
 
 #define	FB_DEFAULT_W	800
 #define	FB_DEFAULT_H	600
 #define	FB_DEFAULT_REF	60
 #define	FB_BPP		32
 #define	FB_ALIGN	(16 * 4)
 #define	FB_MAX_BW	(1920 * 1080 * 60)
 #define	FB_MAX_W	2048
 #define	FB_MAX_H	2048
 #define FB_DIVIDE(x, y)	(((x) + ((y) / 2)) / (y))
 
 #define	PCFG_MAGIC	0xc7ff2100
 
 #define	DOT_CLOCK_TO_HZ(c)	((c) * 1000)
 
 #ifndef VM_MEMATTR_WRITE_COMBINING
 #define	VM_MEMATTR_WRITE_COMBINING VM_MEMATTR_UNCACHEABLE
 #endif
 
 struct jzlcd_softc {
 	device_t		dev;
 	device_t		fbdev;
 	struct resource		*res[1];
 
 	/* Clocks */
 	clk_t			clk;
 	clk_t			clk_pix;
 
 	/* Framebuffer */
 	struct fb_info		info;
 	size_t			fbsize;
 	bus_addr_t		paddr;
 	vm_offset_t		vaddr;
 
 	/* HDMI */
 	eventhandler_tag	hdmi_evh;
 
 	/* Frame descriptor DMA */
 	bus_dma_tag_t		fdesc_tag;
 	bus_dmamap_t		fdesc_map;
 	bus_addr_t		fdesc_paddr;
 	struct lcd_frame_descriptor	*fdesc;
 };
 
 static struct resource_spec jzlcd_spec[] = {
 	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },
 	{ -1, 0 }
 };
 
 #define	LCD_READ(sc, reg)		bus_read_4((sc)->res[0], (reg))
 #define	LCD_WRITE(sc, reg, val)		bus_write_4((sc)->res[0], (reg), (val))
 
 static int
 jzlcd_allocfb(struct jzlcd_softc *sc)
 {
 	sc->vaddr = kmem_alloc_contig(sc->fbsize, M_NOWAIT | M_ZERO, 0, ~0,
 	    FB_ALIGN, 0, VM_MEMATTR_WRITE_COMBINING);
 	if (sc->vaddr == 0) {
 		device_printf(sc->dev, "failed to allocate FB memory\n");
 		return (ENOMEM);
 	}
 	sc->paddr = pmap_kextract(sc->vaddr);
 
 	return (0);
 }
 
 static void
 jzlcd_freefb(struct jzlcd_softc *sc)
 {
-	kmem_free(kernel_arena, sc->vaddr, sc->fbsize);
+	kmem_free(sc->vaddr, sc->fbsize);
 }
 
 static void
 jzlcd_start(struct jzlcd_softc *sc)
 {
 	uint32_t ctrl;
 
 	/* Clear status registers */
 	LCD_WRITE(sc, LCDSTATE, 0);
 	LCD_WRITE(sc, LCDOSDS, 0);
 	/* Enable the controller */
 	ctrl = LCD_READ(sc, LCDCTRL);
 	ctrl |= LCDCTRL_ENA;
 	ctrl &= ~LCDCTRL_DIS;
 	LCD_WRITE(sc, LCDCTRL, ctrl);
 }
 
 static void
 jzlcd_stop(struct jzlcd_softc *sc)
 {
 	uint32_t ctrl;
 
 	ctrl = LCD_READ(sc, LCDCTRL);
 	if ((ctrl & LCDCTRL_ENA) != 0) {
 		/* Disable the controller and wait for it to stop */
 		ctrl |= LCDCTRL_DIS;
 		LCD_WRITE(sc, LCDCTRL, ctrl);
 		while ((LCD_READ(sc, LCDSTATE) & LCDSTATE_LDD) == 0)
 			DELAY(100);
 	}
 	/* Clear all status except for disable */
 	LCD_WRITE(sc, LCDSTATE, LCD_READ(sc, LCDSTATE) & ~LCDSTATE_LDD);
 }
 
 static void
 jzlcd_setup_descriptor(struct jzlcd_softc *sc, const struct videomode *mode,
     u_int desno)
 {
 	struct lcd_frame_descriptor *fdesc;
 	int line_sz;
 
 	/* Frame size is specified in # words */
 	line_sz = (mode->hdisplay * FB_BPP) >> 3;
 	line_sz = ((line_sz + 3) & ~3) / 4;
 
 	fdesc = sc->fdesc + desno;
 
 	if (desno == 0)
 		fdesc->next = sc->fdesc_paddr +
 		    sizeof(struct lcd_frame_descriptor);
 	else
 		fdesc->next = sc->fdesc_paddr;
 	fdesc->physaddr = sc->paddr;
 	fdesc->id = desno;
 	fdesc->cmd = LCDCMD_FRM_EN | (line_sz * mode->vdisplay);
 	fdesc->offs = 0;
 	fdesc->pw = 0;
 	fdesc->cnum_pos = LCDPOS_BPP01_18_24 |
 	    LCDPOS_PREMULTI01 |
 	    (desno == 0 ? LCDPOS_COEF_BLE01_1 : LCDPOS_COEF_SLE01);
 	fdesc->dessize = LCDDESSIZE_ALPHA |
 	    ((mode->vdisplay - 1) << LCDDESSIZE_HEIGHT_SHIFT) |
 	    ((mode->hdisplay - 1) << LCDDESSIZE_WIDTH_SHIFT);
 }
 
 static int
 jzlcd_set_videomode(struct jzlcd_softc *sc, const struct videomode *mode)
 {
 	u_int hbp, hfp, hsw, vbp, vfp, vsw;
 	u_int hds, hde, ht, vds, vde, vt;
 	uint32_t ctrl;
 	int error;
 
 	hbp = mode->htotal - mode->hsync_end;
 	hfp = mode->hsync_start - mode->hdisplay;
 	hsw = mode->hsync_end - mode->hsync_start;
 	vbp = mode->vtotal - mode->vsync_end;
 	vfp = mode->vsync_start - mode->vdisplay;
 	vsw = mode->vsync_end - mode->vsync_start;
 
 	hds = hsw + hbp;
 	hde = hds + mode->hdisplay;
 	ht = hde + hfp;
 
 	vds = vsw + vbp;
 	vde = vds + mode->vdisplay;
 	vt = vde + vfp;
 
 	/* Setup timings */
 	LCD_WRITE(sc, LCDVAT,
 	    (ht << LCDVAT_HT_SHIFT) | (vt << LCDVAT_VT_SHIFT));
 	LCD_WRITE(sc, LCDDAH,
 	    (hds << LCDDAH_HDS_SHIFT) | (hde << LCDDAH_HDE_SHIFT));
 	LCD_WRITE(sc, LCDDAV,
 	    (vds << LCDDAV_VDS_SHIFT) | (vde << LCDDAV_VDE_SHIFT));
 	LCD_WRITE(sc, LCDHSYNC, hsw);
 	LCD_WRITE(sc, LCDVSYNC, vsw);
 
 	/* Set configuration */
 	LCD_WRITE(sc, LCDCFG, LCDCFG_NEWDES | LCDCFG_RECOVER | LCDCFG_24 |
 	    LCDCFG_PSM | LCDCFG_CLSM | LCDCFG_SPLM | LCDCFG_REVM | LCDCFG_PCP);
 	ctrl = LCD_READ(sc, LCDCTRL);
 	ctrl &= ~LCDCTRL_BST;
 	ctrl |= LCDCTRL_BST_64 | LCDCTRL_OFUM;
 	LCD_WRITE(sc, LCDCTRL, ctrl);
 	LCD_WRITE(sc, LCDPCFG, PCFG_MAGIC);
 	LCD_WRITE(sc, LCDRGBC, LCDRGBC_RGBFMT);
 
 	/* Update registers */
 	LCD_WRITE(sc, LCDSTATE, 0);
 
 	/* Setup frame descriptors */
 	jzlcd_setup_descriptor(sc, mode, 0);
 	jzlcd_setup_descriptor(sc, mode, 1);
 	bus_dmamap_sync(sc->fdesc_tag, sc->fdesc_map, BUS_DMASYNC_PREWRITE);
 
 	/* Setup DMA channels */
 	LCD_WRITE(sc, LCDDA0, sc->fdesc_paddr
 	    + sizeof(struct lcd_frame_descriptor));
 	LCD_WRITE(sc, LCDDA1, sc->fdesc_paddr);
 
 	/* Set display clock */
 	error = clk_set_freq(sc->clk_pix, DOT_CLOCK_TO_HZ(mode->dot_clock), 0);
 	if (error != 0) {
 		device_printf(sc->dev, "failed to set pixel clock to %u Hz\n",
 		    DOT_CLOCK_TO_HZ(mode->dot_clock));
 		return (error);
 	}
 
 	return (0);
 }
 
 static int
 jzlcd_configure(struct jzlcd_softc *sc, const struct videomode *mode)
 {
 	size_t fbsize;
 	int error;
 
 	fbsize = round_page(mode->hdisplay * mode->vdisplay * (FB_BPP / NBBY));
 
 	/* Detach the old FB device */
 	if (sc->fbdev != NULL) {
 		device_delete_child(sc->dev, sc->fbdev);
 		sc->fbdev = NULL;
 	}
 
 	/* If the FB size has changed, free the old FB memory */
 	if (sc->fbsize > 0 && sc->fbsize != fbsize) {
 		jzlcd_freefb(sc);
 		sc->vaddr = 0;
 	}
 
 	/* Allocate the FB if necessary */
 	sc->fbsize = fbsize;
 	if (sc->vaddr == 0) {
 		error = jzlcd_allocfb(sc);
 		if (error != 0) {
 			device_printf(sc->dev, "failed to allocate FB memory\n");
 			return (ENXIO);
 		}
 	}
 
 	/* Setup video mode */
 	error = jzlcd_set_videomode(sc, mode);
 	if (error != 0)
 		return (error);
 
 	/* Attach framebuffer device */
 	sc->info.fb_name = device_get_nameunit(sc->dev);
 	sc->info.fb_vbase = (intptr_t)sc->vaddr;
 	sc->info.fb_pbase = sc->paddr;
 	sc->info.fb_size = sc->fbsize;
 	sc->info.fb_bpp = sc->info.fb_depth = FB_BPP;
 	sc->info.fb_stride = mode->hdisplay * (FB_BPP / NBBY);
 	sc->info.fb_width = mode->hdisplay;
 	sc->info.fb_height = mode->vdisplay;
 #ifdef VM_MEMATTR_WRITE_COMBINING
 	sc->info.fb_flags = FB_FLAG_MEMATTR;
 	sc->info.fb_memattr = VM_MEMATTR_WRITE_COMBINING;
 #endif
 	sc->fbdev = device_add_child(sc->dev, "fbd", device_get_unit(sc->dev));
 	if (sc->fbdev == NULL) {
 		device_printf(sc->dev, "failed to add fbd child\n");
 		return (ENOENT);
 	}
 
 	error = device_probe_and_attach(sc->fbdev);
 	if (error != 0) {
 		device_printf(sc->dev, "failed to attach fbd device\n");
 		return (error);
 	}
 
 	return (0);
 }
 
 static int
 jzlcd_get_bandwidth(const struct videomode *mode)
 {
 	int refresh;
 
 	refresh = FB_DIVIDE(FB_DIVIDE(DOT_CLOCK_TO_HZ(mode->dot_clock),
 	    mode->htotal), mode->vtotal);
 
 	return mode->hdisplay * mode->vdisplay * refresh;
 }
 
 static int
 jzlcd_mode_supported(const struct videomode *mode)
 {
 	/* Width and height must be less than 2048 */
 	if (mode->hdisplay > FB_MAX_W || mode->vdisplay > FB_MAX_H)
 		return (0);
 
 	/* Bandwidth check */
 	if (jzlcd_get_bandwidth(mode) > FB_MAX_BW)
 		return (0);
 
 	/* Interlace modes not yet supported by the driver */
 	if ((mode->flags & VID_INTERLACE) != 0)
 		return (0);
 
 	return (1);
 }
 
 static const struct videomode *
 jzlcd_find_mode(struct edid_info *ei)
 {
 	const struct videomode *best;
 	int n, bw, best_bw;
 
 	/* If the preferred mode is OK, just use it */
 	if (jzlcd_mode_supported(ei->edid_preferred_mode) != 0)
 		return ei->edid_preferred_mode;
 
 	/* Pick the mode with the highest bandwidth requirements */
 	best = NULL;
 	best_bw = 0;
 	for (n = 0; n < ei->edid_nmodes; n++) {
 		if (jzlcd_mode_supported(&ei->edid_modes[n]) == 0)
 			continue;
 		bw = jzlcd_get_bandwidth(&ei->edid_modes[n]);
 		if (bw > FB_MAX_BW)
 			continue;
 		if (best == NULL || bw > best_bw) {
 			best = &ei->edid_modes[n];
 			best_bw = bw;
 		}
 	}
 
 	return best;
 }
 
 static void
 jzlcd_hdmi_event(void *arg, device_t hdmi_dev)
 {
 	const struct videomode *mode;
 	struct videomode hdmi_mode;
 	struct jzlcd_softc *sc;
 	struct edid_info ei;
 	uint8_t *edid;
 	uint32_t edid_len;
 	int error;
 
 	sc = arg;
 	edid = NULL;
 	edid_len = 0;
 	mode = NULL;
 
 	error = HDMI_GET_EDID(hdmi_dev, &edid, &edid_len);
 	if (error != 0) {
 		device_printf(sc->dev, "failed to get EDID: %d\n", error);
 	} else {
 		error = edid_parse(edid, &ei);
 		if (error != 0) {
 			device_printf(sc->dev, "failed to parse EDID: %d\n",
 			    error);
 		} else {
 			if (bootverbose)
 				edid_print(&ei);
 
 			mode = jzlcd_find_mode(&ei);
 		}
 	}
 
 	/* If a suitable mode could not be found, try the default */
 	if (mode == NULL)
 		mode = pick_mode_by_ref(FB_DEFAULT_W, FB_DEFAULT_H,
 		    FB_DEFAULT_REF);
 
 	if (mode == NULL) {
 		device_printf(sc->dev, "failed to find usable video mode\n");
 		return;
 	}
 
 	if (bootverbose)
 		device_printf(sc->dev, "using %dx%d\n",
 		    mode->hdisplay, mode->vdisplay);
 
 	/* Stop the controller */
 	jzlcd_stop(sc);
 
 	/* Configure LCD controller */
 	error = jzlcd_configure(sc, mode);
 	if (error != 0) {
 		device_printf(sc->dev, "failed to configure FB: %d\n", error);
 		return;
 	}
 
 	/* Enable HDMI TX */
 	hdmi_mode = *mode;
 	HDMI_SET_VIDEOMODE(hdmi_dev, &hdmi_mode);
 
 	/* Start the controller! */
 	jzlcd_start(sc);
 }
 
 static void
 jzlcd_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	if (error != 0)
 		return;
 	*(bus_addr_t *)arg = segs[0].ds_addr;
 }
 
 static int
 jzlcd_probe(device_t dev)
 {
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 
 	if (!ofw_bus_is_compatible(dev, "ingenic,jz4780-lcd"))
 		return (ENXIO);
 
 	device_set_desc(dev, "Ingenic JZ4780 LCD Controller");
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 jzlcd_attach(device_t dev)
 {
 	struct jzlcd_softc *sc;
 	int error;
 
 	sc = device_get_softc(dev);
 
 	sc->dev = dev;
 
 	if (bus_alloc_resources(dev, jzlcd_spec, sc->res)) {
 		device_printf(dev, "cannot allocate resources for device\n");
 		goto failed;
 	}
 
 	if (clk_get_by_ofw_name(dev, 0, "lcd_clk", &sc->clk) != 0 ||
 	    clk_get_by_ofw_name(dev, 0, "lcd_pixclk", &sc->clk_pix) != 0) {
 		device_printf(dev, "cannot get clocks\n");
 		goto failed;
 	}
 	if (clk_enable(sc->clk) != 0 || clk_enable(sc->clk_pix) != 0) {
 		device_printf(dev, "cannot enable clocks\n");
 		goto failed;
 	}
 
 	error = bus_dma_tag_create(
 	    bus_get_dma_tag(dev),
 	    sizeof(struct lcd_frame_descriptor), 0,
 	    BUS_SPACE_MAXADDR_32BIT,
 	    BUS_SPACE_MAXADDR,
 	    NULL, NULL,
 	    sizeof(struct lcd_frame_descriptor) * 2, 1,
 	    sizeof(struct lcd_frame_descriptor) * 2,
 	    0,
 	    NULL, NULL,
 	    &sc->fdesc_tag);
 	if (error != 0) {
 		device_printf(dev, "cannot create bus dma tag\n");
 		goto failed;
 	}
 
 	error = bus_dmamem_alloc(sc->fdesc_tag, (void **)&sc->fdesc,
 	    BUS_DMA_NOCACHE | BUS_DMA_WAITOK | BUS_DMA_ZERO, &sc->fdesc_map);
 	if (error != 0) {
 		device_printf(dev, "cannot allocate dma descriptor\n");
 		goto dmaalloc_failed;
 	}
 
 	error = bus_dmamap_load(sc->fdesc_tag, sc->fdesc_map, sc->fdesc,
 	    sizeof(struct lcd_frame_descriptor) * 2, jzlcd_dmamap_cb,
 	    &sc->fdesc_paddr, 0);
 	if (error != 0) {
 		device_printf(dev, "cannot load dma map\n");
 		goto dmaload_failed;
 	}
 
 	sc->hdmi_evh = EVENTHANDLER_REGISTER(hdmi_event,
 	    jzlcd_hdmi_event, sc, 0);
 
 	return (0);
 
 dmaload_failed:
 	bus_dmamem_free(sc->fdesc_tag, sc->fdesc, sc->fdesc_map);
 dmaalloc_failed:
 	bus_dma_tag_destroy(sc->fdesc_tag);
 failed:
 	if (sc->clk_pix != NULL)
 		clk_release(sc->clk);
 	if (sc->clk != NULL)
 		clk_release(sc->clk);
 	if (sc->res != NULL)
 		bus_release_resources(dev, jzlcd_spec, sc->res);
 
 	return (ENXIO);
 }
 
 static struct fb_info *
 jzlcd_fb_getinfo(device_t dev)
 {
 	struct jzlcd_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	return (&sc->info);
 }
 
 static device_method_t jzlcd_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		jzlcd_probe),
 	DEVMETHOD(device_attach,	jzlcd_attach),
 
 	/* FB interface */
 	DEVMETHOD(fb_getinfo,		jzlcd_fb_getinfo),
 
 	DEVMETHOD_END
 };
 
 static driver_t jzlcd_driver = {
 	"fb",
 	jzlcd_methods,
 	sizeof(struct jzlcd_softc),
 };
 
 static devclass_t jzlcd_devclass;
 
 DRIVER_MODULE(fb, simplebus, jzlcd_driver, jzlcd_devclass, 0, 0);
Index: head/sys/mips/mips/busdma_machdep.c
===================================================================
--- head/sys/mips/mips/busdma_machdep.c	(revision 338317)
+++ head/sys/mips/mips/busdma_machdep.c	(revision 338318)
@@ -1,1524 +1,1524 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2006 Oleksandr Tymoshenko
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *  From i386/busdma_machdep.c,v 1.26 2002/04/19 22:58:09 alfred
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * MIPS bus dma support routines
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/busdma_bufalloc.h>
 #include <sys/interrupt.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/memdesc.h>
 #include <sys/mutex.h>
 #include <sys/ktr.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <vm/uma.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/cache.h>
 #include <machine/cpufunc.h>
 #include <machine/cpuinfo.h>
 #include <machine/md_var.h>
 
 #define MAX_BPAGES 64
 #define BUS_DMA_COULD_BOUNCE	BUS_DMA_BUS3
 #define BUS_DMA_MIN_ALLOC_COMP	BUS_DMA_BUS4
 
 /*
  * On XBurst cores from Ingenic, cache-line writeback is local
  * only, unless accompanied by invalidation. Invalidations force
  * dirty line writeout and invalidation requests forwarded to
  * other cores if other cores have the cache line dirty.
  */
 #if defined(SMP) && defined(CPU_XBURST)
 #define	BUS_DMA_FORCE_WBINV
 #endif
 
 struct bounce_zone;
 
 struct bus_dma_tag {
 	bus_dma_tag_t		parent;
 	bus_size_t		alignment;
 	bus_addr_t		boundary;
 	bus_addr_t		lowaddr;
 	bus_addr_t		highaddr;
 	bus_dma_filter_t	*filter;
 	void			*filterarg;
 	bus_size_t		maxsize;
 	u_int			nsegments;
 	bus_size_t		maxsegsz;
 	int			flags;
 	int			ref_count;
 	int			map_count;
 	bus_dma_lock_t		*lockfunc;
 	void			*lockfuncarg;
 	bus_dma_segment_t	*segments;
 	struct bounce_zone *bounce_zone;
 };
 
 struct bounce_page {
 	vm_offset_t	vaddr;		/* kva of bounce buffer */
 	vm_offset_t	vaddr_nocache;	/* kva of bounce buffer uncached */
 	bus_addr_t	busaddr;	/* Physical address */
 	vm_offset_t	datavaddr;	/* kva of client data */
 	bus_addr_t	dataaddr;	/* client physical address */
 	bus_size_t	datacount;	/* client data count */
 	STAILQ_ENTRY(bounce_page) links;
 };
 
 struct sync_list {
 	vm_offset_t	vaddr;		/* kva of bounce buffer */
 	bus_addr_t	busaddr;	/* Physical address */
 	bus_size_t	datacount;	/* client data count */
 };
 
 int busdma_swi_pending;
 
 struct bounce_zone {
 	STAILQ_ENTRY(bounce_zone) links;
 	STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
 	int		total_bpages;
 	int		free_bpages;
 	int		reserved_bpages;
 	int		active_bpages;
 	int		total_bounced;
 	int		total_deferred;
 	int		map_count;
 	bus_size_t	alignment;
 	bus_addr_t	lowaddr;
 	char		zoneid[8];
 	char		lowaddrid[20];
 	struct sysctl_ctx_list sysctl_tree;
 	struct sysctl_oid *sysctl_tree_top;
 };
 
 static struct mtx bounce_lock;
 static int total_bpages;
 static int busdma_zonecount;
 static STAILQ_HEAD(, bounce_zone) bounce_zone_list;
 
 static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters");
 SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0,
 	   "Total bounce pages");
 
 #define DMAMAP_UNCACHEABLE	0x08
 #define DMAMAP_CACHE_ALIGNED	0x10
 
 struct bus_dmamap {
 	struct bp_list	bpages;
 	int		pagesneeded;
 	int		pagesreserved;
 	bus_dma_tag_t	dmat;
 	struct memdesc	mem;
 	int		flags;
 	void		*origbuffer;
 	void		*allocbuffer;
 	TAILQ_ENTRY(bus_dmamap)	freelist;
 	STAILQ_ENTRY(bus_dmamap) links;
 	bus_dmamap_callback_t *callback;
 	void		*callback_arg;
 	int		sync_count;
 	struct sync_list *slist;
 };
 
 static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
 static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
 
 static void init_bounce_pages(void *dummy);
 static int alloc_bounce_zone(bus_dma_tag_t dmat);
 static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
 static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
 				int commit);
 static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
 				  vm_offset_t vaddr, bus_addr_t addr,
 				  bus_size_t size);
 static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
 
 /* Default tag, as most drivers provide no parent tag. */
 bus_dma_tag_t mips_root_dma_tag;
 
 static uma_zone_t dmamap_zone;	/* Cache of struct bus_dmamap items */
 
 static busdma_bufalloc_t coherent_allocator;	/* Cache of coherent buffers */
 static busdma_bufalloc_t standard_allocator;	/* Cache of standard buffers */
 
 MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata");
 MALLOC_DEFINE(M_BOUNCE, "bounce", "busdma bounce pages");
 
 /*
  * This is the ctor function passed to uma_zcreate() for the pool of dma maps.
  * It'll need platform-specific changes if this code is copied.
  */
 static int
 dmamap_ctor(void *mem, int size, void *arg, int flags)
 {
 	bus_dmamap_t map;
 	bus_dma_tag_t dmat;
 
 	map = (bus_dmamap_t)mem;
 	dmat = (bus_dma_tag_t)arg;
 
 	dmat->map_count++;
 
 	map->dmat = dmat;
 	map->flags = 0;
 	map->slist = NULL;
 	map->allocbuffer = NULL;
 	map->sync_count = 0;
 	STAILQ_INIT(&map->bpages);
 
 	return (0);
 }
 
 /*
  * This is the dtor function passed to uma_zcreate() for the pool of dma maps.
  * It may need platform-specific changes if this code is copied              .
  */
 static void
 dmamap_dtor(void *mem, int size, void *arg)
 {
 	bus_dmamap_t map;
 
 	map = (bus_dmamap_t)mem;
 
 	map->dmat->map_count--;
 }
 
 static void
 busdma_init(void *dummy)
 {
 
 	/* Create a cache of maps for bus_dmamap_create(). */
 	dmamap_zone = uma_zcreate("dma maps", sizeof(struct bus_dmamap),
 	    dmamap_ctor, dmamap_dtor, NULL, NULL, UMA_ALIGN_PTR, 0);
 
 	/* Create a cache of buffers in standard (cacheable) memory. */
 	standard_allocator = busdma_bufalloc_create("buffer",
 	    mips_dcache_max_linesize,	/* minimum_alignment */
 	    NULL,			/* uma_alloc func */
 	    NULL,			/* uma_free func */
 	    0);				/* uma_zcreate_flags */
 
 	/*
 	 * Create a cache of buffers in uncacheable memory, to implement the
 	 * BUS_DMA_COHERENT flag.
 	 */
 	coherent_allocator = busdma_bufalloc_create("coherent",
 	    mips_dcache_max_linesize,	/* minimum_alignment */
 	    busdma_bufalloc_alloc_uncacheable,
 	    busdma_bufalloc_free_uncacheable,
 	    0);				/* uma_zcreate_flags */
 }
 SYSINIT(busdma, SI_SUB_KMEM, SI_ORDER_FOURTH, busdma_init, NULL);
 
 /*
  * Return true if a match is made.
  *
  * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'.
  *
  * If paddr is within the bounds of the dma tag then call the filter callback
  * to check for a match, if there is no filter callback then assume a match.
  */
 static int
 run_filter(bus_dma_tag_t dmat, bus_addr_t paddr)
 {
 	int retval;
 
 	retval = 0;
 
 	do {
 		if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr)
 		 || ((paddr & (dmat->alignment - 1)) != 0))
 		 && (dmat->filter == NULL
 		  || (*dmat->filter)(dmat->filterarg, paddr) != 0))
 			retval = 1;
 
 		dmat = dmat->parent;
 	} while (retval == 0 && dmat != NULL);
 	return (retval);
 }
 
 /*
  * Check to see if the specified page is in an allowed DMA range.
  */
 
 static __inline int
 _bus_dma_can_bounce(vm_offset_t lowaddr, vm_offset_t highaddr)
 {
 	int i;
 	for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) {
 		if ((lowaddr >= phys_avail[i] && lowaddr <= phys_avail[i + 1])
 		    || (lowaddr < phys_avail[i] &&
 		    highaddr > phys_avail[i]))
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Convenience function for manipulating driver locks from busdma (during
  * busdma_swi, for example).  Drivers that don't provide their own locks
  * should specify &Giant to dmat->lockfuncarg.  Drivers that use their own
  * non-mutex locking scheme don't have to use this at all.
  */
 void
 busdma_lock_mutex(void *arg, bus_dma_lock_op_t op)
 {
 	struct mtx *dmtx;
 
 	dmtx = (struct mtx *)arg;
 	switch (op) {
 	case BUS_DMA_LOCK:
 		mtx_lock(dmtx);
 		break;
 	case BUS_DMA_UNLOCK:
 		mtx_unlock(dmtx);
 		break;
 	default:
 		panic("Unknown operation 0x%x for busdma_lock_mutex!", op);
 	}
 }
 
 /*
  * dflt_lock should never get called.  It gets put into the dma tag when
  * lockfunc == NULL, which is only valid if the maps that are associated
  * with the tag are meant to never be defered.
  * XXX Should have a way to identify which driver is responsible here.
  */
 static void
 dflt_lock(void *arg, bus_dma_lock_op_t op)
 {
 #ifdef INVARIANTS
 	panic("driver error: busdma dflt_lock called");
 #else
 	printf("DRIVER_ERROR: busdma dflt_lock called\n");
 #endif
 }
 
 static __inline bus_dmamap_t
 _busdma_alloc_dmamap(bus_dma_tag_t dmat)
 {
 	struct sync_list *slist;
 	bus_dmamap_t map;
 
 	slist = malloc(sizeof(*slist) * dmat->nsegments, M_BUSDMA, M_NOWAIT);
 	if (slist == NULL)
 		return (NULL);
 	map = uma_zalloc_arg(dmamap_zone, dmat, M_NOWAIT);
 	if (map != NULL)
 		map->slist = slist;
 	else
 		free(slist, M_BUSDMA);
 	return (map);
 }
 
 static __inline void
 _busdma_free_dmamap(bus_dmamap_t map)
 {
 
 	free(map->slist, M_BUSDMA);
 	uma_zfree(dmamap_zone, map);
 }
 
 /*
  * Allocate a device specific dma_tag.
  */
 #define SEG_NB 1024
 
 int
 bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
     bus_addr_t boundary, bus_addr_t lowaddr,
     bus_addr_t highaddr, bus_dma_filter_t *filter,
     void *filterarg, bus_size_t maxsize, int nsegments,
     bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
     void *lockfuncarg, bus_dma_tag_t *dmat)
 {
 	bus_dma_tag_t newtag;
 	int error = 0;
 	/* Return a NULL tag on failure */
 	*dmat = NULL;
 	if (!parent)
 		parent = mips_root_dma_tag;
 
 	newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA, M_NOWAIT);
 	if (newtag == NULL) {
 		CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 		    __func__, newtag, 0, error);
 		return (ENOMEM);
 	}
 
 	newtag->parent = parent;
 	newtag->alignment = alignment;
 	newtag->boundary = boundary;
 	newtag->lowaddr = trunc_page((vm_offset_t)lowaddr) + (PAGE_SIZE - 1);
 	newtag->highaddr = trunc_page((vm_offset_t)highaddr) + (PAGE_SIZE - 1);
 	newtag->filter = filter;
 	newtag->filterarg = filterarg;
 	newtag->maxsize = maxsize;
 	newtag->nsegments = nsegments;
 	newtag->maxsegsz = maxsegsz;
 	newtag->flags = flags;
 	if (cpuinfo.cache_coherent_dma)
 		newtag->flags |= BUS_DMA_COHERENT;
 	newtag->ref_count = 1; /* Count ourself */
 	newtag->map_count = 0;
 	if (lockfunc != NULL) {
 		newtag->lockfunc = lockfunc;
 		newtag->lockfuncarg = lockfuncarg;
 	} else {
 		newtag->lockfunc = dflt_lock;
 		newtag->lockfuncarg = NULL;
 	}
 	newtag->segments = NULL;
 
 	/*
 	 * Take into account any restrictions imposed by our parent tag
 	 */
 	if (parent != NULL) {
 		newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr);
 		newtag->highaddr = MAX(parent->highaddr, newtag->highaddr);
 		if (newtag->boundary == 0)
 			newtag->boundary = parent->boundary;
 		else if (parent->boundary != 0)
 			newtag->boundary =
 			    MIN(parent->boundary, newtag->boundary);
 		if ((newtag->filter != NULL) ||
 		    ((parent->flags & BUS_DMA_COULD_BOUNCE) != 0))
 			newtag->flags |= BUS_DMA_COULD_BOUNCE;
 		if (newtag->filter == NULL) {
 			/*
 			* Short circuit looking at our parent directly
 			* since we have encapsulated all of its information
 			*/
 			newtag->filter = parent->filter;
 			newtag->filterarg = parent->filterarg;
 			newtag->parent = parent->parent;
 		}
 		if (newtag->parent != NULL)
 			atomic_add_int(&parent->ref_count, 1);
 	}
 	if (_bus_dma_can_bounce(newtag->lowaddr, newtag->highaddr)
 	 || newtag->alignment > 1)
 		newtag->flags |= BUS_DMA_COULD_BOUNCE;
 
 	if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 	    (flags & BUS_DMA_ALLOCNOW) != 0) {
 		struct bounce_zone *bz;
 
 		/* Must bounce */
 
 		if ((error = alloc_bounce_zone(newtag)) != 0) {
 			free(newtag, M_BUSDMA);
 			return (error);
 		}
 		bz = newtag->bounce_zone;
 
 		if (ptoa(bz->total_bpages) < maxsize) {
 			int pages;
 
 			pages = atop(maxsize) - bz->total_bpages;
 
 			/* Add pages to our bounce pool */
 			if (alloc_bounce_pages(newtag, pages) < pages)
 				error = ENOMEM;
 		}
 		/* Performed initial allocation */
 		newtag->flags |= BUS_DMA_MIN_ALLOC_COMP;
 	} else
 		newtag->bounce_zone = NULL;
 	if (error != 0)
 		free(newtag, M_BUSDMA);
 	else
 		*dmat = newtag;
 	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 	    __func__, newtag, (newtag != NULL ? newtag->flags : 0), error);
 
 	return (error);
 }
 
 int
 bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain)
 {
 
 	return (0);
 }
 
 int
 bus_dma_tag_destroy(bus_dma_tag_t dmat)
 {
 #ifdef KTR
 	bus_dma_tag_t dmat_copy = dmat;
 #endif
 
 	if (dmat != NULL) {
 		if (dmat->map_count != 0)
 			return (EBUSY);
 
 		while (dmat != NULL) {
 			bus_dma_tag_t parent;
 
 			parent = dmat->parent;
 			atomic_subtract_int(&dmat->ref_count, 1);
 			if (dmat->ref_count == 0) {
 				if (dmat->segments != NULL)
 					free(dmat->segments, M_BUSDMA);
 				free(dmat, M_BUSDMA);
 				/*
 				 * Last reference count, so
 				 * release our reference
 				 * count on our parent.
 				 */
 				dmat = parent;
 			} else
 				dmat = NULL;
 		}
 	}
 	CTR2(KTR_BUSDMA, "%s tag %p", __func__, dmat_copy);
 
 	return (0);
 }
 
 #include <sys/kdb.h>
 /*
  * Allocate a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 int
 bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	bus_dmamap_t newmap;
 	int error = 0;
 
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc(
 		    sizeof(bus_dma_segment_t) * dmat->nsegments, M_BUSDMA,
 		    M_NOWAIT);
 		if (dmat->segments == NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 
 	newmap = _busdma_alloc_dmamap(dmat);
 	if (newmap == NULL) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM);
 		return (ENOMEM);
 	}
 	*mapp = newmap;
 
 	/*
 	 * Bouncing might be required if the driver asks for an active
 	 * exclusion region, a data alignment that is stricter than 1, and/or
 	 * an active address boundary.
 	 */
 	if (dmat->flags & BUS_DMA_COULD_BOUNCE) {
 
 		/* Must bounce */
 		struct bounce_zone *bz;
 		int maxpages;
 
 		if (dmat->bounce_zone == NULL) {
 			if ((error = alloc_bounce_zone(dmat)) != 0) {
 				_busdma_free_dmamap(newmap);
 				*mapp = NULL;
 				return (error);
 			}
 		}
 		bz = dmat->bounce_zone;
 
 		/* Initialize the new map */
 		STAILQ_INIT(&((*mapp)->bpages));
 
 		/*
 		 * Attempt to add pages to our pool on a per-instance
 		 * basis up to a sane limit.
 		 */
 		maxpages = MAX_BPAGES;
 		if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0
 		 || (bz->map_count > 0 && bz->total_bpages < maxpages)) {
 			int pages;
 
 			pages = MAX(atop(dmat->maxsize), 1);
 			pages = MIN(maxpages - bz->total_bpages, pages);
 			pages = MAX(pages, 1);
 			if (alloc_bounce_pages(dmat, pages) < pages)
 				error = ENOMEM;
 
 			if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) {
 				if (error == 0)
 					dmat->flags |= BUS_DMA_MIN_ALLOC_COMP;
 			} else {
 				error = 0;
 			}
 		}
 		bz->map_count++;
 	}
 
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->flags, error);
 
 	return (0);
 }
 
 /*
  * Destroy a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 int
 bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 
 	if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 		    __func__, dmat, EBUSY);
 		return (EBUSY);
 	}
 	if (dmat->bounce_zone)
 		dmat->bounce_zone->map_count--;
 	_busdma_free_dmamap(map);
 	CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
         return (0);
 }
 
 /*
  * Allocate a piece of memory that can be efficiently mapped into
  * bus device space based on the constraints lited in the dma tag.
  * A dmamap to for use with dmamap_load is also allocated.
  */
 int
 bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddrp, int flags,
     bus_dmamap_t *mapp)
 {
 	bus_dmamap_t newmap = NULL;
 	busdma_bufalloc_t ba;
 	struct busdma_bufzone *bufzone;
 	vm_memattr_t memattr;
 	void *vaddr;
 
 	int mflags;
 
 	if (flags & BUS_DMA_NOWAIT)
 		mflags = M_NOWAIT;
 	else
 		mflags = M_WAITOK;
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc(
 		    sizeof(bus_dma_segment_t) * dmat->nsegments, M_BUSDMA,
 		    mflags);
 		if (dmat->segments == NULL) {
 			CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 			    __func__, dmat, dmat->flags, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 
 	newmap = _busdma_alloc_dmamap(dmat);
 	if (newmap == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->flags, ENOMEM);
 		return (ENOMEM);
 	}
 
 	/*
 	 * If all the memory is coherent with DMA then we don't need to
 	 * do anything special for a coherent mapping request.
 	 */
 	if (dmat->flags & BUS_DMA_COHERENT)
 	    flags &= ~BUS_DMA_COHERENT;
 
 	if (flags & BUS_DMA_COHERENT) {
 		memattr = VM_MEMATTR_UNCACHEABLE;
 		ba = coherent_allocator;
 		newmap->flags |= DMAMAP_UNCACHEABLE;
 	} else {
 		memattr = VM_MEMATTR_DEFAULT;
 		ba = standard_allocator;
 	}
 	/* All buffers we allocate are cache-aligned. */
 	newmap->flags |= DMAMAP_CACHE_ALIGNED;
 
 	if (flags & BUS_DMA_ZERO)
 		mflags |= M_ZERO;
 
 	/*
 	 * Try to find a bufzone in the allocator that holds a cache of buffers
 	 * of the right size for this request.  If the buffer is too big to be
 	 * held in the allocator cache, this returns NULL.
 	 */
 	bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize);
 
 	/*
 	 * Allocate the buffer from the uma(9) allocator if...
 	 *  - It's small enough to be in the allocator (bufzone not NULL).
 	 *  - The alignment constraint isn't larger than the allocation size
 	 *    (the allocator aligns buffers to their size boundaries).
 	 *  - There's no need to handle lowaddr/highaddr exclusion zones.
 	 * else allocate non-contiguous pages if...
 	 *  - The page count that could get allocated doesn't exceed
 	 *    nsegments also when the maximum segment size is less
 	 *    than PAGE_SIZE.
 	 *  - The alignment constraint isn't larger than a page boundary.
 	 *  - There are no boundary-crossing constraints.
 	 * else allocate a block of contiguous pages because one or more of the
 	 * constraints is something that only the contig allocator can fulfill.
 	 */
 	if (bufzone != NULL && dmat->alignment <= bufzone->size &&
 	    !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) {
 		vaddr = uma_zalloc(bufzone->umazone, mflags);
 	} else if (dmat->nsegments >=
 	    howmany(dmat->maxsize, MIN(dmat->maxsegsz, PAGE_SIZE)) &&
 	    dmat->alignment <= PAGE_SIZE &&
 	    (dmat->boundary % PAGE_SIZE) == 0) {
 		vaddr = (void *)kmem_alloc_attr(dmat->maxsize, mflags, 0,
 		    dmat->lowaddr, memattr);
 	} else {
 		vaddr = (void *)kmem_alloc_contig(dmat->maxsize, mflags, 0,
 		    dmat->lowaddr, dmat->alignment, dmat->boundary, memattr);
 	}
 	if (vaddr == NULL) {
 		_busdma_free_dmamap(newmap);
 		newmap = NULL;
 	} else {
 		newmap->sync_count = 0;
 	}
 	*vaddrp = vaddr;
 	*mapp = newmap;
 
 	return (vaddr == NULL ? ENOMEM : 0);
 }
 
 /*
  * Free a piece of memory and it's allocated dmamap, that was allocated
  * via bus_dmamem_alloc.  Make the same choice for free/contigfree.
  */
 void
 bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
 {
 	struct busdma_bufzone *bufzone;
 	busdma_bufalloc_t ba;
 
 	if (map->flags & DMAMAP_UNCACHEABLE)
 		ba = coherent_allocator;
 	else
 		ba = standard_allocator;
 
 	free(map->slist, M_BUSDMA);
 	uma_zfree(dmamap_zone, map);
 
 	bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize);
 
 	if (bufzone != NULL && dmat->alignment <= bufzone->size &&
 	    !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr))
 		uma_zfree(bufzone->umazone, vaddr);
 	else
-		kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->maxsize);
+		kmem_free((vm_offset_t)vaddr, dmat->maxsize);
 	CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags);
 }
 
 static void
 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags)
 {
 	bus_addr_t curaddr;
 	bus_size_t sgsize;
 
 	if (map->pagesneeded == 0) {
 		CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d",
 		    dmat->lowaddr, dmat->boundary, dmat->alignment);
 		CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d",
 		    map, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		curaddr = buf;
 		while (buflen != 0) {
 			sgsize = MIN(buflen, dmat->maxsegsz);
 			if (run_filter(dmat, curaddr) != 0) {
 				sgsize = MIN(sgsize, PAGE_SIZE);
 				map->pagesneeded++;
 			}
 			curaddr += sgsize;
 			buflen -= sgsize;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static void
 _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap,
     void *buf, bus_size_t buflen, int flags)
 {
 	vm_offset_t vaddr;
 	vm_offset_t vendaddr;
 	bus_addr_t paddr;
 
 	if (map->pagesneeded == 0) {
 		CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d",
 		    dmat->lowaddr, dmat->boundary, dmat->alignment);
 		CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d",
 		    map, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		vaddr = (vm_offset_t)buf;
 		vendaddr = (vm_offset_t)buf + buflen;
 
 		while (vaddr < vendaddr) {
 			bus_size_t sg_len;
 
 			KASSERT(kernel_pmap == pmap, ("pmap is not kernel pmap"));
 			sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK);
 			paddr = pmap_kextract(vaddr);
 			if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 			    run_filter(dmat, paddr) != 0) {
 				sg_len = roundup2(sg_len, dmat->alignment);
 				map->pagesneeded++;
 			}
 			vaddr += sg_len;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static int
 _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,int flags)
 {
 
 	/* Reserve Necessary Bounce Pages */
 	mtx_lock(&bounce_lock);
 	if (flags & BUS_DMA_NOWAIT) {
 		if (reserve_bounce_pages(dmat, map, 0) != 0) {
 			mtx_unlock(&bounce_lock);
 			return (ENOMEM);
 		}
 	} else {
 		if (reserve_bounce_pages(dmat, map, 1) != 0) {
 			/* Queue us for resources */
 			STAILQ_INSERT_TAIL(&bounce_map_waitinglist,
 			    map, links);
 			mtx_unlock(&bounce_lock);
 			return (EINPROGRESS);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 
 	return (0);
 }
 
 /*
  * Add a single contiguous physical range to the segment list.
  */
 static int
 _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr,
     bus_size_t sgsize, bus_dma_segment_t *segs, int *segp)
 {
 	bus_addr_t baddr, bmask;
 	int seg;
 
 	/*
 	 * Make sure we don't cross any boundaries.
 	 */
 	bmask = ~(dmat->boundary - 1);
 	if (dmat->boundary > 0) {
 		baddr = (curaddr + dmat->boundary) & bmask;
 		if (sgsize > (baddr - curaddr))
 			sgsize = (baddr - curaddr);
 	}
 	/*
 	 * Insert chunk into a segment, coalescing with
 	 * the previous segment if possible.
 	 */
 	seg = *segp;
 	if (seg >= 0 &&
 	    curaddr == segs[seg].ds_addr + segs[seg].ds_len &&
 	    (segs[seg].ds_len + sgsize) <= dmat->maxsegsz &&
 	    (dmat->boundary == 0 ||
 	     (segs[seg].ds_addr & bmask) == (curaddr & bmask))) {
 		segs[seg].ds_len += sgsize;
 	} else {
 		if (++seg >= dmat->nsegments)
 			return (0);
 		segs[seg].ds_addr = curaddr;
 		segs[seg].ds_len = sgsize;
 	}
 	*segp = seg;
 	return (sgsize);
 }
 
 /*
  * Utility function to load a physical buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 int
 _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	bus_addr_t curaddr;
 	bus_size_t sgsize;
 	int error;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_phys(dmat, map, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	while (buflen > 0) {
 		curaddr = buf;
 		sgsize = MIN(buflen, dmat->maxsegsz);
 		if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 		    map->pagesneeded != 0 && run_filter(dmat, curaddr)) {
 			sgsize = MIN(sgsize, PAGE_SIZE);
 			curaddr = add_bounce_page(dmat, map, 0, curaddr,
 			    sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		buf += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	if (buflen != 0) {
 		bus_dmamap_unload(dmat, map);
 		return (EFBIG); /* XXX better return value here? */
 	}
 	return (0);
 }
 
 int
 _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
     bus_dma_segment_t *segs, int *segp)
 {
 
 	return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags,
 	    segs, segp));
 }
 
 /*
  * Utility function to load a linear buffer.  segp contains
  * the starting segment on entrance, and the ending segment on exit.
  * first indicates if this is the first invocation of this function.
  */
 int
 _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
     bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	bus_size_t sgsize;
 	bus_addr_t curaddr;
 	struct sync_list *sl;
 	vm_offset_t vaddr = (vm_offset_t)buf;
 	int error = 0;
 
 
 	if (segs == NULL)
 		segs = dmat->segments;
 	if ((flags & BUS_DMA_LOAD_MBUF) != 0)
 		map->flags |= DMAMAP_CACHE_ALIGNED;
 
 	if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 	CTR3(KTR_BUSDMA, "lowaddr= %d boundary= %d, "
 	    "alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment);
 
 	while (buflen > 0) {
 		/*
 		 * Get the physical address for this segment.
 		 *
 		 * XXX Don't support checking for coherent mappings
 		 * XXX in user address space.
 		 */
 		KASSERT(kernel_pmap == pmap, ("pmap is not kernel pmap"));
 		curaddr = pmap_kextract(vaddr);
 
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
 		sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK);
 		if (sgsize > dmat->maxsegsz)
 			sgsize = dmat->maxsegsz;
 		if (buflen < sgsize)
 			sgsize = buflen;
 
 		if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 		    map->pagesneeded != 0 && run_filter(dmat, curaddr)) {
 			curaddr = add_bounce_page(dmat, map, vaddr, curaddr,
 			    sgsize);
 		} else {
 			sl = &map->slist[map->sync_count - 1];
 			if (map->sync_count == 0 ||
 			    vaddr != sl->vaddr + sl->datacount) {
 				if (++map->sync_count > dmat->nsegments)
 					goto cleanup;
 				sl++;
 				sl->vaddr = vaddr;
 				sl->datacount = sgsize;
 				sl->busaddr = curaddr;
 			} else
 				sl->datacount += sgsize;
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		vaddr += sgsize;
 		buflen -= sgsize;
 	}
 
 cleanup:
 	/*
 	 * Did we fit?
 	 */
 	if (buflen != 0) {
 		bus_dmamap_unload(dmat, map);
 		error = EFBIG; /* XXX better return value here? */
 	}
 	return (error);
 }
 
 void
 _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
 {
 
 	KASSERT(dmat != NULL, ("dmatag is NULL"));
 	KASSERT(map != NULL, ("dmamap is NULL"));
 	map->mem = *mem;
 	map->callback = callback;
 	map->callback_arg = callback_arg;
 }
 
 bus_dma_segment_t *
 _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dma_segment_t *segs, int nsegs, int error)
 {
 
 	if (segs == NULL)
 		segs = dmat->segments;
 	return (segs);
 }
 
 /*
  * Release the mapping held by map.
  */
 void
 bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bounce_page *bpage;
 
 	while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 		STAILQ_REMOVE_HEAD(&map->bpages, links);
 		free_bounce_page(dmat, bpage);
 	}
 	map->sync_count = 0;
 	return;
 }
 
 static void
 bus_dmamap_sync_buf(vm_offset_t buf, int len, bus_dmasync_op_t op, int aligned)
 {
 	char tmp_cl[mips_dcache_max_linesize], tmp_clend[mips_dcache_max_linesize];
 	vm_offset_t buf_cl, buf_clend;
 	vm_size_t size_cl, size_clend;
 	int cache_linesize_mask = mips_dcache_max_linesize - 1;
 
 	/*
 	 * dcache invalidation operates on cache line aligned addresses
 	 * and could modify areas of memory that share the same cache line
 	 * at the beginning and the ending of the buffer. In order to
 	 * prevent a data loss we save these chunks in temporary buffer
 	 * before invalidation and restore them afer it.
 	 *
 	 * If the aligned flag is set the buffer is either an mbuf or came from
 	 * our allocator caches.  In both cases they are always sized and
 	 * aligned to cacheline boundaries, so we can skip preserving nearby
 	 * data if a transfer appears to overlap cachelines.  An mbuf in
 	 * particular will usually appear to be overlapped because of offsetting
 	 * within the buffer to align the L3 headers, but we know that the bytes
 	 * preceeding that offset are part of the same mbuf memory and are not
 	 * unrelated adjacent data (and a rule of mbuf handling is that the cpu
 	 * is not allowed to touch the mbuf while dma is in progress, including
 	 * header fields).
 	 */
 	if (aligned) {
 		size_cl = 0;
 		size_clend = 0;
 	} else {
 		buf_cl = buf & ~cache_linesize_mask;
 		size_cl = buf & cache_linesize_mask;
 		buf_clend = buf + len;
 		size_clend = (mips_dcache_max_linesize -
 		    (buf_clend & cache_linesize_mask)) & cache_linesize_mask;
 	}
 
 	switch (op) {
 	case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE:
 	case BUS_DMASYNC_POSTREAD:
 
 		/*
 		 * Save buffers that might be modified by invalidation
 		 */
 		if (size_cl)
 			memcpy (tmp_cl, (void*)buf_cl, size_cl);
 		if (size_clend)
 			memcpy (tmp_clend, (void*)buf_clend, size_clend);
 		mips_dcache_inv_range(buf, len);
 		/*
 		 * Restore them
 		 */
 		if (size_cl)
 			memcpy ((void*)buf_cl, tmp_cl, size_cl);
 		if (size_clend)
 			memcpy ((void*)buf_clend, tmp_clend, size_clend);
 		/*
 		 * Copies above have brought corresponding memory
 		 * cache lines back into dirty state. Write them back
 		 * out and invalidate affected cache lines again if
 		 * necessary.
 		 */
 		if (size_cl)
 			mips_dcache_wbinv_range(buf_cl, size_cl);
 		if (size_clend && (size_cl == 0 ||
                     buf_clend - buf_cl > mips_dcache_max_linesize))
 			mips_dcache_wbinv_range(buf_clend, size_clend);
 		break;
 
 	case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE:
 		mips_dcache_wbinv_range(buf, len);
 		break;
 
 	case BUS_DMASYNC_PREREAD:
 		/*
 		 * Save buffers that might be modified by invalidation
 		 */
 		if (size_cl)
 			memcpy (tmp_cl, (void *)buf_cl, size_cl);
 		if (size_clend)
 			memcpy (tmp_clend, (void *)buf_clend, size_clend);
 		mips_dcache_inv_range(buf, len);
 		/*
 		 * Restore them
 		 */
 		if (size_cl)
 			memcpy ((void *)buf_cl, tmp_cl, size_cl);
 		if (size_clend)
 			memcpy ((void *)buf_clend, tmp_clend, size_clend);
 		/*
 		 * Copies above have brought corresponding memory
 		 * cache lines back into dirty state. Write them back
 		 * out and invalidate affected cache lines again if
 		 * necessary.
 		 */
 		if (size_cl)
 			mips_dcache_wbinv_range(buf_cl, size_cl);
 		if (size_clend && (size_cl == 0 ||
                     buf_clend - buf_cl > mips_dcache_max_linesize))
 			mips_dcache_wbinv_range(buf_clend, size_clend);
 		break;
 
 	case BUS_DMASYNC_PREWRITE:
 #ifdef BUS_DMA_FORCE_WBINV
 		mips_dcache_wbinv_range(buf, len);
 #else
 		mips_dcache_wb_range(buf, len);
 #endif
 		break;
 	}
 }
 
 static void
 _bus_dmamap_sync_bp(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
 {
 	struct bounce_page *bpage;
 
 	STAILQ_FOREACH(bpage, &map->bpages, links) {
 		if (op & BUS_DMASYNC_PREWRITE) {
 			if (bpage->datavaddr != 0)
 				bcopy((void *)bpage->datavaddr,
 				    (void *)(bpage->vaddr_nocache != 0 ?
 					     bpage->vaddr_nocache :
 					     bpage->vaddr),
 				    bpage->datacount);
 			else
 				physcopyout(bpage->dataaddr,
 				    (void *)(bpage->vaddr_nocache != 0 ?
 					     bpage->vaddr_nocache :
 					     bpage->vaddr),
 				    bpage->datacount);
 			if (bpage->vaddr_nocache == 0) {
 #ifdef BUS_DMA_FORCE_WBINV
 				mips_dcache_wbinv_range(bpage->vaddr,
 				    bpage->datacount);
 #else
 				mips_dcache_wb_range(bpage->vaddr,
 				    bpage->datacount);
 #endif
 			}
 			dmat->bounce_zone->total_bounced++;
 		}
 		if (op & BUS_DMASYNC_POSTREAD) {
 			if (bpage->vaddr_nocache == 0) {
 				mips_dcache_inv_range(bpage->vaddr,
 				    bpage->datacount);
 			}
 			if (bpage->datavaddr != 0)
 				bcopy((void *)(bpage->vaddr_nocache != 0 ?
 				    bpage->vaddr_nocache : bpage->vaddr),
 				    (void *)bpage->datavaddr, bpage->datacount);
 			else
 				physcopyin((void *)(bpage->vaddr_nocache != 0 ?
 				    bpage->vaddr_nocache : bpage->vaddr),
 				    bpage->dataaddr, bpage->datacount);
 			dmat->bounce_zone->total_bounced++;
 		}
 	}
 }
 
 void
 bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
 {
 	struct sync_list *sl, *end;
 	int aligned;
 
 	if (op == BUS_DMASYNC_POSTWRITE)
 		return;
 	if (STAILQ_FIRST(&map->bpages))
 		_bus_dmamap_sync_bp(dmat, map, op);
 
 	if ((dmat->flags & BUS_DMA_COHERENT) ||
 	    (map->flags & DMAMAP_UNCACHEABLE)) {
 		if (op & BUS_DMASYNC_PREWRITE)
 			mips_sync();
 		return;
 	}
 
 	aligned = (map->flags & DMAMAP_CACHE_ALIGNED) ? 1 : 0;
 
 	CTR3(KTR_BUSDMA, "%s: op %x flags %x", __func__, op, map->flags);
 	if (map->sync_count) {
 		end = &map->slist[map->sync_count];
 		for (sl = &map->slist[0]; sl != end; sl++)
 			bus_dmamap_sync_buf(sl->vaddr, sl->datacount, op,
 			    aligned);
 	}
 }
 
 static void
 init_bounce_pages(void *dummy __unused)
 {
 
 	total_bpages = 0;
 	STAILQ_INIT(&bounce_zone_list);
 	STAILQ_INIT(&bounce_map_waitinglist);
 	STAILQ_INIT(&bounce_map_callbacklist);
 	mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF);
 }
 SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL);
 
 static struct sysctl_ctx_list *
 busdma_sysctl_tree(struct bounce_zone *bz)
 {
 	return (&bz->sysctl_tree);
 }
 
 static struct sysctl_oid *
 busdma_sysctl_tree_top(struct bounce_zone *bz)
 {
 	return (bz->sysctl_tree_top);
 }
 
 static int
 alloc_bounce_zone(bus_dma_tag_t dmat)
 {
 	struct bounce_zone *bz;
 
 	/* Check to see if we already have a suitable zone */
 	STAILQ_FOREACH(bz, &bounce_zone_list, links) {
 		if ((dmat->alignment <= bz->alignment)
 		 && (dmat->lowaddr >= bz->lowaddr)) {
 			dmat->bounce_zone = bz;
 			return (0);
 		}
 	}
 
 	if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_BUSDMA,
 	    M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	STAILQ_INIT(&bz->bounce_page_list);
 	bz->free_bpages = 0;
 	bz->reserved_bpages = 0;
 	bz->active_bpages = 0;
 	bz->lowaddr = dmat->lowaddr;
 	bz->alignment = MAX(dmat->alignment, PAGE_SIZE);
 	bz->map_count = 0;
 	snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount);
 	busdma_zonecount++;
 	snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr);
 	STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links);
 	dmat->bounce_zone = bz;
 
 	sysctl_ctx_init(&bz->sysctl_tree);
 	bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree,
 	    SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid,
 	    CTLFLAG_RD, 0, "");
 	if (bz->sysctl_tree_top == NULL) {
 		sysctl_ctx_free(&bz->sysctl_tree);
 		return (0);	/* XXX error code? */
 	}
 
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0,
 	    "Total bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0,
 	    "Free bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0,
 	    "Reserved bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0,
 	    "Active bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0,
 	    "Total bounce requests");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0,
 	    "Total bounce requests that were deferred");
 	SYSCTL_ADD_STRING(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, "");
 	SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "alignment", CTLFLAG_RD, &bz->alignment, "");
 
 	return (0);
 }
 
 static int
 alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
 {
 	struct bounce_zone *bz;
 	int count;
 
 	bz = dmat->bounce_zone;
 	count = 0;
 	while (numpages > 0) {
 		struct bounce_page *bpage;
 
 		bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_BUSDMA,
 						     M_NOWAIT | M_ZERO);
 
 		if (bpage == NULL)
 			break;
 		bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_BOUNCE,
 							 M_NOWAIT, 0ul,
 							 bz->lowaddr,
 							 PAGE_SIZE,
 							 0);
 		if (bpage->vaddr == 0) {
 			free(bpage, M_BUSDMA);
 			break;
 		}
 		bpage->busaddr = pmap_kextract(bpage->vaddr);
 		bpage->vaddr_nocache =
 		    (vm_offset_t)pmap_mapdev(bpage->busaddr, PAGE_SIZE);
 		mtx_lock(&bounce_lock);
 		STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links);
 		total_bpages++;
 		bz->total_bpages++;
 		bz->free_bpages++;
 		mtx_unlock(&bounce_lock);
 		count++;
 		numpages--;
 	}
 	return (count);
 }
 
 static int
 reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit)
 {
 	struct bounce_zone *bz;
 	int pages;
 
 	mtx_assert(&bounce_lock, MA_OWNED);
 	bz = dmat->bounce_zone;
 	pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved);
 	if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages))
 		return (map->pagesneeded - (map->pagesreserved + pages));
 	bz->free_bpages -= pages;
 	bz->reserved_bpages += pages;
 	map->pagesreserved += pages;
 	pages = map->pagesneeded - map->pagesreserved;
 
 	return (pages);
 }
 
 static bus_addr_t
 add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
 		bus_addr_t addr, bus_size_t size)
 {
 	struct bounce_zone *bz;
 	struct bounce_page *bpage;
 
 	KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag"));
 	KASSERT(map != NULL, ("add_bounce_page: bad map %p", map));
 
 	bz = dmat->bounce_zone;
 	if (map->pagesneeded == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesneeded--;
 
 	if (map->pagesreserved == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesreserved--;
 
 	mtx_lock(&bounce_lock);
 	bpage = STAILQ_FIRST(&bz->bounce_page_list);
 	if (bpage == NULL)
 		panic("add_bounce_page: free page list is empty");
 
 	STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links);
 	bz->reserved_bpages--;
 	bz->active_bpages++;
 	mtx_unlock(&bounce_lock);
 
 	if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/* Page offset needs to be preserved. */
 		bpage->vaddr |= addr & PAGE_MASK;
 		bpage->busaddr |= addr & PAGE_MASK;
 	}
 	bpage->datavaddr = vaddr;
 	bpage->dataaddr = addr;
 	bpage->datacount = size;
 	STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
 	return (bpage->busaddr);
 }
 
 static void
 free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
 {
 	struct bus_dmamap *map;
 	struct bounce_zone *bz;
 
 	bz = dmat->bounce_zone;
 	bpage->datavaddr = 0;
 	bpage->datacount = 0;
 	if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/*
 		 * Reset the bounce page to start at offset 0.  Other uses
 		 * of this bounce page may need to store a full page of
 		 * data and/or assume it starts on a page boundary.
 		 */
 		bpage->vaddr &= ~PAGE_MASK;
 		bpage->busaddr &= ~PAGE_MASK;
 	}
 
 	mtx_lock(&bounce_lock);
 	STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links);
 	bz->free_bpages++;
 	bz->active_bpages--;
 	if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
 		if (reserve_bounce_pages(map->dmat, map, 1) == 0) {
 			STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
 			STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
 					   map, links);
 			busdma_swi_pending = 1;
 			bz->total_deferred++;
 			swi_sched(vm_ih, 0);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 void
 busdma_swi(void)
 {
 	bus_dma_tag_t dmat;
 	struct bus_dmamap *map;
 
 	mtx_lock(&bounce_lock);
 	while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
 		STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
 		mtx_unlock(&bounce_lock);
 		dmat = map->dmat;
 		(dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK);
 		bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback,
 		    map->callback_arg, BUS_DMA_WAITOK);
 		(dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK);
 		mtx_lock(&bounce_lock);
 	}
 	mtx_unlock(&bounce_lock);
 }
Index: head/sys/powerpc/powerpc/busdma_machdep.c
===================================================================
--- head/sys/powerpc/powerpc/busdma_machdep.c	(revision 338317)
+++ head/sys/powerpc/powerpc/busdma_machdep.c	(revision 338318)
@@ -1,1229 +1,1229 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1997, 1998 Justin T. Gibbs.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * From amd64/busdma_machdep.c, r204214
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/memdesc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
 
 #include "iommu_if.h"
 
 #define MAX_BPAGES MIN(8192, physmem/40)
 
 struct bounce_zone;
 
 struct bus_dma_tag {
 	bus_dma_tag_t	  parent;
 	bus_size_t	  alignment;
 	bus_addr_t	  boundary;
 	bus_addr_t	  lowaddr;
 	bus_addr_t	  highaddr;
 	bus_dma_filter_t *filter;
 	void		 *filterarg;
 	bus_size_t	  maxsize;
 	u_int		  nsegments;
 	bus_size_t	  maxsegsz;
 	int		  flags;
 	int		  ref_count;
 	int		  map_count;
 	bus_dma_lock_t	 *lockfunc;
 	void		 *lockfuncarg;
 	struct bounce_zone *bounce_zone;
 	device_t	  iommu;
 	void		 *iommu_cookie;
 };
 
 struct bounce_page {
 	vm_offset_t	vaddr;		/* kva of bounce buffer */
 	bus_addr_t	busaddr;	/* Physical address */
 	vm_offset_t	datavaddr;	/* kva of client data */
 	vm_page_t	datapage;	/* physical page of client data */
 	vm_offset_t	dataoffs;	/* page offset of client data */
 	bus_size_t	datacount;	/* client data count */
 	STAILQ_ENTRY(bounce_page) links;
 };
 
 int busdma_swi_pending;
 
 struct bounce_zone {
 	STAILQ_ENTRY(bounce_zone) links;
 	STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
 	int		total_bpages;
 	int		free_bpages;
 	int		reserved_bpages;
 	int		active_bpages;
 	int		total_bounced;
 	int		total_deferred;
 	int		map_count;
 	bus_size_t	alignment;
 	bus_addr_t	lowaddr;
 	char		zoneid[8];
 	char		lowaddrid[20];
 	struct sysctl_ctx_list sysctl_tree;
 	struct sysctl_oid *sysctl_tree_top;
 };
 
 static struct mtx bounce_lock;
 static int total_bpages;
 static int busdma_zonecount;
 static STAILQ_HEAD(, bounce_zone) bounce_zone_list;
 
 static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters");
 SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0,
 	   "Total bounce pages");
 
 struct bus_dmamap {
 	struct bp_list	       bpages;
 	int		       pagesneeded;
 	int		       pagesreserved;
 	bus_dma_tag_t	       dmat;
 	struct memdesc	       mem;
 	bus_dma_segment_t     *segments;
 	int		       nsegs;
 	bus_dmamap_callback_t *callback;
 	void		      *callback_arg;
 	STAILQ_ENTRY(bus_dmamap) links;
 	int		       contigalloc;
 };
 
 static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
 static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
 
 static void init_bounce_pages(void *dummy);
 static int alloc_bounce_zone(bus_dma_tag_t dmat);
 static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
 static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
 				int commit);
 static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
 				  vm_offset_t vaddr, bus_addr_t addr,
 				  bus_size_t size);
 static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
 static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr);
 
 /*
  * Return true if a match is made.
  *
  * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'.
  *
  * If paddr is within the bounds of the dma tag then call the filter callback
  * to check for a match, if there is no filter callback then assume a match.
  */
 static __inline int
 run_filter(bus_dma_tag_t dmat, bus_addr_t paddr)
 {
 	int retval;
 
 	retval = 0;
 
 	do {
 		if (dmat->filter == NULL && dmat->iommu == NULL &&
 		    paddr > dmat->lowaddr && paddr <= dmat->highaddr)
 			retval = 1;
 		if (dmat->filter == NULL &&
 		    (paddr & (dmat->alignment - 1)) != 0)
 			retval = 1;
 		if (dmat->filter != NULL &&
 		    (*dmat->filter)(dmat->filterarg, paddr) != 0)
 			retval = 1;
 
 		dmat = dmat->parent;		
 	} while (retval == 0 && dmat != NULL);
 	return (retval);
 }
 
 /*
  * Convenience function for manipulating driver locks from busdma (during
  * busdma_swi, for example).  Drivers that don't provide their own locks
  * should specify &Giant to dmat->lockfuncarg.  Drivers that use their own
  * non-mutex locking scheme don't have to use this at all.
  */
 void
 busdma_lock_mutex(void *arg, bus_dma_lock_op_t op)
 {
 	struct mtx *dmtx;
 
 	dmtx = (struct mtx *)arg;
 	switch (op) {
 	case BUS_DMA_LOCK:
 		mtx_lock(dmtx);
 		break;
 	case BUS_DMA_UNLOCK:
 		mtx_unlock(dmtx);
 		break;
 	default:
 		panic("Unknown operation 0x%x for busdma_lock_mutex!", op);
 	}
 }
 
 /*
  * dflt_lock should never get called.  It gets put into the dma tag when
  * lockfunc == NULL, which is only valid if the maps that are associated
  * with the tag are meant to never be defered.
  * XXX Should have a way to identify which driver is responsible here.
  */
 static void
 dflt_lock(void *arg, bus_dma_lock_op_t op)
 {
 	panic("driver error: busdma dflt_lock called");
 }
 
 #define BUS_DMA_COULD_BOUNCE	BUS_DMA_BUS3
 #define BUS_DMA_MIN_ALLOC_COMP	BUS_DMA_BUS4
 /*
  * Allocate a device specific dma_tag.
  */
 int
 bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
 		   bus_addr_t boundary, bus_addr_t lowaddr,
 		   bus_addr_t highaddr, bus_dma_filter_t *filter,
 		   void *filterarg, bus_size_t maxsize, int nsegments,
 		   bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
 		   void *lockfuncarg, bus_dma_tag_t *dmat)
 {
 	bus_dma_tag_t newtag;
 	int error = 0;
 
 	/* Basic sanity checking */
 	if (boundary != 0 && boundary < maxsegsz)
 		maxsegsz = boundary;
 
 	if (maxsegsz == 0) {
 		return (EINVAL);
 	}
 
 	/* Return a NULL tag on failure */
 	*dmat = NULL;
 
 	newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_DEVBUF,
 	    M_ZERO | M_NOWAIT);
 	if (newtag == NULL) {
 		CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 		    __func__, newtag, 0, error);
 		return (ENOMEM);
 	}
 
 	newtag->parent = parent;
 	newtag->alignment = alignment;
 	newtag->boundary = boundary;
 	newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1);
 	newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1);
 	newtag->filter = filter;
 	newtag->filterarg = filterarg;
 	newtag->maxsize = maxsize;
 	newtag->nsegments = nsegments;
 	newtag->maxsegsz = maxsegsz;
 	newtag->flags = flags;
 	newtag->ref_count = 1; /* Count ourself */
 	newtag->map_count = 0;
 	if (lockfunc != NULL) {
 		newtag->lockfunc = lockfunc;
 		newtag->lockfuncarg = lockfuncarg;
 	} else {
 		newtag->lockfunc = dflt_lock;
 		newtag->lockfuncarg = NULL;
 	}
 
 	/* Take into account any restrictions imposed by our parent tag */
 	if (parent != NULL) {
 		newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr);
 		newtag->highaddr = MAX(parent->highaddr, newtag->highaddr);
 		if (newtag->boundary == 0)
 			newtag->boundary = parent->boundary;
 		else if (parent->boundary != 0)
 			newtag->boundary = MIN(parent->boundary,
 					       newtag->boundary);
 		if (newtag->filter == NULL) {
 			/*
 			 * Short circuit looking at our parent directly
 			 * since we have encapsulated all of its information
 			 */
 			newtag->filter = parent->filter;
 			newtag->filterarg = parent->filterarg;
 			newtag->parent = parent->parent;
 		}
 		if (newtag->parent != NULL)
 			atomic_add_int(&parent->ref_count, 1);
 		newtag->iommu = parent->iommu;
 		newtag->iommu_cookie = parent->iommu_cookie;
 	}
 
 	if (newtag->lowaddr < ptoa((vm_paddr_t)Maxmem) && newtag->iommu == NULL)
 		newtag->flags |= BUS_DMA_COULD_BOUNCE;
 
 	if (newtag->alignment > 1)
 		newtag->flags |= BUS_DMA_COULD_BOUNCE;
 
 	if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 	    (flags & BUS_DMA_ALLOCNOW) != 0) {
 		struct bounce_zone *bz;
 
 		/* Must bounce */
 
 		if ((error = alloc_bounce_zone(newtag)) != 0) {
 			free(newtag, M_DEVBUF);
 			return (error);
 		}
 		bz = newtag->bounce_zone;
 
 		if (ptoa(bz->total_bpages) < maxsize) {
 			int pages;
 
 			pages = atop(maxsize) - bz->total_bpages;
 
 			/* Add pages to our bounce pool */
 			if (alloc_bounce_pages(newtag, pages) < pages)
 				error = ENOMEM;
 		}
 		/* Performed initial allocation */
 		newtag->flags |= BUS_DMA_MIN_ALLOC_COMP;
 	}
 	
 	if (error != 0) {
 		free(newtag, M_DEVBUF);
 	} else {
 		*dmat = newtag;
 	}
 	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 	    __func__, newtag, (newtag != NULL ? newtag->flags : 0), error);
 	return (error);
 }
 
 int
 bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain)
 {
 
 	return (0);
 }
 
 int
 bus_dma_tag_destroy(bus_dma_tag_t dmat)
 {
 	bus_dma_tag_t dmat_copy;
 	int error;
 
 	error = 0;
 	dmat_copy = dmat;
 
 	if (dmat != NULL) {
 
 		if (dmat->map_count != 0) {
 			error = EBUSY;
 			goto out;
 		}
 
 		while (dmat != NULL) {
 			bus_dma_tag_t parent;
 
 			parent = dmat->parent;
 			atomic_subtract_int(&dmat->ref_count, 1);
 			if (dmat->ref_count == 0) {
 				free(dmat, M_DEVBUF);
 				/*
 				 * Last reference count, so
 				 * release our reference
 				 * count on our parent.
 				 */
 				dmat = parent;
 			} else
 				dmat = NULL;
 		}
 	}
 out:
 	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
 	return (error);
 }
 
 /*
  * Allocate a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 int
 bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	int error;
 
 	error = 0;
 
 	*mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF,
 				     M_NOWAIT | M_ZERO);
 	if (*mapp == NULL) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 		    __func__, dmat, ENOMEM);
 		return (ENOMEM);
 	}
 
 
 	/*
 	 * Bouncing might be required if the driver asks for an active
 	 * exclusion region, a data alignment that is stricter than 1, and/or
 	 * an active address boundary.
 	 */
 	if (dmat->flags & BUS_DMA_COULD_BOUNCE) {
 
 		/* Must bounce */
 		struct bounce_zone *bz;
 		int maxpages;
 
 		if (dmat->bounce_zone == NULL) {
 			if ((error = alloc_bounce_zone(dmat)) != 0)
 				return (error);
 		}
 		bz = dmat->bounce_zone;
 
 		/* Initialize the new map */
 		STAILQ_INIT(&((*mapp)->bpages));
 
 		/*
 		 * Attempt to add pages to our pool on a per-instance
 		 * basis up to a sane limit.
 		 */
 		if (dmat->alignment > 1)
 			maxpages = MAX_BPAGES;
 		else
 			maxpages = MIN(MAX_BPAGES, Maxmem -atop(dmat->lowaddr));
 		if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0
 		 || (bz->map_count > 0 && bz->total_bpages < maxpages)) {
 			int pages;
 
 			pages = MAX(atop(dmat->maxsize), 1);
 			pages = MIN(maxpages - bz->total_bpages, pages);
 			pages = MAX(pages, 1);
 			if (alloc_bounce_pages(dmat, pages) < pages)
 				error = ENOMEM;
 
 			if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) {
 				if (error == 0)
 					dmat->flags |= BUS_DMA_MIN_ALLOC_COMP;
 			} else {
 				error = 0;
 			}
 		}
 		bz->map_count++;
 	}
 
 	(*mapp)->nsegs = 0;
 	(*mapp)->segments = (bus_dma_segment_t *)malloc(
 	    sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF,
 	    M_NOWAIT);
 	if ((*mapp)->segments == NULL) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 		    __func__, dmat, ENOMEM);
 		return (ENOMEM);
 	}
 
 	if (error == 0)
 		dmat->map_count++;
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->flags, error);
 	return (error);
 }
 
 /*
  * Destroy a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 int
 bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	if (dmat->flags & BUS_DMA_COULD_BOUNCE) {
 		if (STAILQ_FIRST(&map->bpages) != NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, EBUSY);
 			return (EBUSY);
 		}
 		if (dmat->bounce_zone)
 			dmat->bounce_zone->map_count--;
 	}
 	free(map->segments, M_DEVBUF);
 	free(map, M_DEVBUF);
 	dmat->map_count--;
 	CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
 	return (0);
 }
 
 
 /*
  * Allocate a piece of memory that can be efficiently mapped into
  * bus device space based on the constraints lited in the dma tag.
  * A dmamap to for use with dmamap_load is also allocated.
  */
 int
 bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
 		 bus_dmamap_t *mapp)
 {
 	vm_memattr_t attr;
 	int mflags;
 
 	if (flags & BUS_DMA_NOWAIT)
 		mflags = M_NOWAIT;
 	else
 		mflags = M_WAITOK;
 
 	bus_dmamap_create(dmat, flags, mapp);
 
 	if (flags & BUS_DMA_ZERO)
 		mflags |= M_ZERO;
 #ifdef NOTYET
 	if (flags & BUS_DMA_NOCACHE)
 		attr = VM_MEMATTR_UNCACHEABLE;
 	else
 #endif
 		attr = VM_MEMATTR_DEFAULT;
 
 	/* 
 	 * XXX:
 	 * (dmat->alignment <= dmat->maxsize) is just a quick hack; the exact
 	 * alignment guarantees of malloc need to be nailed down, and the
 	 * code below should be rewritten to take that into account.
 	 *
 	 * In the meantime, we'll warn the user if malloc gets it wrong.
 	 */
 	if ((dmat->maxsize <= PAGE_SIZE) &&
 	   (dmat->alignment <= dmat->maxsize) &&
 	    dmat->lowaddr >= ptoa((vm_paddr_t)Maxmem) &&
 	    attr == VM_MEMATTR_DEFAULT) {
 		*vaddr = malloc(dmat->maxsize, M_DEVBUF, mflags);
 	} else {
 		/*
 		 * XXX Use Contigmalloc until it is merged into this facility
 		 *     and handles multi-seg allocations.  Nobody is doing
 		 *     multi-seg allocations yet though.
 		 * XXX Certain AGP hardware does.
 		 */
 		*vaddr = (void *)kmem_alloc_contig(dmat->maxsize, mflags, 0ul,
 		    dmat->lowaddr, dmat->alignment ? dmat->alignment : 1ul,
 		    dmat->boundary, attr);
 		(*mapp)->contigalloc = 1;
 	}
 	if (*vaddr == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->flags, ENOMEM);
 		return (ENOMEM);
 	} else if (vtophys(*vaddr) & (dmat->alignment - 1)) {
 		printf("bus_dmamem_alloc failed to align memory properly.\n");
 	}
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->flags, 0);
 	return (0);
 }
 
 /*
  * Free a piece of memory and it's allociated dmamap, that was allocated
  * via bus_dmamem_alloc.  Make the same choice for free/contigfree.
  */
 void
 bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
 {
 
 	if (!map->contigalloc)
 		free(vaddr, M_DEVBUF);
 	else
-		kmem_free(kmem_arena, (vm_offset_t)vaddr, dmat->maxsize);
+		kmem_free((vm_offset_t)vaddr, dmat->maxsize);
 	bus_dmamap_destroy(dmat, map);
 	CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags);
 }
 
 static void
 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags)
 {
 	bus_addr_t curaddr;
 	bus_size_t sgsize;
 
 	if (map->pagesneeded == 0) {
 		CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, "
 		    "alignment= %d", dmat->lowaddr, ptoa((vm_paddr_t)Maxmem),
 		    dmat->boundary, dmat->alignment);
 		CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		curaddr = buf;
 		while (buflen != 0) {
 			sgsize = MIN(buflen, dmat->maxsegsz);
 			if (run_filter(dmat, curaddr) != 0) {
 				sgsize = MIN(sgsize,
 				    PAGE_SIZE - (curaddr & PAGE_MASK));
 				map->pagesneeded++;
 			}
 			curaddr += sgsize;
 			buflen -= sgsize;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static void
 _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap,
     void *buf, bus_size_t buflen, int flags)
 {
         vm_offset_t vaddr;
         vm_offset_t vendaddr;
         bus_addr_t paddr;
 
 	if (map->pagesneeded == 0) {
 		CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, "
 		    "alignment= %d", dmat->lowaddr, ptoa((vm_paddr_t)Maxmem),
 		    dmat->boundary, dmat->alignment);
 		CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		vaddr = (vm_offset_t)buf;
 		vendaddr = (vm_offset_t)buf + buflen;
 
 		while (vaddr < vendaddr) {
 			bus_size_t sg_len;
 
 			sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK);
 			if (pmap == kernel_pmap)
 				paddr = pmap_kextract(vaddr);
 			else
 				paddr = pmap_extract(pmap, vaddr);
 			if (run_filter(dmat, paddr) != 0) {
 				sg_len = roundup2(sg_len, dmat->alignment);
 				map->pagesneeded++;
 			}
 			vaddr += sg_len;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static int
 _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags)
 {
 
 	/* Reserve Necessary Bounce Pages */
 	mtx_lock(&bounce_lock);
 	if (flags & BUS_DMA_NOWAIT) {
 		if (reserve_bounce_pages(dmat, map, 0) != 0) {
 			mtx_unlock(&bounce_lock);
 			return (ENOMEM);
 		}
 	} else {
 		if (reserve_bounce_pages(dmat, map, 1) != 0) {
 			/* Queue us for resources */
 			STAILQ_INSERT_TAIL(&bounce_map_waitinglist,
 			    map, links);
 			mtx_unlock(&bounce_lock);
 			return (EINPROGRESS);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 
 	return (0);
 }
 
 /*
  * Add a single contiguous physical range to the segment list.
  */
 static int
 _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr,
 		   bus_size_t sgsize, bus_dma_segment_t *segs, int *segp)
 {
 	bus_addr_t baddr, bmask;
 	int seg;
 
 	/*
 	 * Make sure we don't cross any boundaries.
 	 */
 	bmask = ~(dmat->boundary - 1);
 	if (dmat->boundary > 0) {
 		baddr = (curaddr + dmat->boundary) & bmask;
 		if (sgsize > (baddr - curaddr))
 			sgsize = (baddr - curaddr);
 	}
 
 	/*
 	 * Insert chunk into a segment, coalescing with
 	 * previous segment if possible.
 	 */
 	seg = *segp;
 	if (seg == -1) {
 		seg = 0;
 		segs[seg].ds_addr = curaddr;
 		segs[seg].ds_len = sgsize;
 	} else {
 		if (curaddr == segs[seg].ds_addr + segs[seg].ds_len &&
 		    (segs[seg].ds_len + sgsize) <= dmat->maxsegsz &&
 		    (dmat->boundary == 0 ||
 		     (segs[seg].ds_addr & bmask) == (curaddr & bmask)))
 			segs[seg].ds_len += sgsize;
 		else {
 			if (++seg >= dmat->nsegments)
 				return (0);
 			segs[seg].ds_addr = curaddr;
 			segs[seg].ds_len = sgsize;
 		}
 	}
 	*segp = seg;
 	return (sgsize);
 }
 
 /*
  * Utility function to load a physical buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 int
 _bus_dmamap_load_phys(bus_dma_tag_t dmat,
 		      bus_dmamap_t map,
 		      vm_paddr_t buf, bus_size_t buflen,
 		      int flags,
 		      bus_dma_segment_t *segs,
 		      int *segp)
 {
 	bus_addr_t curaddr;
 	bus_size_t sgsize;
 	int error;
 
 	if (segs == NULL)
 		segs = map->segments;
 
 	if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_phys(dmat, map, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	while (buflen > 0) {
 		curaddr = buf;
 		sgsize = MIN(buflen, dmat->maxsegsz);
 		if (map->pagesneeded != 0 && run_filter(dmat, curaddr)) {
 			sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK));
 			curaddr = add_bounce_page(dmat, map, 0, curaddr,
 			    sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		buf += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 int
 _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
     bus_dma_segment_t *segs, int *segp)
 {
 
 	return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags,
 	    segs, segp));
 }
 
 /*
  * Utility function to load a linear buffer.  segp contains
  * the starting segment on entrance, and the ending segment on exit.
  */
 int
 _bus_dmamap_load_buffer(bus_dma_tag_t dmat,
     			bus_dmamap_t map,
 			void *buf, bus_size_t buflen,
 			pmap_t pmap,
 			int flags,
 			bus_dma_segment_t *segs,
 			int *segp)
 {
 	bus_size_t sgsize;
 	bus_addr_t curaddr;
 	vm_offset_t kvaddr, vaddr;
 	int error;
 
 	if (segs == NULL)
 		segs = map->segments;
 
 	if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	vaddr = (vm_offset_t)buf;
 
 	while (buflen > 0) {
 		bus_size_t max_sgsize;
 
 		/*
 		 * Get the physical address for this segment.
 		 */
 		if (pmap == kernel_pmap) {
 			curaddr = pmap_kextract(vaddr);
 			kvaddr = vaddr;
 		} else {
 			curaddr = pmap_extract(pmap, vaddr);
 			kvaddr = 0;
 		}
 
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
 		max_sgsize = MIN(buflen, dmat->maxsegsz);
 		sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
 		if (map->pagesneeded != 0 && run_filter(dmat, curaddr)) {
 			sgsize = roundup2(sgsize, dmat->alignment);
 			sgsize = MIN(sgsize, max_sgsize);
 			curaddr = add_bounce_page(dmat, map, kvaddr, curaddr,
 			    sgsize);
 		} else {
 			sgsize = MIN(sgsize, max_sgsize);
 		}
 
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		vaddr += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 void
 _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map,
 		    struct memdesc *mem, bus_dmamap_callback_t *callback,
 		    void *callback_arg)
 {
 
 	if (dmat->flags & BUS_DMA_COULD_BOUNCE) {
 		map->dmat = dmat;
 		map->mem = *mem;
 		map->callback = callback;
 		map->callback_arg = callback_arg;
 	}
 }
 
 bus_dma_segment_t *
 _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
 		     bus_dma_segment_t *segs, int nsegs, int error)
 {
 
 	map->nsegs = nsegs;
 	if (segs != NULL)
 		memcpy(map->segments, segs, map->nsegs*sizeof(segs[0]));
 	if (dmat->iommu != NULL)
 		IOMMU_MAP(dmat->iommu, map->segments, &map->nsegs,
 		    dmat->lowaddr, dmat->highaddr, dmat->alignment,
 		    dmat->boundary, dmat->iommu_cookie);
 
 	if (segs != NULL)
 		memcpy(segs, map->segments, map->nsegs*sizeof(segs[0]));
 	else
 		segs = map->segments;
 
 	return (segs);
 }
 
 /*
  * Release the mapping held by map.
  */
 void
 bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bounce_page *bpage;
 
 	if (dmat->iommu) {
 		IOMMU_UNMAP(dmat->iommu, map->segments, map->nsegs, dmat->iommu_cookie);
 		map->nsegs = 0;
 	}
 
 	while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 		STAILQ_REMOVE_HEAD(&map->bpages, links);
 		free_bounce_page(dmat, bpage);
 	}
 }
 
 void
 bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
 {
 	struct bounce_page *bpage;
 	vm_offset_t datavaddr, tempvaddr;
 
 	if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 
 		/*
 		 * Handle data bouncing.  We might also
 		 * want to add support for invalidating
 		 * the caches on broken hardware
 		 */
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
 		    "performing bounce", __func__, dmat, dmat->flags, op);
 
 		if (op & BUS_DMASYNC_PREWRITE) {
 			while (bpage != NULL) {
 				tempvaddr = 0;
 				datavaddr = bpage->datavaddr;
 				if (datavaddr == 0) {
 					tempvaddr = pmap_quick_enter_page(
 					    bpage->datapage);
 					datavaddr = tempvaddr |
 					    bpage->dataoffs;
 				}
 
 				bcopy((void *)datavaddr,
 				    (void *)bpage->vaddr, bpage->datacount);
 
 				if (tempvaddr != 0)
 					pmap_quick_remove_page(tempvaddr);
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 			dmat->bounce_zone->total_bounced++;
 		}
 
 		if (op & BUS_DMASYNC_POSTREAD) {
 			while (bpage != NULL) {
 				tempvaddr = 0;
 				datavaddr = bpage->datavaddr;
 				if (datavaddr == 0) {
 					tempvaddr = pmap_quick_enter_page(
 					    bpage->datapage);
 					datavaddr = tempvaddr |
 					    bpage->dataoffs;
 				}
 
 				bcopy((void *)bpage->vaddr,
 				    (void *)datavaddr, bpage->datacount);
 
 				if (tempvaddr != 0)
 					pmap_quick_remove_page(tempvaddr);
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 			dmat->bounce_zone->total_bounced++;
 		}
 	}
 
 	powerpc_sync();
 }
 
 static void
 init_bounce_pages(void *dummy __unused)
 {
 
 	total_bpages = 0;
 	STAILQ_INIT(&bounce_zone_list);
 	STAILQ_INIT(&bounce_map_waitinglist);
 	STAILQ_INIT(&bounce_map_callbacklist);
 	mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF);
 }
 SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL);
 
 static struct sysctl_ctx_list *
 busdma_sysctl_tree(struct bounce_zone *bz)
 {
 	return (&bz->sysctl_tree);
 }
 
 static struct sysctl_oid *
 busdma_sysctl_tree_top(struct bounce_zone *bz)
 {
 	return (bz->sysctl_tree_top);
 }
 
 static int
 alloc_bounce_zone(bus_dma_tag_t dmat)
 {
 	struct bounce_zone *bz;
 
 	/* Check to see if we already have a suitable zone */
 	STAILQ_FOREACH(bz, &bounce_zone_list, links) {
 		if ((dmat->alignment <= bz->alignment)
 		 && (dmat->lowaddr >= bz->lowaddr)) {
 			dmat->bounce_zone = bz;
 			return (0);
 		}
 	}
 
 	if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF,
 	    M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	STAILQ_INIT(&bz->bounce_page_list);
 	bz->free_bpages = 0;
 	bz->reserved_bpages = 0;
 	bz->active_bpages = 0;
 	bz->lowaddr = dmat->lowaddr;
 	bz->alignment = MAX(dmat->alignment, PAGE_SIZE);
 	bz->map_count = 0;
 	snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount);
 	busdma_zonecount++;
 	snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr);
 	STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links);
 	dmat->bounce_zone = bz;
 
 	sysctl_ctx_init(&bz->sysctl_tree);
 	bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree,
 	    SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid,
 	    CTLFLAG_RD, 0, "");
 	if (bz->sysctl_tree_top == NULL) {
 		sysctl_ctx_free(&bz->sysctl_tree);
 		return (0);	/* XXX error code? */
 	}
 
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0,
 	    "Total bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0,
 	    "Free bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0,
 	    "Reserved bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0,
 	    "Active bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0,
 	    "Total bounce requests");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0,
 	    "Total bounce requests that were deferred");
 	SYSCTL_ADD_STRING(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, "");
 	SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "alignment", CTLFLAG_RD, &bz->alignment, "");
 
 	return (0);
 }
 
 static int
 alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
 {
 	struct bounce_zone *bz;
 	int count;
 
 	bz = dmat->bounce_zone;
 	count = 0;
 	while (numpages > 0) {
 		struct bounce_page *bpage;
 
 		bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF,
 						     M_NOWAIT | M_ZERO);
 
 		if (bpage == NULL)
 			break;
 		bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF,
 							 M_NOWAIT, 0ul,
 							 bz->lowaddr,
 							 PAGE_SIZE,
 							 0);
 		if (bpage->vaddr == 0) {
 			free(bpage, M_DEVBUF);
 			break;
 		}
 		bpage->busaddr = pmap_kextract(bpage->vaddr);
 		mtx_lock(&bounce_lock);
 		STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links);
 		total_bpages++;
 		bz->total_bpages++;
 		bz->free_bpages++;
 		mtx_unlock(&bounce_lock);
 		count++;
 		numpages--;
 	}
 	return (count);
 }
 
 static int
 reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit)
 {
 	struct bounce_zone *bz;
 	int pages;
 
 	mtx_assert(&bounce_lock, MA_OWNED);
 	bz = dmat->bounce_zone;
 	pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved);
 	if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages))
 		return (map->pagesneeded - (map->pagesreserved + pages));
 	bz->free_bpages -= pages;
 	bz->reserved_bpages += pages;
 	map->pagesreserved += pages;
 	pages = map->pagesneeded - map->pagesreserved;
 
 	return (pages);
 }
 
 static bus_addr_t
 add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
 		bus_addr_t addr, bus_size_t size)
 {
 	struct bounce_zone *bz;
 	struct bounce_page *bpage;
 
 	KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag"));
 
 	bz = dmat->bounce_zone;
 	if (map->pagesneeded == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesneeded--;
 
 	if (map->pagesreserved == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesreserved--;
 
 	mtx_lock(&bounce_lock);
 	bpage = STAILQ_FIRST(&bz->bounce_page_list);
 	if (bpage == NULL)
 		panic("add_bounce_page: free page list is empty");
 
 	STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links);
 	bz->reserved_bpages--;
 	bz->active_bpages++;
 	mtx_unlock(&bounce_lock);
 
 	if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/* Page offset needs to be preserved. */
 		bpage->vaddr |= addr & PAGE_MASK;
 		bpage->busaddr |= addr & PAGE_MASK;
 	}
 	bpage->datavaddr = vaddr;
 	bpage->datapage = PHYS_TO_VM_PAGE(addr);
 	bpage->dataoffs = addr & PAGE_MASK;
 	bpage->datacount = size;
 	STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
 	return (bpage->busaddr);
 }
 
 static void
 free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
 {
 	struct bus_dmamap *map;
 	struct bounce_zone *bz;
 
 	bz = dmat->bounce_zone;
 	bpage->datavaddr = 0;
 	bpage->datacount = 0;
 	if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/*
 		 * Reset the bounce page to start at offset 0.  Other uses
 		 * of this bounce page may need to store a full page of
 		 * data and/or assume it starts on a page boundary.
 		 */
 		bpage->vaddr &= ~PAGE_MASK;
 		bpage->busaddr &= ~PAGE_MASK;
 	}
 
 	mtx_lock(&bounce_lock);
 	STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links);
 	bz->free_bpages++;
 	bz->active_bpages--;
 	if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
 		if (reserve_bounce_pages(map->dmat, map, 1) == 0) {
 			STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
 			STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
 					   map, links);
 			busdma_swi_pending = 1;
 			bz->total_deferred++;
 			swi_sched(vm_ih, 0);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 void
 busdma_swi(void)
 {
 	bus_dma_tag_t dmat;
 	struct bus_dmamap *map;
 
 	mtx_lock(&bounce_lock);
 	while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
 		STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
 		mtx_unlock(&bounce_lock);
 		dmat = map->dmat;
 		(dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK);
 		bus_dmamap_load_mem(map->dmat, map, &map->mem,
 				    map->callback, map->callback_arg,
 				    BUS_DMA_WAITOK);
 		(dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK);
 		mtx_lock(&bounce_lock);
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 int
 bus_dma_tag_set_iommu(bus_dma_tag_t tag, device_t iommu, void *cookie)
 {
 	tag->iommu = iommu;
 	tag->iommu_cookie = cookie;
 
 	return (0);
 }
 
Index: head/sys/vm/uma.h
===================================================================
--- head/sys/vm/uma.h	(revision 338317)
+++ head/sys/vm/uma.h	(revision 338318)
@@ -1,717 +1,716 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002, 2003, 2004, 2005 Jeffrey Roberson <jeff@FreeBSD.org>
  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  */
 
 /*
  * uma.h - External definitions for the Universal Memory Allocator
  *
 */
 
 #ifndef _VM_UMA_H_
 #define _VM_UMA_H_
 
 #include <sys/param.h>		/* For NULL */
 #include <sys/malloc.h>		/* For M_* */
 
 /* User visible parameters */
 #define UMA_SMALLEST_UNIT       (PAGE_SIZE / 256) /* Smallest item allocated */
 
 /* Types and type defs */
 
 struct uma_zone;
 /* Opaque type used as a handle to the zone */
 typedef struct uma_zone * uma_zone_t;
 
 void zone_drain(uma_zone_t);
 
 /*
  * Item constructor
  *
  * Arguments:
  *	item  A pointer to the memory which has been allocated.
  *	arg   The arg field passed to uma_zalloc_arg
  *	size  The size of the allocated item
  *	flags See zalloc flags
  *
  * Returns:
  *	0      on success
  *      errno  on failure
  *
  * Discussion:
  *	The constructor is called just before the memory is returned
  *	to the user. It may block if necessary.
  */
 typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags);
 
 /*
  * Item destructor
  *
  * Arguments:
  *	item  A pointer to the memory which has been allocated.
  *	size  The size of the item being destructed.
  *	arg   Argument passed through uma_zfree_arg
  *
  * Returns:
  *	Nothing
  *
  * Discussion:
  *	The destructor may perform operations that differ from those performed
  *	by the initializer, but it must leave the object in the same state.
  *	This IS type stable storage.  This is called after EVERY zfree call.
  */
 typedef void (*uma_dtor)(void *mem, int size, void *arg);
 
 /*
  * Item initializer
  *
  * Arguments:
  *	item  A pointer to the memory which has been allocated.
  *	size  The size of the item being initialized.
  *	flags See zalloc flags
  *
  * Returns:
  *	0      on success
  *      errno  on failure
  *
  * Discussion:
  *	The initializer is called when the memory is cached in the uma zone.
  *	The initializer and the destructor should leave the object in the same
  *	state.
  */
 typedef int (*uma_init)(void *mem, int size, int flags);
 
 /*
  * Item discard function
  *
  * Arguments:
  *	item  A pointer to memory which has been 'freed' but has not left the
  *	      zone's cache.
  *	size  The size of the item being discarded.
  *
  * Returns:
  *	Nothing
  *
  * Discussion:
  *	This routine is called when memory leaves a zone and is returned to the
  *	system for other uses.  It is the counter-part to the init function.
  */
 typedef void (*uma_fini)(void *mem, int size);
 
 /*
  * Import new memory into a cache zone.
  */
 typedef int (*uma_import)(void *arg, void **store, int count, int domain,
     int flags);
 
 /*
  * Free memory from a cache zone.
  */
 typedef void (*uma_release)(void *arg, void **store, int count);
 
 /*
  * What's the difference between initializing and constructing?
  *
  * The item is initialized when it is cached, and this is the state that the
  * object should be in when returned to the allocator. The purpose of this is
  * to remove some code which would otherwise be called on each allocation by
  * utilizing a known, stable state.  This differs from the constructor which
  * will be called on EVERY allocation.
  *
  * For example, in the initializer you may want to initialize embedded locks,
  * NULL list pointers, set up initial states, magic numbers, etc.  This way if
  * the object is held in the allocator and re-used it won't be necessary to
  * re-initialize it.
  *
  * The constructor may be used to lock a data structure, link it on to lists,
  * bump reference counts or total counts of outstanding structures, etc.
  *
  */
 
 
 /* Function proto types */
 
 /*
  * Create a new uma zone
  *
  * Arguments:
  *	name  The text name of the zone for debugging and stats. This memory
  *		should not be freed until the zone has been deallocated.
  *	size  The size of the object that is being created.
  *	ctor  The constructor that is called when the object is allocated.
  *	dtor  The destructor that is called when the object is freed.
  *	init  An initializer that sets up the initial state of the memory.
  *	fini  A discard function that undoes initialization done by init.
  *		ctor/dtor/init/fini may all be null, see notes above.
  *	align A bitmask that corresponds to the requested alignment
  *		eg 4 would be 0x3
  *	flags A set of parameters that control the behavior of the zone.
  *
  * Returns:
  *	A pointer to a structure which is intended to be opaque to users of
  *	the interface.  The value may be null if the wait flag is not set.
  */
 uma_zone_t uma_zcreate(const char *name, size_t size, uma_ctor ctor,
 		    uma_dtor dtor, uma_init uminit, uma_fini fini,
 		    int align, uint32_t flags);
 
 /*
  * Create a secondary uma zone
  *
  * Arguments:
  *	name  The text name of the zone for debugging and stats. This memory
  *		should not be freed until the zone has been deallocated.
  *	ctor  The constructor that is called when the object is allocated.
  *	dtor  The destructor that is called when the object is freed.
  *	zinit  An initializer that sets up the initial state of the memory
  *		as the object passes from the Keg's slab to the Zone's cache.
  *	zfini  A discard function that undoes initialization done by init
  *		as the object passes from the Zone's cache to the Keg's slab.
  *
  *		ctor/dtor/zinit/zfini may all be null, see notes above.
  *		Note that the zinit and zfini specified here are NOT
  *		exactly the same as the init/fini specified to uma_zcreate()
  *		when creating a master zone.  These zinit/zfini are called
  *		on the TRANSITION from keg to zone (and vice-versa). Once
  *		these are set, the primary zone may alter its init/fini
  *		(which are called when the object passes from VM to keg)
  *		using uma_zone_set_init/fini()) as well as its own
  *		zinit/zfini (unset by default for master zone) with
  *		uma_zone_set_zinit/zfini() (note subtle 'z' prefix).
  *
  *	master  A reference to this zone's Master Zone (Primary Zone),
  *		which contains the backing Keg for the Secondary Zone
  *		being added.
  *
  * Returns:
  *	A pointer to a structure which is intended to be opaque to users of
  *	the interface.  The value may be null if the wait flag is not set.
  */
 uma_zone_t uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
 		    uma_init zinit, uma_fini zfini, uma_zone_t master);
 
 /*
  * Add a second master to a secondary zone.  This provides multiple data
  * backends for objects with the same size.  Both masters must have
  * compatible allocation flags.  Presently, UMA_ZONE_MALLOC type zones are
  * the only supported.
  *
  * Returns:
  *	Error on failure, 0 on success.
  */
 int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
 
 /*
  * Create cache-only zones.
  *
  * This allows uma's per-cpu cache facilities to handle arbitrary
  * pointers.  Consumers must specify the import and release functions to
  * fill and destroy caches.  UMA does not allocate any memory for these
  * zones.  The 'arg' parameter is passed to import/release and is caller
  * specific.
  */
 uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
 		    uma_init zinit, uma_fini zfini, uma_import zimport,
 		    uma_release zrelease, void *arg, int flags);
 
 /*
  * Definitions for uma_zcreate flags
  *
  * These flags share space with UMA_ZFLAGs in uma_int.h.  Be careful not to
  * overlap when adding new features.  0xff000000 is in use by uma_int.h.
  */
 #define UMA_ZONE_PAGEABLE	0x0001	/* Return items not fully backed by
 					   physical memory XXX Not yet */
 #define UMA_ZONE_ZINIT		0x0002	/* Initialize with zeros */
 #define UMA_ZONE_STATIC		0x0004	/* Statically sized zone */
 #define UMA_ZONE_OFFPAGE	0x0008	/* Force the slab structure allocation
 					   off of the real memory */
 #define UMA_ZONE_MALLOC		0x0010	/* For use by malloc(9) only! */
 #define UMA_ZONE_NOFREE		0x0020	/* Do not free slabs of this type! */
 #define UMA_ZONE_MTXCLASS	0x0040	/* Create a new lock class */
 #define	UMA_ZONE_VM		0x0080	/*
 					 * Used for internal vm datastructures
 					 * only.
 					 */
 #define	UMA_ZONE_HASH		0x0100	/*
 					 * Use a hash table instead of caching
 					 * information in the vm_page.
 					 */
 #define	UMA_ZONE_SECONDARY	0x0200	/* Zone is a Secondary Zone */
 #define	UMA_ZONE_NOBUCKET	0x0400	/* Do not use buckets. */
 #define	UMA_ZONE_MAXBUCKET	0x0800	/* Use largest buckets. */
 #define	UMA_ZONE_CACHESPREAD	0x1000	/*
 					 * Spread memory start locations across
 					 * all possible cache lines.  May
 					 * require many virtually contiguous
 					 * backend pages and can fail early.
 					 */
 #define	UMA_ZONE_VTOSLAB	0x2000	/* Zone uses vtoslab for lookup. */
 #define	UMA_ZONE_NODUMP		0x4000	/*
 					 * Zone's pages will not be included in
 					 * mini-dumps.
 					 */
 #define	UMA_ZONE_PCPU		0x8000	/*
 					 * Allocates mp_maxid + 1 slabs of PAGE_SIZE
 					 */
 #define	UMA_ZONE_NUMA		0x10000	/*
 					 * NUMA aware Zone.  Implements a best
 					 * effort first-touch policy.
 					 */
 #define	UMA_ZONE_NOBUCKETCACHE	0x20000	/*
 					 * Don't cache full buckets.  Limit
 					 * UMA to per-cpu state.
 					 */
 
 /*
  * These flags are shared between the keg and zone.  In zones wishing to add
  * new kegs these flags must be compatible.  Some are determined based on
  * physical parameters of the request and may not be provided by the consumer.
  */
 #define	UMA_ZONE_INHERIT						\
     (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE |		\
     UMA_ZONE_HASH | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU)
 
 /* Definitions for align */
 #define UMA_ALIGN_PTR	(sizeof(void *) - 1)	/* Alignment fit for ptr */
 #define UMA_ALIGN_LONG	(sizeof(long) - 1)	/* "" long */
 #define UMA_ALIGN_INT	(sizeof(int) - 1)	/* "" int */
 #define UMA_ALIGN_SHORT	(sizeof(short) - 1)	/* "" short */
 #define UMA_ALIGN_CHAR	(sizeof(char) - 1)	/* "" char */
 #define UMA_ALIGN_CACHE	(0 - 1)			/* Cache line size align */
 #define	UMA_ALIGNOF(type) (_Alignof(type) - 1)	/* Alignment fit for 'type' */
 
 /*
  * Destroys an empty uma zone.  If the zone is not empty uma complains loudly.
  *
  * Arguments:
  *	zone  The zone we want to destroy.
  *
  */
 void uma_zdestroy(uma_zone_t zone);
 
 /*
  * Allocates an item out of a zone
  *
  * Arguments:
  *	zone  The zone we are allocating from
  *	arg   This data is passed to the ctor function
  *	flags See sys/malloc.h for available flags.
  *
  * Returns:
  *	A non-null pointer to an initialized element from the zone is
  *	guaranteed if the wait flag is M_WAITOK.  Otherwise a null pointer
  *	may be returned if the zone is empty or the ctor failed.
  */
 
 void *uma_zalloc_arg(uma_zone_t zone, void *arg, int flags);
 void *uma_zalloc_pcpu_arg(uma_zone_t zone, void *arg, int flags);
 
 /*
  * Allocate an item from a specific NUMA domain.  This uses a slow path in
  * the allocator but is guaranteed to allocate memory from the requested
  * domain if M_WAITOK is set.
  *
  * Arguments:
  *	zone  The zone we are allocating from
  *	arg   This data is passed to the ctor function
  *	domain The domain to allocate from.
  *	flags See sys/malloc.h for available flags.
  */
 void *uma_zalloc_domain(uma_zone_t zone, void *arg, int domain, int flags);
 
 /*
  * Allocates an item out of a zone without supplying an argument
  *
  * This is just a wrapper for uma_zalloc_arg for convenience.
  *
  */
 static __inline void *uma_zalloc(uma_zone_t zone, int flags);
 static __inline void *uma_zalloc_pcpu(uma_zone_t zone, int flags);
 
 static __inline void *
 uma_zalloc(uma_zone_t zone, int flags)
 {
 	return uma_zalloc_arg(zone, NULL, flags);
 }
 
 static __inline void *
 uma_zalloc_pcpu(uma_zone_t zone, int flags)
 {
 	return uma_zalloc_pcpu_arg(zone, NULL, flags);
 }
 
 /*
  * Frees an item back into the specified zone.
  *
  * Arguments:
  *	zone  The zone the item was originally allocated out of.
  *	item  The memory to be freed.
  *	arg   Argument passed to the destructor
  *
  * Returns:
  *	Nothing.
  */
 
 void uma_zfree_arg(uma_zone_t zone, void *item, void *arg);
 void uma_zfree_pcpu_arg(uma_zone_t zone, void *item, void *arg);
 
 /*
  * Frees an item back to the specified zone's domain specific pool.
  *
  * Arguments:
  *	zone  The zone the item was originally allocated out of.
  *	item  The memory to be freed.
  *	arg   Argument passed to the destructor
  */
 void uma_zfree_domain(uma_zone_t zone, void *item, void *arg);
 
 /*
  * Frees an item back to a zone without supplying an argument
  *
  * This is just a wrapper for uma_zfree_arg for convenience.
  *
  */
 static __inline void uma_zfree(uma_zone_t zone, void *item);
 static __inline void uma_zfree_pcpu(uma_zone_t zone, void *item);
 
 static __inline void
 uma_zfree(uma_zone_t zone, void *item)
 {
 	uma_zfree_arg(zone, item, NULL);
 }
 
 static __inline void
 uma_zfree_pcpu(uma_zone_t zone, void *item)
 {
 	uma_zfree_pcpu_arg(zone, item, NULL);
 }
 
 /*
  * Wait until the specified zone can allocate an item.
  */
 void uma_zwait(uma_zone_t zone);
 
 /*
  * Backend page supplier routines
  *
  * Arguments:
  *	zone  The zone that is requesting pages.
  *	size  The number of bytes being requested.
  *	pflag Flags for these memory pages, see below.
  *	domain The NUMA domain that we prefer for this allocation.
  *	wait  Indicates our willingness to block.
  *
  * Returns:
  *	A pointer to the allocated memory or NULL on failure.
  */
 
 typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain,
     uint8_t *pflag, int wait);
 
 /*
  * Backend page free routines
  *
  * Arguments:
  *	item  A pointer to the previously allocated pages.
  *	size  The original size of the allocation.
  *	pflag The flags for the slab.  See UMA_SLAB_* below.
  *
  * Returns:
  *	None
  */
 typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
 
 /*
  * Reclaims unused memory for all zones
  *
  * Arguments:
  *	None
  * Returns:
  *	None
  *
  * This should only be called by the page out daemon.
  */
 
 void uma_reclaim(void);
 
 /*
  * Sets the alignment mask to be used for all zones requesting cache
  * alignment.  Should be called by MD boot code prior to starting VM/UMA.
  *
  * Arguments:
  *	align The alignment mask
  *
  * Returns:
  *	Nothing
  */
 void uma_set_align(int align);
 
 /*
  * Set a reserved number of items to hold for M_USE_RESERVE allocations.  All
  * other requests must allocate new backing pages.
  */
 void uma_zone_reserve(uma_zone_t zone, int nitems);
 
 /*
  * Reserves the maximum KVA space required by the zone and configures the zone
  * to use a VM_ALLOC_NOOBJ-based backend allocator.
  *
  * Arguments:
  *	zone  The zone to update.
  *	nitems  The upper limit on the number of items that can be allocated.
  *
  * Returns:
  *	0  if KVA space can not be allocated
  *	1  if successful
  *
  * Discussion:
  *	When the machine supports a direct map and the zone's items are smaller
  *	than a page, the zone will use the direct map instead of allocating KVA
  *	space.
  */
 int uma_zone_reserve_kva(uma_zone_t zone, int nitems);
 
 /*
  * Sets a high limit on the number of items allowed in a zone
  *
  * Arguments:
  *	zone  The zone to limit
  *	nitems  The requested upper limit on the number of items allowed
  *
  * Returns:
  *	int  The effective value of nitems after rounding up based on page size
  */
 int uma_zone_set_max(uma_zone_t zone, int nitems);
 
 /*
  * Obtains the effective limit on the number of items in a zone
  *
  * Arguments:
  *	zone  The zone to obtain the effective limit from
  *
  * Return:
  *	0  No limit
  *	int  The effective limit of the zone
  */
 int uma_zone_get_max(uma_zone_t zone);
 
 /*
  * Sets a warning to be printed when limit is reached
  *
  * Arguments:
  *	zone  The zone we will warn about
  *	warning  Warning content
  *
  * Returns:
  *	Nothing
  */
 void uma_zone_set_warning(uma_zone_t zone, const char *warning);
 
 /*
  * Sets a function to run when limit is reached
  *
  * Arguments:
  *	zone  The zone to which this applies
  *	fx  The function ro run
  *
  * Returns:
  *	Nothing
  */
 typedef void (*uma_maxaction_t)(uma_zone_t, int);
 void uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t);
 
 /*
  * Obtains the approximate current number of items allocated from a zone
  *
  * Arguments:
  *	zone  The zone to obtain the current allocation count from
  *
  * Return:
  *	int  The approximate current number of items allocated from the zone
  */
 int uma_zone_get_cur(uma_zone_t zone);
 
 /*
  * The following two routines (uma_zone_set_init/fini)
  * are used to set the backend init/fini pair which acts on an
  * object as it becomes allocated and is placed in a slab within
  * the specified zone's backing keg.  These should probably not
  * be changed once allocations have already begun, but only be set
  * immediately upon zone creation.
  */
 void uma_zone_set_init(uma_zone_t zone, uma_init uminit);
 void uma_zone_set_fini(uma_zone_t zone, uma_fini fini);
 
 /*
  * The following two routines (uma_zone_set_zinit/zfini) are
  * used to set the zinit/zfini pair which acts on an object as
  * it passes from the backing Keg's slab cache to the
  * specified Zone's bucket cache.  These should probably not
  * be changed once allocations have already begun, but only be set
  * immediately upon zone creation.
  */
 void uma_zone_set_zinit(uma_zone_t zone, uma_init zinit);
 void uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini);
 
 /*
  * Replaces the standard backend allocator for this zone.
  *
  * Arguments:
  *	zone   The zone whose backend allocator is being changed.
  *	allocf A pointer to the allocation function
  *
  * Returns:
  *	Nothing
  *
  * Discussion:
  *	This could be used to implement pageable allocation, or perhaps
  *	even DMA allocators if used in conjunction with the OFFPAGE
  *	zone flag.
  */
 
 void uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf);
 
 /*
  * Used for freeing memory provided by the allocf above
  *
  * Arguments:
  *	zone  The zone that intends to use this free routine.
  *	freef The page freeing routine.
  *
  * Returns:
  *	Nothing
  */
 
 void uma_zone_set_freef(uma_zone_t zone, uma_free freef);
 
 /*
  * These flags are setable in the allocf and visible in the freef.
  */
 #define UMA_SLAB_BOOT	0x01		/* Slab alloced from boot pages */
-#define UMA_SLAB_KRWX	0x02		/* Slab alloced from kernel_rwx_arena */
-#define UMA_SLAB_KERNEL	0x04		/* Slab alloced from kernel_map */
+#define UMA_SLAB_KERNEL	0x04		/* Slab alloced from kmem */
 #define UMA_SLAB_PRIV	0x08		/* Slab alloced from priv allocator */
 #define UMA_SLAB_OFFP	0x10		/* Slab is managed separately  */
 #define UMA_SLAB_MALLOC	0x20		/* Slab is a large malloc slab */
-/* 0x40 and 0x80 are available */
+/* 0x02, 0x40, and 0x80 are available */
 
 /*
  * Used to pre-fill a zone with some number of items
  *
  * Arguments:
  *	zone    The zone to fill
  *	itemcnt The number of items to reserve
  *
  * Returns:
  *	Nothing
  *
  * NOTE: This is blocking and should only be done at startup
  */
 void uma_prealloc(uma_zone_t zone, int itemcnt);
 
 /*
  * Used to determine if a fixed-size zone is exhausted.
  *
  * Arguments:
  *	zone    The zone to check
  *
  * Returns:
  *	Non-zero if zone is exhausted.
  */
 int uma_zone_exhausted(uma_zone_t zone);
 int uma_zone_exhausted_nolock(uma_zone_t zone);
 
 /*
  * Common UMA_ZONE_PCPU zones.
  */
 extern uma_zone_t pcpu_zone_64;
 extern uma_zone_t pcpu_zone_ptr;
 
 /*
  * Exported statistics structures to be used by user space monitoring tools.
  * Statistics stream consists of a uma_stream_header, followed by a series of
  * alternative uma_type_header and uma_type_stat structures.
  */
 #define	UMA_STREAM_VERSION	0x00000001
 struct uma_stream_header {
 	uint32_t	ush_version;	/* Stream format version. */
 	uint32_t	ush_maxcpus;	/* Value of MAXCPU for stream. */
 	uint32_t	ush_count;	/* Number of records. */
 	uint32_t	_ush_pad;	/* Pad/reserved field. */
 };
 
 #define	UTH_MAX_NAME	32
 #define	UTH_ZONE_SECONDARY	0x00000001
 struct uma_type_header {
 	/*
 	 * Static per-zone data, some extracted from the supporting keg.
 	 */
 	char		uth_name[UTH_MAX_NAME];
 	uint32_t	uth_align;	/* Keg: alignment. */
 	uint32_t	uth_size;	/* Keg: requested size of item. */
 	uint32_t	uth_rsize;	/* Keg: real size of item. */
 	uint32_t	uth_maxpages;	/* Keg: maximum number of pages. */
 	uint32_t	uth_limit;	/* Keg: max items to allocate. */
 
 	/*
 	 * Current dynamic zone/keg-derived statistics.
 	 */
 	uint32_t	uth_pages;	/* Keg: pages allocated. */
 	uint32_t	uth_keg_free;	/* Keg: items free. */
 	uint32_t	uth_zone_free;	/* Zone: items free. */
 	uint32_t	uth_bucketsize;	/* Zone: desired bucket size. */
 	uint32_t	uth_zone_flags;	/* Zone: flags. */
 	uint64_t	uth_allocs;	/* Zone: number of allocations. */
 	uint64_t	uth_frees;	/* Zone: number of frees. */
 	uint64_t	uth_fails;	/* Zone: number of alloc failures. */
 	uint64_t	uth_sleeps;	/* Zone: number of alloc sleeps. */
 	uint64_t	_uth_reserved1[2];	/* Reserved. */
 };
 
 struct uma_percpu_stat {
 	uint64_t	ups_allocs;	/* Cache: number of allocations. */
 	uint64_t	ups_frees;	/* Cache: number of frees. */
 	uint64_t	ups_cache_free;	/* Cache: free items in cache. */
 	uint64_t	_ups_reserved[5];	/* Reserved. */
 };
 
 void uma_reclaim_wakeup(void);
 void uma_reclaim_worker(void *);
 
 unsigned long uma_limit(void);
 
 /* Return the amount of memory managed by UMA. */
 unsigned long uma_size(void);
 
 /* Return the amount of memory remaining.  May be negative. */
 long uma_avail(void);
 
 #endif	/* _VM_UMA_H_ */
Index: head/sys/vm/uma_core.c
===================================================================
--- head/sys/vm/uma_core.c	(revision 338317)
+++ head/sys/vm/uma_core.c	(revision 338318)
@@ -1,4235 +1,4219 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
  * Copyright (c) 2004-2006 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * uma_core.c  Implementation of the Universal Memory allocator
  *
  * This allocator is intended to replace the multitude of similar object caches
  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
  * efficient.  A primary design goal is to return unused memory to the rest of
  * the system.  This will make the system as a whole more flexible due to the
  * ability to move memory to subsystems which most need it instead of leaving
  * pools of reserved memory unused.
  *
  * The basic ideas stem from similar slab/zone based allocators whose algorithms
  * are well known.
  *
  */
 
 /*
  * TODO:
  *	- Improve memory usage for large allocations
  *	- Investigate cache size adjustments
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_param.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitset.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/types.h>
 #include <sys/limits.h>
 #include <sys/queue.h>
 #include <sys/malloc.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/sysctl.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/random.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/taskqueue.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_param.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 #include <vm/uma_dbg.h>
 
 #include <ddb/ddb.h>
 
 #ifdef DEBUG_MEMGUARD
 #include <vm/memguard.h>
 #endif
 
 /*
  * This is the zone and keg from which all zones are spawned.
  */
 static uma_zone_t kegs;
 static uma_zone_t zones;
 
 /* This is the zone from which all offpage uma_slab_ts are allocated. */
 static uma_zone_t slabzone;
 
 /*
  * The initial hash tables come out of this zone so they can be allocated
  * prior to malloc coming up.
  */
 static uma_zone_t hashzone;
 
 /* The boot-time adjusted value for cache line alignment. */
 int uma_align_cache = 64 - 1;
 
 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
 
 /*
  * Are we allowed to allocate buckets?
  */
 static int bucketdisable = 1;
 
 /* Linked list of all kegs in the system */
 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
 
 /* Linked list of all cache-only zones in the system */
 static LIST_HEAD(,uma_zone) uma_cachezones =
     LIST_HEAD_INITIALIZER(uma_cachezones);
 
 /* This RW lock protects the keg list */
 static struct rwlock_padalign __exclusive_cache_line uma_rwlock;
 
 /*
  * Pointer and counter to pool of pages, that is preallocated at
  * startup to bootstrap UMA.
  */
 static char *bootmem;
 static int boot_pages;
 
 static struct sx uma_drain_lock;
 
 /* kmem soft limit. */
 static unsigned long uma_kmem_limit = LONG_MAX;
 static volatile unsigned long uma_kmem_total;
 
 /* Is the VM done starting up? */
 static enum { BOOT_COLD = 0, BOOT_STRAPPED, BOOT_PAGEALLOC, BOOT_BUCKETS,
     BOOT_RUNNING } booted = BOOT_COLD;
 
 /*
  * This is the handle used to schedule events that need to happen
  * outside of the allocation fast path.
  */
 static struct callout uma_callout;
 #define	UMA_TIMEOUT	20		/* Seconds for callout interval. */
 
 /*
  * This structure is passed as the zone ctor arg so that I don't have to create
  * a special allocation function just for zones.
  */
 struct uma_zctor_args {
 	const char *name;
 	size_t size;
 	uma_ctor ctor;
 	uma_dtor dtor;
 	uma_init uminit;
 	uma_fini fini;
 	uma_import import;
 	uma_release release;
 	void *arg;
 	uma_keg_t keg;
 	int align;
 	uint32_t flags;
 };
 
 struct uma_kctor_args {
 	uma_zone_t zone;
 	size_t size;
 	uma_init uminit;
 	uma_fini fini;
 	int align;
 	uint32_t flags;
 };
 
 struct uma_bucket_zone {
 	uma_zone_t	ubz_zone;
 	char		*ubz_name;
 	int		ubz_entries;	/* Number of items it can hold. */
 	int		ubz_maxsize;	/* Maximum allocation size per-item. */
 };
 
 /*
  * Compute the actual number of bucket entries to pack them in power
  * of two sizes for more efficient space utilization.
  */
 #define	BUCKET_SIZE(n)						\
     (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
 
 #define	BUCKET_MAX	BUCKET_SIZE(256)
 
 struct uma_bucket_zone bucket_zones[] = {
 	{ NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
 	{ NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
 	{ NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
 	{ NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
 	{ NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
 	{ NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
 	{ NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
 	{ NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
 	{ NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
 	{ NULL, NULL, 0}
 };
 
 /*
  * Flags and enumerations to be passed to internal functions.
  */
 enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
 
 #define	UMA_ANYDOMAIN	-1	/* Special value for domain search. */
 
 /* Prototypes.. */
 
 int	uma_startup_count(int);
 void	uma_startup(void *, int);
 void	uma_startup1(void);
 void	uma_startup2(void);
 
 static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
 static void *page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
 static void *pcpu_page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
 static void *startup_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
 static void page_free(void *, vm_size_t, uint8_t);
 static void pcpu_page_free(void *, vm_size_t, uint8_t);
 static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int);
 static void cache_drain(uma_zone_t);
 static void bucket_drain(uma_zone_t, uma_bucket_t);
 static void bucket_cache_drain(uma_zone_t zone);
 static int keg_ctor(void *, int, void *, int);
 static void keg_dtor(void *, int, void *);
 static int zone_ctor(void *, int, void *, int);
 static void zone_dtor(void *, int, void *);
 static int zero_init(void *, int, int);
 static void keg_small_init(uma_keg_t keg);
 static void keg_large_init(uma_keg_t keg);
 static void zone_foreach(void (*zfunc)(uma_zone_t));
 static void zone_timeout(uma_zone_t zone);
 static int hash_alloc(struct uma_hash *);
 static int hash_expand(struct uma_hash *, struct uma_hash *);
 static void hash_free(struct uma_hash *hash);
 static void uma_timeout(void *);
 static void uma_startup3(void);
 static void *zone_alloc_item(uma_zone_t, void *, int, int);
 static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
 static void bucket_enable(void);
 static void bucket_init(void);
 static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
 static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
 static void bucket_zone_drain(void);
 static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int);
 static uma_slab_t zone_fetch_slab(uma_zone_t, uma_keg_t, int, int);
 static uma_slab_t zone_fetch_slab_multi(uma_zone_t, uma_keg_t, int, int);
 static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
 static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
 static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
     uma_fini fini, int align, uint32_t flags);
 static int zone_import(uma_zone_t, void **, int, int, int);
 static void zone_release(uma_zone_t, void **, int);
 static void uma_zero_item(void *, uma_zone_t);
 
 void uma_print_zone(uma_zone_t);
 void uma_print_stats(void);
 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
 
 #ifdef INVARIANTS
 static bool uma_dbg_kskip(uma_keg_t keg, void *mem);
 static bool uma_dbg_zskip(uma_zone_t zone, void *mem);
 static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
 static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
 
 static SYSCTL_NODE(_vm, OID_AUTO, debug, CTLFLAG_RD, 0,
     "Memory allocation debugging");
 
 static u_int dbg_divisor = 1;
 SYSCTL_UINT(_vm_debug, OID_AUTO, divisor,
     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &dbg_divisor, 0,
     "Debug & thrash every this item in memory allocator");
 
 static counter_u64_t uma_dbg_cnt = EARLY_COUNTER;
 static counter_u64_t uma_skip_cnt = EARLY_COUNTER;
 SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, trashed, CTLFLAG_RD,
     &uma_dbg_cnt, "memory items debugged");
 SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, skipped, CTLFLAG_RD,
     &uma_skip_cnt, "memory items skipped, not debugged");
 #endif
 
 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
 
 SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
     0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
 
 SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
     0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
 
 static int zone_warnings = 1;
 SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
     "Warn when UMA zones becomes full");
 
 /* Adjust bytes under management by UMA. */
 static inline void
 uma_total_dec(unsigned long size)
 {
 
 	atomic_subtract_long(&uma_kmem_total, size);
 }
 
 static inline void
 uma_total_inc(unsigned long size)
 {
 
 	if (atomic_fetchadd_long(&uma_kmem_total, size) > uma_kmem_limit)
 		uma_reclaim_wakeup();
 }
 
 /*
  * This routine checks to see whether or not it's safe to enable buckets.
  */
 static void
 bucket_enable(void)
 {
 	bucketdisable = vm_page_count_min();
 }
 
 /*
  * Initialize bucket_zones, the array of zones of buckets of various sizes.
  *
  * For each zone, calculate the memory required for each bucket, consisting
  * of the header and an array of pointers.
  */
 static void
 bucket_init(void)
 {
 	struct uma_bucket_zone *ubz;
 	int size;
 
 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
 		size = roundup(sizeof(struct uma_bucket), sizeof(void *));
 		size += sizeof(void *) * ubz->ubz_entries;
 		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 		    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET | UMA_ZONE_NUMA);
 	}
 }
 
 /*
  * Given a desired number of entries for a bucket, return the zone from which
  * to allocate the bucket.
  */
 static struct uma_bucket_zone *
 bucket_zone_lookup(int entries)
 {
 	struct uma_bucket_zone *ubz;
 
 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
 		if (ubz->ubz_entries >= entries)
 			return (ubz);
 	ubz--;
 	return (ubz);
 }
 
 static int
 bucket_select(int size)
 {
 	struct uma_bucket_zone *ubz;
 
 	ubz = &bucket_zones[0];
 	if (size > ubz->ubz_maxsize)
 		return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
 
 	for (; ubz->ubz_entries != 0; ubz++)
 		if (ubz->ubz_maxsize < size)
 			break;
 	ubz--;
 	return (ubz->ubz_entries);
 }
 
 static uma_bucket_t
 bucket_alloc(uma_zone_t zone, void *udata, int flags)
 {
 	struct uma_bucket_zone *ubz;
 	uma_bucket_t bucket;
 
 	/*
 	 * This is to stop us from allocating per cpu buckets while we're
 	 * running out of vm.boot_pages.  Otherwise, we would exhaust the
 	 * boot pages.  This also prevents us from allocating buckets in
 	 * low memory situations.
 	 */
 	if (bucketdisable)
 		return (NULL);
 	/*
 	 * To limit bucket recursion we store the original zone flags
 	 * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
 	 * NOVM flag to persist even through deep recursions.  We also
 	 * store ZFLAG_BUCKET once we have recursed attempting to allocate
 	 * a bucket for a bucket zone so we do not allow infinite bucket
 	 * recursion.  This cookie will even persist to frees of unused
 	 * buckets via the allocation path or bucket allocations in the
 	 * free path.
 	 */
 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
 		udata = (void *)(uintptr_t)zone->uz_flags;
 	else {
 		if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
 			return (NULL);
 		udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
 	}
 	if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
 		flags |= M_NOVM;
 	ubz = bucket_zone_lookup(zone->uz_count);
 	if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
 		ubz++;
 	bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
 	if (bucket) {
 #ifdef INVARIANTS
 		bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
 #endif
 		bucket->ub_cnt = 0;
 		bucket->ub_entries = ubz->ubz_entries;
 	}
 
 	return (bucket);
 }
 
 static void
 bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
 {
 	struct uma_bucket_zone *ubz;
 
 	KASSERT(bucket->ub_cnt == 0,
 	    ("bucket_free: Freeing a non free bucket."));
 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
 		udata = (void *)(uintptr_t)zone->uz_flags;
 	ubz = bucket_zone_lookup(bucket->ub_entries);
 	uma_zfree_arg(ubz->ubz_zone, bucket, udata);
 }
 
 static void
 bucket_zone_drain(void)
 {
 	struct uma_bucket_zone *ubz;
 
 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
 		zone_drain(ubz->ubz_zone);
 }
 
 static void
 zone_log_warning(uma_zone_t zone)
 {
 	static const struct timeval warninterval = { 300, 0 };
 
 	if (!zone_warnings || zone->uz_warning == NULL)
 		return;
 
 	if (ratecheck(&zone->uz_ratecheck, &warninterval))
 		printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
 }
 
 static inline void
 zone_maxaction(uma_zone_t zone)
 {
 
 	if (zone->uz_maxaction.ta_func != NULL)
 		taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
 }
 
 static void
 zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
 {
 	uma_klink_t klink;
 
 	LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
 		kegfn(klink->kl_keg);
 }
 
 /*
  * Routine called by timeout which is used to fire off some time interval
  * based calculations.  (stats, hash size, etc.)
  *
  * Arguments:
  *	arg   Unused
  *
  * Returns:
  *	Nothing
  */
 static void
 uma_timeout(void *unused)
 {
 	bucket_enable();
 	zone_foreach(zone_timeout);
 
 	/* Reschedule this event */
 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
 }
 
 /*
  * Routine to perform timeout driven calculations.  This expands the
  * hashes and does per cpu statistics aggregation.
  *
  *  Returns nothing.
  */
 static void
 keg_timeout(uma_keg_t keg)
 {
 
 	KEG_LOCK(keg);
 	/*
 	 * Expand the keg hash table.
 	 *
 	 * This is done if the number of slabs is larger than the hash size.
 	 * What I'm trying to do here is completely reduce collisions.  This
 	 * may be a little aggressive.  Should I allow for two collisions max?
 	 */
 	if (keg->uk_flags & UMA_ZONE_HASH &&
 	    keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
 		struct uma_hash newhash;
 		struct uma_hash oldhash;
 		int ret;
 
 		/*
 		 * This is so involved because allocating and freeing
 		 * while the keg lock is held will lead to deadlock.
 		 * I have to do everything in stages and check for
 		 * races.
 		 */
 		newhash = keg->uk_hash;
 		KEG_UNLOCK(keg);
 		ret = hash_alloc(&newhash);
 		KEG_LOCK(keg);
 		if (ret) {
 			if (hash_expand(&keg->uk_hash, &newhash)) {
 				oldhash = keg->uk_hash;
 				keg->uk_hash = newhash;
 			} else
 				oldhash = newhash;
 
 			KEG_UNLOCK(keg);
 			hash_free(&oldhash);
 			return;
 		}
 	}
 	KEG_UNLOCK(keg);
 }
 
 static void
 zone_timeout(uma_zone_t zone)
 {
 
 	zone_foreach_keg(zone, &keg_timeout);
 }
 
 /*
  * Allocate and zero fill the next sized hash table from the appropriate
  * backing store.
  *
  * Arguments:
  *	hash  A new hash structure with the old hash size in uh_hashsize
  *
  * Returns:
  *	1 on success and 0 on failure.
  */
 static int
 hash_alloc(struct uma_hash *hash)
 {
 	int oldsize;
 	int alloc;
 
 	oldsize = hash->uh_hashsize;
 
 	/* We're just going to go to a power of two greater */
 	if (oldsize)  {
 		hash->uh_hashsize = oldsize * 2;
 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
 		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
 		    M_UMAHASH, M_NOWAIT);
 	} else {
 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
 		hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
 		    UMA_ANYDOMAIN, M_WAITOK);
 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
 	}
 	if (hash->uh_slab_hash) {
 		bzero(hash->uh_slab_hash, alloc);
 		hash->uh_hashmask = hash->uh_hashsize - 1;
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Expands the hash table for HASH zones.  This is done from zone_timeout
  * to reduce collisions.  This must not be done in the regular allocation
  * path, otherwise, we can recurse on the vm while allocating pages.
  *
  * Arguments:
  *	oldhash  The hash you want to expand
  *	newhash  The hash structure for the new table
  *
  * Returns:
  *	Nothing
  *
  * Discussion:
  */
 static int
 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
 {
 	uma_slab_t slab;
 	int hval;
 	int i;
 
 	if (!newhash->uh_slab_hash)
 		return (0);
 
 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
 		return (0);
 
 	/*
 	 * I need to investigate hash algorithms for resizing without a
 	 * full rehash.
 	 */
 
 	for (i = 0; i < oldhash->uh_hashsize; i++)
 		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
 			slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
 			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
 			hval = UMA_HASH(newhash, slab->us_data);
 			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
 			    slab, us_hlink);
 		}
 
 	return (1);
 }
 
 /*
  * Free the hash bucket to the appropriate backing store.
  *
  * Arguments:
  *	slab_hash  The hash bucket we're freeing
  *	hashsize   The number of entries in that hash bucket
  *
  * Returns:
  *	Nothing
  */
 static void
 hash_free(struct uma_hash *hash)
 {
 	if (hash->uh_slab_hash == NULL)
 		return;
 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
 		zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
 	else
 		free(hash->uh_slab_hash, M_UMAHASH);
 }
 
 /*
  * Frees all outstanding items in a bucket
  *
  * Arguments:
  *	zone   The zone to free to, must be unlocked.
  *	bucket The free/alloc bucket with items, cpu queue must be locked.
  *
  * Returns:
  *	Nothing
  */
 
 static void
 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
 {
 	int i;
 
 	if (bucket == NULL)
 		return;
 
 	if (zone->uz_fini)
 		for (i = 0; i < bucket->ub_cnt; i++) 
 			zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
 	zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
 	bucket->ub_cnt = 0;
 }
 
 /*
  * Drains the per cpu caches for a zone.
  *
  * NOTE: This may only be called while the zone is being turn down, and not
  * during normal operation.  This is necessary in order that we do not have
  * to migrate CPUs to drain the per-CPU caches.
  *
  * Arguments:
  *	zone     The zone to drain, must be unlocked.
  *
  * Returns:
  *	Nothing
  */
 static void
 cache_drain(uma_zone_t zone)
 {
 	uma_cache_t cache;
 	int cpu;
 
 	/*
 	 * XXX: It is safe to not lock the per-CPU caches, because we're
 	 * tearing down the zone anyway.  I.e., there will be no further use
 	 * of the caches at this point.
 	 *
 	 * XXX: It would good to be able to assert that the zone is being
 	 * torn down to prevent improper use of cache_drain().
 	 *
 	 * XXX: We lock the zone before passing into bucket_cache_drain() as
 	 * it is used elsewhere.  Should the tear-down path be made special
 	 * there in some form?
 	 */
 	CPU_FOREACH(cpu) {
 		cache = &zone->uz_cpu[cpu];
 		bucket_drain(zone, cache->uc_allocbucket);
 		bucket_drain(zone, cache->uc_freebucket);
 		if (cache->uc_allocbucket != NULL)
 			bucket_free(zone, cache->uc_allocbucket, NULL);
 		if (cache->uc_freebucket != NULL)
 			bucket_free(zone, cache->uc_freebucket, NULL);
 		cache->uc_allocbucket = cache->uc_freebucket = NULL;
 	}
 	ZONE_LOCK(zone);
 	bucket_cache_drain(zone);
 	ZONE_UNLOCK(zone);
 }
 
 static void
 cache_shrink(uma_zone_t zone)
 {
 
 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
 		return;
 
 	ZONE_LOCK(zone);
 	zone->uz_count = (zone->uz_count_min + zone->uz_count) / 2;
 	ZONE_UNLOCK(zone);
 }
 
 static void
 cache_drain_safe_cpu(uma_zone_t zone)
 {
 	uma_cache_t cache;
 	uma_bucket_t b1, b2;
 	int domain;
 
 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
 		return;
 
 	b1 = b2 = NULL;
 	ZONE_LOCK(zone);
 	critical_enter();
 	if (zone->uz_flags & UMA_ZONE_NUMA)
 		domain = PCPU_GET(domain);
 	else
 		domain = 0;
 	cache = &zone->uz_cpu[curcpu];
 	if (cache->uc_allocbucket) {
 		if (cache->uc_allocbucket->ub_cnt != 0)
 			LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
 			    cache->uc_allocbucket, ub_link);
 		else
 			b1 = cache->uc_allocbucket;
 		cache->uc_allocbucket = NULL;
 	}
 	if (cache->uc_freebucket) {
 		if (cache->uc_freebucket->ub_cnt != 0)
 			LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
 			    cache->uc_freebucket, ub_link);
 		else
 			b2 = cache->uc_freebucket;
 		cache->uc_freebucket = NULL;
 	}
 	critical_exit();
 	ZONE_UNLOCK(zone);
 	if (b1)
 		bucket_free(zone, b1, NULL);
 	if (b2)
 		bucket_free(zone, b2, NULL);
 }
 
 /*
  * Safely drain per-CPU caches of a zone(s) to alloc bucket.
  * This is an expensive call because it needs to bind to all CPUs
  * one by one and enter a critical section on each of them in order
  * to safely access their cache buckets.
  * Zone lock must not be held on call this function.
  */
 static void
 cache_drain_safe(uma_zone_t zone)
 {
 	int cpu;
 
 	/*
 	 * Polite bucket sizes shrinking was not enouth, shrink aggressively.
 	 */
 	if (zone)
 		cache_shrink(zone);
 	else
 		zone_foreach(cache_shrink);
 
 	CPU_FOREACH(cpu) {
 		thread_lock(curthread);
 		sched_bind(curthread, cpu);
 		thread_unlock(curthread);
 
 		if (zone)
 			cache_drain_safe_cpu(zone);
 		else
 			zone_foreach(cache_drain_safe_cpu);
 	}
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
 }
 
 /*
  * Drain the cached buckets from a zone.  Expects a locked zone on entry.
  */
 static void
 bucket_cache_drain(uma_zone_t zone)
 {
 	uma_zone_domain_t zdom;
 	uma_bucket_t bucket;
 	int i;
 
 	/*
 	 * Drain the bucket queues and free the buckets.
 	 */
 	for (i = 0; i < vm_ndomains; i++) {
 		zdom = &zone->uz_domain[i];
 		while ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
 			LIST_REMOVE(bucket, ub_link);
 			ZONE_UNLOCK(zone);
 			bucket_drain(zone, bucket);
 			bucket_free(zone, bucket, NULL);
 			ZONE_LOCK(zone);
 		}
 	}
 
 	/*
 	 * Shrink further bucket sizes.  Price of single zone lock collision
 	 * is probably lower then price of global cache drain.
 	 */
 	if (zone->uz_count > zone->uz_count_min)
 		zone->uz_count--;
 }
 
 static void
 keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
 {
 	uint8_t *mem;
 	int i;
 	uint8_t flags;
 
 	CTR4(KTR_UMA, "keg_free_slab keg %s(%p) slab %p, returning %d bytes",
 	    keg->uk_name, keg, slab, PAGE_SIZE * keg->uk_ppera);
 
 	mem = slab->us_data;
 	flags = slab->us_flags;
 	i = start;
 	if (keg->uk_fini != NULL) {
 		for (i--; i > -1; i--)
 #ifdef INVARIANTS
 		/*
 		 * trash_fini implies that dtor was trash_dtor. trash_fini
 		 * would check that memory hasn't been modified since free,
 		 * which executed trash_dtor.
 		 * That's why we need to run uma_dbg_kskip() check here,
 		 * albeit we don't make skip check for other init/fini
 		 * invocations.
 		 */
 		if (!uma_dbg_kskip(keg, slab->us_data + (keg->uk_rsize * i)) ||
 		    keg->uk_fini != trash_fini)
 #endif
 			keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
 			    keg->uk_size);
 	}
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 		zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
 	keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
 	uma_total_dec(PAGE_SIZE * keg->uk_ppera);
 }
 
 /*
  * Frees pages from a keg back to the system.  This is done on demand from
  * the pageout daemon.
  *
  * Returns nothing.
  */
 static void
 keg_drain(uma_keg_t keg)
 {
 	struct slabhead freeslabs = { 0 };
 	uma_domain_t dom;
 	uma_slab_t slab, tmp;
 	int i;
 
 	/*
 	 * We don't want to take pages from statically allocated kegs at this
 	 * time
 	 */
 	if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
 		return;
 
 	CTR3(KTR_UMA, "keg_drain %s(%p) free items: %u",
 	    keg->uk_name, keg, keg->uk_free);
 	KEG_LOCK(keg);
 	if (keg->uk_free == 0)
 		goto finished;
 
 	for (i = 0; i < vm_ndomains; i++) {
 		dom = &keg->uk_domain[i];
 		LIST_FOREACH_SAFE(slab, &dom->ud_free_slab, us_link, tmp) {
 			/* We have nowhere to free these to. */
 			if (slab->us_flags & UMA_SLAB_BOOT)
 				continue;
 
 			LIST_REMOVE(slab, us_link);
 			keg->uk_pages -= keg->uk_ppera;
 			keg->uk_free -= keg->uk_ipers;
 
 			if (keg->uk_flags & UMA_ZONE_HASH)
 				UMA_HASH_REMOVE(&keg->uk_hash, slab,
 				    slab->us_data);
 
 			SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
 		}
 	}
 
 finished:
 	KEG_UNLOCK(keg);
 
 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
 		keg_free_slab(keg, slab, keg->uk_ipers);
 	}
 }
 
 static void
 zone_drain_wait(uma_zone_t zone, int waitok)
 {
 
 	/*
 	 * Set draining to interlock with zone_dtor() so we can release our
 	 * locks as we go.  Only dtor() should do a WAITOK call since it
 	 * is the only call that knows the structure will still be available
 	 * when it wakes up.
 	 */
 	ZONE_LOCK(zone);
 	while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
 		if (waitok == M_NOWAIT)
 			goto out;
 		msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
 	}
 	zone->uz_flags |= UMA_ZFLAG_DRAINING;
 	bucket_cache_drain(zone);
 	ZONE_UNLOCK(zone);
 	/*
 	 * The DRAINING flag protects us from being freed while
 	 * we're running.  Normally the uma_rwlock would protect us but we
 	 * must be able to release and acquire the right lock for each keg.
 	 */
 	zone_foreach_keg(zone, &keg_drain);
 	ZONE_LOCK(zone);
 	zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
 	wakeup(zone);
 out:
 	ZONE_UNLOCK(zone);
 }
 
 void
 zone_drain(uma_zone_t zone)
 {
 
 	zone_drain_wait(zone, M_NOWAIT);
 }
 
 /*
  * Allocate a new slab for a keg.  This does not insert the slab onto a list.
  *
  * Arguments:
  *	wait  Shall we wait?
  *
  * Returns:
  *	The slab that was allocated or NULL if there is no memory and the
  *	caller specified M_NOWAIT.
  */
 static uma_slab_t
 keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait)
 {
 	uma_alloc allocf;
 	uma_slab_t slab;
 	unsigned long size;
 	uint8_t *mem;
 	uint8_t flags;
 	int i;
 
 	KASSERT(domain >= 0 && domain < vm_ndomains,
 	    ("keg_alloc_slab: domain %d out of range", domain));
 	mtx_assert(&keg->uk_lock, MA_OWNED);
 	slab = NULL;
 	mem = NULL;
 
 	allocf = keg->uk_allocf;
 	KEG_UNLOCK(keg);
 	size = keg->uk_ppera * PAGE_SIZE;
 
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
 		slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, wait);
 		if (slab == NULL)
 			goto out;
 	}
 
 	/*
 	 * This reproduces the old vm_zone behavior of zero filling pages the
 	 * first time they are added to a zone.
 	 *
 	 * Malloced items are zeroed in uma_zalloc.
 	 */
 
 	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
 		wait |= M_ZERO;
 	else
 		wait &= ~M_ZERO;
 
 	if (keg->uk_flags & UMA_ZONE_NODUMP)
 		wait |= M_NODUMP;
 
 	/* zone is passed for legacy reasons. */
 	mem = allocf(zone, size, domain, &flags, wait);
 	if (mem == NULL) {
 		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 			zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
 		slab = NULL;
 		goto out;
 	}
 	uma_total_inc(size);
 
 	/* Point the slab into the allocated memory */
 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
 		slab = (uma_slab_t )(mem + keg->uk_pgoff);
 
 	if (keg->uk_flags & UMA_ZONE_VTOSLAB)
 		for (i = 0; i < keg->uk_ppera; i++)
 			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
 
 	slab->us_keg = keg;
 	slab->us_data = mem;
 	slab->us_freecount = keg->uk_ipers;
 	slab->us_flags = flags;
 	slab->us_domain = domain;
 	BIT_FILL(SLAB_SETSIZE, &slab->us_free);
 #ifdef INVARIANTS
 	BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
 #endif
 
 	if (keg->uk_init != NULL) {
 		for (i = 0; i < keg->uk_ipers; i++)
 			if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
 			    keg->uk_size, wait) != 0)
 				break;
 		if (i != keg->uk_ipers) {
 			keg_free_slab(keg, slab, i);
 			slab = NULL;
 			goto out;
 		}
 	}
 out:
 	KEG_LOCK(keg);
 
 	CTR3(KTR_UMA, "keg_alloc_slab: allocated slab %p for %s(%p)",
 	    slab, keg->uk_name, keg);
 
 	if (slab != NULL) {
 		if (keg->uk_flags & UMA_ZONE_HASH)
 			UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
 
 		keg->uk_pages += keg->uk_ppera;
 		keg->uk_free += keg->uk_ipers;
 	}
 
 	return (slab);
 }
 
 /*
  * This function is intended to be used early on in place of page_alloc() so
  * that we may use the boot time page cache to satisfy allocations before
  * the VM is ready.
  */
 static void *
 startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
     int wait)
 {
 	uma_keg_t keg;
 	void *mem;
 	int pages;
 
 	keg = zone_first_keg(zone);
 
 	/*
 	 * If we are in BOOT_BUCKETS or higher, than switch to real
 	 * allocator.  Zones with page sized slabs switch at BOOT_PAGEALLOC.
 	 */
 	switch (booted) {
 		case BOOT_COLD:
 		case BOOT_STRAPPED:
 			break;
 		case BOOT_PAGEALLOC:
 			if (keg->uk_ppera > 1)
 				break;
 		case BOOT_BUCKETS:
 		case BOOT_RUNNING:
 #ifdef UMA_MD_SMALL_ALLOC
 			keg->uk_allocf = (keg->uk_ppera > 1) ?
 			    page_alloc : uma_small_alloc;
 #else
 			keg->uk_allocf = page_alloc;
 #endif
 			return keg->uk_allocf(zone, bytes, domain, pflag, wait);
 	}
 
 	/*
 	 * Check our small startup cache to see if it has pages remaining.
 	 */
 	pages = howmany(bytes, PAGE_SIZE);
 	KASSERT(pages > 0, ("%s can't reserve 0 pages", __func__));
 	if (pages > boot_pages)
 		panic("UMA zone \"%s\": Increase vm.boot_pages", zone->uz_name);
 #ifdef DIAGNOSTIC
 	printf("%s from \"%s\", %d boot pages left\n", __func__, zone->uz_name,
 	    boot_pages);
 #endif
 	mem = bootmem;
 	boot_pages -= pages;
 	bootmem += pages * PAGE_SIZE;
 	*pflag = UMA_SLAB_BOOT;
 
 	return (mem);
 }
 
 /*
  * Allocates a number of pages from the system
  *
  * Arguments:
  *	bytes  The number of bytes requested
  *	wait  Shall we wait?
  *
  * Returns:
  *	A pointer to the alloced memory or possibly
  *	NULL if M_NOWAIT is set.
  */
 static void *
 page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
     int wait)
 {
 	void *p;	/* Returned page */
 
 	*pflag = UMA_SLAB_KERNEL;
 	p = (void *) kmem_malloc_domain(domain, bytes, wait);
 
 	return (p);
 }
 
 static void *
 pcpu_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
     int wait)
 {
 	struct pglist alloctail;
 	vm_offset_t addr, zkva;
 	int cpu, flags;
 	vm_page_t p, p_next;
 #ifdef NUMA
 	struct pcpu *pc;
 #endif
 
 	MPASS(bytes == (mp_maxid + 1) * PAGE_SIZE);
 
 	TAILQ_INIT(&alloctail);
 	flags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
 	    malloc2vm_flags(wait);
 	*pflag = UMA_SLAB_KERNEL;
 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
 		if (CPU_ABSENT(cpu)) {
 			p = vm_page_alloc(NULL, 0, flags);
 		} else {
 #ifndef NUMA
 			p = vm_page_alloc(NULL, 0, flags);
 #else
 			pc = pcpu_find(cpu);
 			p = vm_page_alloc_domain(NULL, 0, pc->pc_domain, flags);
 			if (__predict_false(p == NULL))
 				p = vm_page_alloc(NULL, 0, flags);
 #endif
 		}
 		if (__predict_false(p == NULL))
 			goto fail;
 		TAILQ_INSERT_TAIL(&alloctail, p, listq);
 	}
 	if ((addr = kva_alloc(bytes)) == 0)
 		goto fail;
 	zkva = addr;
 	TAILQ_FOREACH(p, &alloctail, listq) {
 		pmap_qenter(zkva, &p, 1);
 		zkva += PAGE_SIZE;
 	}
 	return ((void*)addr);
  fail:
 	TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
 		vm_page_unwire(p, PQ_NONE);
 		vm_page_free(p);
 	}
 	return (NULL);
 }
 
 /*
  * Allocates a number of pages from within an object
  *
  * Arguments:
  *	bytes  The number of bytes requested
  *	wait   Shall we wait?
  *
  * Returns:
  *	A pointer to the alloced memory or possibly
  *	NULL if M_NOWAIT is set.
  */
 static void *
 noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
     int wait)
 {
 	TAILQ_HEAD(, vm_page) alloctail;
 	u_long npages;
 	vm_offset_t retkva, zkva;
 	vm_page_t p, p_next;
 	uma_keg_t keg;
 
 	TAILQ_INIT(&alloctail);
 	keg = zone_first_keg(zone);
 
 	npages = howmany(bytes, PAGE_SIZE);
 	while (npages > 0) {
 		p = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_INTERRUPT |
 		    VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
 		    ((wait & M_WAITOK) != 0 ? VM_ALLOC_WAITOK :
 		    VM_ALLOC_NOWAIT));
 		if (p != NULL) {
 			/*
 			 * Since the page does not belong to an object, its
 			 * listq is unused.
 			 */
 			TAILQ_INSERT_TAIL(&alloctail, p, listq);
 			npages--;
 			continue;
 		}
 		/*
 		 * Page allocation failed, free intermediate pages and
 		 * exit.
 		 */
 		TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
 			vm_page_unwire(p, PQ_NONE);
 			vm_page_free(p); 
 		}
 		return (NULL);
 	}
 	*flags = UMA_SLAB_PRIV;
 	zkva = keg->uk_kva +
 	    atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
 	retkva = zkva;
 	TAILQ_FOREACH(p, &alloctail, listq) {
 		pmap_qenter(zkva, &p, 1);
 		zkva += PAGE_SIZE;
 	}
 
 	return ((void *)retkva);
 }
 
 /*
  * Frees a number of pages to the system
  *
  * Arguments:
  *	mem   A pointer to the memory to be freed
  *	size  The size of the memory being freed
  *	flags The original p->us_flags field
  *
  * Returns:
  *	Nothing
  */
 static void
 page_free(void *mem, vm_size_t size, uint8_t flags)
 {
-	struct vmem *vmem;
 
-	if (flags & UMA_SLAB_KERNEL)
-		vmem = kernel_arena;
-	else
+	if ((flags & UMA_SLAB_KERNEL) == 0)
 		panic("UMA: page_free used with invalid flags %x", flags);
 
-	kmem_free(vmem, (vm_offset_t)mem, size);
+	kmem_free((vm_offset_t)mem, size);
 }
 
 /*
  * Frees pcpu zone allocations
  *
  * Arguments:
  *	mem   A pointer to the memory to be freed
  *	size  The size of the memory being freed
  *	flags The original p->us_flags field
  *
  * Returns:
  *	Nothing
  */
 static void
 pcpu_page_free(void *mem, vm_size_t size, uint8_t flags)
 {
 	vm_offset_t sva, curva;
 	vm_paddr_t paddr;
 	vm_page_t m;
 
 	MPASS(size == (mp_maxid+1)*PAGE_SIZE);
 	sva = (vm_offset_t)mem;
 	for (curva = sva; curva < sva + size; curva += PAGE_SIZE) {
 		paddr = pmap_kextract(curva);
 		m = PHYS_TO_VM_PAGE(paddr);
 		vm_page_unwire(m, PQ_NONE);
 		vm_page_free(m);
 	}
 	pmap_qremove(sva, size >> PAGE_SHIFT);
 	kva_free(sva, size);
 }
 
 
 /*
  * Zero fill initializer
  *
  * Arguments/Returns follow uma_init specifications
  */
 static int
 zero_init(void *mem, int size, int flags)
 {
 	bzero(mem, size);
 	return (0);
 }
 
 /*
  * Finish creating a small uma keg.  This calculates ipers, and the keg size.
  *
  * Arguments
  *	keg  The zone we should initialize
  *
  * Returns
  *	Nothing
  */
 static void
 keg_small_init(uma_keg_t keg)
 {
 	u_int rsize;
 	u_int memused;
 	u_int wastedspace;
 	u_int shsize;
 	u_int slabsize;
 
 	if (keg->uk_flags & UMA_ZONE_PCPU) {
 		u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
 
 		slabsize = UMA_PCPU_ALLOC_SIZE;
 		keg->uk_ppera = ncpus;
 	} else {
 		slabsize = UMA_SLAB_SIZE;
 		keg->uk_ppera = 1;
 	}
 
 	/*
 	 * Calculate the size of each allocation (rsize) according to
 	 * alignment.  If the requested size is smaller than we have
 	 * allocation bits for we round it up.
 	 */
 	rsize = keg->uk_size;
 	if (rsize < slabsize / SLAB_SETSIZE)
 		rsize = slabsize / SLAB_SETSIZE;
 	if (rsize & keg->uk_align)
 		rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
 	keg->uk_rsize = rsize;
 
 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
 	    keg->uk_rsize < UMA_PCPU_ALLOC_SIZE,
 	    ("%s: size %u too large", __func__, keg->uk_rsize));
 
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 		shsize = 0;
 	else 
 		shsize = sizeof(struct uma_slab);
 
 	if (rsize <= slabsize - shsize)
 		keg->uk_ipers = (slabsize - shsize) / rsize;
 	else {
 		/* Handle special case when we have 1 item per slab, so
 		 * alignment requirement can be relaxed. */
 		KASSERT(keg->uk_size <= slabsize - shsize,
 		    ("%s: size %u greater than slab", __func__, keg->uk_size));
 		keg->uk_ipers = 1;
 	}
 	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
 	    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
 
 	memused = keg->uk_ipers * rsize + shsize;
 	wastedspace = slabsize - memused;
 
 	/*
 	 * We can't do OFFPAGE if we're internal or if we've been
 	 * asked to not go to the VM for buckets.  If we do this we
 	 * may end up going to the VM  for slabs which we do not
 	 * want to do if we're UMA_ZFLAG_CACHEONLY as a result
 	 * of UMA_ZONE_VM, which clearly forbids it.
 	 */
 	if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
 	    (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
 		return;
 
 	/*
 	 * See if using an OFFPAGE slab will limit our waste.  Only do
 	 * this if it permits more items per-slab.
 	 *
 	 * XXX We could try growing slabsize to limit max waste as well.
 	 * Historically this was not done because the VM could not
 	 * efficiently handle contiguous allocations.
 	 */
 	if ((wastedspace >= slabsize / UMA_MAX_WASTE) &&
 	    (keg->uk_ipers < (slabsize / keg->uk_rsize))) {
 		keg->uk_ipers = slabsize / keg->uk_rsize;
 		KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
 		    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
 		CTR6(KTR_UMA, "UMA decided we need offpage slab headers for "
 		    "keg: %s(%p), calculated wastedspace = %d, "
 		    "maximum wasted space allowed = %d, "
 		    "calculated ipers = %d, "
 		    "new wasted space = %d\n", keg->uk_name, keg, wastedspace,
 		    slabsize / UMA_MAX_WASTE, keg->uk_ipers,
 		    slabsize - keg->uk_ipers * keg->uk_rsize);
 		keg->uk_flags |= UMA_ZONE_OFFPAGE;
 	}
 
 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
 	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
 		keg->uk_flags |= UMA_ZONE_HASH;
 }
 
 /*
  * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
  * more complicated.
  *
  * Arguments
  *	keg  The keg we should initialize
  *
  * Returns
  *	Nothing
  */
 static void
 keg_large_init(uma_keg_t keg)
 {
 	u_int shsize;
 
 	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
 	KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
 	    ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
 	    ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
 
 	keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
 	keg->uk_ipers = 1;
 	keg->uk_rsize = keg->uk_size;
 
 	/* Check whether we have enough space to not do OFFPAGE. */
 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) {
 		shsize = sizeof(struct uma_slab);
 		if (shsize & UMA_ALIGN_PTR)
 			shsize = (shsize & ~UMA_ALIGN_PTR) +
 			    (UMA_ALIGN_PTR + 1);
 
 		if (PAGE_SIZE * keg->uk_ppera - keg->uk_rsize < shsize) {
 			/*
 			 * We can't do OFFPAGE if we're internal, in which case
 			 * we need an extra page per allocation to contain the
 			 * slab header.
 			 */
 			if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) == 0)
 				keg->uk_flags |= UMA_ZONE_OFFPAGE;
 			else
 				keg->uk_ppera++;
 		}
 	}
 
 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
 	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
 		keg->uk_flags |= UMA_ZONE_HASH;
 }
 
 static void
 keg_cachespread_init(uma_keg_t keg)
 {
 	int alignsize;
 	int trailer;
 	int pages;
 	int rsize;
 
 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
 	    ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
 
 	alignsize = keg->uk_align + 1;
 	rsize = keg->uk_size;
 	/*
 	 * We want one item to start on every align boundary in a page.  To
 	 * do this we will span pages.  We will also extend the item by the
 	 * size of align if it is an even multiple of align.  Otherwise, it
 	 * would fall on the same boundary every time.
 	 */
 	if (rsize & keg->uk_align)
 		rsize = (rsize & ~keg->uk_align) + alignsize;
 	if ((rsize & alignsize) == 0)
 		rsize += alignsize;
 	trailer = rsize - keg->uk_size;
 	pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
 	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
 	keg->uk_rsize = rsize;
 	keg->uk_ppera = pages;
 	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
 	keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
 	KASSERT(keg->uk_ipers <= SLAB_SETSIZE,
 	    ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
 	    keg->uk_ipers));
 }
 
 /*
  * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
  * the keg onto the global keg list.
  *
  * Arguments/Returns follow uma_ctor specifications
  *	udata  Actually uma_kctor_args
  */
 static int
 keg_ctor(void *mem, int size, void *udata, int flags)
 {
 	struct uma_kctor_args *arg = udata;
 	uma_keg_t keg = mem;
 	uma_zone_t zone;
 
 	bzero(keg, size);
 	keg->uk_size = arg->size;
 	keg->uk_init = arg->uminit;
 	keg->uk_fini = arg->fini;
 	keg->uk_align = arg->align;
 	keg->uk_cursor = 0;
 	keg->uk_free = 0;
 	keg->uk_reserve = 0;
 	keg->uk_pages = 0;
 	keg->uk_flags = arg->flags;
 	keg->uk_slabzone = NULL;
 
 	/*
 	 * The master zone is passed to us at keg-creation time.
 	 */
 	zone = arg->zone;
 	keg->uk_name = zone->uz_name;
 
 	if (arg->flags & UMA_ZONE_VM)
 		keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
 
 	if (arg->flags & UMA_ZONE_ZINIT)
 		keg->uk_init = zero_init;
 
 	if (arg->flags & UMA_ZONE_MALLOC)
 		keg->uk_flags |= UMA_ZONE_VTOSLAB;
 
 	if (arg->flags & UMA_ZONE_PCPU)
 #ifdef SMP
 		keg->uk_flags |= UMA_ZONE_OFFPAGE;
 #else
 		keg->uk_flags &= ~UMA_ZONE_PCPU;
 #endif
 
 	if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
 		keg_cachespread_init(keg);
 	} else {
 		if (keg->uk_size > UMA_SLAB_SPACE)
 			keg_large_init(keg);
 		else
 			keg_small_init(keg);
 	}
 
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 		keg->uk_slabzone = slabzone;
 
 	/*
 	 * If we haven't booted yet we need allocations to go through the
 	 * startup cache until the vm is ready.
 	 */
 	if (booted < BOOT_PAGEALLOC)
 		keg->uk_allocf = startup_alloc;
 #ifdef UMA_MD_SMALL_ALLOC
 	else if (keg->uk_ppera == 1)
 		keg->uk_allocf = uma_small_alloc;
 #endif
 	else if (keg->uk_flags & UMA_ZONE_PCPU)
 		keg->uk_allocf = pcpu_page_alloc;
 	else
 		keg->uk_allocf = page_alloc;
 #ifdef UMA_MD_SMALL_ALLOC
 	if (keg->uk_ppera == 1)
 		keg->uk_freef = uma_small_free;
 	else
 #endif
 	if (keg->uk_flags & UMA_ZONE_PCPU)
 		keg->uk_freef = pcpu_page_free;
 	else
 		keg->uk_freef = page_free;
 
 	/*
 	 * Initialize keg's lock
 	 */
 	KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
 
 	/*
 	 * If we're putting the slab header in the actual page we need to
 	 * figure out where in each page it goes.  This calculates a right
 	 * justified offset into the memory on an ALIGN_PTR boundary.
 	 */
 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
 		u_int totsize;
 
 		/* Size of the slab struct and free list */
 		totsize = sizeof(struct uma_slab);
 
 		if (totsize & UMA_ALIGN_PTR)
 			totsize = (totsize & ~UMA_ALIGN_PTR) +
 			    (UMA_ALIGN_PTR + 1);
 		keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
 
 		/*
 		 * The only way the following is possible is if with our
 		 * UMA_ALIGN_PTR adjustments we are now bigger than
 		 * UMA_SLAB_SIZE.  I haven't checked whether this is
 		 * mathematically possible for all cases, so we make
 		 * sure here anyway.
 		 */
 		totsize = keg->uk_pgoff + sizeof(struct uma_slab);
 		if (totsize > PAGE_SIZE * keg->uk_ppera) {
 			printf("zone %s ipers %d rsize %d size %d\n",
 			    zone->uz_name, keg->uk_ipers, keg->uk_rsize,
 			    keg->uk_size);
 			panic("UMA slab won't fit.");
 		}
 	}
 
 	if (keg->uk_flags & UMA_ZONE_HASH)
 		hash_alloc(&keg->uk_hash);
 
 	CTR5(KTR_UMA, "keg_ctor %p zone %s(%p) out %d free %d\n",
 	    keg, zone->uz_name, zone,
 	    (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
 	    keg->uk_free);
 
 	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
 
 	rw_wlock(&uma_rwlock);
 	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
 	rw_wunlock(&uma_rwlock);
 	return (0);
 }
 
 /*
  * Zone header ctor.  This initializes all fields, locks, etc.
  *
  * Arguments/Returns follow uma_ctor specifications
  *	udata  Actually uma_zctor_args
  */
 static int
 zone_ctor(void *mem, int size, void *udata, int flags)
 {
 	struct uma_zctor_args *arg = udata;
 	uma_zone_t zone = mem;
 	uma_zone_t z;
 	uma_keg_t keg;
 
 	bzero(zone, size);
 	zone->uz_name = arg->name;
 	zone->uz_ctor = arg->ctor;
 	zone->uz_dtor = arg->dtor;
 	zone->uz_slab = zone_fetch_slab;
 	zone->uz_init = NULL;
 	zone->uz_fini = NULL;
 	zone->uz_allocs = 0;
 	zone->uz_frees = 0;
 	zone->uz_fails = 0;
 	zone->uz_sleeps = 0;
 	zone->uz_count = 0;
 	zone->uz_count_min = 0;
 	zone->uz_flags = 0;
 	zone->uz_warning = NULL;
 	/* The domain structures follow the cpu structures. */
 	zone->uz_domain = (struct uma_zone_domain *)&zone->uz_cpu[mp_ncpus];
 	timevalclear(&zone->uz_ratecheck);
 	keg = arg->keg;
 
 	ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
 
 	/*
 	 * This is a pure cache zone, no kegs.
 	 */
 	if (arg->import) {
 		if (arg->flags & UMA_ZONE_VM)
 			arg->flags |= UMA_ZFLAG_CACHEONLY;
 		zone->uz_flags = arg->flags;
 		zone->uz_size = arg->size;
 		zone->uz_import = arg->import;
 		zone->uz_release = arg->release;
 		zone->uz_arg = arg->arg;
 		zone->uz_lockptr = &zone->uz_lock;
 		rw_wlock(&uma_rwlock);
 		LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
 		rw_wunlock(&uma_rwlock);
 		goto out;
 	}
 
 	/*
 	 * Use the regular zone/keg/slab allocator.
 	 */
 	zone->uz_import = (uma_import)zone_import;
 	zone->uz_release = (uma_release)zone_release;
 	zone->uz_arg = zone; 
 
 	if (arg->flags & UMA_ZONE_SECONDARY) {
 		KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
 		zone->uz_init = arg->uminit;
 		zone->uz_fini = arg->fini;
 		zone->uz_lockptr = &keg->uk_lock;
 		zone->uz_flags |= UMA_ZONE_SECONDARY;
 		rw_wlock(&uma_rwlock);
 		ZONE_LOCK(zone);
 		LIST_FOREACH(z, &keg->uk_zones, uz_link) {
 			if (LIST_NEXT(z, uz_link) == NULL) {
 				LIST_INSERT_AFTER(z, zone, uz_link);
 				break;
 			}
 		}
 		ZONE_UNLOCK(zone);
 		rw_wunlock(&uma_rwlock);
 	} else if (keg == NULL) {
 		if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
 		    arg->align, arg->flags)) == NULL)
 			return (ENOMEM);
 	} else {
 		struct uma_kctor_args karg;
 		int error;
 
 		/* We should only be here from uma_startup() */
 		karg.size = arg->size;
 		karg.uminit = arg->uminit;
 		karg.fini = arg->fini;
 		karg.align = arg->align;
 		karg.flags = arg->flags;
 		karg.zone = zone;
 		error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
 		    flags);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * Link in the first keg.
 	 */
 	zone->uz_klink.kl_keg = keg;
 	LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
 	zone->uz_lockptr = &keg->uk_lock;
 	zone->uz_size = keg->uk_size;
 	zone->uz_flags |= (keg->uk_flags &
 	    (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
 
 	/*
 	 * Some internal zones don't have room allocated for the per cpu
 	 * caches.  If we're internal, bail out here.
 	 */
 	if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
 		KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
 		    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
 		return (0);
 	}
 
 out:
 	KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) !=
 	    (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET),
 	    ("Invalid zone flag combination"));
 	if ((arg->flags & UMA_ZONE_MAXBUCKET) != 0)
 		zone->uz_count = BUCKET_MAX;
 	else if ((arg->flags & UMA_ZONE_NOBUCKET) != 0)
 		zone->uz_count = 0;
 	else
 		zone->uz_count = bucket_select(zone->uz_size);
 	zone->uz_count_min = zone->uz_count;
 
 	return (0);
 }
 
 /*
  * Keg header dtor.  This frees all data, destroys locks, frees the hash
  * table and removes the keg from the global list.
  *
  * Arguments/Returns follow uma_dtor specifications
  *	udata  unused
  */
 static void
 keg_dtor(void *arg, int size, void *udata)
 {
 	uma_keg_t keg;
 
 	keg = (uma_keg_t)arg;
 	KEG_LOCK(keg);
 	if (keg->uk_free != 0) {
 		printf("Freed UMA keg (%s) was not empty (%d items). "
 		    " Lost %d pages of memory.\n",
 		    keg->uk_name ? keg->uk_name : "",
 		    keg->uk_free, keg->uk_pages);
 	}
 	KEG_UNLOCK(keg);
 
 	hash_free(&keg->uk_hash);
 
 	KEG_LOCK_FINI(keg);
 }
 
 /*
  * Zone header dtor.
  *
  * Arguments/Returns follow uma_dtor specifications
  *	udata  unused
  */
 static void
 zone_dtor(void *arg, int size, void *udata)
 {
 	uma_klink_t klink;
 	uma_zone_t zone;
 	uma_keg_t keg;
 
 	zone = (uma_zone_t)arg;
 	keg = zone_first_keg(zone);
 
 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
 		cache_drain(zone);
 
 	rw_wlock(&uma_rwlock);
 	LIST_REMOVE(zone, uz_link);
 	rw_wunlock(&uma_rwlock);
 	/*
 	 * XXX there are some races here where
 	 * the zone can be drained but zone lock
 	 * released and then refilled before we
 	 * remove it... we dont care for now
 	 */
 	zone_drain_wait(zone, M_WAITOK);
 	/*
 	 * Unlink all of our kegs.
 	 */
 	while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
 		klink->kl_keg = NULL;
 		LIST_REMOVE(klink, kl_link);
 		if (klink == &zone->uz_klink)
 			continue;
 		free(klink, M_TEMP);
 	}
 	/*
 	 * We only destroy kegs from non secondary zones.
 	 */
 	if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0)  {
 		rw_wlock(&uma_rwlock);
 		LIST_REMOVE(keg, uk_link);
 		rw_wunlock(&uma_rwlock);
 		zone_free_item(kegs, keg, NULL, SKIP_NONE);
 	}
 	ZONE_LOCK_FINI(zone);
 }
 
 /*
  * Traverses every zone in the system and calls a callback
  *
  * Arguments:
  *	zfunc  A pointer to a function which accepts a zone
  *		as an argument.
  *
  * Returns:
  *	Nothing
  */
 static void
 zone_foreach(void (*zfunc)(uma_zone_t))
 {
 	uma_keg_t keg;
 	uma_zone_t zone;
 
 	rw_rlock(&uma_rwlock);
 	LIST_FOREACH(keg, &uma_kegs, uk_link) {
 		LIST_FOREACH(zone, &keg->uk_zones, uz_link)
 			zfunc(zone);
 	}
 	rw_runlock(&uma_rwlock);
 }
 
 /*
  * Count how many pages do we need to bootstrap.  VM supplies
  * its need in early zones in the argument, we add up our zones,
  * which consist of: UMA Slabs, UMA Hash and 9 Bucket zones. The
  * zone of zones and zone of kegs are accounted separately.
  */
 #define	UMA_BOOT_ZONES	11
 /* Zone of zones and zone of kegs have arbitrary alignment. */
 #define	UMA_BOOT_ALIGN	32
 static int zsize, ksize;
 int
 uma_startup_count(int vm_zones)
 {
 	int zones, pages;
 
 	ksize = sizeof(struct uma_keg) +
 	    (sizeof(struct uma_domain) * vm_ndomains);
 	zsize = sizeof(struct uma_zone) +
 	    (sizeof(struct uma_cache) * (mp_maxid + 1)) +
 	    (sizeof(struct uma_zone_domain) * vm_ndomains);
 
 	/*
 	 * Memory for the zone of kegs and its keg,
 	 * and for zone of zones.
 	 */
 	pages = howmany(roundup(zsize, CACHE_LINE_SIZE) * 2 +
 	    roundup(ksize, CACHE_LINE_SIZE), PAGE_SIZE);
 
 #ifdef	UMA_MD_SMALL_ALLOC
 	zones = UMA_BOOT_ZONES;
 #else
 	zones = UMA_BOOT_ZONES + vm_zones;
 	vm_zones = 0;
 #endif
 
 	/* Memory for the rest of startup zones, UMA and VM, ... */
 	if (zsize > UMA_SLAB_SPACE)
 		pages += (zones + vm_zones) *
 		    howmany(roundup2(zsize, UMA_BOOT_ALIGN), UMA_SLAB_SIZE);
 	else if (roundup2(zsize, UMA_BOOT_ALIGN) > UMA_SLAB_SPACE)
 		pages += zones;
 	else
 		pages += howmany(zones,
 		    UMA_SLAB_SPACE / roundup2(zsize, UMA_BOOT_ALIGN));
 
 	/* ... and their kegs. Note that zone of zones allocates a keg! */
 	pages += howmany(zones + 1,
 	    UMA_SLAB_SPACE / roundup2(ksize, UMA_BOOT_ALIGN));
 
 	/*
 	 * Most of startup zones are not going to be offpages, that's
 	 * why we use UMA_SLAB_SPACE instead of UMA_SLAB_SIZE in all
 	 * calculations.  Some large bucket zones will be offpage, and
 	 * thus will allocate hashes.  We take conservative approach
 	 * and assume that all zones may allocate hash.  This may give
 	 * us some positive inaccuracy, usually an extra single page.
 	 */
 	pages += howmany(zones, UMA_SLAB_SPACE /
 	    (sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT));
 
 	return (pages);
 }
 
 void
 uma_startup(void *mem, int npages)
 {
 	struct uma_zctor_args args;
 	uma_keg_t masterkeg;
 	uintptr_t m;
 
 #ifdef DIAGNOSTIC
 	printf("Entering %s with %d boot pages configured\n", __func__, npages);
 #endif
 
 	rw_init(&uma_rwlock, "UMA lock");
 
 	/* Use bootpages memory for the zone of zones and zone of kegs. */
 	m = (uintptr_t)mem;
 	zones = (uma_zone_t)m;
 	m += roundup(zsize, CACHE_LINE_SIZE);
 	kegs = (uma_zone_t)m;
 	m += roundup(zsize, CACHE_LINE_SIZE);
 	masterkeg = (uma_keg_t)m;
 	m += roundup(ksize, CACHE_LINE_SIZE);
 	m = roundup(m, PAGE_SIZE);
 	npages -= (m - (uintptr_t)mem) / PAGE_SIZE;
 	mem = (void *)m;
 
 	/* "manually" create the initial zone */
 	memset(&args, 0, sizeof(args));
 	args.name = "UMA Kegs";
 	args.size = ksize;
 	args.ctor = keg_ctor;
 	args.dtor = keg_dtor;
 	args.uminit = zero_init;
 	args.fini = NULL;
 	args.keg = masterkeg;
 	args.align = UMA_BOOT_ALIGN - 1;
 	args.flags = UMA_ZFLAG_INTERNAL;
 	zone_ctor(kegs, zsize, &args, M_WAITOK);
 
 	bootmem = mem;
 	boot_pages = npages;
 
 	args.name = "UMA Zones";
 	args.size = zsize;
 	args.ctor = zone_ctor;
 	args.dtor = zone_dtor;
 	args.uminit = zero_init;
 	args.fini = NULL;
 	args.keg = NULL;
 	args.align = UMA_BOOT_ALIGN - 1;
 	args.flags = UMA_ZFLAG_INTERNAL;
 	zone_ctor(zones, zsize, &args, M_WAITOK);
 
 	/* Now make a zone for slab headers */
 	slabzone = uma_zcreate("UMA Slabs",
 				sizeof(struct uma_slab),
 				NULL, NULL, NULL, NULL,
 				UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
 
 	hashzone = uma_zcreate("UMA Hash",
 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
 	    NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
 
 	bucket_init();
 
 	booted = BOOT_STRAPPED;
 }
 
 void
 uma_startup1(void)
 {
 
 #ifdef DIAGNOSTIC
 	printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
 #endif
 	booted = BOOT_PAGEALLOC;
 }
 
 void
 uma_startup2(void)
 {
 
 #ifdef DIAGNOSTIC
 	printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
 #endif
 	booted = BOOT_BUCKETS;
 	sx_init(&uma_drain_lock, "umadrain");
 	bucket_enable();
 }
 
 /*
  * Initialize our callout handle
  *
  */
 static void
 uma_startup3(void)
 {
 
 #ifdef INVARIANTS
 	TUNABLE_INT_FETCH("vm.debug.divisor", &dbg_divisor);
 	uma_dbg_cnt = counter_u64_alloc(M_WAITOK);
 	uma_skip_cnt = counter_u64_alloc(M_WAITOK);
 #endif
 	callout_init(&uma_callout, 1);
 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
 	booted = BOOT_RUNNING;
 }
 
 static uma_keg_t
 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
 		int align, uint32_t flags)
 {
 	struct uma_kctor_args args;
 
 	args.size = size;
 	args.uminit = uminit;
 	args.fini = fini;
 	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
 	args.flags = flags;
 	args.zone = zone;
 	return (zone_alloc_item(kegs, &args, UMA_ANYDOMAIN, M_WAITOK));
 }
 
 /* Public functions */
 /* See uma.h */
 void
 uma_set_align(int align)
 {
 
 	if (align != UMA_ALIGN_CACHE)
 		uma_align_cache = align;
 }
 
 /* See uma.h */
 uma_zone_t
 uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
 		uma_init uminit, uma_fini fini, int align, uint32_t flags)
 
 {
 	struct uma_zctor_args args;
 	uma_zone_t res;
 	bool locked;
 
 	KASSERT(powerof2(align + 1), ("invalid zone alignment %d for \"%s\"",
 	    align, name));
 
 	/* This stuff is essential for the zone ctor */
 	memset(&args, 0, sizeof(args));
 	args.name = name;
 	args.size = size;
 	args.ctor = ctor;
 	args.dtor = dtor;
 	args.uminit = uminit;
 	args.fini = fini;
 #ifdef  INVARIANTS
 	/*
 	 * If a zone is being created with an empty constructor and
 	 * destructor, pass UMA constructor/destructor which checks for
 	 * memory use after free.
 	 */
 	if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOFREE))) &&
 	    ctor == NULL && dtor == NULL && uminit == NULL && fini == NULL) {
 		args.ctor = trash_ctor;
 		args.dtor = trash_dtor;
 		args.uminit = trash_init;
 		args.fini = trash_fini;
 	}
 #endif
 	args.align = align;
 	args.flags = flags;
 	args.keg = NULL;
 
 	if (booted < BOOT_BUCKETS) {
 		locked = false;
 	} else {
 		sx_slock(&uma_drain_lock);
 		locked = true;
 	}
 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
 	if (locked)
 		sx_sunlock(&uma_drain_lock);
 	return (res);
 }
 
 /* See uma.h */
 uma_zone_t
 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
 		    uma_init zinit, uma_fini zfini, uma_zone_t master)
 {
 	struct uma_zctor_args args;
 	uma_keg_t keg;
 	uma_zone_t res;
 	bool locked;
 
 	keg = zone_first_keg(master);
 	memset(&args, 0, sizeof(args));
 	args.name = name;
 	args.size = keg->uk_size;
 	args.ctor = ctor;
 	args.dtor = dtor;
 	args.uminit = zinit;
 	args.fini = zfini;
 	args.align = keg->uk_align;
 	args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
 	args.keg = keg;
 
 	if (booted < BOOT_BUCKETS) {
 		locked = false;
 	} else {
 		sx_slock(&uma_drain_lock);
 		locked = true;
 	}
 	/* XXX Attaches only one keg of potentially many. */
 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
 	if (locked)
 		sx_sunlock(&uma_drain_lock);
 	return (res);
 }
 
 /* See uma.h */
 uma_zone_t
 uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
 		    uma_init zinit, uma_fini zfini, uma_import zimport,
 		    uma_release zrelease, void *arg, int flags)
 {
 	struct uma_zctor_args args;
 
 	memset(&args, 0, sizeof(args));
 	args.name = name;
 	args.size = size;
 	args.ctor = ctor;
 	args.dtor = dtor;
 	args.uminit = zinit;
 	args.fini = zfini;
 	args.import = zimport;
 	args.release = zrelease;
 	args.arg = arg;
 	args.align = 0;
 	args.flags = flags;
 
 	return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK));
 }
 
 static void
 zone_lock_pair(uma_zone_t a, uma_zone_t b)
 {
 	if (a < b) {
 		ZONE_LOCK(a);
 		mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
 	} else {
 		ZONE_LOCK(b);
 		mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
 	}
 }
 
 static void
 zone_unlock_pair(uma_zone_t a, uma_zone_t b)
 {
 
 	ZONE_UNLOCK(a);
 	ZONE_UNLOCK(b);
 }
 
 int
 uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
 {
 	uma_klink_t klink;
 	uma_klink_t kl;
 	int error;
 
 	error = 0;
 	klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
 
 	zone_lock_pair(zone, master);
 	/*
 	 * zone must use vtoslab() to resolve objects and must already be
 	 * a secondary.
 	 */
 	if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
 	    != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * The new master must also use vtoslab().
 	 */
 	if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
 		error = EINVAL;
 		goto out;
 	}
 
 	/*
 	 * The underlying object must be the same size.  rsize
 	 * may be different.
 	 */
 	if (master->uz_size != zone->uz_size) {
 		error = E2BIG;
 		goto out;
 	}
 	/*
 	 * Put it at the end of the list.
 	 */
 	klink->kl_keg = zone_first_keg(master);
 	LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
 		if (LIST_NEXT(kl, kl_link) == NULL) {
 			LIST_INSERT_AFTER(kl, klink, kl_link);
 			break;
 		}
 	}
 	klink = NULL;
 	zone->uz_flags |= UMA_ZFLAG_MULTI;
 	zone->uz_slab = zone_fetch_slab_multi;
 
 out:
 	zone_unlock_pair(zone, master);
 	if (klink != NULL)
 		free(klink, M_TEMP);
 
 	return (error);
 }
 
 
 /* See uma.h */
 void
 uma_zdestroy(uma_zone_t zone)
 {
 
 	sx_slock(&uma_drain_lock);
 	zone_free_item(zones, zone, NULL, SKIP_NONE);
 	sx_sunlock(&uma_drain_lock);
 }
 
 void
 uma_zwait(uma_zone_t zone)
 {
 	void *item;
 
 	item = uma_zalloc_arg(zone, NULL, M_WAITOK);
 	uma_zfree(zone, item);
 }
 
 void *
 uma_zalloc_pcpu_arg(uma_zone_t zone, void *udata, int flags)
 {
 	void *item;
 #ifdef SMP
 	int i;
 
 	MPASS(zone->uz_flags & UMA_ZONE_PCPU);
 #endif
 	item = uma_zalloc_arg(zone, udata, flags & ~M_ZERO);
 	if (item != NULL && (flags & M_ZERO)) {
 #ifdef SMP
 		for (i = 0; i <= mp_maxid; i++)
 			bzero(zpcpu_get_cpu(item, i), zone->uz_size);
 #else
 		bzero(item, zone->uz_size);
 #endif
 	}
 	return (item);
 }
 
 /*
  * A stub while both regular and pcpu cases are identical.
  */
 void
 uma_zfree_pcpu_arg(uma_zone_t zone, void *item, void *udata)
 {
 
 #ifdef SMP
 	MPASS(zone->uz_flags & UMA_ZONE_PCPU);
 #endif
 	uma_zfree_arg(zone, item, udata);
 }
 
 /* See uma.h */
 void *
 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
 {
 	uma_zone_domain_t zdom;
 	uma_bucket_t bucket;
 	uma_cache_t cache;
 	void *item;
 	int cpu, domain, lockfail;
 #ifdef INVARIANTS
 	bool skipdbg;
 #endif
 
 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
 	random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
 
 	/* This is the fast path allocation */
 	CTR4(KTR_UMA, "uma_zalloc_arg thread %x zone %s(%p) flags %d",
 	    curthread, zone->uz_name, zone, flags);
 
 	if (flags & M_WAITOK) {
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
 	}
 	KASSERT((flags & M_EXEC) == 0, ("uma_zalloc_arg: called with M_EXEC"));
 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
 	    ("uma_zalloc_arg: called with spinlock or critical section held"));
 	if (zone->uz_flags & UMA_ZONE_PCPU)
 		KASSERT((flags & M_ZERO) == 0, ("allocating from a pcpu zone "
 		    "with M_ZERO passed"));
 
 #ifdef DEBUG_MEMGUARD
 	if (memguard_cmp_zone(zone)) {
 		item = memguard_alloc(zone->uz_size, flags);
 		if (item != NULL) {
 			if (zone->uz_init != NULL &&
 			    zone->uz_init(item, zone->uz_size, flags) != 0)
 				return (NULL);
 			if (zone->uz_ctor != NULL &&
 			    zone->uz_ctor(item, zone->uz_size, udata,
 			    flags) != 0) {
 			    	zone->uz_fini(item, zone->uz_size);
 				return (NULL);
 			}
 			return (item);
 		}
 		/* This is unfortunate but should not be fatal. */
 	}
 #endif
 	/*
 	 * If possible, allocate from the per-CPU cache.  There are two
 	 * requirements for safe access to the per-CPU cache: (1) the thread
 	 * accessing the cache must not be preempted or yield during access,
 	 * and (2) the thread must not migrate CPUs without switching which
 	 * cache it accesses.  We rely on a critical section to prevent
 	 * preemption and migration.  We release the critical section in
 	 * order to acquire the zone mutex if we are unable to allocate from
 	 * the current cache; when we re-acquire the critical section, we
 	 * must detect and handle migration if it has occurred.
 	 */
 	critical_enter();
 	cpu = curcpu;
 	cache = &zone->uz_cpu[cpu];
 
 zalloc_start:
 	bucket = cache->uc_allocbucket;
 	if (bucket != NULL && bucket->ub_cnt > 0) {
 		bucket->ub_cnt--;
 		item = bucket->ub_bucket[bucket->ub_cnt];
 #ifdef INVARIANTS
 		bucket->ub_bucket[bucket->ub_cnt] = NULL;
 #endif
 		KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
 		cache->uc_allocs++;
 		critical_exit();
 #ifdef INVARIANTS
 		skipdbg = uma_dbg_zskip(zone, item);
 #endif
 		if (zone->uz_ctor != NULL &&
 #ifdef INVARIANTS
 		    (!skipdbg || zone->uz_ctor != trash_ctor ||
 		    zone->uz_dtor != trash_dtor) &&
 #endif
 		    zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
 			atomic_add_long(&zone->uz_fails, 1);
 			zone_free_item(zone, item, udata, SKIP_DTOR);
 			return (NULL);
 		}
 #ifdef INVARIANTS
 		if (!skipdbg)
 			uma_dbg_alloc(zone, NULL, item);
 #endif
 		if (flags & M_ZERO)
 			uma_zero_item(item, zone);
 		return (item);
 	}
 
 	/*
 	 * We have run out of items in our alloc bucket.
 	 * See if we can switch with our free bucket.
 	 */
 	bucket = cache->uc_freebucket;
 	if (bucket != NULL && bucket->ub_cnt > 0) {
 		CTR2(KTR_UMA,
 		    "uma_zalloc: zone %s(%p) swapping empty with alloc",
 		    zone->uz_name, zone);
 		cache->uc_freebucket = cache->uc_allocbucket;
 		cache->uc_allocbucket = bucket;
 		goto zalloc_start;
 	}
 
 	/*
 	 * Discard any empty allocation bucket while we hold no locks.
 	 */
 	bucket = cache->uc_allocbucket;
 	cache->uc_allocbucket = NULL;
 	critical_exit();
 	if (bucket != NULL)
 		bucket_free(zone, bucket, udata);
 
 	if (zone->uz_flags & UMA_ZONE_NUMA)
 		domain = PCPU_GET(domain);
 	else
 		domain = UMA_ANYDOMAIN;
 
 	/* Short-circuit for zones without buckets and low memory. */
 	if (zone->uz_count == 0 || bucketdisable)
 		goto zalloc_item;
 
 	/*
 	 * Attempt to retrieve the item from the per-CPU cache has failed, so
 	 * we must go back to the zone.  This requires the zone lock, so we
 	 * must drop the critical section, then re-acquire it when we go back
 	 * to the cache.  Since the critical section is released, we may be
 	 * preempted or migrate.  As such, make sure not to maintain any
 	 * thread-local state specific to the cache from prior to releasing
 	 * the critical section.
 	 */
 	lockfail = 0;
 	if (ZONE_TRYLOCK(zone) == 0) {
 		/* Record contention to size the buckets. */
 		ZONE_LOCK(zone);
 		lockfail = 1;
 	}
 	critical_enter();
 	cpu = curcpu;
 	cache = &zone->uz_cpu[cpu];
 
 	/* See if we lost the race to fill the cache. */
 	if (cache->uc_allocbucket != NULL) {
 		ZONE_UNLOCK(zone);
 		goto zalloc_start;
 	}
 
 	/*
 	 * Check the zone's cache of buckets.
 	 */
 	if (domain == UMA_ANYDOMAIN)
 		zdom = &zone->uz_domain[0];
 	else
 		zdom = &zone->uz_domain[domain];
 	if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
 		KASSERT(bucket->ub_cnt != 0,
 		    ("uma_zalloc_arg: Returning an empty bucket."));
 
 		LIST_REMOVE(bucket, ub_link);
 		cache->uc_allocbucket = bucket;
 		ZONE_UNLOCK(zone);
 		goto zalloc_start;
 	}
 	/* We are no longer associated with this CPU. */
 	critical_exit();
 
 	/*
 	 * We bump the uz count when the cache size is insufficient to
 	 * handle the working set.
 	 */
 	if (lockfail && zone->uz_count < BUCKET_MAX)
 		zone->uz_count++;
 	ZONE_UNLOCK(zone);
 
 	/*
 	 * Now lets just fill a bucket and put it on the free list.  If that
 	 * works we'll restart the allocation from the beginning and it
 	 * will use the just filled bucket.
 	 */
 	bucket = zone_alloc_bucket(zone, udata, domain, flags);
 	CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
 	    zone->uz_name, zone, bucket);
 	if (bucket != NULL) {
 		ZONE_LOCK(zone);
 		critical_enter();
 		cpu = curcpu;
 		cache = &zone->uz_cpu[cpu];
 		/*
 		 * See if we lost the race or were migrated.  Cache the
 		 * initialized bucket to make this less likely or claim
 		 * the memory directly.
 		 */
 		if (cache->uc_allocbucket != NULL ||
 		    (zone->uz_flags & UMA_ZONE_NUMA &&
 		    domain != PCPU_GET(domain)))
 			LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
 		else
 			cache->uc_allocbucket = bucket;
 		ZONE_UNLOCK(zone);
 		goto zalloc_start;
 	}
 
 	/*
 	 * We may not be able to get a bucket so return an actual item.
 	 */
 zalloc_item:
 	item = zone_alloc_item(zone, udata, domain, flags);
 
 	return (item);
 }
 
 void *
 uma_zalloc_domain(uma_zone_t zone, void *udata, int domain, int flags)
 {
 
 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
 	random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
 
 	/* This is the fast path allocation */
 	CTR5(KTR_UMA,
 	    "uma_zalloc_domain thread %x zone %s(%p) domain %d flags %d",
 	    curthread, zone->uz_name, zone, domain, flags);
 
 	if (flags & M_WAITOK) {
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "uma_zalloc_domain: zone \"%s\"", zone->uz_name);
 	}
 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
 	    ("uma_zalloc_domain: called with spinlock or critical section held"));
 
 	return (zone_alloc_item(zone, udata, domain, flags));
 }
 
 /*
  * Find a slab with some space.  Prefer slabs that are partially used over those
  * that are totally full.  This helps to reduce fragmentation.
  *
  * If 'rr' is 1, search all domains starting from 'domain'.  Otherwise check
  * only 'domain'.
  */
 static uma_slab_t
 keg_first_slab(uma_keg_t keg, int domain, int rr)
 {
 	uma_domain_t dom;
 	uma_slab_t slab;
 	int start;
 
 	KASSERT(domain >= 0 && domain < vm_ndomains,
 	    ("keg_first_slab: domain %d out of range", domain));
 
 	slab = NULL;
 	start = domain;
 	do {
 		dom = &keg->uk_domain[domain];
 		if (!LIST_EMPTY(&dom->ud_part_slab))
 			return (LIST_FIRST(&dom->ud_part_slab));
 		if (!LIST_EMPTY(&dom->ud_free_slab)) {
 			slab = LIST_FIRST(&dom->ud_free_slab);
 			LIST_REMOVE(slab, us_link);
 			LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
 			return (slab);
 		}
 		if (rr)
 			domain = (domain + 1) % vm_ndomains;
 	} while (domain != start);
 
 	return (NULL);
 }
 
 static uma_slab_t
 keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, int flags)
 {
 	uma_domain_t dom;
 	uma_slab_t slab;
 	int allocflags, domain, reserve, rr, start;
 
 	mtx_assert(&keg->uk_lock, MA_OWNED);
 	slab = NULL;
 	reserve = 0;
 	allocflags = flags;
 	if ((flags & M_USE_RESERVE) == 0)
 		reserve = keg->uk_reserve;
 
 	/*
 	 * Round-robin for non first-touch zones when there is more than one
 	 * domain.
 	 */
 	if (vm_ndomains == 1)
 		rdomain = 0;
 	rr = rdomain == UMA_ANYDOMAIN;
 	if (rr) {
 		keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains;
 		domain = start = keg->uk_cursor;
 		/* Only block on the second pass. */
 		if ((flags & (M_WAITOK | M_NOVM)) == M_WAITOK)
 			allocflags = (allocflags & ~M_WAITOK) | M_NOWAIT;
 	} else
 		domain = start = rdomain;
 
 again:
 	do {
 		if (keg->uk_free > reserve &&
 		    (slab = keg_first_slab(keg, domain, rr)) != NULL) {
 			MPASS(slab->us_keg == keg);
 			return (slab);
 		}
 
 		/*
 		 * M_NOVM means don't ask at all!
 		 */
 		if (flags & M_NOVM)
 			break;
 
 		if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
 			keg->uk_flags |= UMA_ZFLAG_FULL;
 			/*
 			 * If this is not a multi-zone, set the FULL bit.
 			 * Otherwise slab_multi() takes care of it.
 			 */
 			if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
 				zone->uz_flags |= UMA_ZFLAG_FULL;
 				zone_log_warning(zone);
 				zone_maxaction(zone);
 			}
 			if (flags & M_NOWAIT)
 				return (NULL);
 			zone->uz_sleeps++;
 			msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
 			continue;
 		}
 		slab = keg_alloc_slab(keg, zone, domain, allocflags);
 		/*
 		 * If we got a slab here it's safe to mark it partially used
 		 * and return.  We assume that the caller is going to remove
 		 * at least one item.
 		 */
 		if (slab) {
 			MPASS(slab->us_keg == keg);
 			dom = &keg->uk_domain[slab->us_domain];
 			LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
 			return (slab);
 		}
 		if (rr) {
 			keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains;
 			domain = keg->uk_cursor;
 		}
 	} while (domain != start);
 
 	/* Retry domain scan with blocking. */
 	if (allocflags != flags) {
 		allocflags = flags;
 		goto again;
 	}
 
 	/*
 	 * We might not have been able to get a slab but another cpu
 	 * could have while we were unlocked.  Check again before we
 	 * fail.
 	 */
 	if (keg->uk_free > reserve &&
 	    (slab = keg_first_slab(keg, domain, rr)) != NULL) {
 		MPASS(slab->us_keg == keg);
 		return (slab);
 	}
 	return (NULL);
 }
 
 static uma_slab_t
 zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int domain, int flags)
 {
 	uma_slab_t slab;
 
 	if (keg == NULL) {
 		keg = zone_first_keg(zone);
 		KEG_LOCK(keg);
 	}
 
 	for (;;) {
 		slab = keg_fetch_slab(keg, zone, domain, flags);
 		if (slab)
 			return (slab);
 		if (flags & (M_NOWAIT | M_NOVM))
 			break;
 	}
 	KEG_UNLOCK(keg);
 	return (NULL);
 }
 
 /*
  * uma_zone_fetch_slab_multi:  Fetches a slab from one available keg.  Returns
  * with the keg locked.  On NULL no lock is held.
  *
  * The last pointer is used to seed the search.  It is not required.
  */
 static uma_slab_t
 zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int domain, int rflags)
 {
 	uma_klink_t klink;
 	uma_slab_t slab;
 	uma_keg_t keg;
 	int flags;
 	int empty;
 	int full;
 
 	/*
 	 * Don't wait on the first pass.  This will skip limit tests
 	 * as well.  We don't want to block if we can find a provider
 	 * without blocking.
 	 */
 	flags = (rflags & ~M_WAITOK) | M_NOWAIT;
 	/*
 	 * Use the last slab allocated as a hint for where to start
 	 * the search.
 	 */
 	if (last != NULL) {
 		slab = keg_fetch_slab(last, zone, domain, flags);
 		if (slab)
 			return (slab);
 		KEG_UNLOCK(last);
 	}
 	/*
 	 * Loop until we have a slab incase of transient failures
 	 * while M_WAITOK is specified.  I'm not sure this is 100%
 	 * required but we've done it for so long now.
 	 */
 	for (;;) {
 		empty = 0;
 		full = 0;
 		/*
 		 * Search the available kegs for slabs.  Be careful to hold the
 		 * correct lock while calling into the keg layer.
 		 */
 		LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
 			keg = klink->kl_keg;
 			KEG_LOCK(keg);
 			if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
 				slab = keg_fetch_slab(keg, zone, domain, flags);
 				if (slab)
 					return (slab);
 			}
 			if (keg->uk_flags & UMA_ZFLAG_FULL)
 				full++;
 			else
 				empty++;
 			KEG_UNLOCK(keg);
 		}
 		if (rflags & (M_NOWAIT | M_NOVM))
 			break;
 		flags = rflags;
 		/*
 		 * All kegs are full.  XXX We can't atomically check all kegs
 		 * and sleep so just sleep for a short period and retry.
 		 */
 		if (full && !empty) {
 			ZONE_LOCK(zone);
 			zone->uz_flags |= UMA_ZFLAG_FULL;
 			zone->uz_sleeps++;
 			zone_log_warning(zone);
 			zone_maxaction(zone);
 			msleep(zone, zone->uz_lockptr, PVM,
 			    "zonelimit", hz/100);
 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
 			ZONE_UNLOCK(zone);
 			continue;
 		}
 	}
 	return (NULL);
 }
 
 static void *
 slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
 {
 	uma_domain_t dom;
 	void *item;
 	uint8_t freei;
 
 	MPASS(keg == slab->us_keg);
 	mtx_assert(&keg->uk_lock, MA_OWNED);
 
 	freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
 	BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
 	item = slab->us_data + (keg->uk_rsize * freei);
 	slab->us_freecount--;
 	keg->uk_free--;
 
 	/* Move this slab to the full list */
 	if (slab->us_freecount == 0) {
 		LIST_REMOVE(slab, us_link);
 		dom = &keg->uk_domain[slab->us_domain];
 		LIST_INSERT_HEAD(&dom->ud_full_slab, slab, us_link);
 	}
 
 	return (item);
 }
 
 static int
 zone_import(uma_zone_t zone, void **bucket, int max, int domain, int flags)
 {
 	uma_slab_t slab;
 	uma_keg_t keg;
 #ifdef NUMA
 	int stripe;
 #endif
 	int i;
 
 	slab = NULL;
 	keg = NULL;
 	/* Try to keep the buckets totally full */
 	for (i = 0; i < max; ) {
 		if ((slab = zone->uz_slab(zone, keg, domain, flags)) == NULL)
 			break;
 		keg = slab->us_keg;
 #ifdef NUMA
 		stripe = howmany(max, vm_ndomains);
 #endif
 		while (slab->us_freecount && i < max) { 
 			bucket[i++] = slab_alloc_item(keg, slab);
 			if (keg->uk_free <= keg->uk_reserve)
 				break;
 #ifdef NUMA
 			/*
 			 * If the zone is striped we pick a new slab for every
 			 * N allocations.  Eliminating this conditional will
 			 * instead pick a new domain for each bucket rather
 			 * than stripe within each bucket.  The current option
 			 * produces more fragmentation and requires more cpu
 			 * time but yields better distribution.
 			 */
 			if ((zone->uz_flags & UMA_ZONE_NUMA) == 0 &&
 			    vm_ndomains > 1 && --stripe == 0)
 				break;
 #endif
 		}
 		/* Don't block if we allocated any successfully. */
 		flags &= ~M_WAITOK;
 		flags |= M_NOWAIT;
 	}
 	if (slab != NULL)
 		KEG_UNLOCK(keg);
 
 	return i;
 }
 
 static uma_bucket_t
 zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
 {
 	uma_bucket_t bucket;
 	int max;
 
 	/* Don't wait for buckets, preserve caller's NOVM setting. */
 	bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
 	if (bucket == NULL)
 		return (NULL);
 
 	max = MIN(bucket->ub_entries, zone->uz_count);
 	bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
 	    max, domain, flags);
 
 	/*
 	 * Initialize the memory if necessary.
 	 */
 	if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
 		int i;
 
 		for (i = 0; i < bucket->ub_cnt; i++)
 			if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
 			    flags) != 0)
 				break;
 		/*
 		 * If we couldn't initialize the whole bucket, put the
 		 * rest back onto the freelist.
 		 */
 		if (i != bucket->ub_cnt) {
 			zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
 			    bucket->ub_cnt - i);
 #ifdef INVARIANTS
 			bzero(&bucket->ub_bucket[i],
 			    sizeof(void *) * (bucket->ub_cnt - i));
 #endif
 			bucket->ub_cnt = i;
 		}
 	}
 
 	if (bucket->ub_cnt == 0) {
 		bucket_free(zone, bucket, udata);
 		atomic_add_long(&zone->uz_fails, 1);
 		return (NULL);
 	}
 
 	return (bucket);
 }
 
 /*
  * Allocates a single item from a zone.
  *
  * Arguments
  *	zone   The zone to alloc for.
  *	udata  The data to be passed to the constructor.
  *	domain The domain to allocate from or UMA_ANYDOMAIN.
  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
  *
  * Returns
  *	NULL if there is no memory and M_NOWAIT is set
  *	An item if successful
  */
 
 static void *
 zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
 {
 	void *item;
 #ifdef INVARIANTS
 	bool skipdbg;
 #endif
 
 	item = NULL;
 
 	if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
 		goto fail;
 	atomic_add_long(&zone->uz_allocs, 1);
 
 #ifdef INVARIANTS
 	skipdbg = uma_dbg_zskip(zone, item);
 #endif
 	/*
 	 * We have to call both the zone's init (not the keg's init)
 	 * and the zone's ctor.  This is because the item is going from
 	 * a keg slab directly to the user, and the user is expecting it
 	 * to be both zone-init'd as well as zone-ctor'd.
 	 */
 	if (zone->uz_init != NULL) {
 		if (zone->uz_init(item, zone->uz_size, flags) != 0) {
 			zone_free_item(zone, item, udata, SKIP_FINI);
 			goto fail;
 		}
 	}
 	if (zone->uz_ctor != NULL &&
 #ifdef INVARIANTS
 	    (!skipdbg || zone->uz_ctor != trash_ctor ||
 	    zone->uz_dtor != trash_dtor) &&
 #endif
 	    zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
 		zone_free_item(zone, item, udata, SKIP_DTOR);
 		goto fail;
 	}
 #ifdef INVARIANTS
 	if (!skipdbg)
 		uma_dbg_alloc(zone, NULL, item);
 #endif
 	if (flags & M_ZERO)
 		uma_zero_item(item, zone);
 
 	CTR3(KTR_UMA, "zone_alloc_item item %p from %s(%p)", item,
 	    zone->uz_name, zone);
 
 	return (item);
 
 fail:
 	CTR2(KTR_UMA, "zone_alloc_item failed from %s(%p)",
 	    zone->uz_name, zone);
 	atomic_add_long(&zone->uz_fails, 1);
 	return (NULL);
 }
 
 /* See uma.h */
 void
 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
 {
 	uma_cache_t cache;
 	uma_bucket_t bucket;
 	uma_zone_domain_t zdom;
 	int cpu, domain, lockfail;
 #ifdef INVARIANTS
 	bool skipdbg;
 #endif
 
 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
 	random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
 
 	CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
 	    zone->uz_name);
 
 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
 	    ("uma_zfree_arg: called with spinlock or critical section held"));
 
         /* uma_zfree(..., NULL) does nothing, to match free(9). */
         if (item == NULL)
                 return;
 #ifdef DEBUG_MEMGUARD
 	if (is_memguard_addr(item)) {
 		if (zone->uz_dtor != NULL)
 			zone->uz_dtor(item, zone->uz_size, udata);
 		if (zone->uz_fini != NULL)
 			zone->uz_fini(item, zone->uz_size);
 		memguard_free(item);
 		return;
 	}
 #endif
 #ifdef INVARIANTS
 	skipdbg = uma_dbg_zskip(zone, item);
 	if (skipdbg == false) {
 		if (zone->uz_flags & UMA_ZONE_MALLOC)
 			uma_dbg_free(zone, udata, item);
 		else
 			uma_dbg_free(zone, NULL, item);
 	}
 	if (zone->uz_dtor != NULL && (!skipdbg ||
 	    zone->uz_dtor != trash_dtor || zone->uz_ctor != trash_ctor))
 #else
 	if (zone->uz_dtor != NULL)
 #endif
 		zone->uz_dtor(item, zone->uz_size, udata);
 
 	/*
 	 * The race here is acceptable.  If we miss it we'll just have to wait
 	 * a little longer for the limits to be reset.
 	 */
 	if (zone->uz_flags & UMA_ZFLAG_FULL)
 		goto zfree_item;
 
 	/*
 	 * If possible, free to the per-CPU cache.  There are two
 	 * requirements for safe access to the per-CPU cache: (1) the thread
 	 * accessing the cache must not be preempted or yield during access,
 	 * and (2) the thread must not migrate CPUs without switching which
 	 * cache it accesses.  We rely on a critical section to prevent
 	 * preemption and migration.  We release the critical section in
 	 * order to acquire the zone mutex if we are unable to free to the
 	 * current cache; when we re-acquire the critical section, we must
 	 * detect and handle migration if it has occurred.
 	 */
 zfree_restart:
 	critical_enter();
 	cpu = curcpu;
 	cache = &zone->uz_cpu[cpu];
 
 zfree_start:
 	/*
 	 * Try to free into the allocbucket first to give LIFO ordering
 	 * for cache-hot datastructures.  Spill over into the freebucket
 	 * if necessary.  Alloc will swap them if one runs dry.
 	 */
 	bucket = cache->uc_allocbucket;
 	if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
 		bucket = cache->uc_freebucket;
 	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
 		KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
 		    ("uma_zfree: Freeing to non free bucket index."));
 		bucket->ub_bucket[bucket->ub_cnt] = item;
 		bucket->ub_cnt++;
 		cache->uc_frees++;
 		critical_exit();
 		return;
 	}
 
 	/*
 	 * We must go back the zone, which requires acquiring the zone lock,
 	 * which in turn means we must release and re-acquire the critical
 	 * section.  Since the critical section is released, we may be
 	 * preempted or migrate.  As such, make sure not to maintain any
 	 * thread-local state specific to the cache from prior to releasing
 	 * the critical section.
 	 */
 	critical_exit();
 	if (zone->uz_count == 0 || bucketdisable)
 		goto zfree_item;
 
 	lockfail = 0;
 	if (ZONE_TRYLOCK(zone) == 0) {
 		/* Record contention to size the buckets. */
 		ZONE_LOCK(zone);
 		lockfail = 1;
 	}
 	critical_enter();
 	cpu = curcpu;
 	cache = &zone->uz_cpu[cpu];
 
 	bucket = cache->uc_freebucket;
 	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
 		ZONE_UNLOCK(zone);
 		goto zfree_start;
 	}
 	cache->uc_freebucket = NULL;
 	/* We are no longer associated with this CPU. */
 	critical_exit();
 
 	if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
 		domain = PCPU_GET(domain);
 	else 
 		domain = 0;
 	zdom = &zone->uz_domain[0];
 
 	/* Can we throw this on the zone full list? */
 	if (bucket != NULL) {
 		CTR3(KTR_UMA,
 		    "uma_zfree: zone %s(%p) putting bucket %p on free list",
 		    zone->uz_name, zone, bucket);
 		/* ub_cnt is pointing to the last free item */
 		KASSERT(bucket->ub_cnt != 0,
 		    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
 		if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) != 0) {
 			ZONE_UNLOCK(zone);
 			bucket_drain(zone, bucket);
 			bucket_free(zone, bucket, udata);
 			goto zfree_restart;
 		} else
 			LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
 	}
 
 	/*
 	 * We bump the uz count when the cache size is insufficient to
 	 * handle the working set.
 	 */
 	if (lockfail && zone->uz_count < BUCKET_MAX)
 		zone->uz_count++;
 	ZONE_UNLOCK(zone);
 
 	bucket = bucket_alloc(zone, udata, M_NOWAIT);
 	CTR3(KTR_UMA, "uma_zfree: zone %s(%p) allocated bucket %p",
 	    zone->uz_name, zone, bucket);
 	if (bucket) {
 		critical_enter();
 		cpu = curcpu;
 		cache = &zone->uz_cpu[cpu];
 		if (cache->uc_freebucket == NULL &&
 		    ((zone->uz_flags & UMA_ZONE_NUMA) == 0 ||
 		    domain == PCPU_GET(domain))) {
 			cache->uc_freebucket = bucket;
 			goto zfree_start;
 		}
 		/*
 		 * We lost the race, start over.  We have to drop our
 		 * critical section to free the bucket.
 		 */
 		critical_exit();
 		bucket_free(zone, bucket, udata);
 		goto zfree_restart;
 	}
 
 	/*
 	 * If nothing else caught this, we'll just do an internal free.
 	 */
 zfree_item:
 	zone_free_item(zone, item, udata, SKIP_DTOR);
 
 	return;
 }
 
 void
 uma_zfree_domain(uma_zone_t zone, void *item, void *udata)
 {
 
 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
 	random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
 
 	CTR2(KTR_UMA, "uma_zfree_domain thread %x zone %s", curthread,
 	    zone->uz_name);
 
 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
 	    ("uma_zfree_domain: called with spinlock or critical section held"));
 
         /* uma_zfree(..., NULL) does nothing, to match free(9). */
         if (item == NULL)
                 return;
 	zone_free_item(zone, item, udata, SKIP_NONE);
 }
 
 static void
 slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
 {
 	uma_domain_t dom;
 	uint8_t freei;
 
 	mtx_assert(&keg->uk_lock, MA_OWNED);
 	MPASS(keg == slab->us_keg);
 
 	dom = &keg->uk_domain[slab->us_domain];
 
 	/* Do we need to remove from any lists? */
 	if (slab->us_freecount+1 == keg->uk_ipers) {
 		LIST_REMOVE(slab, us_link);
 		LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
 	} else if (slab->us_freecount == 0) {
 		LIST_REMOVE(slab, us_link);
 		LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
 	}
 
 	/* Slab management. */
 	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
 	BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
 	slab->us_freecount++;
 
 	/* Keg statistics. */
 	keg->uk_free++;
 }
 
 static void
 zone_release(uma_zone_t zone, void **bucket, int cnt)
 {
 	void *item;
 	uma_slab_t slab;
 	uma_keg_t keg;
 	uint8_t *mem;
 	int clearfull;
 	int i;
 
 	clearfull = 0;
 	keg = zone_first_keg(zone);
 	KEG_LOCK(keg);
 	for (i = 0; i < cnt; i++) {
 		item = bucket[i];
 		if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
 			mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
 			if (zone->uz_flags & UMA_ZONE_HASH) {
 				slab = hash_sfind(&keg->uk_hash, mem);
 			} else {
 				mem += keg->uk_pgoff;
 				slab = (uma_slab_t)mem;
 			}
 		} else {
 			slab = vtoslab((vm_offset_t)item);
 			if (slab->us_keg != keg) {
 				KEG_UNLOCK(keg);
 				keg = slab->us_keg;
 				KEG_LOCK(keg);
 			}
 		}
 		slab_free_item(keg, slab, item);
 		if (keg->uk_flags & UMA_ZFLAG_FULL) {
 			if (keg->uk_pages < keg->uk_maxpages) {
 				keg->uk_flags &= ~UMA_ZFLAG_FULL;
 				clearfull = 1;
 			}
 
 			/* 
 			 * We can handle one more allocation. Since we're
 			 * clearing ZFLAG_FULL, wake up all procs blocked
 			 * on pages. This should be uncommon, so keeping this
 			 * simple for now (rather than adding count of blocked 
 			 * threads etc).
 			 */
 			wakeup(keg);
 		}
 	}
 	KEG_UNLOCK(keg);
 	if (clearfull) {
 		ZONE_LOCK(zone);
 		zone->uz_flags &= ~UMA_ZFLAG_FULL;
 		wakeup(zone);
 		ZONE_UNLOCK(zone);
 	}
 
 }
 
 /*
  * Frees a single item to any zone.
  *
  * Arguments:
  *	zone   The zone to free to
  *	item   The item we're freeing
  *	udata  User supplied data for the dtor
  *	skip   Skip dtors and finis
  */
 static void
 zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
 {
 #ifdef INVARIANTS
 	bool skipdbg;
 
 	skipdbg = uma_dbg_zskip(zone, item);
 	if (skip == SKIP_NONE && !skipdbg) {
 		if (zone->uz_flags & UMA_ZONE_MALLOC)
 			uma_dbg_free(zone, udata, item);
 		else
 			uma_dbg_free(zone, NULL, item);
 	}
 
 	if (skip < SKIP_DTOR && zone->uz_dtor != NULL &&
 	    (!skipdbg || zone->uz_dtor != trash_dtor ||
 	    zone->uz_ctor != trash_ctor))
 #else
 	if (skip < SKIP_DTOR && zone->uz_dtor != NULL)
 #endif
 		zone->uz_dtor(item, zone->uz_size, udata);
 
 	if (skip < SKIP_FINI && zone->uz_fini)
 		zone->uz_fini(item, zone->uz_size);
 
 	atomic_add_long(&zone->uz_frees, 1);
 	zone->uz_release(zone->uz_arg, &item, 1);
 }
 
 /* See uma.h */
 int
 uma_zone_set_max(uma_zone_t zone, int nitems)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	if (keg == NULL)
 		return (0);
 	KEG_LOCK(keg);
 	keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
 	if (keg->uk_maxpages * keg->uk_ipers < nitems)
 		keg->uk_maxpages += keg->uk_ppera;
 	nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
 	KEG_UNLOCK(keg);
 
 	return (nitems);
 }
 
 /* See uma.h */
 int
 uma_zone_get_max(uma_zone_t zone)
 {
 	int nitems;
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	if (keg == NULL)
 		return (0);
 	KEG_LOCK(keg);
 	nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
 	KEG_UNLOCK(keg);
 
 	return (nitems);
 }
 
 /* See uma.h */
 void
 uma_zone_set_warning(uma_zone_t zone, const char *warning)
 {
 
 	ZONE_LOCK(zone);
 	zone->uz_warning = warning;
 	ZONE_UNLOCK(zone);
 }
 
 /* See uma.h */
 void
 uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
 {
 
 	ZONE_LOCK(zone);
 	TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
 	ZONE_UNLOCK(zone);
 }
 
 /* See uma.h */
 int
 uma_zone_get_cur(uma_zone_t zone)
 {
 	int64_t nitems;
 	u_int i;
 
 	ZONE_LOCK(zone);
 	nitems = zone->uz_allocs - zone->uz_frees;
 	CPU_FOREACH(i) {
 		/*
 		 * See the comment in sysctl_vm_zone_stats() regarding the
 		 * safety of accessing the per-cpu caches. With the zone lock
 		 * held, it is safe, but can potentially result in stale data.
 		 */
 		nitems += zone->uz_cpu[i].uc_allocs -
 		    zone->uz_cpu[i].uc_frees;
 	}
 	ZONE_UNLOCK(zone);
 
 	return (nitems < 0 ? 0 : nitems);
 }
 
 /* See uma.h */
 void
 uma_zone_set_init(uma_zone_t zone, uma_init uminit)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
 	KEG_LOCK(keg);
 	KASSERT(keg->uk_pages == 0,
 	    ("uma_zone_set_init on non-empty keg"));
 	keg->uk_init = uminit;
 	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
 void
 uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	KASSERT(keg != NULL, ("uma_zone_set_fini: Invalid zone type"));
 	KEG_LOCK(keg);
 	KASSERT(keg->uk_pages == 0,
 	    ("uma_zone_set_fini on non-empty keg"));
 	keg->uk_fini = fini;
 	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
 void
 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
 {
 
 	ZONE_LOCK(zone);
 	KASSERT(zone_first_keg(zone)->uk_pages == 0,
 	    ("uma_zone_set_zinit on non-empty keg"));
 	zone->uz_init = zinit;
 	ZONE_UNLOCK(zone);
 }
 
 /* See uma.h */
 void
 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
 {
 
 	ZONE_LOCK(zone);
 	KASSERT(zone_first_keg(zone)->uk_pages == 0,
 	    ("uma_zone_set_zfini on non-empty keg"));
 	zone->uz_fini = zfini;
 	ZONE_UNLOCK(zone);
 }
 
 /* See uma.h */
 /* XXX uk_freef is not actually used with the zone locked */
 void
 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
 	KEG_LOCK(keg);
 	keg->uk_freef = freef;
 	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
 /* XXX uk_allocf is not actually used with the zone locked */
 void
 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	KEG_LOCK(keg);
 	keg->uk_allocf = allocf;
 	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
 void
 uma_zone_reserve(uma_zone_t zone, int items)
 {
 	uma_keg_t keg;
 
 	keg = zone_first_keg(zone);
 	if (keg == NULL)
 		return;
 	KEG_LOCK(keg);
 	keg->uk_reserve = items;
 	KEG_UNLOCK(keg);
 
 	return;
 }
 
 /* See uma.h */
 int
 uma_zone_reserve_kva(uma_zone_t zone, int count)
 {
 	uma_keg_t keg;
 	vm_offset_t kva;
 	u_int pages;
 
 	keg = zone_first_keg(zone);
 	if (keg == NULL)
 		return (0);
 	pages = count / keg->uk_ipers;
 
 	if (pages * keg->uk_ipers < count)
 		pages++;
 	pages *= keg->uk_ppera;
 
 #ifdef UMA_MD_SMALL_ALLOC
 	if (keg->uk_ppera > 1) {
 #else
 	if (1) {
 #endif
 		kva = kva_alloc((vm_size_t)pages * PAGE_SIZE);
 		if (kva == 0)
 			return (0);
 	} else
 		kva = 0;
 	KEG_LOCK(keg);
 	keg->uk_kva = kva;
 	keg->uk_offset = 0;
 	keg->uk_maxpages = pages;
 #ifdef UMA_MD_SMALL_ALLOC
 	keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
 #else
 	keg->uk_allocf = noobj_alloc;
 #endif
 	keg->uk_flags |= UMA_ZONE_NOFREE;
 	KEG_UNLOCK(keg);
 
 	return (1);
 }
 
 /* See uma.h */
 void
 uma_prealloc(uma_zone_t zone, int items)
 {
 	uma_domain_t dom;
 	uma_slab_t slab;
 	uma_keg_t keg;
 	int domain, slabs;
 
 	keg = zone_first_keg(zone);
 	if (keg == NULL)
 		return;
 	KEG_LOCK(keg);
 	slabs = items / keg->uk_ipers;
 	domain = 0;
 	if (slabs * keg->uk_ipers < items)
 		slabs++;
 	while (slabs > 0) {
 		slab = keg_alloc_slab(keg, zone, domain, M_WAITOK);
 		if (slab == NULL)
 			break;
 		MPASS(slab->us_keg == keg);
 		dom = &keg->uk_domain[slab->us_domain];
 		LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
 		slabs--;
 		domain = (domain + 1) % vm_ndomains;
 	}
 	KEG_UNLOCK(keg);
 }
 
 /* See uma.h */
 static void
 uma_reclaim_locked(bool kmem_danger)
 {
 
 	CTR0(KTR_UMA, "UMA: vm asked us to release pages!");
 	sx_assert(&uma_drain_lock, SA_XLOCKED);
 	bucket_enable();
 	zone_foreach(zone_drain);
 	if (vm_page_count_min() || kmem_danger) {
 		cache_drain_safe(NULL);
 		zone_foreach(zone_drain);
 	}
 	/*
 	 * Some slabs may have been freed but this zone will be visited early
 	 * we visit again so that we can free pages that are empty once other
 	 * zones are drained.  We have to do the same for buckets.
 	 */
 	zone_drain(slabzone);
 	bucket_zone_drain();
 }
 
 void
 uma_reclaim(void)
 {
 
 	sx_xlock(&uma_drain_lock);
 	uma_reclaim_locked(false);
 	sx_xunlock(&uma_drain_lock);
 }
 
 static volatile int uma_reclaim_needed;
 
 void
 uma_reclaim_wakeup(void)
 {
 
 	if (atomic_fetchadd_int(&uma_reclaim_needed, 1) == 0)
 		wakeup(uma_reclaim);
 }
 
 void
 uma_reclaim_worker(void *arg __unused)
 {
 
 	for (;;) {
 		sx_xlock(&uma_drain_lock);
 		while (atomic_load_int(&uma_reclaim_needed) == 0)
 			sx_sleep(uma_reclaim, &uma_drain_lock, PVM, "umarcl",
 			    hz);
 		sx_xunlock(&uma_drain_lock);
 		EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
 		sx_xlock(&uma_drain_lock);
 		uma_reclaim_locked(true);
 		atomic_store_int(&uma_reclaim_needed, 0);
 		sx_xunlock(&uma_drain_lock);
 		/* Don't fire more than once per-second. */
 		pause("umarclslp", hz);
 	}
 }
 
 /* See uma.h */
 int
 uma_zone_exhausted(uma_zone_t zone)
 {
 	int full;
 
 	ZONE_LOCK(zone);
 	full = (zone->uz_flags & UMA_ZFLAG_FULL);
 	ZONE_UNLOCK(zone);
 	return (full);	
 }
 
 int
 uma_zone_exhausted_nolock(uma_zone_t zone)
 {
 	return (zone->uz_flags & UMA_ZFLAG_FULL);
 }
 
 void *
 uma_large_malloc_domain(vm_size_t size, int domain, int wait)
 {
 	vm_offset_t addr;
 	uma_slab_t slab;
 
 	slab = zone_alloc_item(slabzone, NULL, domain, wait);
 	if (slab == NULL)
 		return (NULL);
 	if (domain == UMA_ANYDOMAIN)
 		addr = kmem_malloc(size, wait);
 	else
 		addr = kmem_malloc_domain(domain, size, wait);
 	if (addr != 0) {
 		vsetslab(addr, slab);
 		slab->us_data = (void *)addr;
 		slab->us_flags = UMA_SLAB_KERNEL | UMA_SLAB_MALLOC;
-#if VM_NRESERVLEVEL > 0
-		if (__predict_false((wait & M_EXEC) != 0))
-			slab->us_flags |= UMA_SLAB_KRWX;
-#endif
 		slab->us_size = size;
 		slab->us_domain = vm_phys_domain(PHYS_TO_VM_PAGE(
 		    pmap_kextract(addr)));
 		uma_total_inc(size);
 	} else {
 		zone_free_item(slabzone, slab, NULL, SKIP_NONE);
 	}
 
 	return ((void *)addr);
 }
 
 void *
 uma_large_malloc(vm_size_t size, int wait)
 {
 
 	return uma_large_malloc_domain(size, UMA_ANYDOMAIN, wait);
 }
 
 void
 uma_large_free(uma_slab_t slab)
 {
-	struct vmem *arena;
 
 	KASSERT((slab->us_flags & UMA_SLAB_KERNEL) != 0,
 	    ("uma_large_free:  Memory not allocated with uma_large_malloc."));
-#if VM_NRESERVLEVEL > 0
-	if (__predict_true((slab->us_flags & UMA_SLAB_KRWX) == 0))
-		arena = kernel_arena;
-	else
-		arena = kernel_rwx_arena;
-#else
-	arena = kernel_arena;
-#endif
-	kmem_free(arena, (vm_offset_t)slab->us_data, slab->us_size);
+	kmem_free((vm_offset_t)slab->us_data, slab->us_size);
 	uma_total_dec(slab->us_size);
 	zone_free_item(slabzone, slab, NULL, SKIP_NONE);
 }
 
 static void
 uma_zero_item(void *item, uma_zone_t zone)
 {
 
 	bzero(item, zone->uz_size);
 }
 
 unsigned long
 uma_limit(void)
 {
 
 	return (uma_kmem_limit);
 }
 
 void
 uma_set_limit(unsigned long limit)
 {
 
 	uma_kmem_limit = limit;
 }
 
 unsigned long
 uma_size(void)
 {
 
 	return (uma_kmem_total);
 }
 
 long
 uma_avail(void)
 {
 
 	return (uma_kmem_limit - uma_kmem_total);
 }
 
 void
 uma_print_stats(void)
 {
 	zone_foreach(uma_print_zone);
 }
 
 static void
 slab_print(uma_slab_t slab)
 {
 	printf("slab: keg %p, data %p, freecount %d\n",
 		slab->us_keg, slab->us_data, slab->us_freecount);
 }
 
 static void
 cache_print(uma_cache_t cache)
 {
 	printf("alloc: %p(%d), free: %p(%d)\n",
 		cache->uc_allocbucket,
 		cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
 		cache->uc_freebucket,
 		cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
 }
 
 static void
 uma_print_keg(uma_keg_t keg)
 {
 	uma_domain_t dom;
 	uma_slab_t slab;
 	int i;
 
 	printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
 	    "out %d free %d limit %d\n",
 	    keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
 	    keg->uk_ipers, keg->uk_ppera,
 	    (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
 	    keg->uk_free, (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
 	for (i = 0; i < vm_ndomains; i++) {
 		dom = &keg->uk_domain[i];
 		printf("Part slabs:\n");
 		LIST_FOREACH(slab, &dom->ud_part_slab, us_link)
 			slab_print(slab);
 		printf("Free slabs:\n");
 		LIST_FOREACH(slab, &dom->ud_free_slab, us_link)
 			slab_print(slab);
 		printf("Full slabs:\n");
 		LIST_FOREACH(slab, &dom->ud_full_slab, us_link)
 			slab_print(slab);
 	}
 }
 
 void
 uma_print_zone(uma_zone_t zone)
 {
 	uma_cache_t cache;
 	uma_klink_t kl;
 	int i;
 
 	printf("zone: %s(%p) size %d flags %#x\n",
 	    zone->uz_name, zone, zone->uz_size, zone->uz_flags);
 	LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
 		uma_print_keg(kl->kl_keg);
 	CPU_FOREACH(i) {
 		cache = &zone->uz_cpu[i];
 		printf("CPU %d Cache:\n", i);
 		cache_print(cache);
 	}
 }
 
 #ifdef DDB
 /*
  * Generate statistics across both the zone and its per-cpu cache's.  Return
  * desired statistics if the pointer is non-NULL for that statistic.
  *
  * Note: does not update the zone statistics, as it can't safely clear the
  * per-CPU cache statistic.
  *
  * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
  * safe from off-CPU; we should modify the caches to track this information
  * directly so that we don't have to.
  */
 static void
 uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
     uint64_t *freesp, uint64_t *sleepsp)
 {
 	uma_cache_t cache;
 	uint64_t allocs, frees, sleeps;
 	int cachefree, cpu;
 
 	allocs = frees = sleeps = 0;
 	cachefree = 0;
 	CPU_FOREACH(cpu) {
 		cache = &z->uz_cpu[cpu];
 		if (cache->uc_allocbucket != NULL)
 			cachefree += cache->uc_allocbucket->ub_cnt;
 		if (cache->uc_freebucket != NULL)
 			cachefree += cache->uc_freebucket->ub_cnt;
 		allocs += cache->uc_allocs;
 		frees += cache->uc_frees;
 	}
 	allocs += z->uz_allocs;
 	frees += z->uz_frees;
 	sleeps += z->uz_sleeps;
 	if (cachefreep != NULL)
 		*cachefreep = cachefree;
 	if (allocsp != NULL)
 		*allocsp = allocs;
 	if (freesp != NULL)
 		*freesp = frees;
 	if (sleepsp != NULL)
 		*sleepsp = sleeps;
 }
 #endif /* DDB */
 
 static int
 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
 {
 	uma_keg_t kz;
 	uma_zone_t z;
 	int count;
 
 	count = 0;
 	rw_rlock(&uma_rwlock);
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
 			count++;
 	}
 	rw_runlock(&uma_rwlock);
 	return (sysctl_handle_int(oidp, &count, 0, req));
 }
 
 static int
 sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct uma_stream_header ush;
 	struct uma_type_header uth;
 	struct uma_percpu_stat *ups;
 	uma_bucket_t bucket;
 	uma_zone_domain_t zdom;
 	struct sbuf sbuf;
 	uma_cache_t cache;
 	uma_klink_t kl;
 	uma_keg_t kz;
 	uma_zone_t z;
 	uma_keg_t k;
 	int count, error, i;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 	sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
 	ups = malloc((mp_maxid + 1) * sizeof(*ups), M_TEMP, M_WAITOK);
 
 	count = 0;
 	rw_rlock(&uma_rwlock);
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
 			count++;
 	}
 
 	/*
 	 * Insert stream header.
 	 */
 	bzero(&ush, sizeof(ush));
 	ush.ush_version = UMA_STREAM_VERSION;
 	ush.ush_maxcpus = (mp_maxid + 1);
 	ush.ush_count = count;
 	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
 
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
 			bzero(&uth, sizeof(uth));
 			ZONE_LOCK(z);
 			strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
 			uth.uth_align = kz->uk_align;
 			uth.uth_size = kz->uk_size;
 			uth.uth_rsize = kz->uk_rsize;
 			LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
 				k = kl->kl_keg;
 				uth.uth_maxpages += k->uk_maxpages;
 				uth.uth_pages += k->uk_pages;
 				uth.uth_keg_free += k->uk_free;
 				uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
 				    * k->uk_ipers;
 			}
 
 			/*
 			 * A zone is secondary is it is not the first entry
 			 * on the keg's zone list.
 			 */
 			if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
 			    (LIST_FIRST(&kz->uk_zones) != z))
 				uth.uth_zone_flags = UTH_ZONE_SECONDARY;
 
 			for (i = 0; i < vm_ndomains; i++) {
 				zdom = &z->uz_domain[i];
 				LIST_FOREACH(bucket, &zdom->uzd_buckets,
 				    ub_link)
 					uth.uth_zone_free += bucket->ub_cnt;
 			}
 			uth.uth_allocs = z->uz_allocs;
 			uth.uth_frees = z->uz_frees;
 			uth.uth_fails = z->uz_fails;
 			uth.uth_sleeps = z->uz_sleeps;
 			/*
 			 * While it is not normally safe to access the cache
 			 * bucket pointers while not on the CPU that owns the
 			 * cache, we only allow the pointers to be exchanged
 			 * without the zone lock held, not invalidated, so
 			 * accept the possible race associated with bucket
 			 * exchange during monitoring.
 			 */
 			for (i = 0; i < mp_maxid + 1; i++) {
 				bzero(&ups[i], sizeof(*ups));
 				if (kz->uk_flags & UMA_ZFLAG_INTERNAL ||
 				    CPU_ABSENT(i))
 					continue;
 				cache = &z->uz_cpu[i];
 				if (cache->uc_allocbucket != NULL)
 					ups[i].ups_cache_free +=
 					    cache->uc_allocbucket->ub_cnt;
 				if (cache->uc_freebucket != NULL)
 					ups[i].ups_cache_free +=
 					    cache->uc_freebucket->ub_cnt;
 				ups[i].ups_allocs = cache->uc_allocs;
 				ups[i].ups_frees = cache->uc_frees;
 			}
 			ZONE_UNLOCK(z);
 			(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
 			for (i = 0; i < mp_maxid + 1; i++)
 				(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
 		}
 	}
 	rw_runlock(&uma_rwlock);
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	free(ups, M_TEMP);
 	return (error);
 }
 
 int
 sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
 {
 	uma_zone_t zone = *(uma_zone_t *)arg1;
 	int error, max;
 
 	max = uma_zone_get_max(zone);
 	error = sysctl_handle_int(oidp, &max, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	uma_zone_set_max(zone, max);
 
 	return (0);
 }
 
 int
 sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
 {
 	uma_zone_t zone = *(uma_zone_t *)arg1;
 	int cur;
 
 	cur = uma_zone_get_cur(zone);
 	return (sysctl_handle_int(oidp, &cur, 0, req));
 }
 
 #ifdef INVARIANTS
 static uma_slab_t
 uma_dbg_getslab(uma_zone_t zone, void *item)
 {
 	uma_slab_t slab;
 	uma_keg_t keg;
 	uint8_t *mem;
 
 	mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
 	if (zone->uz_flags & UMA_ZONE_VTOSLAB) {
 		slab = vtoslab((vm_offset_t)mem);
 	} else {
 		/*
 		 * It is safe to return the slab here even though the
 		 * zone is unlocked because the item's allocation state
 		 * essentially holds a reference.
 		 */
 		ZONE_LOCK(zone);
 		keg = LIST_FIRST(&zone->uz_kegs)->kl_keg;
 		if (keg->uk_flags & UMA_ZONE_HASH)
 			slab = hash_sfind(&keg->uk_hash, mem);
 		else
 			slab = (uma_slab_t)(mem + keg->uk_pgoff);
 		ZONE_UNLOCK(zone);
 	}
 
 	return (slab);
 }
 
 static bool
 uma_dbg_zskip(uma_zone_t zone, void *mem)
 {
 	uma_keg_t keg;
 
 	if ((keg = zone_first_keg(zone)) == NULL)
 		return (true);
 
 	return (uma_dbg_kskip(keg, mem));
 }
 
 static bool
 uma_dbg_kskip(uma_keg_t keg, void *mem)
 {
 	uintptr_t idx;
 
 	if (dbg_divisor == 0)
 		return (true);
 
 	if (dbg_divisor == 1)
 		return (false);
 
 	idx = (uintptr_t)mem >> PAGE_SHIFT;
 	if (keg->uk_ipers > 1) {
 		idx *= keg->uk_ipers;
 		idx += ((uintptr_t)mem & PAGE_MASK) / keg->uk_rsize;
 	}
 
 	if ((idx / dbg_divisor) * dbg_divisor != idx) {
 		counter_u64_add(uma_skip_cnt, 1);
 		return (true);
 	}
 	counter_u64_add(uma_dbg_cnt, 1);
 
 	return (false);
 }
 
 /*
  * Set up the slab's freei data such that uma_dbg_free can function.
  *
  */
 static void
 uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
 {
 	uma_keg_t keg;
 	int freei;
 
 	if (slab == NULL) {
 		slab = uma_dbg_getslab(zone, item);
 		if (slab == NULL) 
 			panic("uma: item %p did not belong to zone %s\n",
 			    item, zone->uz_name);
 	}
 	keg = slab->us_keg;
 	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
 
 	if (BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
 		panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n",
 		    item, zone, zone->uz_name, slab, freei);
 	BIT_SET_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
 
 	return;
 }
 
 /*
  * Verifies freed addresses.  Checks for alignment, valid slab membership
  * and duplicate frees.
  *
  */
 static void
 uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
 {
 	uma_keg_t keg;
 	int freei;
 
 	if (slab == NULL) {
 		slab = uma_dbg_getslab(zone, item);
 		if (slab == NULL) 
 			panic("uma: Freed item %p did not belong to zone %s\n",
 			    item, zone->uz_name);
 	}
 	keg = slab->us_keg;
 	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
 
 	if (freei >= keg->uk_ipers)
 		panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n",
 		    item, zone, zone->uz_name, slab, freei);
 
 	if (((freei * keg->uk_rsize) + slab->us_data) != item) 
 		panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n",
 		    item, zone, zone->uz_name, slab, freei);
 
 	if (!BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
 		panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n",
 		    item, zone, zone->uz_name, slab, freei);
 
 	BIT_CLR_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
 }
 #endif /* INVARIANTS */
 
 #ifdef DDB
 DB_SHOW_COMMAND(uma, db_show_uma)
 {
 	uma_bucket_t bucket;
 	uma_keg_t kz;
 	uma_zone_t z;
 	uma_zone_domain_t zdom;
 	uint64_t allocs, frees, sleeps;
 	int cachefree, i;
 
 	db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
 	    "Free", "Requests", "Sleeps", "Bucket");
 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
 			if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
 				allocs = z->uz_allocs;
 				frees = z->uz_frees;
 				sleeps = z->uz_sleeps;
 				cachefree = 0;
 			} else
 				uma_zone_sumstat(z, &cachefree, &allocs,
 				    &frees, &sleeps);
 			if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
 			    (LIST_FIRST(&kz->uk_zones) != z)))
 				cachefree += kz->uk_free;
 			for (i = 0; i < vm_ndomains; i++) {
 				zdom = &z->uz_domain[i];
 				LIST_FOREACH(bucket, &zdom->uzd_buckets,
 				    ub_link)
 					cachefree += bucket->ub_cnt;
 			}
 			db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
 			    z->uz_name, (uintmax_t)kz->uk_size,
 			    (intmax_t)(allocs - frees), cachefree,
 			    (uintmax_t)allocs, sleeps, z->uz_count);
 			if (db_pager_quit)
 				return;
 		}
 	}
 }
 
 DB_SHOW_COMMAND(umacache, db_show_umacache)
 {
 	uma_bucket_t bucket;
 	uma_zone_t z;
 	uma_zone_domain_t zdom;
 	uint64_t allocs, frees;
 	int cachefree, i;
 
 	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
 	    "Requests", "Bucket");
 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
 		uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
 		for (i = 0; i < vm_ndomains; i++) {
 			zdom = &z->uz_domain[i];
 			LIST_FOREACH(bucket, &zdom->uzd_buckets, ub_link)
 				cachefree += bucket->ub_cnt;
 		}
 		db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
 		    z->uz_name, (uintmax_t)z->uz_size,
 		    (intmax_t)(allocs - frees), cachefree,
 		    (uintmax_t)allocs, z->uz_count);
 		if (db_pager_quit)
 			return;
 	}
 }
 #endif	/* DDB */
Index: head/sys/vm/vm_extern.h
===================================================================
--- head/sys/vm/vm_extern.h	(revision 338317)
+++ head/sys/vm/vm_extern.h	(revision 338318)
@@ -1,131 +1,131 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vm_extern.h	8.2 (Berkeley) 1/12/94
  * $FreeBSD$
  */
 
 #ifndef _VM_EXTERN_H_
 #define	_VM_EXTERN_H_
 
 struct pmap;
 struct proc;
 struct vmspace;
 struct vnode;
 struct vmem;
 
 #ifdef _KERNEL
 struct cdev;
 struct cdevsw;
 
 /* These operate on kernel virtual addresses only. */
 vm_offset_t kva_alloc(vm_size_t);
 void kva_free(vm_offset_t, vm_size_t);
 
 /* These operate on pageable virtual addresses. */
 vm_offset_t kmap_alloc_wait(vm_map_t, vm_size_t);
 void kmap_free_wakeup(vm_map_t, vm_offset_t, vm_size_t);
 
 /* These operate on virtual addresses backed by memory. */
 vm_offset_t kmem_alloc_attr(vm_size_t size, int flags,
     vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr);
 vm_offset_t kmem_alloc_attr_domain(int domain, vm_size_t size, int flags,
     vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr);
 vm_offset_t kmem_alloc_contig(vm_size_t size, int flags,
     vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
     vm_memattr_t memattr);
 vm_offset_t kmem_alloc_contig_domain(int domain, vm_size_t size, int flags,
     vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
     vm_memattr_t memattr);
 vm_offset_t kmem_malloc(vm_size_t size, int flags);
 vm_offset_t kmem_malloc_domain(int domain, vm_size_t size, int flags);
-void kmem_free(struct vmem *, vm_offset_t, vm_size_t);
+void kmem_free(vm_offset_t addr, vm_size_t size);
 
 /* This provides memory for previously allocated address space. */
 int kmem_back(vm_object_t, vm_offset_t, vm_size_t, int);
 int kmem_back_domain(int, vm_object_t, vm_offset_t, vm_size_t, int);
 void kmem_unback(vm_object_t, vm_offset_t, vm_size_t);
 
 /* Bootstrapping. */
 void kmem_bootstrap_free(vm_offset_t, vm_size_t);
 vm_map_t kmem_suballoc(vm_map_t, vm_offset_t *, vm_offset_t *, vm_size_t,
     boolean_t);
 void kmem_init(vm_offset_t, vm_offset_t);
 void kmem_init_zero_region(void);
 void kmeminit(void);
 
 int kernacc(void *, int, int);
 int useracc(void *, int, int);
 int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int);
 void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t,
     vm_ooffset_t *);
 int vm_fault_disable_pagefaults(void);
 void vm_fault_enable_pagefaults(int save);
 int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
     int fault_flags, vm_page_t *m_hold);
 int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
     vm_prot_t prot, vm_page_t *ma, int max_count);
 int vm_forkproc(struct thread *, struct proc *, struct thread *,
     struct vmspace *, int);
 void vm_waitproc(struct proc *);
 int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int,
     objtype_t, void *, vm_ooffset_t);
 int vm_mmap_object(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t,
     vm_prot_t, int, vm_object_t, vm_ooffset_t, boolean_t, struct thread *);
 int vm_mmap_to_errno(int rv);
 int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
     int *, struct cdev *, struct cdevsw *, vm_ooffset_t *, vm_object_t *);
 int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *,
     struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *);
 void vm_set_page_size(void);
 void vm_sync_icache(vm_map_t, vm_offset_t, vm_size_t);
 typedef int (*pmap_pinit_t)(struct pmap *pmap);
 struct vmspace *vmspace_alloc(vm_offset_t, vm_offset_t, pmap_pinit_t);
 struct vmspace *vmspace_fork(struct vmspace *, vm_ooffset_t *);
 int vmspace_exec(struct proc *, vm_offset_t, vm_offset_t);
 int vmspace_unshare(struct proc *);
 void vmspace_exit(struct thread *);
 struct vmspace *vmspace_acquire_ref(struct proc *);
 void vmspace_free(struct vmspace *);
 void vmspace_exitfree(struct proc *);
 void vmspace_switch_aio(struct vmspace *);
 void vnode_pager_setsize(struct vnode *, vm_ooffset_t);
 int vslock(void *, size_t);
 void vsunlock(void *, size_t);
 struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset);
 void vm_imgact_unmap_page(struct sf_buf *sf);
 void vm_thread_dispose(struct thread *td);
 int vm_thread_new(struct thread *td, int pages);
 u_int vm_active_count(void);
 u_int vm_inactive_count(void);
 u_int vm_laundry_count(void);
 u_int vm_wait_count(void);
 #endif				/* _KERNEL */
 #endif				/* !_VM_EXTERN_H_ */
Index: head/sys/vm/vm_kern.c
===================================================================
--- head/sys/vm/vm_kern.c	(revision 338317)
+++ head/sys/vm/vm_kern.c	(revision 338318)
@@ -1,739 +1,736 @@
 /*-
  * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
  *
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_kern.c	8.3 (Berkeley) 1/12/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 /*
  *	Kernel memory management.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>		/* for ticks and hz */
 #include <sys/domainset.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/rwlock.h>
 #include <sys/sysctl.h>
 #include <sys/vmem.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_domainset.h>
 #include <vm/vm_kern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_pagequeue.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 vm_map_t kernel_map;
 vm_map_t exec_map;
 vm_map_t pipe_map;
 
 const void *zero_region;
 CTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0);
 
 /* NB: Used by kernel debuggers. */
 const u_long vm_maxuser_address = VM_MAXUSER_ADDRESS;
 
 u_int exec_map_entry_size;
 u_int exec_map_entries;
 
 SYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD,
     SYSCTL_NULL_ULONG_PTR, VM_MIN_KERNEL_ADDRESS, "Min kernel address");
 
 SYSCTL_ULONG(_vm, OID_AUTO, max_kernel_address, CTLFLAG_RD,
 #if defined(__arm__) || defined(__sparc64__)
     &vm_max_kernel_address, 0,
 #else
     SYSCTL_NULL_ULONG_PTR, VM_MAX_KERNEL_ADDRESS,
 #endif
     "Max kernel address");
 
 /*
  *	kva_alloc:
  *
  *	Allocate a virtual address range with no underlying object and
  *	no initial mapping to physical memory.  Any mapping from this
  *	range to physical memory must be explicitly created prior to
  *	its use, typically with pmap_qenter().  Any attempt to create
  *	a mapping on demand through vm_fault() will result in a panic. 
  */
 vm_offset_t
 kva_alloc(vm_size_t size)
 {
 	vm_offset_t addr;
 
 	size = round_page(size);
 	if (vmem_alloc(kernel_arena, size, M_BESTFIT | M_NOWAIT, &addr))
 		return (0);
 
 	return (addr);
 }
 
 /*
  *	kva_free:
  *
  *	Release a region of kernel virtual memory allocated
  *	with kva_alloc, and return the physical pages
  *	associated with that region.
  *
  *	This routine may not block on kernel maps.
  */
 void
 kva_free(vm_offset_t addr, vm_size_t size)
 {
 
 	size = round_page(size);
 	vmem_free(kernel_arena, addr, size);
 }
 
 /*
  *	Allocates a region from the kernel address map and physical pages
  *	within the specified address range to the kernel object.  Creates a
  *	wired mapping from this region to these pages, and returns the
  *	region's starting virtual address.  The allocated pages are not
  *	necessarily physically contiguous.  If M_ZERO is specified through the
  *	given flags, then the pages are zeroed before they are mapped.
  */
 vm_offset_t
 kmem_alloc_attr_domain(int domain, vm_size_t size, int flags, vm_paddr_t low,
     vm_paddr_t high, vm_memattr_t memattr)
 {
 	vmem_t *vmem;
 	vm_object_t object = kernel_object;
 	vm_offset_t addr, i, offset;
 	vm_page_t m;
 	int pflags, tries;
 
 	size = round_page(size);
 	vmem = vm_dom[domain].vmd_kernel_arena;
 	if (vmem_alloc(vmem, size, M_BESTFIT | flags, &addr))
 		return (0);
 	offset = addr - VM_MIN_KERNEL_ADDRESS;
 	pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED;
 	pflags &= ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL);
 	pflags |= VM_ALLOC_NOWAIT;
 	VM_OBJECT_WLOCK(object);
 	for (i = 0; i < size; i += PAGE_SIZE) {
 		tries = 0;
 retry:
 		m = vm_page_alloc_contig_domain(object, atop(offset + i),
 		    domain, pflags, 1, low, high, PAGE_SIZE, 0, memattr);
 		if (m == NULL) {
 			VM_OBJECT_WUNLOCK(object);
 			if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
 				if (!vm_page_reclaim_contig_domain(domain,
 				    pflags, 1, low, high, PAGE_SIZE, 0) &&
 				    (flags & M_WAITOK) != 0)
 					vm_wait_domain(domain);
 				VM_OBJECT_WLOCK(object);
 				tries++;
 				goto retry;
 			}
 			kmem_unback(object, addr, i);
 			vmem_free(vmem, addr, size);
 			return (0);
 		}
 		KASSERT(vm_phys_domain(m) == domain,
 		    ("kmem_alloc_attr_domain: Domain mismatch %d != %d",
 		    vm_phys_domain(m), domain));
 		if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
 			pmap_zero_page(m);
 		m->valid = VM_PAGE_BITS_ALL;
 		pmap_enter(kernel_pmap, addr + i, m, VM_PROT_RW,
 		    VM_PROT_RW | PMAP_ENTER_WIRED, 0);
 	}
 	VM_OBJECT_WUNLOCK(object);
 	return (addr);
 }
 
 vm_offset_t
 kmem_alloc_attr(vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high,
     vm_memattr_t memattr)
 {
 	struct vm_domainset_iter di;
 	vm_offset_t addr;
 	int domain;
 
 	vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags);
 	do {
 		addr = kmem_alloc_attr_domain(domain, size, flags, low, high,
 		    memattr);
 		if (addr != 0)
 			break;
 	} while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0);
 
 	return (addr);
 }
 
 /*
  *	Allocates a region from the kernel address map and physically
  *	contiguous pages within the specified address range to the kernel
  *	object.  Creates a wired mapping from this region to these pages, and
  *	returns the region's starting virtual address.  If M_ZERO is specified
  *	through the given flags, then the pages are zeroed before they are
  *	mapped.
  */
 vm_offset_t
 kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, vm_paddr_t low,
     vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
     vm_memattr_t memattr)
 {
 	vmem_t *vmem;
 	vm_object_t object = kernel_object;
 	vm_offset_t addr, offset, tmp;
 	vm_page_t end_m, m;
 	u_long npages;
 	int pflags, tries;
  
 	size = round_page(size);
 	vmem = vm_dom[domain].vmd_kernel_arena;
 	if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr))
 		return (0);
 	offset = addr - VM_MIN_KERNEL_ADDRESS;
 	pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED;
 	pflags &= ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL);
 	pflags |= VM_ALLOC_NOWAIT;
 	npages = atop(size);
 	VM_OBJECT_WLOCK(object);
 	tries = 0;
 retry:
 	m = vm_page_alloc_contig_domain(object, atop(offset), domain, pflags,
 	    npages, low, high, alignment, boundary, memattr);
 	if (m == NULL) {
 		VM_OBJECT_WUNLOCK(object);
 		if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
 			if (!vm_page_reclaim_contig_domain(domain, pflags,
 			    npages, low, high, alignment, boundary) &&
 			    (flags & M_WAITOK) != 0)
 				vm_wait_domain(domain);
 			VM_OBJECT_WLOCK(object);
 			tries++;
 			goto retry;
 		}
 		vmem_free(vmem, addr, size);
 		return (0);
 	}
 	KASSERT(vm_phys_domain(m) == domain,
 	    ("kmem_alloc_contig_domain: Domain mismatch %d != %d",
 	    vm_phys_domain(m), domain));
 	end_m = m + npages;
 	tmp = addr;
 	for (; m < end_m; m++) {
 		if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
 			pmap_zero_page(m);
 		m->valid = VM_PAGE_BITS_ALL;
 		pmap_enter(kernel_pmap, tmp, m, VM_PROT_RW,
 		    VM_PROT_RW | PMAP_ENTER_WIRED, 0);
 		tmp += PAGE_SIZE;
 	}
 	VM_OBJECT_WUNLOCK(object);
 	return (addr);
 }
 
 vm_offset_t
 kmem_alloc_contig(vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high,
     u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr)
 {
 	struct vm_domainset_iter di;
 	vm_offset_t addr;
 	int domain;
 
 	vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags);
 	do {
 		addr = kmem_alloc_contig_domain(domain, size, flags, low, high,
 		    alignment, boundary, memattr);
 		if (addr != 0)
 			break;
 	} while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0);
 
 	return (addr);
 }
 
 /*
  *	kmem_suballoc:
  *
  *	Allocates a map to manage a subrange
  *	of the kernel virtual address space.
  *
  *	Arguments are as follows:
  *
  *	parent		Map to take range from
  *	min, max	Returned endpoints of map
  *	size		Size of range to find
  *	superpage_align	Request that min is superpage aligned
  */
 vm_map_t
 kmem_suballoc(vm_map_t parent, vm_offset_t *min, vm_offset_t *max,
     vm_size_t size, boolean_t superpage_align)
 {
 	int ret;
 	vm_map_t result;
 
 	size = round_page(size);
 
 	*min = vm_map_min(parent);
 	ret = vm_map_find(parent, NULL, 0, min, size, 0, superpage_align ?
 	    VMFS_SUPER_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL,
 	    MAP_ACC_NO_CHARGE);
 	if (ret != KERN_SUCCESS)
 		panic("kmem_suballoc: bad status return of %d", ret);
 	*max = *min + size;
 	result = vm_map_create(vm_map_pmap(parent), *min, *max);
 	if (result == NULL)
 		panic("kmem_suballoc: cannot create submap");
 	if (vm_map_submap(parent, *min, *max, result) != KERN_SUCCESS)
 		panic("kmem_suballoc: unable to change range to submap");
 	return (result);
 }
 
 /*
  *	kmem_malloc:
  *
  *	Allocate wired-down pages in the kernel's address space.
  */
 vm_offset_t
 kmem_malloc_domain(int domain, vm_size_t size, int flags)
 {
 	vmem_t *arena;
 	vm_offset_t addr;
 	int rv;
 
 #if VM_NRESERVLEVEL > 0
 	if (__predict_true((flags & M_EXEC) == 0))
 		arena = vm_dom[domain].vmd_kernel_arena;
 	else
 		arena = vm_dom[domain].vmd_kernel_rwx_arena;
 #else
 	arena = vm_dom[domain].vmd_kernel_arena;
 #endif
 	size = round_page(size);
 	if (vmem_alloc(arena, size, flags | M_BESTFIT, &addr))
 		return (0);
 
 	rv = kmem_back_domain(domain, kernel_object, addr, size, flags);
 	if (rv != KERN_SUCCESS) {
 		vmem_free(arena, addr, size);
 		return (0);
 	}
 	return (addr);
 }
 
 vm_offset_t
 kmem_malloc(vm_size_t size, int flags)
 {
 	struct vm_domainset_iter di;
 	vm_offset_t addr;
 	int domain;
 
 	vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags);
 	do {
 		addr = kmem_malloc_domain(domain, size, flags);
 		if (addr != 0)
 			break;
 	} while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0);
 
 	return (addr);
 }
 
 /*
  *	kmem_back:
  *
  *	Allocate physical pages for the specified virtual address range.
  */
 int
 kmem_back_domain(int domain, vm_object_t object, vm_offset_t addr,
     vm_size_t size, int flags)
 {
 	vm_offset_t offset, i;
 	vm_page_t m, mpred;
 	vm_prot_t prot;
 	int pflags;
 
 	KASSERT(object == kernel_object,
 	    ("kmem_back_domain: only supports kernel object."));
 
 	offset = addr - VM_MIN_KERNEL_ADDRESS;
 	pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED;
 	pflags &= ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL);
 	if (flags & M_WAITOK)
 		pflags |= VM_ALLOC_WAITFAIL;
 	prot = (flags & M_EXEC) != 0 ? VM_PROT_ALL : VM_PROT_RW;
 
 	i = 0;
 	VM_OBJECT_WLOCK(object);
 retry:
 	mpred = vm_radix_lookup_le(&object->rtree, atop(offset + i));
 	for (; i < size; i += PAGE_SIZE, mpred = m) {
 		m = vm_page_alloc_domain_after(object, atop(offset + i),
 		    domain, pflags, mpred);
 
 		/*
 		 * Ran out of space, free everything up and return. Don't need
 		 * to lock page queues here as we know that the pages we got
 		 * aren't on any queues.
 		 */
 		if (m == NULL) {
 			if ((flags & M_NOWAIT) == 0)
 				goto retry;
 			VM_OBJECT_WUNLOCK(object);
 			kmem_unback(object, addr, i);
 			return (KERN_NO_SPACE);
 		}
 		KASSERT(vm_phys_domain(m) == domain,
 		    ("kmem_back_domain: Domain mismatch %d != %d",
 		    vm_phys_domain(m), domain));
 		if (flags & M_ZERO && (m->flags & PG_ZERO) == 0)
 			pmap_zero_page(m);
 		KASSERT((m->oflags & VPO_UNMANAGED) != 0,
 		    ("kmem_malloc: page %p is managed", m));
 		m->valid = VM_PAGE_BITS_ALL;
 		pmap_enter(kernel_pmap, addr + i, m, prot,
 		    prot | PMAP_ENTER_WIRED, 0);
+#if VM_NRESERVLEVEL > 0
+		if (__predict_false((prot & VM_PROT_EXECUTE) != 0))
+			m->oflags |= VPO_KMEM_EXEC;
+#endif
 	}
 	VM_OBJECT_WUNLOCK(object);
 
 	return (KERN_SUCCESS);
 }
 
 int
 kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags)
 {
 	struct vm_domainset_iter di;
 	int domain;
 	int ret;
 
 	KASSERT(object == kernel_object,
 	    ("kmem_back: only supports kernel object."));
 
 	vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags);
 	do {
 		ret = kmem_back_domain(domain, object, addr, size, flags);
 		if (ret == KERN_SUCCESS)
 			break;
 	} while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0);
 
 	return (ret);
 }
 
 /*
  *	kmem_unback:
  *
  *	Unmap and free the physical pages underlying the specified virtual
  *	address range.
  *
  *	A physical page must exist within the specified object at each index
  *	that is being unmapped.
  */
-static int
+static struct vmem *
 _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size)
 {
+	struct vmem *arena;
 	vm_page_t m, next;
 	vm_offset_t end, offset;
 	int domain;
 
 	KASSERT(object == kernel_object,
 	    ("kmem_unback: only supports kernel object."));
 
 	if (size == 0)
-		return (0);
+		return (NULL);
 	pmap_remove(kernel_pmap, addr, addr + size);
 	offset = addr - VM_MIN_KERNEL_ADDRESS;
 	end = offset + size;
 	VM_OBJECT_WLOCK(object);
 	m = vm_page_lookup(object, atop(offset)); 
 	domain = vm_phys_domain(m);
+#if VM_NRESERVLEVEL > 0
+	if (__predict_true((m->oflags & VPO_KMEM_EXEC) == 0))
+		arena = vm_dom[domain].vmd_kernel_arena;
+	else
+		arena = vm_dom[domain].vmd_kernel_rwx_arena;
+#else
+	arena = vm_dom[domain].vmd_kernel_arena;
+#endif
 	for (; offset < end; offset += PAGE_SIZE, m = next) {
 		next = vm_page_next(m);
 		vm_page_unwire(m, PQ_NONE);
 		vm_page_free(m);
 	}
 	VM_OBJECT_WUNLOCK(object);
 
-	return (domain);
+	return (arena);
 }
 
 void
 kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size)
 {
 
-	_kmem_unback(object, addr, size);
+	(void)_kmem_unback(object, addr, size);
 }
 
 /*
  *	kmem_free:
  *
  *	Free memory allocated with kmem_malloc.  The size must match the
  *	original allocation.
  */
 void
-kmem_free(struct vmem *vmem, vm_offset_t addr, vm_size_t size)
+kmem_free(vm_offset_t addr, vm_size_t size)
 {
 	struct vmem *arena;
-	int domain;
 
-#if VM_NRESERVLEVEL > 0
-	KASSERT(vmem == kernel_arena || vmem == kernel_rwx_arena,
-	    ("kmem_free: Only kernel_arena or kernel_rwx_arena are supported."));
-#else
-	KASSERT(vmem == kernel_arena,
-	    ("kmem_free: Only kernel_arena is supported."));
-#endif
-
 	size = round_page(size);
-	domain = _kmem_unback(kernel_object, addr, size);
-#if VM_NRESERVLEVEL > 0
-	if (__predict_true(vmem == kernel_arena))
-		arena = vm_dom[domain].vmd_kernel_arena;
-	else
-		arena = vm_dom[domain].vmd_kernel_rwx_arena;
-#else
-	arena = vm_dom[domain].vmd_kernel_arena;
-#endif
-	vmem_free(arena, addr, size);
+	arena = _kmem_unback(kernel_object, addr, size);
+	if (arena != NULL)
+		vmem_free(arena, addr, size);
 }
 
 /*
  *	kmap_alloc_wait:
  *
  *	Allocates pageable memory from a sub-map of the kernel.  If the submap
  *	has no room, the caller sleeps waiting for more memory in the submap.
  *
  *	This routine may block.
  */
 vm_offset_t
 kmap_alloc_wait(vm_map_t map, vm_size_t size)
 {
 	vm_offset_t addr;
 
 	size = round_page(size);
 	if (!swap_reserve(size))
 		return (0);
 
 	for (;;) {
 		/*
 		 * To make this work for more than one map, use the map's lock
 		 * to lock out sleepers/wakers.
 		 */
 		vm_map_lock(map);
 		if (vm_map_findspace(map, vm_map_min(map), size, &addr) == 0)
 			break;
 		/* no space now; see if we can ever get space */
 		if (vm_map_max(map) - vm_map_min(map) < size) {
 			vm_map_unlock(map);
 			swap_release(size);
 			return (0);
 		}
 		map->needs_wakeup = TRUE;
 		vm_map_unlock_and_wait(map, 0);
 	}
 	vm_map_insert(map, NULL, 0, addr, addr + size, VM_PROT_ALL,
 	    VM_PROT_ALL, MAP_ACC_CHARGED);
 	vm_map_unlock(map);
 	return (addr);
 }
 
 /*
  *	kmap_free_wakeup:
  *
  *	Returns memory to a submap of the kernel, and wakes up any processes
  *	waiting for memory in that map.
  */
 void
 kmap_free_wakeup(vm_map_t map, vm_offset_t addr, vm_size_t size)
 {
 
 	vm_map_lock(map);
 	(void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
 	if (map->needs_wakeup) {
 		map->needs_wakeup = FALSE;
 		vm_map_wakeup(map);
 	}
 	vm_map_unlock(map);
 }
 
 void
 kmem_init_zero_region(void)
 {
 	vm_offset_t addr, i;
 	vm_page_t m;
 
 	/*
 	 * Map a single physical page of zeros to a larger virtual range.
 	 * This requires less looping in places that want large amounts of
 	 * zeros, while not using much more physical resources.
 	 */
 	addr = kva_alloc(ZERO_REGION_SIZE);
 	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 	if ((m->flags & PG_ZERO) == 0)
 		pmap_zero_page(m);
 	for (i = 0; i < ZERO_REGION_SIZE; i += PAGE_SIZE)
 		pmap_qenter(addr + i, &m, 1);
 	pmap_protect(kernel_pmap, addr, addr + ZERO_REGION_SIZE, VM_PROT_READ);
 
 	zero_region = (const void *)addr;
 }
 
 /*
  * 	kmem_init:
  *
  *	Create the kernel map; insert a mapping covering kernel text, 
  *	data, bss, and all space allocated thus far (`boostrap' data).  The 
  *	new map will thus map the range between VM_MIN_KERNEL_ADDRESS and 
  *	`start' as allocated, and the range between `start' and `end' as free.
  */
 void
 kmem_init(vm_offset_t start, vm_offset_t end)
 {
 	vm_map_t m;
 
 	m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end);
 	m->system_map = 1;
 	vm_map_lock(m);
 	/* N.B.: cannot use kgdb to debug, starting with this assignment ... */
 	kernel_map = m;
 	(void) vm_map_insert(m, NULL, (vm_ooffset_t) 0,
 #ifdef __amd64__
 	    KERNBASE,
 #else		     
 	    VM_MIN_KERNEL_ADDRESS,
 #endif
 	    start, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
 	/* ... and ending with the completion of the above `insert' */
 	vm_map_unlock(m);
 }
 
 /*
  *	kmem_bootstrap_free:
  *
  *	Free pages backing preloaded data (e.g., kernel modules) to the
  *	system.  Currently only supported on platforms that create a
  *	vm_phys segment for preloaded data.
  */
 void
 kmem_bootstrap_free(vm_offset_t start, vm_size_t size)
 {
 #if defined(__i386__) || defined(__amd64__)
 	struct vm_domain *vmd;
 	vm_offset_t end, va;
 	vm_paddr_t pa;
 	vm_page_t m;
 
 	end = trunc_page(start + size);
 	start = round_page(start);
 
 	for (va = start; va < end; va += PAGE_SIZE) {
 		pa = pmap_kextract(va);
 		m = PHYS_TO_VM_PAGE(pa);
 
 		vmd = vm_pagequeue_domain(m);
 		vm_domain_free_lock(vmd);
 		vm_phys_free_pages(m, 0);
 		vmd->vmd_page_count++;
 		vm_domain_free_unlock(vmd);
 
 		vm_domain_freecnt_inc(vmd, 1);
 		vm_cnt.v_page_count++;
 	}
 	pmap_remove(kernel_pmap, start, end);
 	(void)vmem_add(kernel_arena, start, end - start, M_WAITOK);
 #endif
 }
 
 #ifdef DIAGNOSTIC
 /*
  * Allow userspace to directly trigger the VM drain routine for testing
  * purposes.
  */
 static int
 debug_vm_lowmem(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 
 	i = 0;
 	error = sysctl_handle_int(oidp, &i, 0, req);
 	if (error)
 		return (error);
 	if ((i & ~(VM_LOW_KMEM | VM_LOW_PAGES)) != 0)
 		return (EINVAL);
 	if (i != 0)
 		EVENTHANDLER_INVOKE(vm_lowmem, i);
 	return (0);
 }
 
 SYSCTL_PROC(_debug, OID_AUTO, vm_lowmem, CTLTYPE_INT | CTLFLAG_RW, 0, 0,
     debug_vm_lowmem, "I", "set to trigger vm_lowmem event with given flags");
 #endif
Index: head/sys/vm/vm_page.h
===================================================================
--- head/sys/vm/vm_page.h	(revision 338317)
+++ head/sys/vm/vm_page.h	(revision 338318)
@@ -1,840 +1,840 @@
 /*-
  * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
  *
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_page.h	8.2 (Berkeley) 12/13/93
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
  * $FreeBSD$
  */
 
 /*
  *	Resident memory system definitions.
  */
 
 #ifndef	_VM_PAGE_
 #define	_VM_PAGE_
 
 #include <vm/pmap.h>
 
 /*
  *	Management of resident (logical) pages.
  *
  *	A small structure is kept for each resident
  *	page, indexed by page number.  Each structure
  *	is an element of several collections:
  *
  *		A radix tree used to quickly
  *		perform object/offset lookups
  *
  *		A list of all pages for a given object,
  *		so they can be quickly deactivated at
  *		time of deallocation.
  *
  *		An ordered list of pages due for pageout.
  *
  *	In addition, the structure contains the object
  *	and offset to which this page belongs (for pageout),
  *	and sundry status bits.
  *
  *	In general, operations on this structure's mutable fields are
  *	synchronized using either one of or a combination of the lock on the
  *	object that the page belongs to (O), the page lock (P),
  *	the per-domain lock for the free queues (F), or the page's queue
  *	lock (Q).  The physical address of a page is used to select its page
  *	lock from a pool.  The queue lock for a page depends on the value of
  *	its queue field and described in detail below.  If a field is
  *	annotated below with two of these locks, then holding either lock is
  *	sufficient for read access, but both locks are required for write
  *	access.  An annotation of (C) indicates that the field is immutable.
  *
  *	In contrast, the synchronization of accesses to the page's
  *	dirty field is machine dependent (M).  In the
  *	machine-independent layer, the lock on the object that the
  *	page belongs to must be held in order to operate on the field.
  *	However, the pmap layer is permitted to set all bits within
  *	the field without holding that lock.  If the underlying
  *	architecture does not support atomic read-modify-write
  *	operations on the field's type, then the machine-independent
  *	layer uses a 32-bit atomic on the aligned 32-bit word that
  *	contains the dirty field.  In the machine-independent layer,
  *	the implementation of read-modify-write operations on the
  *	field is encapsulated in vm_page_clear_dirty_mask().
  *
  *	The page structure contains two counters which prevent page reuse.
  *	Both counters are protected by the page lock (P).  The hold
  *	counter counts transient references obtained via a pmap lookup, and
  *	is also used to prevent page reclamation in situations where it is
  *	undesirable to block other accesses to the page.  The wire counter
  *	is used to implement mlock(2) and is non-zero for pages containing
  *	kernel memory.  Pages that are wired or held will not be reclaimed
  *	or laundered by the page daemon, but are treated differently during
  *	a page queue scan: held pages remain at their position in the queue,
  *	while wired pages are removed from the queue and must later be
  *	re-enqueued appropriately by the unwiring thread.  It is legal to
  *	call vm_page_free() on a held page; doing so causes it to be removed
  *	from its object and page queue, and the page is released to the
  *	allocator once the last hold reference is dropped.  In contrast,
  *	wired pages may not be freed.
  *
  *	In some pmap implementations, the wire count of a page table page is
  *	used to track the number of populated entries.
  *
  *	The busy lock is an embedded reader-writer lock which protects the
  *	page's contents and identity (i.e., its <object, pindex> tuple) and
  *	interlocks with the object lock (O).  In particular, a page may be
  *	busied or unbusied only with the object write lock held.  To avoid
  *	bloating the page structure, the busy lock lacks some of the
  *	features available to the kernel's general-purpose synchronization
  *	primitives.  As a result, busy lock ordering rules are not verified,
  *	lock recursion is not detected, and an attempt to xbusy a busy page
  *	or sbusy an xbusy page results will trigger a panic rather than
  *	causing the thread to block.  vm_page_sleep_if_busy() can be used to
  *	sleep until the page's busy state changes, after which the caller
  *	must re-lookup the page and re-evaluate its state.
  *
  *	The queue field is the index of the page queue containing the
  *	page, or PQ_NONE if the page is not enqueued.  The queue lock of a
  *	page is the page queue lock corresponding to the page queue index,
  *	or the page lock (P) for the page if it is not enqueued.  To modify
  *	the queue field, the queue lock for the old value of the field must
  *	be held.  It is invalid for a page's queue field to transition
  *	between two distinct page queue indices.  That is, when updating
  *	the queue field, either the new value or the old value must be
  *	PQ_NONE.
  *
  *	To avoid contention on page queue locks, page queue operations
  *	(enqueue, dequeue, requeue) are batched using per-CPU queues.
  *	A deferred operation is requested by inserting an entry into a
  *	batch queue; the entry is simply a pointer to the page, and the
  *	request type is encoded in the page's aflags field using the values
  *	in PGA_QUEUE_STATE_MASK.  The type-stability of struct vm_pages is
  *	crucial to this scheme since the processing of entries in a given
  *	batch queue may be deferred indefinitely.  In particular, a page
  *	may be freed before its pending batch queue entries have been
  *	processed.  The page lock (P) must be held to schedule a batched
  *	queue operation, and the page queue lock must be held in order to
  *	process batch queue entries for the page queue.
  */
 
 #if PAGE_SIZE == 4096
 #define VM_PAGE_BITS_ALL 0xffu
 typedef uint8_t vm_page_bits_t;
 #elif PAGE_SIZE == 8192
 #define VM_PAGE_BITS_ALL 0xffffu
 typedef uint16_t vm_page_bits_t;
 #elif PAGE_SIZE == 16384
 #define VM_PAGE_BITS_ALL 0xffffffffu
 typedef uint32_t vm_page_bits_t;
 #elif PAGE_SIZE == 32768
 #define VM_PAGE_BITS_ALL 0xfffffffffffffffflu
 typedef uint64_t vm_page_bits_t;
 #endif
 
 struct vm_page {
 	union {
 		TAILQ_ENTRY(vm_page) q; /* page queue or free list (Q) */
 		struct {
 			SLIST_ENTRY(vm_page) ss; /* private slists */
 			void *pv;
 		} s;
 		struct {
 			u_long p;
 			u_long v;
 		} memguard;
 	} plinks;
 	TAILQ_ENTRY(vm_page) listq;	/* pages in same object (O) */
 	vm_object_t object;		/* which object am I in (O,P) */
 	vm_pindex_t pindex;		/* offset into object (O,P) */
 	vm_paddr_t phys_addr;		/* physical address of page (C) */
 	struct md_page md;		/* machine dependent stuff */
 	u_int wire_count;		/* wired down maps refs (P) */
 	volatile u_int busy_lock;	/* busy owners lock */
 	uint16_t hold_count;		/* page hold count (P) */
 	uint16_t flags;			/* page PG_* flags (P) */
 	uint8_t aflags;			/* access is atomic */
 	uint8_t oflags;			/* page VPO_* flags (O) */
 	uint8_t queue;			/* page queue index (Q) */
 	int8_t psind;			/* pagesizes[] index (O) */
 	int8_t segind;			/* vm_phys segment index (C) */
 	uint8_t	order;			/* index of the buddy queue (F) */
 	uint8_t pool;			/* vm_phys freepool index (F) */
 	u_char	act_count;		/* page usage count (P) */
 	/* NOTE that these must support one bit per DEV_BSIZE in a page */
 	/* so, on normal X86 kernels, they must be at least 8 bits wide */
 	vm_page_bits_t valid;		/* map of valid DEV_BSIZE chunks (O) */
 	vm_page_bits_t dirty;		/* map of dirty DEV_BSIZE chunks (M) */
 };
 
 /*
  * Page flags stored in oflags:
  *
  * Access to these page flags is synchronized by the lock on the object
  * containing the page (O).
  *
  * Note: VPO_UNMANAGED (used by OBJT_DEVICE, OBJT_PHYS and OBJT_SG)
  * 	 indicates that the page is not under PV management but
  * 	 otherwise should be treated as a normal page.  Pages not
  * 	 under PV management cannot be paged out via the
  * 	 object/vm_page_t because there is no knowledge of their pte
  * 	 mappings, and such pages are also not on any PQ queue.
  *
  */
-#define	VPO_UNUSED01	0x01		/* --available-- */
+#define	VPO_KMEM_EXEC	0x01		/* kmem mapping allows execution */
 #define	VPO_SWAPSLEEP	0x02		/* waiting for swap to finish */
 #define	VPO_UNMANAGED	0x04		/* no PV management for page */
 #define	VPO_SWAPINPROG	0x08		/* swap I/O in progress on page */
 #define	VPO_NOSYNC	0x10		/* do not collect for syncer */
 
 /*
  * Busy page implementation details.
  * The algorithm is taken mostly by rwlock(9) and sx(9) locks implementation,
  * even if the support for owner identity is removed because of size
  * constraints.  Checks on lock recursion are then not possible, while the
  * lock assertions effectiveness is someway reduced.
  */
 #define	VPB_BIT_SHARED		0x01
 #define	VPB_BIT_EXCLUSIVE	0x02
 #define	VPB_BIT_WAITERS		0x04
 #define	VPB_BIT_FLAGMASK						\
 	(VPB_BIT_SHARED | VPB_BIT_EXCLUSIVE | VPB_BIT_WAITERS)
 
 #define	VPB_SHARERS_SHIFT	3
 #define	VPB_SHARERS(x)							\
 	(((x) & ~VPB_BIT_FLAGMASK) >> VPB_SHARERS_SHIFT)
 #define	VPB_SHARERS_WORD(x)	((x) << VPB_SHARERS_SHIFT | VPB_BIT_SHARED)
 #define	VPB_ONE_SHARER		(1 << VPB_SHARERS_SHIFT)
 
 #define	VPB_SINGLE_EXCLUSIVER	VPB_BIT_EXCLUSIVE
 
 #define	VPB_UNBUSIED		VPB_SHARERS_WORD(0)
 
 #define	PQ_NONE		255
 #define	PQ_INACTIVE	0
 #define	PQ_ACTIVE	1
 #define	PQ_LAUNDRY	2
 #define	PQ_UNSWAPPABLE	3
 #define	PQ_COUNT	4
 
 #ifndef VM_PAGE_HAVE_PGLIST
 TAILQ_HEAD(pglist, vm_page);
 #define VM_PAGE_HAVE_PGLIST
 #endif
 SLIST_HEAD(spglist, vm_page);
 
 #ifdef _KERNEL
 extern vm_page_t bogus_page;
 #endif	/* _KERNEL */
 
 extern struct mtx_padalign pa_lock[];
 
 #if defined(__arm__)
 #define	PDRSHIFT	PDR_SHIFT
 #elif !defined(PDRSHIFT)
 #define PDRSHIFT	21
 #endif
 
 #define	pa_index(pa)	((pa) >> PDRSHIFT)
 #define	PA_LOCKPTR(pa)	((struct mtx *)(&pa_lock[pa_index(pa) % PA_LOCK_COUNT]))
 #define	PA_LOCKOBJPTR(pa)	((struct lock_object *)PA_LOCKPTR((pa)))
 #define	PA_LOCK(pa)	mtx_lock(PA_LOCKPTR(pa))
 #define	PA_TRYLOCK(pa)	mtx_trylock(PA_LOCKPTR(pa))
 #define	PA_UNLOCK(pa)	mtx_unlock(PA_LOCKPTR(pa))
 #define	PA_UNLOCK_COND(pa) 			\
 	do {		   			\
 		if ((pa) != 0) {		\
 			PA_UNLOCK((pa));	\
 			(pa) = 0;		\
 		}				\
 	} while (0)
 
 #define	PA_LOCK_ASSERT(pa, a)	mtx_assert(PA_LOCKPTR(pa), (a))
 
 #if defined(KLD_MODULE) && !defined(KLD_TIED)
 #define	vm_page_lock(m)		vm_page_lock_KBI((m), LOCK_FILE, LOCK_LINE)
 #define	vm_page_unlock(m)	vm_page_unlock_KBI((m), LOCK_FILE, LOCK_LINE)
 #define	vm_page_trylock(m)	vm_page_trylock_KBI((m), LOCK_FILE, LOCK_LINE)
 #else	/* !KLD_MODULE */
 #define	vm_page_lockptr(m)	(PA_LOCKPTR(VM_PAGE_TO_PHYS((m))))
 #define	vm_page_lock(m)		mtx_lock(vm_page_lockptr((m)))
 #define	vm_page_unlock(m)	mtx_unlock(vm_page_lockptr((m)))
 #define	vm_page_trylock(m)	mtx_trylock(vm_page_lockptr((m)))
 #endif
 #if defined(INVARIANTS)
 #define	vm_page_assert_locked(m)		\
     vm_page_assert_locked_KBI((m), __FILE__, __LINE__)
 #define	vm_page_lock_assert(m, a)		\
     vm_page_lock_assert_KBI((m), (a), __FILE__, __LINE__)
 #else
 #define	vm_page_assert_locked(m)
 #define	vm_page_lock_assert(m, a)
 #endif
 
 /*
  * The vm_page's aflags are updated using atomic operations.  To set or clear
  * these flags, the functions vm_page_aflag_set() and vm_page_aflag_clear()
  * must be used.  Neither these flags nor these functions are part of the KBI.
  *
  * PGA_REFERENCED may be cleared only if the page is locked.  It is set by
  * both the MI and MD VM layers.  However, kernel loadable modules should not
  * directly set this flag.  They should call vm_page_reference() instead.
  *
  * PGA_WRITEABLE is set exclusively on managed pages by pmap_enter().
  * When it does so, the object must be locked, or the page must be
  * exclusive busied.  The MI VM layer must never access this flag
  * directly.  Instead, it should call pmap_page_is_write_mapped().
  *
  * PGA_EXECUTABLE may be set by pmap routines, and indicates that a page has
  * at least one executable mapping.  It is not consumed by the MI VM layer.
  *
  * PGA_ENQUEUED is set and cleared when a page is inserted into or removed
  * from a page queue, respectively.  It determines whether the plinks.q field
  * of the page is valid.  To set or clear this flag, the queue lock for the
  * page must be held: the page queue lock corresponding to the page's "queue"
  * field if its value is not PQ_NONE, and the page lock otherwise.
  *
  * PGA_DEQUEUE is set when the page is scheduled to be dequeued from a page
  * queue, and cleared when the dequeue request is processed.  A page may
  * have PGA_DEQUEUE set and PGA_ENQUEUED cleared, for instance if a dequeue
  * is requested after the page is scheduled to be enqueued but before it is
  * actually inserted into the page queue.  The page lock must be held to set
  * this flag, and the queue lock for the page must be held to clear it.
  *
  * PGA_REQUEUE is set when the page is scheduled to be enqueued or requeued
  * in its page queue.  The page lock must be held to set this flag, and the
  * queue lock for the page must be held to clear it.
  *
  * PGA_REQUEUE_HEAD is a special flag for enqueuing pages near the head of
  * the inactive queue, thus bypassing LRU.  The page lock must be held to
  * set this flag, and the queue lock for the page must be held to clear it.
  */
 #define	PGA_WRITEABLE	0x01		/* page may be mapped writeable */
 #define	PGA_REFERENCED	0x02		/* page has been referenced */
 #define	PGA_EXECUTABLE	0x04		/* page may be mapped executable */
 #define	PGA_ENQUEUED	0x08		/* page is enqueued in a page queue */
 #define	PGA_DEQUEUE	0x10		/* page is due to be dequeued */
 #define	PGA_REQUEUE	0x20		/* page is due to be requeued */
 #define	PGA_REQUEUE_HEAD 0x40		/* page requeue should bypass LRU */
 
 #define	PGA_QUEUE_STATE_MASK	(PGA_ENQUEUED | PGA_DEQUEUE | PGA_REQUEUE | \
 				PGA_REQUEUE_HEAD)
 
 /*
  * Page flags.  If changed at any other time than page allocation or
  * freeing, the modification must be protected by the vm_page lock.
  */
 #define	PG_FICTITIOUS	0x0004		/* physical page doesn't exist */
 #define	PG_ZERO		0x0008		/* page is zeroed */
 #define	PG_MARKER	0x0010		/* special queue marker page */
 #define	PG_NODUMP	0x0080		/* don't include this page in a dump */
 #define	PG_UNHOLDFREE	0x0100		/* delayed free of a held page */
 
 /*
  * Misc constants.
  */
 #define ACT_DECLINE		1
 #define ACT_ADVANCE		3
 #define ACT_INIT		5
 #define ACT_MAX			64
 
 #ifdef _KERNEL
 
 #include <sys/systm.h>
 
 #include <machine/atomic.h>
 
 /*
  * Each pageable resident page falls into one of five lists:
  *
  *	free
  *		Available for allocation now.
  *
  *	inactive
  *		Low activity, candidates for reclamation.
  *		This list is approximately LRU ordered.
  *
  *	laundry
  *		This is the list of pages that should be
  *		paged out next.
  *
  *	unswappable
  *		Dirty anonymous pages that cannot be paged
  *		out because no swap device is configured.
  *
  *	active
  *		Pages that are "active", i.e., they have been
  *		recently referenced.
  *
  */
 
 extern vm_page_t vm_page_array;		/* First resident page in table */
 extern long vm_page_array_size;		/* number of vm_page_t's */
 extern long first_page;			/* first physical page number */
 
 #define VM_PAGE_TO_PHYS(entry)	((entry)->phys_addr)
 
 /*
  * PHYS_TO_VM_PAGE() returns the vm_page_t object that represents a memory
  * page to which the given physical address belongs. The correct vm_page_t
  * object is returned for addresses that are not page-aligned.
  */
 vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa);
 
 /*
  * Page allocation parameters for vm_page for the functions
  * vm_page_alloc(), vm_page_grab(), vm_page_alloc_contig() and
  * vm_page_alloc_freelist().  Some functions support only a subset
  * of the flags, and ignore others, see the flags legend.
  *
  * The meaning of VM_ALLOC_ZERO differs slightly between the vm_page_alloc*()
  * and the vm_page_grab*() functions.  See these functions for details.
  *
  * Bits 0 - 1 define class.
  * Bits 2 - 15 dedicated for flags.
  * Legend:
  * (a) - vm_page_alloc() supports the flag.
  * (c) - vm_page_alloc_contig() supports the flag.
  * (f) - vm_page_alloc_freelist() supports the flag.
  * (g) - vm_page_grab() supports the flag.
  * (p) - vm_page_grab_pages() supports the flag.
  * Bits above 15 define the count of additional pages that the caller
  * intends to allocate.
  */
 #define VM_ALLOC_NORMAL		0
 #define VM_ALLOC_INTERRUPT	1
 #define VM_ALLOC_SYSTEM		2
 #define	VM_ALLOC_CLASS_MASK	3
 #define	VM_ALLOC_WAITOK		0x0008	/* (acf) Sleep and retry */
 #define	VM_ALLOC_WAITFAIL	0x0010	/* (acf) Sleep and return error */
 #define	VM_ALLOC_WIRED		0x0020	/* (acfgp) Allocate a wired page */
 #define	VM_ALLOC_ZERO		0x0040	/* (acfgp) Allocate a prezeroed page */
 #define	VM_ALLOC_NOOBJ		0x0100	/* (acg) No associated object */
 #define	VM_ALLOC_NOBUSY		0x0200	/* (acgp) Do not excl busy the page */
 #define	VM_ALLOC_IGN_SBUSY	0x1000	/* (gp) Ignore shared busy flag */
 #define	VM_ALLOC_NODUMP		0x2000	/* (ag) don't include in dump */
 #define	VM_ALLOC_SBUSY		0x4000	/* (acgp) Shared busy the page */
 #define	VM_ALLOC_NOWAIT		0x8000	/* (acfgp) Do not sleep */
 #define	VM_ALLOC_COUNT_SHIFT	16
 #define	VM_ALLOC_COUNT(count)	((count) << VM_ALLOC_COUNT_SHIFT)
 
 #ifdef M_NOWAIT
 static inline int
 malloc2vm_flags(int malloc_flags)
 {
 	int pflags;
 
 	KASSERT((malloc_flags & M_USE_RESERVE) == 0 ||
 	    (malloc_flags & M_NOWAIT) != 0,
 	    ("M_USE_RESERVE requires M_NOWAIT"));
 	pflags = (malloc_flags & M_USE_RESERVE) != 0 ? VM_ALLOC_INTERRUPT :
 	    VM_ALLOC_SYSTEM;
 	if ((malloc_flags & M_ZERO) != 0)
 		pflags |= VM_ALLOC_ZERO;
 	if ((malloc_flags & M_NODUMP) != 0)
 		pflags |= VM_ALLOC_NODUMP;
 	if ((malloc_flags & M_NOWAIT))
 		pflags |= VM_ALLOC_NOWAIT;
 	if ((malloc_flags & M_WAITOK))
 		pflags |= VM_ALLOC_WAITOK;
 	return (pflags);
 }
 #endif
 
 /*
  * Predicates supported by vm_page_ps_test():
  *
  *	PS_ALL_DIRTY is true only if the entire (super)page is dirty.
  *	However, it can be spuriously false when the (super)page has become
  *	dirty in the pmap but that information has not been propagated to the
  *	machine-independent layer.
  */
 #define	PS_ALL_DIRTY	0x1
 #define	PS_ALL_VALID	0x2
 #define	PS_NONE_BUSY	0x4
 
 void vm_page_busy_downgrade(vm_page_t m);
 void vm_page_busy_sleep(vm_page_t m, const char *msg, bool nonshared);
 void vm_page_flash(vm_page_t m);
 void vm_page_hold(vm_page_t mem);
 void vm_page_unhold(vm_page_t mem);
 void vm_page_free(vm_page_t m);
 void vm_page_free_zero(vm_page_t m);
 
 void vm_page_activate (vm_page_t);
 void vm_page_advise(vm_page_t m, int advice);
 vm_page_t vm_page_alloc(vm_object_t, vm_pindex_t, int);
 vm_page_t vm_page_alloc_domain(vm_object_t, vm_pindex_t, int, int);
 vm_page_t vm_page_alloc_after(vm_object_t, vm_pindex_t, int, vm_page_t);
 vm_page_t vm_page_alloc_domain_after(vm_object_t, vm_pindex_t, int, int,
     vm_page_t);
 vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_memattr_t memattr);
 vm_page_t vm_page_alloc_contig_domain(vm_object_t object,
     vm_pindex_t pindex, int domain, int req, u_long npages, vm_paddr_t low,
     vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
     vm_memattr_t memattr);
 vm_page_t vm_page_alloc_freelist(int, int);
 vm_page_t vm_page_alloc_freelist_domain(int, int, int);
 bool vm_page_blacklist_add(vm_paddr_t pa, bool verbose);
 void vm_page_change_lock(vm_page_t m, struct mtx **mtx);
 vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
 int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
     vm_page_t *ma, int count);
 void vm_page_deactivate(vm_page_t);
 void vm_page_deactivate_noreuse(vm_page_t);
 void vm_page_dequeue(vm_page_t m);
 void vm_page_dequeue_deferred(vm_page_t m);
 void vm_page_drain_pqbatch(void);
 vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
 bool vm_page_free_prep(vm_page_t m);
 vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);
 void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
 int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
 void vm_page_launder(vm_page_t m);
 vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t);
 vm_page_t vm_page_next(vm_page_t m);
 int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *);
 struct vm_pagequeue *vm_page_pagequeue(vm_page_t m);
 vm_page_t vm_page_prev(vm_page_t m);
 bool vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m);
 void vm_page_putfake(vm_page_t m);
 void vm_page_readahead_finish(vm_page_t m);
 bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low,
     vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
 bool vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
     vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
 void vm_page_reference(vm_page_t m);
 void vm_page_remove (vm_page_t);
 int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);
 vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object,
     vm_pindex_t pindex);
 void vm_page_requeue(vm_page_t m);
 int vm_page_sbusied(vm_page_t m);
 vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start,
     vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options);
 void vm_page_set_valid_range(vm_page_t m, int base, int size);
 int vm_page_sleep_if_busy(vm_page_t m, const char *msg);
 vm_offset_t vm_page_startup(vm_offset_t vaddr);
 void vm_page_sunbusy(vm_page_t m);
 bool vm_page_try_to_free(vm_page_t m);
 int vm_page_trysbusy(vm_page_t m);
 void vm_page_unhold_pages(vm_page_t *ma, int count);
 void vm_page_unswappable(vm_page_t m);
 bool vm_page_unwire(vm_page_t m, uint8_t queue);
 bool vm_page_unwire_noq(vm_page_t m);
 void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
 void vm_page_wire (vm_page_t);
 void vm_page_xunbusy_hard(vm_page_t m);
 void vm_page_xunbusy_maybelocked(vm_page_t m);
 void vm_page_set_validclean (vm_page_t, int, int);
 void vm_page_clear_dirty (vm_page_t, int, int);
 void vm_page_set_invalid (vm_page_t, int, int);
 int vm_page_is_valid (vm_page_t, int, int);
 void vm_page_test_dirty (vm_page_t);
 vm_page_bits_t vm_page_bits(int base, int size);
 void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid);
 void vm_page_free_toq(vm_page_t m);
 void vm_page_free_pages_toq(struct spglist *free, bool update_wire_count);
 
 void vm_page_dirty_KBI(vm_page_t m);
 void vm_page_lock_KBI(vm_page_t m, const char *file, int line);
 void vm_page_unlock_KBI(vm_page_t m, const char *file, int line);
 int vm_page_trylock_KBI(vm_page_t m, const char *file, int line);
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 void vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line);
 void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line);
 #endif
 
 #define	vm_page_assert_sbusied(m)					\
 	KASSERT(vm_page_sbusied(m),					\
 	    ("vm_page_assert_sbusied: page %p not shared busy @ %s:%d", \
 	    (m), __FILE__, __LINE__))
 
 #define	vm_page_assert_unbusied(m)					\
 	KASSERT(!vm_page_busied(m),					\
 	    ("vm_page_assert_unbusied: page %p busy @ %s:%d",		\
 	    (m), __FILE__, __LINE__))
 
 #define	vm_page_assert_xbusied(m)					\
 	KASSERT(vm_page_xbusied(m),					\
 	    ("vm_page_assert_xbusied: page %p not exclusive busy @ %s:%d", \
 	    (m), __FILE__, __LINE__))
 
 #define	vm_page_busied(m)						\
 	((m)->busy_lock != VPB_UNBUSIED)
 
 #define	vm_page_sbusy(m) do {						\
 	if (!vm_page_trysbusy(m))					\
 		panic("%s: page %p failed shared busying", __func__,	\
 		    (m));						\
 } while (0)
 
 #define	vm_page_tryxbusy(m)						\
 	(atomic_cmpset_acq_int(&(m)->busy_lock, VPB_UNBUSIED,		\
 	    VPB_SINGLE_EXCLUSIVER))
 
 #define	vm_page_xbusied(m)						\
 	(((m)->busy_lock & VPB_SINGLE_EXCLUSIVER) != 0)
 
 #define	vm_page_xbusy(m) do {						\
 	if (!vm_page_tryxbusy(m))					\
 		panic("%s: page %p failed exclusive busying", __func__,	\
 		    (m));						\
 } while (0)
 
 /* Note: page m's lock must not be owned by the caller. */
 #define	vm_page_xunbusy(m) do {						\
 	if (!atomic_cmpset_rel_int(&(m)->busy_lock,			\
 	    VPB_SINGLE_EXCLUSIVER, VPB_UNBUSIED))			\
 		vm_page_xunbusy_hard(m);				\
 } while (0)
 
 #ifdef INVARIANTS
 void vm_page_object_lock_assert(vm_page_t m);
 #define	VM_PAGE_OBJECT_LOCK_ASSERT(m)	vm_page_object_lock_assert(m)
 void vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits);
 #define	VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits)				\
 	vm_page_assert_pga_writeable(m, bits)
 #else
 #define	VM_PAGE_OBJECT_LOCK_ASSERT(m)	(void)0
 #define	VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits)	(void)0
 #endif
 
 /*
  * We want to use atomic updates for the aflags field, which is 8 bits wide.
  * However, not all architectures support atomic operations on 8-bit
  * destinations.  In order that we can easily use a 32-bit operation, we
  * require that the aflags field be 32-bit aligned.
  */
 CTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0);
 
 /*
  *	Clear the given bits in the specified page.
  */
 static inline void
 vm_page_aflag_clear(vm_page_t m, uint8_t bits)
 {
 	uint32_t *addr, val;
 
 	/*
 	 * The PGA_REFERENCED flag can only be cleared if the page is locked.
 	 */
 	if ((bits & PGA_REFERENCED) != 0)
 		vm_page_assert_locked(m);
 
 	/*
 	 * Access the whole 32-bit word containing the aflags field with an
 	 * atomic update.  Parallel non-atomic updates to the other fields
 	 * within this word are handled properly by the atomic update.
 	 */
 	addr = (void *)&m->aflags;
 	KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0,
 	    ("vm_page_aflag_clear: aflags is misaligned"));
 	val = bits;
 #if BYTE_ORDER == BIG_ENDIAN
 	val <<= 24;
 #endif
 	atomic_clear_32(addr, val);
 }
 
 /*
  *	Set the given bits in the specified page.
  */
 static inline void
 vm_page_aflag_set(vm_page_t m, uint8_t bits)
 {
 	uint32_t *addr, val;
 
 	VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits);
 
 	/*
 	 * Access the whole 32-bit word containing the aflags field with an
 	 * atomic update.  Parallel non-atomic updates to the other fields
 	 * within this word are handled properly by the atomic update.
 	 */
 	addr = (void *)&m->aflags;
 	KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0,
 	    ("vm_page_aflag_set: aflags is misaligned"));
 	val = bits;
 #if BYTE_ORDER == BIG_ENDIAN
 	val <<= 24;
 #endif
 	atomic_set_32(addr, val);
 } 
 
 /*
  *	vm_page_dirty:
  *
  *	Set all bits in the page's dirty field.
  *
  *	The object containing the specified page must be locked if the
  *	call is made from the machine-independent layer.
  *
  *	See vm_page_clear_dirty_mask().
  */
 static __inline void
 vm_page_dirty(vm_page_t m)
 {
 
 	/* Use vm_page_dirty_KBI() under INVARIANTS to save memory. */
 #if (defined(KLD_MODULE) && !defined(KLD_TIED)) || defined(INVARIANTS)
 	vm_page_dirty_KBI(m);
 #else
 	m->dirty = VM_PAGE_BITS_ALL;
 #endif
 }
 
 /*
  *	vm_page_remque:
  *
  *	If the given page is in a page queue, then remove it from that page
  *	queue.
  *
  *	The page must be locked.
  */
 static inline void
 vm_page_remque(vm_page_t m)
 {
 
 	if (m->queue != PQ_NONE)
 		vm_page_dequeue(m);
 }
 
 /*
  *	vm_page_undirty:
  *
  *	Set page to not be dirty.  Note: does not clear pmap modify bits
  */
 static __inline void
 vm_page_undirty(vm_page_t m)
 {
 
 	VM_PAGE_OBJECT_LOCK_ASSERT(m);
 	m->dirty = 0;
 }
 
 static inline void
 vm_page_replace_checked(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex,
     vm_page_t mold)
 {
 	vm_page_t mret;
 
 	mret = vm_page_replace(mnew, object, pindex);
 	KASSERT(mret == mold,
 	    ("invalid page replacement, mold=%p, mret=%p", mold, mret));
 
 	/* Unused if !INVARIANTS. */
 	(void)mold;
 	(void)mret;
 }
 
 /*
  *	vm_page_queue:
  *
  *	Return the index of the queue containing m.  This index is guaranteed
  *	not to change while the page lock is held.
  */
 static inline uint8_t
 vm_page_queue(vm_page_t m)
 {
 
 	vm_page_assert_locked(m);
 
 	if ((m->aflags & PGA_DEQUEUE) != 0)
 		return (PQ_NONE);
 	atomic_thread_fence_acq();
 	return (m->queue);
 }
 
 static inline bool
 vm_page_active(vm_page_t m)
 {
 
 	return (vm_page_queue(m) == PQ_ACTIVE);
 }
 
 static inline bool
 vm_page_inactive(vm_page_t m)
 {
 
 	return (vm_page_queue(m) == PQ_INACTIVE);
 }
 
 static inline bool
 vm_page_in_laundry(vm_page_t m)
 {
 	uint8_t queue;
 
 	queue = vm_page_queue(m);
 	return (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE);
 }
 
 /*
  *	vm_page_held:
  *
  *	Return true if a reference prevents the page from being reclaimable.
  */
 static inline bool
 vm_page_held(vm_page_t m)
 {
 
 	return (m->hold_count > 0 || m->wire_count > 0);
 }
 
 #endif				/* _KERNEL */
 #endif				/* !_VM_PAGE_ */
Index: head/sys/x86/iommu/busdma_dmar.c
===================================================================
--- head/sys/x86/iommu/busdma_dmar.c	(revision 338317)
+++ head/sys/x86/iommu/busdma_dmar.c	(revision 338318)
@@ -1,925 +1,925 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/memdesc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/rman.h>
 #include <sys/taskqueue.h>
 #include <sys/tree.h>
 #include <sys/uio.h>
 #include <sys/vmem.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #include <x86/include/busdma_impl.h>
 #include <x86/iommu/intel_reg.h>
 #include <x86/iommu/busdma_dmar.h>
 #include <x86/iommu/intel_dmar.h>
 
 /*
  * busdma_dmar.c, the implementation of the busdma(9) interface using
  * DMAR units from Intel VT-d.
  */
 
 static bool
 dmar_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
 {
 	char str[128], *env;
 	int default_bounce;
 	bool ret;
 	static const char bounce_str[] = "bounce";
 	static const char dmar_str[] = "dmar";
 
 	default_bounce = 0;
 	env = kern_getenv("hw.busdma.default");
 	if (env != NULL) {
 		if (strcmp(env, bounce_str) == 0)
 			default_bounce = 1;
 		else if (strcmp(env, dmar_str) == 0)
 			default_bounce = 0;
 		freeenv(env);
 	}
 
 	snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d",
 	    domain, bus, slot, func);
 	env = kern_getenv(str);
 	if (env == NULL)
 		return (default_bounce != 0);
 	if (strcmp(env, bounce_str) == 0)
 		ret = true;
 	else if (strcmp(env, dmar_str) == 0)
 		ret = false;
 	else
 		ret = default_bounce != 0;
 	freeenv(env);
 	return (ret);
 }
 
 /*
  * Given original device, find the requester ID that will be seen by
  * the DMAR unit and used for page table lookup.  PCI bridges may take
  * ownership of transactions from downstream devices, so it may not be
  * the same as the BSF of the target device.  In those cases, all
  * devices downstream of the bridge must share a single mapping
  * domain, and must collectively be assigned to use either DMAR or
  * bounce mapping.
  */
 device_t
 dmar_get_requester(device_t dev, uint16_t *rid)
 {
 	devclass_t pci_class;
 	device_t l, pci, pcib, pcip, pcibp, requester;
 	int cap_offset;
 	uint16_t pcie_flags;
 	bool bridge_is_pcie;
 
 	pci_class = devclass_find("pci");
 	l = requester = dev;
 
 	*rid = pci_get_rid(dev);
 
 	/*
 	 * Walk the bridge hierarchy from the target device to the
 	 * host port to find the translating bridge nearest the DMAR
 	 * unit.
 	 */
 	for (;;) {
 		pci = device_get_parent(l);
 		KASSERT(pci != NULL, ("dmar_get_requester(%s): NULL parent "
 		    "for %s", device_get_name(dev), device_get_name(l)));
 		KASSERT(device_get_devclass(pci) == pci_class,
 		    ("dmar_get_requester(%s): non-pci parent %s for %s",
 		    device_get_name(dev), device_get_name(pci),
 		    device_get_name(l)));
 
 		pcib = device_get_parent(pci);
 		KASSERT(pcib != NULL, ("dmar_get_requester(%s): NULL bridge "
 		    "for %s", device_get_name(dev), device_get_name(pci)));
 
 		/*
 		 * The parent of our "bridge" isn't another PCI bus,
 		 * so pcib isn't a PCI->PCI bridge but rather a host
 		 * port, and the requester ID won't be translated
 		 * further.
 		 */
 		pcip = device_get_parent(pcib);
 		if (device_get_devclass(pcip) != pci_class)
 			break;
 		pcibp = device_get_parent(pcip);
 
 		if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) {
 			/*
 			 * Do not stop the loop even if the target
 			 * device is PCIe, because it is possible (but
 			 * unlikely) to have a PCI->PCIe bridge
 			 * somewhere in the hierarchy.
 			 */
 			l = pcib;
 		} else {
 			/*
 			 * Device is not PCIe, it cannot be seen as a
 			 * requester by DMAR unit.  Check whether the
 			 * bridge is PCIe.
 			 */
 			bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS,
 			    &cap_offset) == 0;
 			requester = pcib;
 
 			/*
 			 * Check for a buggy PCIe/PCI bridge that
 			 * doesn't report the express capability.  If
 			 * the bridge above it is express but isn't a
 			 * PCI bridge, then we know pcib is actually a
 			 * PCIe/PCI bridge.
 			 */
 			if (!bridge_is_pcie && pci_find_cap(pcibp,
 			    PCIY_EXPRESS, &cap_offset) == 0) {
 				pcie_flags = pci_read_config(pcibp,
 				    cap_offset + PCIER_FLAGS, 2);
 				if ((pcie_flags & PCIEM_FLAGS_TYPE) !=
 				    PCIEM_TYPE_PCI_BRIDGE)
 					bridge_is_pcie = true;
 			}
 
 			if (bridge_is_pcie) {
 				/*
 				 * The current device is not PCIe, but
 				 * the bridge above it is.  This is a
 				 * PCIe->PCI bridge.  Assume that the
 				 * requester ID will be the secondary
 				 * bus number with slot and function
 				 * set to zero.
 				 *
 				 * XXX: Doesn't handle the case where
 				 * the bridge is PCIe->PCI-X, and the
 				 * bridge will only take ownership of
 				 * requests in some cases.  We should
 				 * provide context entries with the
 				 * same page tables for taken and
 				 * non-taken transactions.
 				 */
 				*rid = PCI_RID(pci_get_bus(l), 0, 0);
 				l = pcibp;
 			} else {
 				/*
 				 * Neither the device nor the bridge
 				 * above it are PCIe.  This is a
 				 * conventional PCI->PCI bridge, which
 				 * will use the bridge's BSF as the
 				 * requester ID.
 				 */
 				*rid = pci_get_rid(pcib);
 				l = pcib;
 			}
 		}
 	}
 	return (requester);
 }
 
 struct dmar_ctx *
 dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev, bool rmrr)
 {
 	device_t requester;
 	struct dmar_ctx *ctx;
 	bool disabled;
 	uint16_t rid;
 
 	requester = dmar_get_requester(dev, &rid);
 
 	/*
 	 * If the user requested the IOMMU disabled for the device, we
 	 * cannot disable the DMAR, due to possibility of other
 	 * devices on the same DMAR still requiring translation.
 	 * Instead provide the identity mapping for the device
 	 * context.
 	 */
 	disabled = dmar_bus_dma_is_dev_disabled(pci_get_domain(requester), 
 	    pci_get_bus(requester), pci_get_slot(requester), 
 	    pci_get_function(requester));
 	ctx = dmar_get_ctx_for_dev(dmar, requester, rid, disabled, rmrr);
 	if (ctx == NULL)
 		return (NULL);
 	if (disabled) {
 		/*
 		 * Keep the first reference on context, release the
 		 * later refs.
 		 */
 		DMAR_LOCK(dmar);
 		if ((ctx->flags & DMAR_CTX_DISABLED) == 0) {
 			ctx->flags |= DMAR_CTX_DISABLED;
 			DMAR_UNLOCK(dmar);
 		} else {
 			dmar_free_ctx_locked(dmar, ctx);
 		}
 		ctx = NULL;
 	}
 	return (ctx);
 }
 
 bus_dma_tag_t
 dmar_get_dma_tag(device_t dev, device_t child)
 {
 	struct dmar_unit *dmar;
 	struct dmar_ctx *ctx;
 	bus_dma_tag_t res;
 
 	dmar = dmar_find(child);
 	/* Not in scope of any DMAR ? */
 	if (dmar == NULL)
 		return (NULL);
 	if (!dmar->dma_enabled)
 		return (NULL);
 	dmar_quirks_pre_use(dmar);
 	dmar_instantiate_rmrr_ctxs(dmar);
 
 	ctx = dmar_instantiate_ctx(dmar, child, false);
 	res = ctx == NULL ? NULL : (bus_dma_tag_t)&ctx->ctx_tag;
 	return (res);
 }
 
 static MALLOC_DEFINE(M_DMAR_DMAMAP, "dmar_dmamap", "Intel DMAR DMA Map");
 
 static void dmar_bus_schedule_dmamap(struct dmar_unit *unit,
     struct bus_dmamap_dmar *map);
 
 static int
 dmar_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
     bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
     bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
     int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
     void *lockfuncarg, bus_dma_tag_t *dmat)
 {
 	struct bus_dma_tag_dmar *newtag, *oldtag;
 	int error;
 
 	*dmat = NULL;
 	error = common_bus_dma_tag_create(parent != NULL ?
 	    &((struct bus_dma_tag_dmar *)parent)->common : NULL, alignment,
 	    boundary, lowaddr, highaddr, filter, filterarg, maxsize,
 	    nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
 	    sizeof(struct bus_dma_tag_dmar), (void **)&newtag);
 	if (error != 0)
 		goto out;
 
 	oldtag = (struct bus_dma_tag_dmar *)parent;
 	newtag->common.impl = &bus_dma_dmar_impl;
 	newtag->ctx = oldtag->ctx;
 	newtag->owner = oldtag->owner;
 
 	*dmat = (bus_dma_tag_t)newtag;
 out:
 	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 	    __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
 	    error);
 	return (error);
 }
 
 static int
 dmar_bus_dma_tag_set_domain(bus_dma_tag_t dmat)
 {
 
 	return (0);
 }
 
 static int
 dmar_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
 {
 	struct bus_dma_tag_dmar *dmat, *dmat_copy, *parent;
 	int error;
 
 	error = 0;
 	dmat_copy = dmat = (struct bus_dma_tag_dmar *)dmat1;
 
 	if (dmat != NULL) {
 		if (dmat->map_count != 0) {
 			error = EBUSY;
 			goto out;
 		}
 		while (dmat != NULL) {
 			parent = (struct bus_dma_tag_dmar *)dmat->common.parent;
 			if (atomic_fetchadd_int(&dmat->common.ref_count, -1) ==
 			    1) {
 				if (dmat == &dmat->ctx->ctx_tag)
 					dmar_free_ctx(dmat->ctx);
 				free_domain(dmat->segments, M_DMAR_DMAMAP);
 				free(dmat, M_DEVBUF);
 				dmat = parent;
 			} else
 				dmat = NULL;
 		}
 	}
 out:
 	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
 	return (error);
 }
 
 static int
 dmar_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__);
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = malloc_domain(sizeof(*map), M_DMAR_DMAMAP,
 	    tag->common.domain, M_NOWAIT | M_ZERO);
 	if (map == NULL) {
 		*mapp = NULL;
 		return (ENOMEM);
 	}
 	if (tag->segments == NULL) {
 		tag->segments = malloc_domain(sizeof(bus_dma_segment_t) *
 		    tag->common.nsegments, M_DMAR_DMAMAP,
 		    tag->common.domain, M_NOWAIT);
 		if (tag->segments == NULL) {
 			free_domain(map, M_DMAR_DMAMAP);
 			*mapp = NULL;
 			return (ENOMEM);
 		}
 	}
 	TAILQ_INIT(&map->map_entries);
 	map->tag = tag;
 	map->locked = true;
 	map->cansleep = false;
 	tag->map_count++;
 	*mapp = (bus_dmamap_t)map;
 
 	return (0);
 }
 
 static int
 dmar_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 	struct dmar_domain *domain;
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)map1;
 	if (map != NULL) {
 		domain = tag->ctx->domain;
 		DMAR_DOMAIN_LOCK(domain);
 		if (!TAILQ_EMPTY(&map->map_entries)) {
 			DMAR_DOMAIN_UNLOCK(domain);
 			return (EBUSY);
 		}
 		DMAR_DOMAIN_UNLOCK(domain);
 		free_domain(map, M_DMAR_DMAMAP);
 	}
 	tag->map_count--;
 	return (0);
 }
 
 
 static int
 dmar_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
     bus_dmamap_t *mapp)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 	int error, mflags;
 	vm_memattr_t attr;
 
 	error = dmar_bus_dmamap_create(dmat, flags, mapp);
 	if (error != 0)
 		return (error);
 
 	mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
 	mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
 	attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
 	    VM_MEMATTR_DEFAULT;
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)*mapp;
 
 	if (tag->common.maxsize < PAGE_SIZE &&
 	    tag->common.alignment <= tag->common.maxsize &&
 	    attr == VM_MEMATTR_DEFAULT) {
 		*vaddr = malloc_domain(tag->common.maxsize, M_DEVBUF,
 		    tag->common.domain, mflags);
 		map->flags |= BUS_DMAMAP_DMAR_MALLOC;
 	} else {
 		*vaddr = (void *)kmem_alloc_attr_domain(tag->common.domain,
 		    tag->common.maxsize, mflags, 0ul, BUS_SPACE_MAXADDR,
 		    attr);
 		map->flags |= BUS_DMAMAP_DMAR_KMEM_ALLOC;
 	}
 	if (*vaddr == NULL) {
 		dmar_bus_dmamap_destroy(dmat, *mapp);
 		*mapp = NULL;
 		return (ENOMEM);
 	}
 	return (0);
 }
 
 static void
 dmar_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)map1;
 
 	if ((map->flags & BUS_DMAMAP_DMAR_MALLOC) != 0) {
 		free_domain(vaddr, M_DEVBUF);
 		map->flags &= ~BUS_DMAMAP_DMAR_MALLOC;
 	} else {
 		KASSERT((map->flags & BUS_DMAMAP_DMAR_KMEM_ALLOC) != 0,
 		    ("dmar_bus_dmamem_free for non alloced map %p", map));
-		kmem_free(kernel_arena, (vm_offset_t)vaddr, tag->common.maxsize);
+		kmem_free((vm_offset_t)vaddr, tag->common.maxsize);
 		map->flags &= ~BUS_DMAMAP_DMAR_KMEM_ALLOC;
 	}
 
 	dmar_bus_dmamap_destroy(dmat, map1);
 }
 
 static int
 dmar_bus_dmamap_load_something1(struct bus_dma_tag_dmar *tag,
     struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
     int flags, bus_dma_segment_t *segs, int *segp,
     struct dmar_map_entries_tailq *unroll_list)
 {
 	struct dmar_ctx *ctx;
 	struct dmar_domain *domain;
 	struct dmar_map_entry *entry;
 	dmar_gaddr_t size;
 	bus_size_t buflen1;
 	int error, idx, gas_flags, seg;
 
 	KASSERT(offset < DMAR_PAGE_SIZE, ("offset %d", offset));
 	if (segs == NULL)
 		segs = tag->segments;
 	ctx = tag->ctx;
 	domain = ctx->domain;
 	seg = *segp;
 	error = 0;
 	idx = 0;
 	while (buflen > 0) {
 		seg++;
 		if (seg >= tag->common.nsegments) {
 			error = EFBIG;
 			break;
 		}
 		buflen1 = buflen > tag->common.maxsegsz ?
 		    tag->common.maxsegsz : buflen;
 		size = round_page(offset + buflen1);
 
 		/*
 		 * (Too) optimistically allow split if there are more
 		 * then one segments left.
 		 */
 		gas_flags = map->cansleep ? DMAR_GM_CANWAIT : 0;
 		if (seg + 1 < tag->common.nsegments)
 			gas_flags |= DMAR_GM_CANSPLIT;
 
 		error = dmar_gas_map(domain, &tag->common, size, offset,
 		    DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE,
 		    gas_flags, ma + idx, &entry);
 		if (error != 0)
 			break;
 		if ((gas_flags & DMAR_GM_CANSPLIT) != 0) {
 			KASSERT(size >= entry->end - entry->start,
 			    ("split increased entry size %jx %jx %jx",
 			    (uintmax_t)size, (uintmax_t)entry->start,
 			    (uintmax_t)entry->end));
 			size = entry->end - entry->start;
 			if (buflen1 > size)
 				buflen1 = size;
 		} else {
 			KASSERT(entry->end - entry->start == size,
 			    ("no split allowed %jx %jx %jx",
 			    (uintmax_t)size, (uintmax_t)entry->start,
 			    (uintmax_t)entry->end));
 		}
 		if (offset + buflen1 > size)
 			buflen1 = size - offset;
 		if (buflen1 > tag->common.maxsegsz)
 			buflen1 = tag->common.maxsegsz;
 
 		KASSERT(((entry->start + offset) & (tag->common.alignment - 1))
 		    == 0,
 		    ("alignment failed: ctx %p start 0x%jx offset %x "
 		    "align 0x%jx", ctx, (uintmax_t)entry->start, offset,
 		    (uintmax_t)tag->common.alignment));
 		KASSERT(entry->end <= tag->common.lowaddr ||
 		    entry->start >= tag->common.highaddr,
 		    ("entry placement failed: ctx %p start 0x%jx end 0x%jx "
 		    "lowaddr 0x%jx highaddr 0x%jx", ctx,
 		    (uintmax_t)entry->start, (uintmax_t)entry->end,
 		    (uintmax_t)tag->common.lowaddr,
 		    (uintmax_t)tag->common.highaddr));
 		KASSERT(dmar_test_boundary(entry->start + offset, buflen1,
 		    tag->common.boundary),
 		    ("boundary failed: ctx %p start 0x%jx end 0x%jx "
 		    "boundary 0x%jx", ctx, (uintmax_t)entry->start,
 		    (uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
 		KASSERT(buflen1 <= tag->common.maxsegsz,
 		    ("segment too large: ctx %p start 0x%jx end 0x%jx "
 		    "buflen1 0x%jx maxsegsz 0x%jx", ctx,
 		    (uintmax_t)entry->start, (uintmax_t)entry->end,
 		    (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz));
 
 		DMAR_DOMAIN_LOCK(domain);
 		TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
 		entry->flags |= DMAR_MAP_ENTRY_MAP;
 		DMAR_DOMAIN_UNLOCK(domain);
 		TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link);
 
 		segs[seg].ds_addr = entry->start + offset;
 		segs[seg].ds_len = buflen1;
 
 		idx += OFF_TO_IDX(trunc_page(offset + buflen1));
 		offset += buflen1;
 		offset &= DMAR_PAGE_MASK;
 		buflen -= buflen1;
 	}
 	if (error == 0)
 		*segp = seg;
 	return (error);
 }
 
 static int
 dmar_bus_dmamap_load_something(struct bus_dma_tag_dmar *tag,
     struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
     int flags, bus_dma_segment_t *segs, int *segp)
 {
 	struct dmar_ctx *ctx;
 	struct dmar_domain *domain;
 	struct dmar_map_entry *entry, *entry1;
 	struct dmar_map_entries_tailq unroll_list;
 	int error;
 
 	ctx = tag->ctx;
 	domain = ctx->domain;
 	atomic_add_long(&ctx->loads, 1);
 
 	TAILQ_INIT(&unroll_list);
 	error = dmar_bus_dmamap_load_something1(tag, map, ma, offset,
 	    buflen, flags, segs, segp, &unroll_list);
 	if (error != 0) {
 		/*
 		 * The busdma interface does not allow us to report
 		 * partial buffer load, so unfortunately we have to
 		 * revert all work done.
 		 */
 		DMAR_DOMAIN_LOCK(domain);
 		TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link,
 		    entry1) {
 			/*
 			 * No entries other than what we have created
 			 * during the failed run might have been
 			 * inserted there in between, since we own ctx
 			 * pglock.
 			 */
 			TAILQ_REMOVE(&map->map_entries, entry, dmamap_link);
 			TAILQ_REMOVE(&unroll_list, entry, unroll_link);
 			TAILQ_INSERT_TAIL(&domain->unload_entries, entry,
 			    dmamap_link);
 		}
 		DMAR_DOMAIN_UNLOCK(domain);
 		taskqueue_enqueue(domain->dmar->delayed_taskqueue,
 		    &domain->unload_task);
 	}
 
 	if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
 	    !map->cansleep)
 		error = EINPROGRESS;
 	if (error == EINPROGRESS)
 		dmar_bus_schedule_dmamap(domain->dmar, map);
 	return (error);
 }
 
 static int
 dmar_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
     struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
     bus_dma_segment_t *segs, int *segp)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)map1;
 	return (dmar_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
 	    flags, segs, segp));
 }
 
 static int
 dmar_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
     vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 	vm_page_t *ma;
 	vm_paddr_t pstart, pend;
 	int error, i, ma_cnt, offset;
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)map1;
 	pstart = trunc_page(buf);
 	pend = round_page(buf + buflen);
 	offset = buf & PAGE_MASK;
 	ma_cnt = OFF_TO_IDX(pend - pstart);
 	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
 	    M_WAITOK : M_NOWAIT);
 	if (ma == NULL)
 		return (ENOMEM);
 	for (i = 0; i < ma_cnt; i++)
 		ma[i] = PHYS_TO_VM_PAGE(pstart + i * PAGE_SIZE);
 	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
 	    flags, segs, segp);
 	free(ma, M_DEVBUF);
 	return (error);
 }
 
 static int
 dmar_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
     bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 	vm_page_t *ma, fma;
 	vm_paddr_t pstart, pend, paddr;
 	int error, i, ma_cnt, offset;
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)map1;
 	pstart = trunc_page((vm_offset_t)buf);
 	pend = round_page((vm_offset_t)buf + buflen);
 	offset = (vm_offset_t)buf & PAGE_MASK;
 	ma_cnt = OFF_TO_IDX(pend - pstart);
 	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
 	    M_WAITOK : M_NOWAIT);
 	if (ma == NULL)
 		return (ENOMEM);
 	if (dumping) {
 		/*
 		 * If dumping, do not attempt to call
 		 * PHYS_TO_VM_PAGE() at all.  It may return non-NULL
 		 * but the vm_page returned might be not initialized,
 		 * e.g. for the kernel itself.
 		 */
 		KASSERT(pmap == kernel_pmap, ("non-kernel address write"));
 		fma = malloc(sizeof(struct vm_page) * ma_cnt, M_DEVBUF,
 		    M_ZERO | (map->cansleep ? M_WAITOK : M_NOWAIT));
 		if (fma == NULL) {
 			free(ma, M_DEVBUF);
 			return (ENOMEM);
 		}
 		for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
 			paddr = pmap_kextract(pstart);
 			vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
 			ma[i] = &fma[i];
 		}
 	} else {
 		fma = NULL;
 		for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
 			if (pmap == kernel_pmap)
 				paddr = pmap_kextract(pstart);
 			else
 				paddr = pmap_extract(pmap, pstart);
 			ma[i] = PHYS_TO_VM_PAGE(paddr);
 			KASSERT(VM_PAGE_TO_PHYS(ma[i]) == paddr,
 			    ("PHYS_TO_VM_PAGE failed %jx %jx m %p",
 			    (uintmax_t)paddr, (uintmax_t)VM_PAGE_TO_PHYS(ma[i]),
 			    ma[i]));
 		}
 	}
 	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
 	    flags, segs, segp);
 	free(ma, M_DEVBUF);
 	free(fma, M_DEVBUF);
 	return (error);
 }
 
 static void
 dmar_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
     struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
 {
 	struct bus_dmamap_dmar *map;
 
 	if (map1 == NULL)
 		return;
 	map = (struct bus_dmamap_dmar *)map1;
 	map->mem = *mem;
 	map->tag = (struct bus_dma_tag_dmar *)dmat;
 	map->callback = callback;
 	map->callback_arg = callback_arg;
 }
 
 static bus_dma_segment_t *
 dmar_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
     bus_dma_segment_t *segs, int nsegs, int error)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)map1;
 
 	if (!map->locked) {
 		KASSERT(map->cansleep,
 		    ("map not locked and not sleepable context %p", map));
 
 		/*
 		 * We are called from the delayed context.  Relock the
 		 * driver.
 		 */
 		(tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
 		map->locked = true;
 	}
 
 	if (segs == NULL)
 		segs = tag->segments;
 	return (segs);
 }
 
 /*
  * The limitations of busdma KPI forces the dmar to perform the actual
  * unload, consisting of the unmapping of the map entries page tables,
  * from the delayed context on i386, since page table page mapping
  * might require a sleep to be successfull.  The unfortunate
  * consequence is that the DMA requests can be served some time after
  * the bus_dmamap_unload() call returned.
  *
  * On amd64, we assume that sf allocation cannot fail.
  */
 static void
 dmar_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 	struct dmar_ctx *ctx;
 	struct dmar_domain *domain;
 #if defined(__amd64__)
 	struct dmar_map_entries_tailq entries;
 #endif
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)map1;
 	ctx = tag->ctx;
 	domain = ctx->domain;
 	atomic_add_long(&ctx->unloads, 1);
 
 #if defined(__i386__)
 	DMAR_DOMAIN_LOCK(domain);
 	TAILQ_CONCAT(&domain->unload_entries, &map->map_entries, dmamap_link);
 	DMAR_DOMAIN_UNLOCK(domain);
 	taskqueue_enqueue(domain->dmar->delayed_taskqueue,
 	    &domain->unload_task);
 #else /* defined(__amd64__) */
 	TAILQ_INIT(&entries);
 	DMAR_DOMAIN_LOCK(domain);
 	TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
 	DMAR_DOMAIN_UNLOCK(domain);
 	THREAD_NO_SLEEPING();
 	dmar_domain_unload(domain, &entries, false);
 	THREAD_SLEEPING_OK();
 	KASSERT(TAILQ_EMPTY(&entries), ("lazy dmar_ctx_unload %p", ctx));
 #endif
 }
 
 static void
 dmar_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dmasync_op_t op)
 {
 }
 
 struct bus_dma_impl bus_dma_dmar_impl = {
 	.tag_create = dmar_bus_dma_tag_create,
 	.tag_destroy = dmar_bus_dma_tag_destroy,
 	.tag_set_domain = dmar_bus_dma_tag_set_domain,
 	.map_create = dmar_bus_dmamap_create,
 	.map_destroy = dmar_bus_dmamap_destroy,
 	.mem_alloc = dmar_bus_dmamem_alloc,
 	.mem_free = dmar_bus_dmamem_free,
 	.load_phys = dmar_bus_dmamap_load_phys,
 	.load_buffer = dmar_bus_dmamap_load_buffer,
 	.load_ma = dmar_bus_dmamap_load_ma,
 	.map_waitok = dmar_bus_dmamap_waitok,
 	.map_complete = dmar_bus_dmamap_complete,
 	.map_unload = dmar_bus_dmamap_unload,
 	.map_sync = dmar_bus_dmamap_sync,
 };
 
 static void
 dmar_bus_task_dmamap(void *arg, int pending)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 	struct dmar_unit *unit;
 
 	unit = arg;
 	DMAR_LOCK(unit);
 	while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
 		TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
 		DMAR_UNLOCK(unit);
 		tag = map->tag;
 		map->cansleep = true;
 		map->locked = false;
 		bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
 		    &map->mem, map->callback, map->callback_arg,
 		    BUS_DMA_WAITOK);
 		map->cansleep = false;
 		if (map->locked) {
 			(tag->common.lockfunc)(tag->common.lockfuncarg,
 			    BUS_DMA_UNLOCK);
 		} else
 			map->locked = true;
 		map->cansleep = false;
 		DMAR_LOCK(unit);
 	}
 	DMAR_UNLOCK(unit);
 }
 
 static void
 dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map)
 {
 
 	map->locked = false;
 	DMAR_LOCK(unit);
 	TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
 	DMAR_UNLOCK(unit);
 	taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
 }
 
 int
 dmar_init_busdma(struct dmar_unit *unit)
 {
 
 	unit->dma_enabled = 1;
 	TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled);
 	TAILQ_INIT(&unit->delayed_maps);
 	TASK_INIT(&unit->dmamap_load_task, 0, dmar_bus_task_dmamap, unit);
 	unit->delayed_taskqueue = taskqueue_create("dmar", M_WAITOK,
 	    taskqueue_thread_enqueue, &unit->delayed_taskqueue);
 	taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
 	    "dmar%d busdma taskq", unit->unit);
 	return (0);
 }
 
 void
 dmar_fini_busdma(struct dmar_unit *unit)
 {
 
 	if (unit->delayed_taskqueue == NULL)
 		return;
 
 	taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
 	taskqueue_free(unit->delayed_taskqueue);
 	unit->delayed_taskqueue = NULL;
 }
Index: head/sys/x86/iommu/intel_intrmap.c
===================================================================
--- head/sys/x86/iommu/intel_intrmap.c	(revision 338317)
+++ head/sys/x86/iommu/intel_intrmap.c	(revision 338318)
@@ -1,380 +1,380 @@
 /*-
  * Copyright (c) 2015 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memdesc.h>
 #include <sys/rman.h>
 #include <sys/rwlock.h>
 #include <sys/taskqueue.h>
 #include <sys/tree.h>
 #include <sys/vmem.h>
 #include <machine/bus.h>
 #include <machine/intr_machdep.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <x86/include/apicreg.h>
 #include <x86/include/apicvar.h>
 #include <x86/include/busdma_impl.h>
 #include <x86/iommu/intel_reg.h>
 #include <x86/iommu/busdma_dmar.h>
 #include <x86/iommu/intel_dmar.h>
 #include <dev/pci/pcivar.h>
 #include <x86/iommu/iommu_intrmap.h>
 
 static struct dmar_unit *dmar_ir_find(device_t src, uint16_t *rid,
     int *is_dmar);
 static void dmar_ir_program_irte(struct dmar_unit *unit, u_int idx,
     uint64_t low, uint16_t rid);
 static int dmar_ir_free_irte(struct dmar_unit *unit, u_int cookie);
 
 int
 iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
 {
 	struct dmar_unit *unit;
 	vmem_addr_t vmem_res;
 	u_int idx, i;
 	int error;
 
 	unit = dmar_ir_find(src, NULL, NULL);
 	if (unit == NULL || !unit->ir_enabled) {
 		for (i = 0; i < count; i++)
 			cookies[i] = -1;
 		return (EOPNOTSUPP);
 	}
 
 	error = vmem_alloc(unit->irtids, count, M_FIRSTFIT | M_NOWAIT,
 	    &vmem_res);
 	if (error != 0) {
 		KASSERT(error != EOPNOTSUPP,
 		    ("impossible EOPNOTSUPP from vmem"));
 		return (error);
 	}
 	idx = vmem_res;
 	for (i = 0; i < count; i++)
 		cookies[i] = idx + i;
 	return (0);
 }
 
 int
 iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie,
     uint64_t *addr, uint32_t *data)
 {
 	struct dmar_unit *unit;
 	uint64_t low;
 	uint16_t rid;
 	int is_dmar;
 
 	unit = dmar_ir_find(src, &rid, &is_dmar);
 	if (is_dmar) {
 		KASSERT(unit == NULL, ("DMAR cannot translate itself"));
 
 		/*
 		 * See VT-d specification, 5.1.6 Remapping Hardware -
 		 * Interrupt Programming.
 		 */
 		*data = vector;
 		*addr = MSI_INTEL_ADDR_BASE | ((cpu & 0xff) << 12);
 		if (x2apic_mode)
 			*addr |= ((uint64_t)cpu & 0xffffff00) << 32;
 		else
 			KASSERT(cpu <= 0xff, ("cpu id too big %d", cpu));
 		return (0);
 	}
 	if (unit == NULL || !unit->ir_enabled || cookie == -1)
 		return (EOPNOTSUPP);
 
 	low = (DMAR_X2APIC(unit) ? DMAR_IRTE1_DST_x2APIC(cpu) :
 	    DMAR_IRTE1_DST_xAPIC(cpu)) | DMAR_IRTE1_V(vector) |
 	    DMAR_IRTE1_DLM_FM | DMAR_IRTE1_TM_EDGE | DMAR_IRTE1_RH_DIRECT |
 	    DMAR_IRTE1_DM_PHYSICAL | DMAR_IRTE1_P;
 	dmar_ir_program_irte(unit, cookie, low, rid);
 
 	if (addr != NULL) {
 		/*
 		 * See VT-d specification, 5.1.5.2 MSI and MSI-X
 		 * Register Programming.
 		 */
 		*addr = MSI_INTEL_ADDR_BASE | ((cookie & 0x7fff) << 5) |
 		    ((cookie & 0x8000) << 2) | 0x18;
 		*data = 0;
 	}
 	return (0);
 }
 
 int
 iommu_unmap_msi_intr(device_t src, u_int cookie)
 {
 	struct dmar_unit *unit;
 
 	if (cookie == -1)
 		return (0);
 	unit = dmar_ir_find(src, NULL, NULL);
 	return (dmar_ir_free_irte(unit, cookie));
 }
 
 int
 iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge,
     bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo)
 {
 	struct dmar_unit *unit;
 	vmem_addr_t vmem_res;
 	uint64_t low, iorte;
 	u_int idx;
 	int error;
 	uint16_t rid;
 
 	unit = dmar_find_ioapic(ioapic_id, &rid);
 	if (unit == NULL || !unit->ir_enabled) {
 		*cookie = -1;
 		return (EOPNOTSUPP);
 	}
 
 	error = vmem_alloc(unit->irtids, 1, M_FIRSTFIT | M_NOWAIT, &vmem_res);
 	if (error != 0) {
 		KASSERT(error != EOPNOTSUPP,
 		    ("impossible EOPNOTSUPP from vmem"));
 		return (error);
 	}
 	idx = vmem_res;
 	low = 0;
 	switch (irq) {
 	case IRQ_EXTINT:
 		low |= DMAR_IRTE1_DLM_ExtINT;
 		break;
 	case IRQ_NMI:
 		low |= DMAR_IRTE1_DLM_NMI;
 		break;
 	case IRQ_SMI:
 		low |= DMAR_IRTE1_DLM_SMI;
 		break;
 	default:
 		KASSERT(vector != 0, ("No vector for IRQ %u", irq));
 		low |= DMAR_IRTE1_DLM_FM | DMAR_IRTE1_V(vector);
 		break;
 	}
 	low |= (DMAR_X2APIC(unit) ? DMAR_IRTE1_DST_x2APIC(cpu) :
 	    DMAR_IRTE1_DST_xAPIC(cpu)) |
 	    (edge ? DMAR_IRTE1_TM_EDGE : DMAR_IRTE1_TM_LEVEL) |
 	    DMAR_IRTE1_RH_DIRECT | DMAR_IRTE1_DM_PHYSICAL | DMAR_IRTE1_P;
 	dmar_ir_program_irte(unit, idx, low, rid);
 
 	if (hi != NULL) {
 		/*
 		 * See VT-d specification, 5.1.5.1 I/OxAPIC
 		 * Programming.
 		 */
 		iorte = (1ULL << 48) | ((uint64_t)(idx & 0x7fff) << 49) |
 		    ((idx & 0x8000) != 0 ? (1 << 11) : 0) |
 		    (edge ? IOART_TRGREDG : IOART_TRGRLVL) |
 		    (activehi ? IOART_INTAHI : IOART_INTALO) |
 		    IOART_DELFIXED | vector;
 		*hi = iorte >> 32;
 		*lo = iorte;
 	}
 	*cookie = idx;
 	return (0);
 }
 
 int
 iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
 {
 	struct dmar_unit *unit;
 	u_int idx;
 
 	idx = *cookie;
 	if (idx == -1)
 		return (0);
 	*cookie = -1;
 	unit = dmar_find_ioapic(ioapic_id, NULL);
 	KASSERT(unit != NULL && unit->ir_enabled,
 	    ("unmap: cookie %d unit %p", idx, unit));
 	return (dmar_ir_free_irte(unit, idx));
 }
 
 static struct dmar_unit *
 dmar_ir_find(device_t src, uint16_t *rid, int *is_dmar)
 {
 	devclass_t src_class;
 	struct dmar_unit *unit;
 
 	/*
 	 * We need to determine if the interrupt source generates FSB
 	 * interrupts.  If yes, it is either DMAR, in which case
 	 * interrupts are not remapped.  Or it is HPET, and interrupts
 	 * are remapped.  For HPET, source id is reported by HPET
 	 * record in DMAR ACPI table.
 	 */
 	if (is_dmar != NULL)
 		*is_dmar = FALSE;
 	src_class = device_get_devclass(src);
 	if (src_class == devclass_find("dmar")) {
 		unit = NULL;
 		if (is_dmar != NULL)
 			*is_dmar = TRUE;
 	} else if (src_class == devclass_find("hpet")) {
 		unit = dmar_find_hpet(src, rid);
 	} else {
 		unit = dmar_find(src);
 		if (unit != NULL && rid != NULL)
 			dmar_get_requester(src, rid);
 	}
 	return (unit);
 }
 
 static void
 dmar_ir_program_irte(struct dmar_unit *unit, u_int idx, uint64_t low,
     uint16_t rid)
 {
 	dmar_irte_t *irte;
 	uint64_t high;
 
 	KASSERT(idx < unit->irte_cnt,
 	    ("bad cookie %d %d", idx, unit->irte_cnt));
 	irte = &(unit->irt[idx]);
 	high = DMAR_IRTE2_SVT_RID | DMAR_IRTE2_SQ_RID |
 	    DMAR_IRTE2_SID_RID(rid);
 	device_printf(unit->dev,
 	    "programming irte[%d] rid %#x high %#jx low %#jx\n",
 	    idx, rid, (uintmax_t)high, (uintmax_t)low);
 	DMAR_LOCK(unit);
 	if ((irte->irte1 & DMAR_IRTE1_P) != 0) {
 		/*
 		 * The rte is already valid.  Assume that the request
 		 * is to remap the interrupt for balancing.  Only low
 		 * word of rte needs to be changed.  Assert that the
 		 * high word contains expected value.
 		 */
 		KASSERT(irte->irte2 == high,
 		    ("irte2 mismatch, %jx %jx", (uintmax_t)irte->irte2,
 		    (uintmax_t)high));
 		dmar_pte_update(&irte->irte1, low);
 	} else {
 		dmar_pte_store(&irte->irte2, high);
 		dmar_pte_store(&irte->irte1, low);
 	}
 	dmar_qi_invalidate_iec(unit, idx, 1);
 	DMAR_UNLOCK(unit);
 
 }
 
 static int
 dmar_ir_free_irte(struct dmar_unit *unit, u_int cookie)
 {
 	dmar_irte_t *irte;
 
 	KASSERT(unit != NULL && unit->ir_enabled,
 	    ("unmap: cookie %d unit %p", cookie, unit));
 	KASSERT(cookie < unit->irte_cnt,
 	    ("bad cookie %u %u", cookie, unit->irte_cnt));
 	irte = &(unit->irt[cookie]);
 	dmar_pte_clear(&irte->irte1);
 	dmar_pte_clear(&irte->irte2);
 	DMAR_LOCK(unit);
 	dmar_qi_invalidate_iec(unit, cookie, 1);
 	DMAR_UNLOCK(unit);
 	vmem_free(unit->irtids, cookie, 1);
 	return (0);
 }
 
 static u_int
 clp2(u_int v)
 {
 
 	return (powerof2(v) ? v : 1 << fls(v));
 }
 
 int
 dmar_init_irt(struct dmar_unit *unit)
 {
 
 	if ((unit->hw_ecap & DMAR_ECAP_IR) == 0)
 		return (0);
 	unit->ir_enabled = 1;
 	TUNABLE_INT_FETCH("hw.dmar.ir", &unit->ir_enabled);
 	if (!unit->ir_enabled)
 		return (0);
 	if (!unit->qi_enabled) {
 		unit->ir_enabled = 0;
 		if (bootverbose)
 			device_printf(unit->dev,
 	     "QI disabled, disabling interrupt remapping\n");
 		return (0);
 	}
 	unit->irte_cnt = clp2(NUM_IO_INTS);
 	unit->irt = (dmar_irte_t *)(uintptr_t)kmem_alloc_contig(
 	    unit->irte_cnt * sizeof(dmar_irte_t), M_ZERO | M_WAITOK, 0,
 	    dmar_high, PAGE_SIZE, 0, DMAR_IS_COHERENT(unit) ?
 	    VM_MEMATTR_DEFAULT : VM_MEMATTR_UNCACHEABLE);
 	if (unit->irt == NULL)
 		return (ENOMEM);
 	unit->irt_phys = pmap_kextract((vm_offset_t)unit->irt);
 	unit->irtids = vmem_create("dmarirt", 0, unit->irte_cnt, 1, 0,
 	    M_FIRSTFIT | M_NOWAIT);
 	DMAR_LOCK(unit);
 	dmar_load_irt_ptr(unit);
 	dmar_qi_invalidate_iec_glob(unit);
 	DMAR_UNLOCK(unit);
 
 	/*
 	 * Initialize mappings for already configured interrupt pins.
 	 * Required, because otherwise the interrupts fault without
 	 * irtes.
 	 */
 	intr_reprogram();
 
 	DMAR_LOCK(unit);
 	dmar_enable_ir(unit);
 	DMAR_UNLOCK(unit);
 	return (0);
 }
 
 void
 dmar_fini_irt(struct dmar_unit *unit)
 {
 
 	unit->ir_enabled = 0;
 	if (unit->irt != NULL) {
 		dmar_disable_ir(unit);
 		dmar_qi_invalidate_iec_glob(unit);
 		vmem_destroy(unit->irtids);
-		kmem_free(kernel_arena, (vm_offset_t)unit->irt,
-		    unit->irte_cnt * sizeof(dmar_irte_t));
+		kmem_free((vm_offset_t)unit->irt, unit->irte_cnt *
+		    sizeof(dmar_irte_t));
 	}
 }
Index: head/sys/x86/iommu/intel_qi.c
===================================================================
--- head/sys/x86/iommu/intel_qi.c	(revision 338317)
+++ head/sys/x86/iommu/intel_qi.c	(revision 338318)
@@ -1,474 +1,474 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_acpi.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/memdesc.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 #include <sys/taskqueue.h>
 #include <sys/time.h>
 #include <sys/tree.h>
 #include <sys/vmem.h>
 #include <machine/bus.h>
 #include <contrib/dev/acpica/include/acpi.h>
 #include <contrib/dev/acpica/include/accommon.h>
 #include <dev/acpica/acpivar.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <machine/cpu.h>
 #include <x86/include/busdma_impl.h>
 #include <x86/iommu/intel_reg.h>
 #include <x86/iommu/busdma_dmar.h>
 #include <x86/iommu/intel_dmar.h>
 
 static bool
 dmar_qi_seq_processed(const struct dmar_unit *unit,
     const struct dmar_qi_genseq *pseq)
 {
 
 	return (pseq->gen < unit->inv_waitd_gen ||
 	    (pseq->gen == unit->inv_waitd_gen &&
 	     pseq->seq <= unit->inv_waitd_seq_hw));
 }
 
 static int
 dmar_enable_qi(struct dmar_unit *unit)
 {
 	int error;
 
 	DMAR_ASSERT_LOCKED(unit);
 	unit->hw_gcmd |= DMAR_GCMD_QIE;
 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES)
 	    != 0));
 	return (error);
 }
 
 static int
 dmar_disable_qi(struct dmar_unit *unit)
 {
 	int error;
 
 	DMAR_ASSERT_LOCKED(unit);
 	unit->hw_gcmd &= ~DMAR_GCMD_QIE;
 	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
 	DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES)
 	    == 0));
 	return (error);
 }
 
 static void
 dmar_qi_advance_tail(struct dmar_unit *unit)
 {
 
 	DMAR_ASSERT_LOCKED(unit);
 	dmar_write4(unit, DMAR_IQT_REG, unit->inv_queue_tail);
 }
 
 static void
 dmar_qi_ensure(struct dmar_unit *unit, int descr_count)
 {
 	uint32_t head;
 	int bytes;
 
 	DMAR_ASSERT_LOCKED(unit);
 	bytes = descr_count << DMAR_IQ_DESCR_SZ_SHIFT;
 	for (;;) {
 		if (bytes <= unit->inv_queue_avail)
 			break;
 		/* refill */
 		head = dmar_read4(unit, DMAR_IQH_REG);
 		head &= DMAR_IQH_MASK;
 		unit->inv_queue_avail = head - unit->inv_queue_tail -
 		    DMAR_IQ_DESCR_SZ;
 		if (head <= unit->inv_queue_tail)
 			unit->inv_queue_avail += unit->inv_queue_size;
 		if (bytes <= unit->inv_queue_avail)
 			break;
 
 		/*
 		 * No space in the queue, do busy wait.  Hardware must
 		 * make a progress.  But first advance the tail to
 		 * inform the descriptor streamer about entries we
 		 * might have already filled, otherwise they could
 		 * clog the whole queue..
 		 */
 		dmar_qi_advance_tail(unit);
 		unit->inv_queue_full++;
 		cpu_spinwait();
 	}
 	unit->inv_queue_avail -= bytes;
 }
 
 static void
 dmar_qi_emit(struct dmar_unit *unit, uint64_t data1, uint64_t data2)
 {
 
 	DMAR_ASSERT_LOCKED(unit);
 	*(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data1;
 	unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
 	KASSERT(unit->inv_queue_tail <= unit->inv_queue_size,
 	    ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail,
 	    (uintmax_t)unit->inv_queue_size));
 	unit->inv_queue_tail &= unit->inv_queue_size - 1;
 	*(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data2;
 	unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
 	KASSERT(unit->inv_queue_tail <= unit->inv_queue_size,
 	    ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail,
 	    (uintmax_t)unit->inv_queue_size));
 	unit->inv_queue_tail &= unit->inv_queue_size - 1;
 }
 
 static void
 dmar_qi_emit_wait_descr(struct dmar_unit *unit, uint32_t seq, bool intr,
     bool memw, bool fence)
 {
 
 	DMAR_ASSERT_LOCKED(unit);
 	dmar_qi_emit(unit, DMAR_IQ_DESCR_WAIT_ID |
 	    (intr ? DMAR_IQ_DESCR_WAIT_IF : 0) |
 	    (memw ? DMAR_IQ_DESCR_WAIT_SW : 0) |
 	    (fence ? DMAR_IQ_DESCR_WAIT_FN : 0) |
 	    (memw ? DMAR_IQ_DESCR_WAIT_SD(seq) : 0),
 	    memw ? unit->inv_waitd_seq_hw_phys : 0);
 }
 
 static void
 dmar_qi_emit_wait_seq(struct dmar_unit *unit, struct dmar_qi_genseq *pseq,
     bool emit_wait)
 {
 	struct dmar_qi_genseq gsec;
 	uint32_t seq;
 
 	KASSERT(pseq != NULL, ("wait descriptor with no place for seq"));
 	DMAR_ASSERT_LOCKED(unit);
 	if (unit->inv_waitd_seq == 0xffffffff) {
 		gsec.gen = unit->inv_waitd_gen;
 		gsec.seq = unit->inv_waitd_seq;
 		dmar_qi_ensure(unit, 1);
 		dmar_qi_emit_wait_descr(unit, gsec.seq, false, true, false);
 		dmar_qi_advance_tail(unit);
 		while (!dmar_qi_seq_processed(unit, &gsec))
 			cpu_spinwait();
 		unit->inv_waitd_gen++;
 		unit->inv_waitd_seq = 1;
 	}
 	seq = unit->inv_waitd_seq++;
 	pseq->gen = unit->inv_waitd_gen;
 	pseq->seq = seq;
 	if (emit_wait) {
 		dmar_qi_ensure(unit, 1);
 		dmar_qi_emit_wait_descr(unit, seq, true, true, false);
 	}
 }
 
 static void
 dmar_qi_wait_for_seq(struct dmar_unit *unit, const struct dmar_qi_genseq *gseq,
     bool nowait)
 {
 
 	DMAR_ASSERT_LOCKED(unit);
 	unit->inv_seq_waiters++;
 	while (!dmar_qi_seq_processed(unit, gseq)) {
 		if (cold || nowait) {
 			cpu_spinwait();
 		} else {
 			msleep(&unit->inv_seq_waiters, &unit->lock, 0,
 			    "dmarse", hz);
 		}
 	}
 	unit->inv_seq_waiters--;
 }
 
 void
 dmar_qi_invalidate_locked(struct dmar_domain *domain, dmar_gaddr_t base,
     dmar_gaddr_t size, struct dmar_qi_genseq *pseq, bool emit_wait)
 {
 	struct dmar_unit *unit;
 	dmar_gaddr_t isize;
 	int am;
 
 	unit = domain->dmar;
 	DMAR_ASSERT_LOCKED(unit);
 	for (; size > 0; base += isize, size -= isize) {
 		am = calc_am(unit, base, size, &isize);
 		dmar_qi_ensure(unit, 1);
 		dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV |
 		    DMAR_IQ_DESCR_IOTLB_PAGE | DMAR_IQ_DESCR_IOTLB_DW |
 		    DMAR_IQ_DESCR_IOTLB_DR |
 		    DMAR_IQ_DESCR_IOTLB_DID(domain->domain),
 		    base | am);
 	}
 	dmar_qi_emit_wait_seq(unit, pseq, emit_wait);
 	dmar_qi_advance_tail(unit);
 }
 
 void
 dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit)
 {
 	struct dmar_qi_genseq gseq;
 
 	DMAR_ASSERT_LOCKED(unit);
 	dmar_qi_ensure(unit, 2);
 	dmar_qi_emit(unit, DMAR_IQ_DESCR_CTX_INV | DMAR_IQ_DESCR_CTX_GLOB, 0);
 	dmar_qi_emit_wait_seq(unit, &gseq, true);
 	dmar_qi_advance_tail(unit);
 	dmar_qi_wait_for_seq(unit, &gseq, false);
 }
 
 void
 dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit)
 {
 	struct dmar_qi_genseq gseq;
 
 	DMAR_ASSERT_LOCKED(unit);
 	dmar_qi_ensure(unit, 2);
 	dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_GLOB |
 	    DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR, 0);
 	dmar_qi_emit_wait_seq(unit, &gseq, true);
 	dmar_qi_advance_tail(unit);
 	dmar_qi_wait_for_seq(unit, &gseq, false);
 }
 
 void
 dmar_qi_invalidate_iec_glob(struct dmar_unit *unit)
 {
 	struct dmar_qi_genseq gseq;
 
 	DMAR_ASSERT_LOCKED(unit);
 	dmar_qi_ensure(unit, 2);
 	dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV, 0);
 	dmar_qi_emit_wait_seq(unit, &gseq, true);
 	dmar_qi_advance_tail(unit);
 	dmar_qi_wait_for_seq(unit, &gseq, false);
 }
 
 void
 dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt)
 {
 	struct dmar_qi_genseq gseq;
 	u_int c, l;
 
 	DMAR_ASSERT_LOCKED(unit);
 	KASSERT(start < unit->irte_cnt && start < start + cnt &&
 	    start + cnt <= unit->irte_cnt,
 	    ("inv iec overflow %d %d %d", unit->irte_cnt, start, cnt));
 	for (; cnt > 0; cnt -= c, start += c) {
 		l = ffs(start | cnt) - 1;
 		c = 1 << l;
 		dmar_qi_ensure(unit, 1);
 		dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV |
 		    DMAR_IQ_DESCR_IEC_IDX | DMAR_IQ_DESCR_IEC_IIDX(start) |
 		    DMAR_IQ_DESCR_IEC_IM(l), 0);
 	}
 	dmar_qi_ensure(unit, 1);
 	dmar_qi_emit_wait_seq(unit, &gseq, true);
 	dmar_qi_advance_tail(unit);
 
 	/*
 	 * The caller of the function, in particular,
 	 * dmar_ir_program_irte(), may be called from the context
 	 * where the sleeping is forbidden (in fact, the
 	 * intr_table_lock mutex may be held, locked from
 	 * intr_shuffle_irqs()).  Wait for the invalidation completion
 	 * using the busy wait.
 	 *
 	 * The impact on the interrupt input setup code is small, the
 	 * expected overhead is comparable with the chipset register
 	 * read.  It is more harmful for the parallel DMA operations,
 	 * since we own the dmar unit lock until whole invalidation
 	 * queue is processed, which includes requests possibly issued
 	 * before our request.
 	 */
 	dmar_qi_wait_for_seq(unit, &gseq, true);
 }
 
 int
 dmar_qi_intr(void *arg)
 {
 	struct dmar_unit *unit;
 
 	unit = arg;
 	KASSERT(unit->qi_enabled, ("dmar%d: QI is not enabled", unit->unit));
 	taskqueue_enqueue(unit->qi_taskqueue, &unit->qi_task);
 	return (FILTER_HANDLED);
 }
 
 static void
 dmar_qi_task(void *arg, int pending __unused)
 {
 	struct dmar_unit *unit;
 	struct dmar_map_entry *entry;
 	uint32_t ics;
 
 	unit = arg;
 
 	DMAR_LOCK(unit);
 	for (;;) {
 		entry = TAILQ_FIRST(&unit->tlb_flush_entries);
 		if (entry == NULL)
 			break;
 		if (!dmar_qi_seq_processed(unit, &entry->gseq))
 			break;
 		TAILQ_REMOVE(&unit->tlb_flush_entries, entry, dmamap_link);
 		DMAR_UNLOCK(unit);
 		dmar_domain_free_entry(entry, (entry->flags &
 		    DMAR_MAP_ENTRY_QI_NF) == 0);
 		DMAR_LOCK(unit);
 	}
 	ics = dmar_read4(unit, DMAR_ICS_REG);
 	if ((ics & DMAR_ICS_IWC) != 0) {
 		ics = DMAR_ICS_IWC;
 		dmar_write4(unit, DMAR_ICS_REG, ics);
 	}
 	if (unit->inv_seq_waiters > 0)
 		wakeup(&unit->inv_seq_waiters);
 	DMAR_UNLOCK(unit);
 }
 
 int
 dmar_init_qi(struct dmar_unit *unit)
 {
 	uint64_t iqa;
 	uint32_t ics;
 	int qi_sz;
 
 	if (!DMAR_HAS_QI(unit) || (unit->hw_cap & DMAR_CAP_CM) != 0)
 		return (0);
 	unit->qi_enabled = 1;
 	TUNABLE_INT_FETCH("hw.dmar.qi", &unit->qi_enabled);
 	if (!unit->qi_enabled)
 		return (0);
 
 	TAILQ_INIT(&unit->tlb_flush_entries);
 	TASK_INIT(&unit->qi_task, 0, dmar_qi_task, unit);
 	unit->qi_taskqueue = taskqueue_create_fast("dmarqf", M_WAITOK,
 	    taskqueue_thread_enqueue, &unit->qi_taskqueue);
 	taskqueue_start_threads(&unit->qi_taskqueue, 1, PI_AV,
 	    "dmar%d qi taskq", unit->unit);
 
 	unit->inv_waitd_gen = 0;
 	unit->inv_waitd_seq = 1;
 
 	qi_sz = DMAR_IQA_QS_DEF;
 	TUNABLE_INT_FETCH("hw.dmar.qi_size", &qi_sz);
 	if (qi_sz > DMAR_IQA_QS_MAX)
 		qi_sz = DMAR_IQA_QS_MAX;
 	unit->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE;
 	/* Reserve one descriptor to prevent wraparound. */
 	unit->inv_queue_avail = unit->inv_queue_size - DMAR_IQ_DESCR_SZ;
 
 	/* The invalidation queue reads by DMARs are always coherent. */
 	unit->inv_queue = kmem_alloc_contig(unit->inv_queue_size, M_WAITOK |
 	    M_ZERO, 0, dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
 	unit->inv_waitd_seq_hw_phys = pmap_kextract(
 	    (vm_offset_t)&unit->inv_waitd_seq_hw);
 
 	DMAR_LOCK(unit);
 	dmar_write8(unit, DMAR_IQT_REG, 0);
 	iqa = pmap_kextract(unit->inv_queue);
 	iqa |= qi_sz;
 	dmar_write8(unit, DMAR_IQA_REG, iqa);
 	dmar_enable_qi(unit);
 	ics = dmar_read4(unit, DMAR_ICS_REG);
 	if ((ics & DMAR_ICS_IWC) != 0) {
 		ics = DMAR_ICS_IWC;
 		dmar_write4(unit, DMAR_ICS_REG, ics);
 	}
 	dmar_enable_qi_intr(unit);
 	DMAR_UNLOCK(unit);
 
 	return (0);
 }
 
 void
 dmar_fini_qi(struct dmar_unit *unit)
 {
 	struct dmar_qi_genseq gseq;
 
 	if (unit->qi_enabled)
 		return;
 	taskqueue_drain(unit->qi_taskqueue, &unit->qi_task);
 	taskqueue_free(unit->qi_taskqueue);
 	unit->qi_taskqueue = NULL;
 
 	DMAR_LOCK(unit);
 	/* quisce */
 	dmar_qi_ensure(unit, 1);
 	dmar_qi_emit_wait_seq(unit, &gseq, true);
 	dmar_qi_advance_tail(unit);
 	dmar_qi_wait_for_seq(unit, &gseq, false);
 	/* only after the quisce, disable queue */
 	dmar_disable_qi_intr(unit);
 	dmar_disable_qi(unit);
 	KASSERT(unit->inv_seq_waiters == 0,
 	    ("dmar%d: waiters on disabled queue", unit->unit));
 	DMAR_UNLOCK(unit);
 
-	kmem_free(kernel_arena, unit->inv_queue, unit->inv_queue_size);
+	kmem_free(unit->inv_queue, unit->inv_queue_size);
 	unit->inv_queue = 0;
 	unit->inv_queue_size = 0;
 	unit->qi_enabled = 0;
 }
 
 void
 dmar_enable_qi_intr(struct dmar_unit *unit)
 {
 	uint32_t iectl;
 
 	DMAR_ASSERT_LOCKED(unit);
 	KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", unit->unit));
 	iectl = dmar_read4(unit, DMAR_IECTL_REG);
 	iectl &= ~DMAR_IECTL_IM;
 	dmar_write4(unit, DMAR_IECTL_REG, iectl);
 }
 
 void
 dmar_disable_qi_intr(struct dmar_unit *unit)
 {
 	uint32_t iectl;
 
 	DMAR_ASSERT_LOCKED(unit);
 	KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", unit->unit));
 	iectl = dmar_read4(unit, DMAR_IECTL_REG);
 	dmar_write4(unit, DMAR_IECTL_REG, iectl | DMAR_IECTL_IM);
 }
Index: head/sys/x86/x86/busdma_bounce.c
===================================================================
--- head/sys/x86/x86/busdma_bounce.c	(revision 338317)
+++ head/sys/x86/x86/busdma_bounce.c	(revision 338318)
@@ -1,1316 +1,1315 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1997, 1998 Justin T. Gibbs.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/memdesc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #include <x86/include/busdma_impl.h>
 
 #ifdef __i386__
 #define MAX_BPAGES 512
 #else
 #define MAX_BPAGES 8192
 #endif
 
 enum {
 	BUS_DMA_COULD_BOUNCE	= 0x01,
 	BUS_DMA_MIN_ALLOC_COMP	= 0x02,
 	BUS_DMA_KMEM_ALLOC	= 0x04,
 };
 
 struct bounce_zone;
 
 struct bus_dma_tag {
 	struct bus_dma_tag_common common;
 	int			map_count;
 	int			bounce_flags;
 	bus_dma_segment_t	*segments;
 	struct bounce_zone	*bounce_zone;
 };
 
 struct bounce_page {
 	vm_offset_t	vaddr;		/* kva of bounce buffer */
 	bus_addr_t	busaddr;	/* Physical address */
 	vm_offset_t	datavaddr;	/* kva of client data */
 	vm_offset_t	dataoffs;	/* page offset of client data */
 	vm_page_t	datapage[2];	/* physical page(s) of client data */
 	bus_size_t	datacount;	/* client data count */
 	STAILQ_ENTRY(bounce_page) links;
 };
 
 int busdma_swi_pending;
 
 struct bounce_zone {
 	STAILQ_ENTRY(bounce_zone) links;
 	STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
 	int		total_bpages;
 	int		free_bpages;
 	int		reserved_bpages;
 	int		active_bpages;
 	int		total_bounced;
 	int		total_deferred;
 	int		map_count;
 	int		domain;
 	bus_size_t	alignment;
 	bus_addr_t	lowaddr;
 	char		zoneid[8];
 	char		lowaddrid[20];
 	struct sysctl_ctx_list sysctl_tree;
 	struct sysctl_oid *sysctl_tree_top;
 };
 
 static struct mtx bounce_lock;
 static int total_bpages;
 static int busdma_zonecount;
 static STAILQ_HEAD(, bounce_zone) bounce_zone_list;
 
 static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters");
 SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0,
 	   "Total bounce pages");
 
 struct bus_dmamap {
 	struct bp_list	       bpages;
 	int		       pagesneeded;
 	int		       pagesreserved;
 	bus_dma_tag_t	       dmat;
 	struct memdesc	       mem;
 	bus_dmamap_callback_t *callback;
 	void		      *callback_arg;
 	STAILQ_ENTRY(bus_dmamap) links;
 };
 
 static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
 static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
 static struct bus_dmamap nobounce_dmamap;
 
 static void init_bounce_pages(void *dummy);
 static int alloc_bounce_zone(bus_dma_tag_t dmat);
 static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
 static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
 				int commit);
 static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
 				  vm_offset_t vaddr, bus_addr_t addr1,
 				  bus_addr_t addr2, bus_size_t size);
 static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
 int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr);
 static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
 				    pmap_t pmap, void *buf, bus_size_t buflen,
 				    int flags);
 static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
 				   vm_paddr_t buf, bus_size_t buflen,
 				   int flags);
 static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
 				     int flags);
 
 static int
 bounce_bus_dma_zone_setup(bus_dma_tag_t dmat)
 {
 	struct bounce_zone *bz;
 	int error;
 
 	/* Must bounce */
 	if ((error = alloc_bounce_zone(dmat)) != 0)
 		return (error);
 	bz = dmat->bounce_zone;
 
 	if (ptoa(bz->total_bpages) < dmat->common.maxsize) {
 		int pages;
 
 		pages = atop(dmat->common.maxsize) - bz->total_bpages;
 
 		/* Add pages to our bounce pool */
 		if (alloc_bounce_pages(dmat, pages) < pages)
 			return (ENOMEM);
 	}
 	/* Performed initial allocation */
 	dmat->bounce_flags |= BUS_DMA_MIN_ALLOC_COMP;
 
 	return (0);
 }
 
 /*
  * Allocate a device specific dma_tag.
  */
 static int
 bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
     bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
     bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
     int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
     void *lockfuncarg, bus_dma_tag_t *dmat)
 {
 	bus_dma_tag_t newtag;
 	int error;
 
 	*dmat = NULL;
 	error = common_bus_dma_tag_create(parent != NULL ? &parent->common :
 	    NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg,
 	    maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
 	    sizeof (struct bus_dma_tag), (void **)&newtag);
 	if (error != 0)
 		return (error);
 
 	newtag->common.impl = &bus_dma_bounce_impl;
 	newtag->map_count = 0;
 	newtag->segments = NULL;
 
 	if (parent != NULL && ((newtag->common.filter != NULL) ||
 	    ((parent->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0)))
 		newtag->bounce_flags |= BUS_DMA_COULD_BOUNCE;
 
 	if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) ||
 	    newtag->common.alignment > 1)
 		newtag->bounce_flags |= BUS_DMA_COULD_BOUNCE;
 
 	if (((newtag->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 	    (flags & BUS_DMA_ALLOCNOW) != 0)
 		error = bounce_bus_dma_zone_setup(newtag);
 	else
 		error = 0;
 	
 	if (error != 0)
 		free(newtag, M_DEVBUF);
 	else
 		*dmat = newtag;
 	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 	    __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
 	    error);
 	return (error);
 }
 
 /*
  * Update the domain for the tag.  We may need to reallocate the zone and
  * bounce pages.
  */ 
 static int
 bounce_bus_dma_tag_set_domain(bus_dma_tag_t dmat)
 {
 
 	KASSERT(dmat->map_count == 0,
 	    ("bounce_bus_dma_tag_set_domain:  Domain set after use.\n"));
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) == 0 ||
 	    dmat->bounce_zone == NULL)
 		return (0);
 	dmat->bounce_flags &= ~BUS_DMA_MIN_ALLOC_COMP;
 	return (bounce_bus_dma_zone_setup(dmat));
 }
 
 static int
 bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat)
 {
 	bus_dma_tag_t dmat_copy, parent;
 	int error;
 
 	error = 0;
 	dmat_copy = dmat;
 
 	if (dmat != NULL) {
 		if (dmat->map_count != 0) {
 			error = EBUSY;
 			goto out;
 		}
 		while (dmat != NULL) {
 			parent = (bus_dma_tag_t)dmat->common.parent;
 			atomic_subtract_int(&dmat->common.ref_count, 1);
 			if (dmat->common.ref_count == 0) {
 				if (dmat->segments != NULL)
 					free_domain(dmat->segments, M_DEVBUF);
 				free(dmat, M_DEVBUF);
 				/*
 				 * Last reference count, so
 				 * release our reference
 				 * count on our parent.
 				 */
 				dmat = parent;
 			} else
 				dmat = NULL;
 		}
 	}
 out:
 	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
 	return (error);
 }
 
 /*
  * Allocate a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static int
 bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	struct bounce_zone *bz;
 	int error, maxpages, pages;
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__);
 
 	error = 0;
 
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc_domain(
 		    sizeof(bus_dma_segment_t) * dmat->common.nsegments,
 		    M_DEVBUF, dmat->common.domain, M_NOWAIT);
 		if (dmat->segments == NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 
 	/*
 	 * Bouncing might be required if the driver asks for an active
 	 * exclusion region, a data alignment that is stricter than 1, and/or
 	 * an active address boundary.
 	 */
 	if (dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) {
 		/* Must bounce */
 		if (dmat->bounce_zone == NULL) {
 			if ((error = alloc_bounce_zone(dmat)) != 0)
 				return (error);
 		}
 		bz = dmat->bounce_zone;
 
 		*mapp = (bus_dmamap_t)malloc_domain(sizeof(**mapp), M_DEVBUF,
 		    dmat->common.domain, M_NOWAIT | M_ZERO);
 		if (*mapp == NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, ENOMEM);
 			return (ENOMEM);
 		}
 
 		/* Initialize the new map */
 		STAILQ_INIT(&((*mapp)->bpages));
 
 		/*
 		 * Attempt to add pages to our pool on a per-instance
 		 * basis up to a sane limit.
 		 */
 		if (dmat->common.alignment > 1)
 			maxpages = MAX_BPAGES;
 		else
 			maxpages = MIN(MAX_BPAGES, Maxmem -
 			    atop(dmat->common.lowaddr));
 		if ((dmat->bounce_flags & BUS_DMA_MIN_ALLOC_COMP) == 0 ||
 		    (bz->map_count > 0 && bz->total_bpages < maxpages)) {
 			pages = MAX(atop(dmat->common.maxsize), 1);
 			pages = MIN(maxpages - bz->total_bpages, pages);
 			pages = MAX(pages, 1);
 			if (alloc_bounce_pages(dmat, pages) < pages)
 				error = ENOMEM;
 			if ((dmat->bounce_flags & BUS_DMA_MIN_ALLOC_COMP)
 			    == 0) {
 				if (error == 0) {
 					dmat->bounce_flags |=
 					    BUS_DMA_MIN_ALLOC_COMP;
 				}
 			} else
 				error = 0;
 		}
 		bz->map_count++;
 	} else {
 		*mapp = NULL;
 	}
 	if (error == 0)
 		dmat->map_count++;
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->common.flags, error);
 	return (error);
 }
 
 /*
  * Destroy a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static int
 bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 
 	if (map != NULL && map != &nobounce_dmamap) {
 		if (STAILQ_FIRST(&map->bpages) != NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, EBUSY);
 			return (EBUSY);
 		}
 		if (dmat->bounce_zone)
 			dmat->bounce_zone->map_count--;
 		free_domain(map, M_DEVBUF);
 	}
 	dmat->map_count--;
 	CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
 	return (0);
 }
 
 
 /*
  * Allocate a piece of memory that can be efficiently mapped into
  * bus device space based on the constraints lited in the dma tag.
  * A dmamap to for use with dmamap_load is also allocated.
  */
 static int
 bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
     bus_dmamap_t *mapp)
 {
 	vm_memattr_t attr;
 	int mflags;
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__);
 
 	if (flags & BUS_DMA_NOWAIT)
 		mflags = M_NOWAIT;
 	else
 		mflags = M_WAITOK;
 
 	/* If we succeed, no mapping/bouncing will be required */
 	*mapp = NULL;
 
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc_domain(
 		    sizeof(bus_dma_segment_t) * dmat->common.nsegments,
 		    M_DEVBUF, dmat->common.domain, mflags);
 		if (dmat->segments == NULL) {
 			CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 			    __func__, dmat, dmat->common.flags, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 	if (flags & BUS_DMA_ZERO)
 		mflags |= M_ZERO;
 	if (flags & BUS_DMA_NOCACHE)
 		attr = VM_MEMATTR_UNCACHEABLE;
 	else
 		attr = VM_MEMATTR_DEFAULT;
 
 	/*
 	 * Allocate the buffer from the malloc(9) allocator if...
 	 *  - It's small enough to fit into a single power of two sized bucket.
 	 *  - The alignment is less than or equal to the maximum size
 	 *  - The low address requirement is fulfilled.
 	 * else allocate non-contiguous pages if...
 	 *  - The page count that could get allocated doesn't exceed
 	 *    nsegments also when the maximum segment size is less
 	 *    than PAGE_SIZE.
 	 *  - The alignment constraint isn't larger than a page boundary.
 	 *  - There are no boundary-crossing constraints.
 	 * else allocate a block of contiguous pages because one or more of the
 	 * constraints is something that only the contig allocator can fulfill.
 	 *
 	 * NOTE: The (dmat->common.alignment <= dmat->maxsize) check
 	 * below is just a quick hack. The exact alignment guarantees
 	 * of malloc(9) need to be nailed down, and the code below
 	 * should be rewritten to take that into account.
 	 *
 	 * In the meantime warn the user if malloc gets it wrong.
 	 */
 	if ((dmat->common.maxsize <= PAGE_SIZE) &&
 	   (dmat->common.alignment <= dmat->common.maxsize) &&
 	    dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) &&
 	    attr == VM_MEMATTR_DEFAULT) {
 		*vaddr = malloc_domain(dmat->common.maxsize, M_DEVBUF,
 		    dmat->common.domain, mflags);
 	} else if (dmat->common.nsegments >=
 	    howmany(dmat->common.maxsize, MIN(dmat->common.maxsegsz, PAGE_SIZE)) &&
 	    dmat->common.alignment <= PAGE_SIZE &&
 	    (dmat->common.boundary % PAGE_SIZE) == 0) {
 		/* Page-based multi-segment allocations allowed */
 		*vaddr = (void *)kmem_alloc_attr_domain(dmat->common.domain,
 		    dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr,
 		    attr);
 		dmat->bounce_flags |= BUS_DMA_KMEM_ALLOC;
 	} else {
 		*vaddr = (void *)kmem_alloc_contig_domain(dmat->common.domain,
 		    dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr,
 		    dmat->common.alignment != 0 ? dmat->common.alignment : 1ul,
 		    dmat->common.boundary, attr);
 		dmat->bounce_flags |= BUS_DMA_KMEM_ALLOC;
 	}
 	if (*vaddr == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->common.flags, ENOMEM);
 		return (ENOMEM);
 	} else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) {
 		printf("bus_dmamem_alloc failed to align memory properly.\n");
 	}
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->common.flags, 0);
 	return (0);
 }
 
 /*
  * Free a piece of memory and it's allociated dmamap, that was allocated
  * via bus_dmamem_alloc.  Make the same choice for free/contigfree.
  */
 static void
 bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
 {
 	/*
 	 * dmamem does not need to be bounced, so the map should be
 	 * NULL and the BUS_DMA_KMEM_ALLOC flag cleared if malloc()
 	 * was used and set if kmem_alloc_contig() was used.
 	 */
 	if (map != NULL)
 		panic("bus_dmamem_free: Invalid map freed\n");
 	if ((dmat->bounce_flags & BUS_DMA_KMEM_ALLOC) == 0)
 		free_domain(vaddr, M_DEVBUF);
 	else
-		kmem_free(kernel_arena, (vm_offset_t)vaddr,
-		    dmat->common.maxsize);
+		kmem_free((vm_offset_t)vaddr, dmat->common.maxsize);
 	CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat,
 	    dmat->bounce_flags);
 }
 
 static void
 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags)
 {
 	bus_addr_t curaddr;
 	bus_size_t sgsize;
 
 	if ((map != &nobounce_dmamap && map->pagesneeded == 0)) {
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		curaddr = buf;
 		while (buflen != 0) {
 			sgsize = MIN(buflen, dmat->common.maxsegsz);
 			if (bus_dma_run_filter(&dmat->common, curaddr)) {
 				sgsize = MIN(sgsize,
 				    PAGE_SIZE - (curaddr & PAGE_MASK));
 				map->pagesneeded++;
 			}
 			curaddr += sgsize;
 			buflen -= sgsize;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static void
 _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap,
     void *buf, bus_size_t buflen, int flags)
 {
 	vm_offset_t vaddr;
 	vm_offset_t vendaddr;
 	bus_addr_t paddr;
 	bus_size_t sg_len;
 
 	if ((map != &nobounce_dmamap && map->pagesneeded == 0)) {
 		CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, "
 		    "alignment= %d", dmat->common.lowaddr,
 		    ptoa((vm_paddr_t)Maxmem),
 		    dmat->common.boundary, dmat->common.alignment);
 		CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d",
 		    map, &nobounce_dmamap, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		vaddr = (vm_offset_t)buf;
 		vendaddr = (vm_offset_t)buf + buflen;
 
 		while (vaddr < vendaddr) {
 			sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK);
 			if (pmap == kernel_pmap)
 				paddr = pmap_kextract(vaddr);
 			else
 				paddr = pmap_extract(pmap, vaddr);
 			if (bus_dma_run_filter(&dmat->common, paddr) != 0) {
 				sg_len = roundup2(sg_len,
 				    dmat->common.alignment);
 				map->pagesneeded++;
 			}
 			vaddr += sg_len;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static void
 _bus_dmamap_count_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma,
     int ma_offs, bus_size_t buflen, int flags)
 {
 	bus_size_t sg_len, max_sgsize;
 	int page_index;
 	vm_paddr_t paddr;
 
 	if ((map != &nobounce_dmamap && map->pagesneeded == 0)) {
 		CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, "
 		    "alignment= %d", dmat->common.lowaddr,
 		    ptoa((vm_paddr_t)Maxmem),
 		    dmat->common.boundary, dmat->common.alignment);
 		CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d",
 		    map, &nobounce_dmamap, map->pagesneeded);
 
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		page_index = 0;
 		while (buflen > 0) {
 			paddr = VM_PAGE_TO_PHYS(ma[page_index]) + ma_offs;
 			sg_len = PAGE_SIZE - ma_offs;
 			max_sgsize = MIN(buflen, dmat->common.maxsegsz);
 			sg_len = MIN(sg_len, max_sgsize);
 			if (bus_dma_run_filter(&dmat->common, paddr) != 0) {
 				sg_len = roundup2(sg_len,
 				    dmat->common.alignment);
 				sg_len = MIN(sg_len, max_sgsize);
 				KASSERT((sg_len & (dmat->common.alignment - 1))
 				    == 0, ("Segment size is not aligned"));
 				map->pagesneeded++;
 			}
 			if (((ma_offs + sg_len) & ~PAGE_MASK) != 0)
 				page_index++;
 			ma_offs = (ma_offs + sg_len) & PAGE_MASK;
 			KASSERT(buflen >= sg_len,
 			    ("Segment length overruns original buffer"));
 			buflen -= sg_len;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static int
 _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags)
 {
 
 	/* Reserve Necessary Bounce Pages */
 	mtx_lock(&bounce_lock);
 	if (flags & BUS_DMA_NOWAIT) {
 		if (reserve_bounce_pages(dmat, map, 0) != 0) {
 			mtx_unlock(&bounce_lock);
 			return (ENOMEM);
 		}
 	} else {
 		if (reserve_bounce_pages(dmat, map, 1) != 0) {
 			/* Queue us for resources */
 			STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links);
 			mtx_unlock(&bounce_lock);
 			return (EINPROGRESS);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 
 	return (0);
 }
 
 /*
  * Add a single contiguous physical range to the segment list.
  */
 static int
 _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr,
     bus_size_t sgsize, bus_dma_segment_t *segs, int *segp)
 {
 	bus_addr_t baddr, bmask;
 	int seg;
 
 	/*
 	 * Make sure we don't cross any boundaries.
 	 */
 	bmask = ~(dmat->common.boundary - 1);
 	if (dmat->common.boundary > 0) {
 		baddr = (curaddr + dmat->common.boundary) & bmask;
 		if (sgsize > (baddr - curaddr))
 			sgsize = (baddr - curaddr);
 	}
 
 	/*
 	 * Insert chunk into a segment, coalescing with
 	 * previous segment if possible.
 	 */
 	seg = *segp;
 	if (seg == -1) {
 		seg = 0;
 		segs[seg].ds_addr = curaddr;
 		segs[seg].ds_len = sgsize;
 	} else {
 		if (curaddr == segs[seg].ds_addr + segs[seg].ds_len &&
 		    (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz &&
 		    (dmat->common.boundary == 0 ||
 		     (segs[seg].ds_addr & bmask) == (curaddr & bmask)))
 			segs[seg].ds_len += sgsize;
 		else {
 			if (++seg >= dmat->common.nsegments)
 				return (0);
 			segs[seg].ds_addr = curaddr;
 			segs[seg].ds_len = sgsize;
 		}
 	}
 	*segp = seg;
 	return (sgsize);
 }
 
 /*
  * Utility function to load a physical buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 static int
 bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	bus_size_t sgsize;
 	bus_addr_t curaddr;
 	int error;
 
 	if (map == NULL)
 		map = &nobounce_dmamap;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_phys(dmat, map, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	while (buflen > 0) {
 		curaddr = buf;
 		sgsize = MIN(buflen, dmat->common.maxsegsz);
 		if (((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 		    map->pagesneeded != 0 &&
 		    bus_dma_run_filter(&dmat->common, curaddr)) {
 			sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK));
 			curaddr = add_bounce_page(dmat, map, 0, curaddr, 0,
 			    sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		buf += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 /*
  * Utility function to load a linear buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 static int
 bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
     bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	bus_size_t sgsize, max_sgsize;
 	bus_addr_t curaddr;
 	vm_offset_t kvaddr, vaddr;
 	int error;
 
 	if (map == NULL)
 		map = &nobounce_dmamap;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	vaddr = (vm_offset_t)buf;
 	while (buflen > 0) {
 		/*
 		 * Get the physical address for this segment.
 		 */
 		if (pmap == kernel_pmap) {
 			curaddr = pmap_kextract(vaddr);
 			kvaddr = vaddr;
 		} else {
 			curaddr = pmap_extract(pmap, vaddr);
 			kvaddr = 0;
 		}
 
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
 		max_sgsize = MIN(buflen, dmat->common.maxsegsz);
 		sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
 		if (((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 		    map->pagesneeded != 0 &&
 		    bus_dma_run_filter(&dmat->common, curaddr)) {
 			sgsize = roundup2(sgsize, dmat->common.alignment);
 			sgsize = MIN(sgsize, max_sgsize);
 			curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, 0,
 			    sgsize);
 		} else {
 			sgsize = MIN(sgsize, max_sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		vaddr += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 static int
 bounce_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct vm_page **ma, bus_size_t buflen, int ma_offs, int flags,
     bus_dma_segment_t *segs, int *segp)
 {
 	vm_paddr_t paddr, next_paddr;
 	int error, page_index;
 	bus_size_t sgsize, max_sgsize;
 
 	if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/*
 		 * If we have to keep the offset of each page this function
 		 * is not suitable, switch back to bus_dmamap_load_ma_triv
 		 * which is going to do the right thing in this case.
 		 */
 		error = bus_dmamap_load_ma_triv(dmat, map, ma, buflen, ma_offs,
 		    flags, segs, segp);
 		return (error);
 	}
 
 	if (map == NULL)
 		map = &nobounce_dmamap;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_ma(dmat, map, ma, ma_offs, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	page_index = 0;
 	while (buflen > 0) {
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
 		paddr = VM_PAGE_TO_PHYS(ma[page_index]) + ma_offs;
 		max_sgsize = MIN(buflen, dmat->common.maxsegsz);
 		sgsize = PAGE_SIZE - ma_offs;
 		if (((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 		    map->pagesneeded != 0 &&
 		    bus_dma_run_filter(&dmat->common, paddr)) {
 			sgsize = roundup2(sgsize, dmat->common.alignment);
 			sgsize = MIN(sgsize, max_sgsize);
 			KASSERT((sgsize & (dmat->common.alignment - 1)) == 0,
 			    ("Segment size is not aligned"));
 			/*
 			 * Check if two pages of the user provided buffer
 			 * are used.
 			 */
 			if ((ma_offs + sgsize) > PAGE_SIZE)
 				next_paddr =
 				    VM_PAGE_TO_PHYS(ma[page_index + 1]);
 			else
 				next_paddr = 0;
 			paddr = add_bounce_page(dmat, map, 0, paddr,
 			    next_paddr, sgsize);
 		} else {
 			sgsize = MIN(sgsize, max_sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, paddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		KASSERT(buflen >= sgsize,
 		    ("Segment length overruns original buffer"));
 		buflen -= sgsize;
 		if (((ma_offs + sgsize) & ~PAGE_MASK) != 0)
 			page_index++;
 		ma_offs = (ma_offs + sgsize) & PAGE_MASK;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 static void
 bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
 {
 
 	if (map == NULL)
 		return;
 	map->mem = *mem;
 	map->dmat = dmat;
 	map->callback = callback;
 	map->callback_arg = callback_arg;
 }
 
 static bus_dma_segment_t *
 bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dma_segment_t *segs, int nsegs, int error)
 {
 
 	if (segs == NULL)
 		segs = dmat->segments;
 	return (segs);
 }
 
 /*
  * Release the mapping held by map.
  */
 static void
 bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bounce_page *bpage;
 
 	if (map == NULL)
 		return;
 
 	while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 		STAILQ_REMOVE_HEAD(&map->bpages, links);
 		free_bounce_page(dmat, bpage);
 	}
 }
 
 static void
 bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dmasync_op_t op)
 {
 	struct bounce_page *bpage;
 	vm_offset_t datavaddr, tempvaddr;
 	bus_size_t datacount1, datacount2;
 
 	if (map == NULL || (bpage = STAILQ_FIRST(&map->bpages)) == NULL)
 		return;
 
 	/*
 	 * Handle data bouncing.  We might also want to add support for
 	 * invalidating the caches on broken hardware.
 	 */
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
 	    "performing bounce", __func__, dmat, dmat->common.flags, op);
 
 	if ((op & BUS_DMASYNC_PREWRITE) != 0) {
 		while (bpage != NULL) {
 			tempvaddr = 0;
 			datavaddr = bpage->datavaddr;
 			datacount1 = bpage->datacount;
 			if (datavaddr == 0) {
 				tempvaddr =
 				    pmap_quick_enter_page(bpage->datapage[0]);
 				datavaddr = tempvaddr | bpage->dataoffs;
 				datacount1 = min(PAGE_SIZE - bpage->dataoffs,
 				    datacount1);
 			}
 
 			bcopy((void *)datavaddr,
 			    (void *)bpage->vaddr, datacount1);
 
 			if (tempvaddr != 0)
 				pmap_quick_remove_page(tempvaddr);
 
 			if (bpage->datapage[1] == 0) {
 				KASSERT(datacount1 == bpage->datacount,
 		("Mismatch between data size and provided memory space"));
 				goto next_w;
 			}
 
 			/*
 			 * We are dealing with an unmapped buffer that expands
 			 * over two pages.
 			 */
 			datavaddr = pmap_quick_enter_page(bpage->datapage[1]);
 			datacount2 = bpage->datacount - datacount1;
 			bcopy((void *)datavaddr,
 			    (void *)(bpage->vaddr + datacount1), datacount2);
 			pmap_quick_remove_page(datavaddr);
 
 next_w:
 			bpage = STAILQ_NEXT(bpage, links);
 		}
 		dmat->bounce_zone->total_bounced++;
 	}
 
 	if ((op & BUS_DMASYNC_POSTREAD) != 0) {
 		while (bpage != NULL) {
 			tempvaddr = 0;
 			datavaddr = bpage->datavaddr;
 			datacount1 = bpage->datacount;
 			if (datavaddr == 0) {
 				tempvaddr =
 				    pmap_quick_enter_page(bpage->datapage[0]);
 				datavaddr = tempvaddr | bpage->dataoffs;
 				datacount1 = min(PAGE_SIZE - bpage->dataoffs,
 				    datacount1);
 			}
 
 			bcopy((void *)bpage->vaddr, (void *)datavaddr,
 			    datacount1);
 
 			if (tempvaddr != 0)
 				pmap_quick_remove_page(tempvaddr);
 
 			if (bpage->datapage[1] == 0) {
 				KASSERT(datacount1 == bpage->datacount,
 		("Mismatch between data size and provided memory space"));
 				goto next_r;
 			}
 
 			/*
 			 * We are dealing with an unmapped buffer that expands
 			 * over two pages.
 			 */
 			datavaddr = pmap_quick_enter_page(bpage->datapage[1]);
 			datacount2 = bpage->datacount - datacount1;
 			bcopy((void *)(bpage->vaddr + datacount1),
 			    (void *)datavaddr, datacount2);
 			pmap_quick_remove_page(datavaddr);
 
 next_r:
 			bpage = STAILQ_NEXT(bpage, links);
 		}
 		dmat->bounce_zone->total_bounced++;
 	}
 }
 
 static void
 init_bounce_pages(void *dummy __unused)
 {
 
 	total_bpages = 0;
 	STAILQ_INIT(&bounce_zone_list);
 	STAILQ_INIT(&bounce_map_waitinglist);
 	STAILQ_INIT(&bounce_map_callbacklist);
 	mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF);
 }
 SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL);
 
 static struct sysctl_ctx_list *
 busdma_sysctl_tree(struct bounce_zone *bz)
 {
 
 	return (&bz->sysctl_tree);
 }
 
 static struct sysctl_oid *
 busdma_sysctl_tree_top(struct bounce_zone *bz)
 {
 
 	return (bz->sysctl_tree_top);
 }
 
 static int
 alloc_bounce_zone(bus_dma_tag_t dmat)
 {
 	struct bounce_zone *bz;
 
 	/* Check to see if we already have a suitable zone */
 	STAILQ_FOREACH(bz, &bounce_zone_list, links) {
 		if ((dmat->common.alignment <= bz->alignment) &&
 		    (dmat->common.lowaddr >= bz->lowaddr) &&
 		    (dmat->common.domain == bz->domain)) {
 			dmat->bounce_zone = bz;
 			return (0);
 		}
 	}
 
 	if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF,
 	    M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	STAILQ_INIT(&bz->bounce_page_list);
 	bz->free_bpages = 0;
 	bz->reserved_bpages = 0;
 	bz->active_bpages = 0;
 	bz->lowaddr = dmat->common.lowaddr;
 	bz->alignment = MAX(dmat->common.alignment, PAGE_SIZE);
 	bz->map_count = 0;
 	bz->domain = dmat->common.domain;
 	snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount);
 	busdma_zonecount++;
 	snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr);
 	STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links);
 	dmat->bounce_zone = bz;
 
 	sysctl_ctx_init(&bz->sysctl_tree);
 	bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree,
 	    SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid,
 	    CTLFLAG_RD, 0, "");
 	if (bz->sysctl_tree_top == NULL) {
 		sysctl_ctx_free(&bz->sysctl_tree);
 		return (0);	/* XXX error code? */
 	}
 
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0,
 	    "Total bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0,
 	    "Free bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0,
 	    "Reserved bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0,
 	    "Active bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0,
 	    "Total bounce requests");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0,
 	    "Total bounce requests that were deferred");
 	SYSCTL_ADD_STRING(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, "");
 	SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "alignment", CTLFLAG_RD, &bz->alignment, "");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "domain", CTLFLAG_RD, &bz->domain, 0,
 	    "memory domain");
 
 	return (0);
 }
 
 static int
 alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
 {
 	struct bounce_zone *bz;
 	int count;
 
 	bz = dmat->bounce_zone;
 	count = 0;
 	while (numpages > 0) {
 		struct bounce_page *bpage;
 
 		bpage = (struct bounce_page *)malloc_domain(sizeof(*bpage),
 		    M_DEVBUF, dmat->common.domain, M_NOWAIT | M_ZERO);
 
 		if (bpage == NULL)
 			break;
 		bpage->vaddr = (vm_offset_t)contigmalloc_domain(PAGE_SIZE,
 		    M_DEVBUF, dmat->common.domain, M_NOWAIT, 0ul,
 		    bz->lowaddr, PAGE_SIZE, 0);
 		if (bpage->vaddr == 0) {
 			free_domain(bpage, M_DEVBUF);
 			break;
 		}
 		bpage->busaddr = pmap_kextract(bpage->vaddr);
 		mtx_lock(&bounce_lock);
 		STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links);
 		total_bpages++;
 		bz->total_bpages++;
 		bz->free_bpages++;
 		mtx_unlock(&bounce_lock);
 		count++;
 		numpages--;
 	}
 	return (count);
 }
 
 static int
 reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit)
 {
 	struct bounce_zone *bz;
 	int pages;
 
 	mtx_assert(&bounce_lock, MA_OWNED);
 	bz = dmat->bounce_zone;
 	pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved);
 	if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages))
 		return (map->pagesneeded - (map->pagesreserved + pages));
 	bz->free_bpages -= pages;
 	bz->reserved_bpages += pages;
 	map->pagesreserved += pages;
 	pages = map->pagesneeded - map->pagesreserved;
 
 	return (pages);
 }
 
 static bus_addr_t
 add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
 		bus_addr_t addr1, bus_addr_t addr2, bus_size_t size)
 {
 	struct bounce_zone *bz;
 	struct bounce_page *bpage;
 
 	KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag"));
 	KASSERT(map != NULL && map != &nobounce_dmamap,
 	    ("add_bounce_page: bad map %p", map));
 
 	bz = dmat->bounce_zone;
 	if (map->pagesneeded == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesneeded--;
 
 	if (map->pagesreserved == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesreserved--;
 
 	mtx_lock(&bounce_lock);
 	bpage = STAILQ_FIRST(&bz->bounce_page_list);
 	if (bpage == NULL)
 		panic("add_bounce_page: free page list is empty");
 
 	STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links);
 	bz->reserved_bpages--;
 	bz->active_bpages++;
 	mtx_unlock(&bounce_lock);
 
 	if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/* Page offset needs to be preserved. */
 		bpage->vaddr |= addr1 & PAGE_MASK;
 		bpage->busaddr |= addr1 & PAGE_MASK;
 		KASSERT(addr2 == 0,
 	("Trying to bounce multiple pages with BUS_DMA_KEEP_PG_OFFSET"));
 	}
 	bpage->datavaddr = vaddr;
 	bpage->datapage[0] = PHYS_TO_VM_PAGE(addr1);
 	KASSERT((addr2 & PAGE_MASK) == 0, ("Second page is not aligned"));
 	bpage->datapage[1] = PHYS_TO_VM_PAGE(addr2);
 	bpage->dataoffs = addr1 & PAGE_MASK;
 	bpage->datacount = size;
 	STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
 	return (bpage->busaddr);
 }
 
 static void
 free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
 {
 	struct bus_dmamap *map;
 	struct bounce_zone *bz;
 
 	bz = dmat->bounce_zone;
 	bpage->datavaddr = 0;
 	bpage->datacount = 0;
 	if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/*
 		 * Reset the bounce page to start at offset 0.  Other uses
 		 * of this bounce page may need to store a full page of
 		 * data and/or assume it starts on a page boundary.
 		 */
 		bpage->vaddr &= ~PAGE_MASK;
 		bpage->busaddr &= ~PAGE_MASK;
 	}
 
 	mtx_lock(&bounce_lock);
 	STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links);
 	bz->free_bpages++;
 	bz->active_bpages--;
 	if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
 		if (reserve_bounce_pages(map->dmat, map, 1) == 0) {
 			STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
 			STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
 			    map, links);
 			busdma_swi_pending = 1;
 			bz->total_deferred++;
 			swi_sched(vm_ih, 0);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 void
 busdma_swi(void)
 {
 	bus_dma_tag_t dmat;
 	struct bus_dmamap *map;
 
 	mtx_lock(&bounce_lock);
 	while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
 		STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
 		mtx_unlock(&bounce_lock);
 		dmat = map->dmat;
 		(dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_LOCK);
 		bus_dmamap_load_mem(map->dmat, map, &map->mem,
 		    map->callback, map->callback_arg, BUS_DMA_WAITOK);
 		(dmat->common.lockfunc)(dmat->common.lockfuncarg,
 		    BUS_DMA_UNLOCK);
 		mtx_lock(&bounce_lock);
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 struct bus_dma_impl bus_dma_bounce_impl = {
 	.tag_create = bounce_bus_dma_tag_create,
 	.tag_destroy = bounce_bus_dma_tag_destroy,
 	.tag_set_domain = bounce_bus_dma_tag_set_domain,
 	.map_create = bounce_bus_dmamap_create,
 	.map_destroy = bounce_bus_dmamap_destroy,
 	.mem_alloc = bounce_bus_dmamem_alloc,
 	.mem_free = bounce_bus_dmamem_free,
 	.load_phys = bounce_bus_dmamap_load_phys,
 	.load_buffer = bounce_bus_dmamap_load_buffer,
 	.load_ma = bounce_bus_dmamap_load_ma,
 	.map_waitok = bounce_bus_dmamap_waitok,
 	.map_complete = bounce_bus_dmamap_complete,
 	.map_unload = bounce_bus_dmamap_unload,
 	.map_sync = bounce_bus_dmamap_sync,
 };