Index: head/sys/riscv/include/pcb.h
===================================================================
--- head/sys/riscv/include/pcb.h	(revision 344107)
+++ head/sys/riscv/include/pcb.h	(revision 344108)
@@ -1,69 +1,68 @@
 /*-
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_PCB_H_
 #define	_MACHINE_PCB_H_
 
 #ifndef LOCORE
 
 struct trapframe;
 
 struct pcb {
 	uint64_t	pcb_ra;		/* Return address */
 	uint64_t	pcb_sp;		/* Stack pointer */
 	uint64_t	pcb_gp;		/* Global pointer */
 	uint64_t	pcb_tp;		/* Thread pointer */
 	uint64_t	pcb_t[7];	/* Temporary registers */
 	uint64_t	pcb_s[12];	/* Saved registers */
 	uint64_t	pcb_a[8];	/* Argument registers */
 	uint64_t	pcb_x[32][2];	/* Floating point registers */
 	uint64_t	pcb_fcsr;	/* Floating point control reg */
 	uint64_t	pcb_fpflags;	/* Floating point flags */
 #define	PCB_FP_STARTED	0x1
 #define	PCB_FP_USERMASK	0x1
 	uint64_t	pcb_sepc;	/* Supervisor exception pc */
-	vm_offset_t	pcb_l1addr;	/* L1 page tables base address */
 	vm_offset_t	pcb_onfault;	/* Copyinout fault handler */
 };
 
 #ifdef _KERNEL
 void	makectx(struct trapframe *tf, struct pcb *pcb);
 int	savectx(struct pcb *pcb) __returns_twice;
 #endif
 
 #endif /* !LOCORE */
 
 #endif /* !_MACHINE_PCB_H_ */
Index: head/sys/riscv/include/pcpu.h
===================================================================
--- head/sys/riscv/include/pcpu.h	(revision 344107)
+++ head/sys/riscv/include/pcpu.h	(revision 344108)
@@ -1,87 +1,88 @@
 /*-
  * Copyright (c) 1999 Luoqi Chen <luoqi@freebsd.org>
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: FreeBSD: src/sys/i386/include/globaldata.h,v 1.27 2001/04/27
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_PCPU_H_
 #define	_MACHINE_PCPU_H_
 
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
 
 #define	ALT_STACK_SIZE	128
 
 #define	PCPU_MD_FIELDS							\
+	struct pmap *pc_curpmap;	/* Currently active pmap */	\
 	uint32_t pc_pending_ipis;	/* IPIs pending to this CPU */	\
 	char __pad[61]
 
 #ifdef _KERNEL
 
 struct pcb;
 struct pcpu;
 extern struct pcpu *pcpup;
 
 static inline struct pcpu *
 get_pcpu(void)
 {
 	struct pcpu *pcpu;
 
 	__asm __volatile("mv %0, gp" : "=&r"(pcpu));
 
 	return (pcpu);
 }
 
 static inline struct thread *
 get_curthread(void)
 {
 	struct thread *td;
 
 	__asm __volatile("ld %0, 0(gp)" : "=&r"(td));
 
 	return (td);
 }
 
 #define	curthread get_curthread()
 
 #define	PCPU_GET(member)	(get_pcpu()->pc_ ## member)
 #define	PCPU_ADD(member, value)	(get_pcpu()->pc_ ## member += (value))
 #define	PCPU_INC(member)	PCPU_ADD(member, 1)
 #define	PCPU_PTR(member)	(&get_pcpu()->pc_ ## member)
 #define	PCPU_SET(member,value)	(get_pcpu()->pc_ ## member = (value))
 
 #endif	/* _KERNEL */
 
 #endif	/* !_MACHINE_PCPU_H_ */
Index: head/sys/riscv/include/pmap.h
===================================================================
--- head/sys/riscv/include/pmap.h	(revision 344107)
+++ head/sys/riscv/include/pmap.h	(revision 344108)
@@ -1,166 +1,173 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_PMAP_H_
 #define	_MACHINE_PMAP_H_
 
 #include <machine/pte.h>
 
 #ifndef LOCORE
 
 #include <sys/queue.h>
+#include <sys/_cpuset.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 
 #include <vm/_vm_radix.h>
 
 #ifdef _KERNEL
 
 #define	vtophys(va)	pmap_kextract((vm_offset_t)(va))
 
 #endif
 
 #define	pmap_page_get_memattr(m)	((m)->md.pv_memattr)
 #define	pmap_page_is_write_mapped(m)	(((m)->aflags & PGA_WRITEABLE) != 0)
 void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma);
 
 /*
  * Pmap stuff
  */
 
 struct md_page {
 	TAILQ_HEAD(,pv_entry)	pv_list;
 	int			pv_gen;
 	vm_memattr_t		pv_memattr;
 };
 
 /*
  * This structure is used to hold a virtual<->physical address
  * association and is used mostly by bootstrap code
  */
 struct pv_addr {
 	SLIST_ENTRY(pv_addr) pv_list;
 	vm_offset_t	pv_va;
 	vm_paddr_t	pv_pa;
 };
 
 struct pmap {
 	struct mtx		pm_mtx;
 	struct pmap_statistics	pm_stats;	/* pmap statictics */
 	pd_entry_t		*pm_l1;
+	u_long			pm_satp;	/* value for SATP register */
+	cpuset_t		pm_active;	/* active on cpus */
 	TAILQ_HEAD(,pv_chunk)	pm_pvchunk;	/* list of mappings in pmap */
 	LIST_ENTRY(pmap)	pm_list;	/* List of all pmaps */
 	struct vm_radix		pm_root;
 };
 
 typedef struct pv_entry {
 	vm_offset_t		pv_va;	/* virtual address for mapping */
 	TAILQ_ENTRY(pv_entry)	pv_next;
 } *pv_entry_t;
 
 /*
  * pv_entries are allocated in chunks per-process.  This avoids the
  * need to track per-pmap assignments.
  */
 #define	_NPCM	3
 #define	_NPCPV	168
 struct pv_chunk {
 	struct pmap *		pc_pmap;
 	TAILQ_ENTRY(pv_chunk)	pc_list;
 	uint64_t		pc_map[_NPCM];  /* bitmap; 1 = free */
 	TAILQ_ENTRY(pv_chunk)	pc_lru;
 	struct pv_entry		pc_pventry[_NPCPV];
 };
 
 typedef struct pmap *pmap_t;
 
 #ifdef _KERNEL
 extern struct pmap	kernel_pmap_store;
 #define	kernel_pmap	(&kernel_pmap_store)
 #define	pmap_kernel()	kernel_pmap
 
 #define	PMAP_ASSERT_LOCKED(pmap) \
 				mtx_assert(&(pmap)->pm_mtx, MA_OWNED)
 #define	PMAP_LOCK(pmap)		mtx_lock(&(pmap)->pm_mtx)
 #define	PMAP_LOCK_ASSERT(pmap, type) \
 				mtx_assert(&(pmap)->pm_mtx, (type))
 #define	PMAP_LOCK_DESTROY(pmap)	mtx_destroy(&(pmap)->pm_mtx)
 #define	PMAP_LOCK_INIT(pmap)	mtx_init(&(pmap)->pm_mtx, "pmap", \
 				    NULL, MTX_DEF | MTX_DUPOK)
 #define	PMAP_OWNED(pmap)	mtx_owned(&(pmap)->pm_mtx)
 #define	PMAP_MTX(pmap)		(&(pmap)->pm_mtx)
 #define	PMAP_TRYLOCK(pmap)	mtx_trylock(&(pmap)->pm_mtx)
 #define	PMAP_UNLOCK(pmap)	mtx_unlock(&(pmap)->pm_mtx)
 
 #define	PHYS_AVAIL_SIZE	10
 extern vm_paddr_t phys_avail[];
 extern vm_paddr_t dump_avail[];
 extern vm_offset_t virtual_avail;
 extern vm_offset_t virtual_end;
 
 /*
  * Macros to test if a mapping is mappable with an L1 Section mapping
  * or an L2 Large Page mapping.
  */
 #define	L1_MAPPABLE_P(va, pa, size)					\
 	((((va) | (pa)) & L1_OFFSET) == 0 && (size) >= L1_SIZE)
 
+struct thread;
+
+void	pmap_activate_boot(pmap_t);
+void	pmap_activate_sw(struct thread *);
 void	pmap_bootstrap(vm_offset_t, vm_paddr_t, vm_size_t);
 void	pmap_kenter_device(vm_offset_t, vm_size_t, vm_paddr_t);
 vm_paddr_t pmap_kextract(vm_offset_t va);
 void	pmap_kremove(vm_offset_t);
 void	pmap_kremove_device(vm_offset_t, vm_size_t);
 bool	pmap_ps_enabled(pmap_t);
 
 void	*pmap_mapdev(vm_offset_t, vm_size_t);
 void	*pmap_mapbios(vm_paddr_t, vm_size_t);
 void	pmap_unmapdev(vm_offset_t, vm_size_t);
 void	pmap_unmapbios(vm_offset_t, vm_size_t);
 
 boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
 void	pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
 
 bool	pmap_get_tables(pmap_t, vm_offset_t, pd_entry_t **, pd_entry_t **,
     pt_entry_t **);
 
 #define	pmap_page_is_mapped(m)	(!TAILQ_EMPTY(&(m)->md.pv_list))
 
 int pmap_fault_fixup(pmap_t, vm_offset_t, vm_prot_t);
 
 #endif	/* _KERNEL */
 
 #endif	/* !LOCORE */
 
 #endif	/* !_MACHINE_PMAP_H_ */
Index: head/sys/riscv/riscv/genassym.c
===================================================================
--- head/sys/riscv/riscv/genassym.c	(revision 344107)
+++ head/sys/riscv/riscv/genassym.c	(revision 344108)
@@ -1,101 +1,100 @@
 /*-
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/assym.h>
 #include <sys/proc.h>
 #include <sys/mbuf.h>
 #include <sys/vmmeter.h>
 #include <sys/bus.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 #include <machine/riscvreg.h>
 #include <machine/frame.h>
 #include <machine/pcb.h>
 #include <machine/cpu.h>
 #include <machine/proc.h>
 #include <machine/cpufunc.h>
 #include <machine/pte.h>
 #include <machine/intr.h>
 
 ASSYM(KERNBASE, KERNBASE);
 ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
 ASSYM(VM_MAX_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS);
 ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
 ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
 
 ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
-ASSYM(PCB_L1ADDR, offsetof(struct pcb, pcb_l1addr));
 ASSYM(PCB_SIZE, sizeof(struct pcb));
 ASSYM(PCB_RA, offsetof(struct pcb, pcb_ra));
 ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp));
 ASSYM(PCB_GP, offsetof(struct pcb, pcb_gp));
 ASSYM(PCB_TP, offsetof(struct pcb, pcb_tp));
 ASSYM(PCB_T, offsetof(struct pcb, pcb_t));
 ASSYM(PCB_S, offsetof(struct pcb, pcb_s));
 ASSYM(PCB_A, offsetof(struct pcb, pcb_a));
 ASSYM(PCB_X, offsetof(struct pcb, pcb_x));
 ASSYM(PCB_FCSR, offsetof(struct pcb, pcb_fcsr));
 
 ASSYM(SF_UC, offsetof(struct sigframe, sf_uc));
 
 ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
 ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
 
 ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
 ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
 ASSYM(TD_PROC, offsetof(struct thread, td_proc));
 ASSYM(TD_FRAME, offsetof(struct thread, td_frame));
 ASSYM(TD_MD, offsetof(struct thread, td_md));
 ASSYM(TD_LOCK, offsetof(struct thread, td_lock));
 
 ASSYM(TF_SIZE, sizeof(struct trapframe));
 ASSYM(TF_RA, offsetof(struct trapframe, tf_ra));
 ASSYM(TF_SP, offsetof(struct trapframe, tf_sp));
 ASSYM(TF_GP, offsetof(struct trapframe, tf_gp));
 ASSYM(TF_TP, offsetof(struct trapframe, tf_tp));
 ASSYM(TF_T, offsetof(struct trapframe, tf_t));
 ASSYM(TF_S, offsetof(struct trapframe, tf_s));
 ASSYM(TF_A, offsetof(struct trapframe, tf_a));
 ASSYM(TF_SEPC, offsetof(struct trapframe, tf_sepc));
 ASSYM(TF_STVAL, offsetof(struct trapframe, tf_stval));
 ASSYM(TF_SCAUSE, offsetof(struct trapframe, tf_scause));
 ASSYM(TF_SSTATUS, offsetof(struct trapframe, tf_sstatus));
Index: head/sys/riscv/riscv/machdep.c
===================================================================
--- head/sys/riscv/riscv/machdep.c	(revision 344107)
+++ head/sys/riscv/riscv/machdep.c	(revision 344108)
@@ -1,895 +1,891 @@
 /*-
  * Copyright (c) 2014 Andrew Turner
  * Copyright (c) 2015-2017 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_platform.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/msgbuf.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 
 #include <machine/riscvreg.h>
 #include <machine/cpu.h>
 #include <machine/kdb.h>
 #include <machine/machdep.h>
 #include <machine/pcb.h>
 #include <machine/reg.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 #include <machine/intr.h>
 #include <machine/sbi.h>
 
 #include <machine/asm.h>
 
 #ifdef FPE
 #include <machine/fpe.h>
 #endif
 
 #ifdef FDT
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 #endif
 
 struct pcpu __pcpu[MAXCPU];
 
 static struct trapframe proc0_tf;
 
 vm_paddr_t phys_avail[PHYS_AVAIL_SIZE + 2];
 vm_paddr_t dump_avail[PHYS_AVAIL_SIZE + 2];
 
 int early_boot = 1;
 int cold = 1;
 long realmem = 0;
 long Maxmem = 0;
 
 #define	DTB_SIZE_MAX	(1024 * 1024)
 
 #define	PHYSMAP_SIZE	(2 * (VM_PHYSSEG_MAX - 1))
 vm_paddr_t physmap[PHYSMAP_SIZE];
 u_int physmap_idx;
 
 struct kva_md_info kmi;
 
 int64_t dcache_line_size;	/* The minimum D cache line size */
 int64_t icache_line_size;	/* The minimum I cache line size */
 int64_t idcache_line_size;	/* The minimum cache line size */
 
 extern int *end;
 extern int *initstack_end;
 
 struct pcpu *pcpup;
 
 uintptr_t mcall_trap(uintptr_t mcause, uintptr_t* regs);
 
 uintptr_t
 mcall_trap(uintptr_t mcause, uintptr_t* regs)
 {
 
 	return (0);
 }
 
 static void
 cpu_startup(void *dummy)
 {
 
 	identify_cpu();
 
 	vm_ksubmap_init(&kmi);
 	bufinit();
 	vm_pager_bufferinit();
 }
 
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 int
 cpu_idle_wakeup(int cpu)
 {
 
 	return (0);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	regs->sepc = frame->tf_sepc;
 	regs->sstatus = frame->tf_sstatus;
 	regs->ra = frame->tf_ra;
 	regs->sp = frame->tf_sp;
 	regs->gp = frame->tf_gp;
 	regs->tp = frame->tf_tp;
 
 	memcpy(regs->t, frame->tf_t, sizeof(regs->t));
 	memcpy(regs->s, frame->tf_s, sizeof(regs->s));
 	memcpy(regs->a, frame->tf_a, sizeof(regs->a));
 
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	frame->tf_sepc = regs->sepc;
 	frame->tf_ra = regs->ra;
 	frame->tf_sp = regs->sp;
 	frame->tf_gp = regs->gp;
 	frame->tf_tp = regs->tp;
 
 	memcpy(frame->tf_t, regs->t, sizeof(frame->tf_t));
 	memcpy(frame->tf_s, regs->s, sizeof(frame->tf_s));
 	memcpy(frame->tf_a, regs->a, sizeof(frame->tf_a));
 
 	return (0);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *regs)
 {
 #ifdef FPE
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 
 	if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running FPE instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		if (td == curthread)
 			fpe_state_save(td);
 
 		memcpy(regs->fp_x, pcb->pcb_x, sizeof(regs->fp_x));
 		regs->fp_fcsr = pcb->pcb_fcsr;
 	} else
 #endif
 		memset(regs, 0, sizeof(*regs));
 
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *regs)
 {
 #ifdef FPE
 	struct trapframe *frame;
 	struct pcb *pcb;
 
 	frame = td->td_frame;
 	pcb = td->td_pcb;
 
 	memcpy(pcb->pcb_x, regs->fp_x, sizeof(regs->fp_x));
 	pcb->pcb_fcsr = regs->fp_fcsr;
 	pcb->pcb_fpflags |= PCB_FP_STARTED;
 	frame->tf_sstatus &= ~SSTATUS_FS_MASK;
 	frame->tf_sstatus |= SSTATUS_FS_CLEAN;
 #endif
 
 	return (0);
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *regs)
 {
 
 	panic("fill_dbregs");
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *regs)
 {
 
 	panic("set_dbregs");
 }
 
 int
 ptrace_set_pc(struct thread *td, u_long addr)
 {
 
 	td->td_frame->tf_sepc = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 
 	/* TODO; */
 	return (EOPNOTSUPP);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 
 	/* TODO; */
 	return (EOPNOTSUPP);
 }
 
 void
 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 {
 	struct trapframe *tf;
 	struct pcb *pcb;
 
 	tf = td->td_frame;
 	pcb = td->td_pcb;
 
 	memset(tf, 0, sizeof(struct trapframe));
 
 	tf->tf_a[0] = stack;
 	tf->tf_sp = STACKALIGN(stack);
 	tf->tf_ra = imgp->entry_addr;
 	tf->tf_sepc = imgp->entry_addr;
 
 	pcb->pcb_fpflags &= ~PCB_FP_STARTED;
 }
 
 /* Sanity check these are the same size, they will be memcpy'd to and fro */
 CTASSERT(sizeof(((struct trapframe *)0)->tf_a) ==
     sizeof((struct gpregs *)0)->gp_a);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_s) ==
     sizeof((struct gpregs *)0)->gp_s);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_t) ==
     sizeof((struct gpregs *)0)->gp_t);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_a) ==
     sizeof((struct reg *)0)->a);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_s) ==
     sizeof((struct reg *)0)->s);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_t) ==
     sizeof((struct reg *)0)->t);
 
 /* Support for FDT configurations only. */
 CTASSERT(FDT);
 
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret)
 {
 	struct trapframe *tf = td->td_frame;
 
 	memcpy(mcp->mc_gpregs.gp_t, tf->tf_t, sizeof(mcp->mc_gpregs.gp_t));
 	memcpy(mcp->mc_gpregs.gp_s, tf->tf_s, sizeof(mcp->mc_gpregs.gp_s));
 	memcpy(mcp->mc_gpregs.gp_a, tf->tf_a, sizeof(mcp->mc_gpregs.gp_a));
 
 	if (clear_ret & GET_MC_CLEAR_RET) {
 		mcp->mc_gpregs.gp_a[0] = 0;
 		mcp->mc_gpregs.gp_t[0] = 0; /* clear syscall error */
 	}
 
 	mcp->mc_gpregs.gp_ra = tf->tf_ra;
 	mcp->mc_gpregs.gp_sp = tf->tf_sp;
 	mcp->mc_gpregs.gp_gp = tf->tf_gp;
 	mcp->mc_gpregs.gp_tp = tf->tf_tp;
 	mcp->mc_gpregs.gp_sepc = tf->tf_sepc;
 	mcp->mc_gpregs.gp_sstatus = tf->tf_sstatus;
 
 	return (0);
 }
 
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 
 	memcpy(tf->tf_t, mcp->mc_gpregs.gp_t, sizeof(tf->tf_t));
 	memcpy(tf->tf_s, mcp->mc_gpregs.gp_s, sizeof(tf->tf_s));
 	memcpy(tf->tf_a, mcp->mc_gpregs.gp_a, sizeof(tf->tf_a));
 
 	tf->tf_ra = mcp->mc_gpregs.gp_ra;
 	tf->tf_sp = mcp->mc_gpregs.gp_sp;
 	tf->tf_gp = mcp->mc_gpregs.gp_gp;
 	tf->tf_sepc = mcp->mc_gpregs.gp_sepc;
 	tf->tf_sstatus = mcp->mc_gpregs.gp_sstatus;
 
 	return (0);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifdef FPE
 	struct pcb *curpcb;
 
 	critical_enter();
 
 	curpcb = curthread->td_pcb;
 
 	KASSERT(td->td_pcb == curpcb, ("Invalid fpe pcb"));
 
 	if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running FPE instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		fpe_state_save(td);
 
 		KASSERT((curpcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0,
 		    ("Non-userspace FPE flags set in get_fpcontext"));
 		memcpy(mcp->mc_fpregs.fp_x, curpcb->pcb_x,
 		    sizeof(mcp->mc_fpregs));
 		mcp->mc_fpregs.fp_fcsr = curpcb->pcb_fcsr;
 		mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags;
 		mcp->mc_flags |= _MC_FP_VALID;
 	}
 
 	critical_exit();
 #endif
 }
 
 static void
 set_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifdef FPE
 	struct pcb *curpcb;
 
 	critical_enter();
 
 	if ((mcp->mc_flags & _MC_FP_VALID) != 0) {
 		curpcb = curthread->td_pcb;
 		/* FPE usage is enabled, override registers. */
 		memcpy(curpcb->pcb_x, mcp->mc_fpregs.fp_x,
 		    sizeof(mcp->mc_fpregs));
 		curpcb->pcb_fcsr = mcp->mc_fpregs.fp_fcsr;
 		curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags & PCB_FP_USERMASK;
 	}
 
 	critical_exit();
 #endif
 }
 
 void
 cpu_idle(int busy)
 {
 
 	spinlock_enter();
 	if (!busy)
 		cpu_idleclock();
 	if (!sched_runnable())
 		__asm __volatile(
 		    "fence \n"
 		    "wfi   \n");
 	if (!busy)
 		cpu_activeclock();
 	spinlock_exit();
 }
 
 void
 cpu_halt(void)
 {
 
 	intr_disable();
 	for (;;)
 		__asm __volatile("wfi");
 }
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 
 	/* TBD */
 }
 
 /* Get current clock frequency for the given CPU ID. */
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
 
 	panic("cpu_est_clockrate");
 }
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 }
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_sstatus_ie = intr_disable();
 	} else
 		td->td_md.md_spinlock_count++;
 	critical_enter();
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t sstatus_ie;
 
 	td = curthread;
 	critical_exit();
 	sstatus_ie = td->td_md.md_saved_sstatus_ie;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0)
 		intr_restore(sstatus_ie);
 }
 
 #ifndef	_SYS_SYSPROTO_H_
 struct sigreturn_args {
 	ucontext_t *ucp;
 };
 #endif
 
 int
 sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 {
 	uint64_t sstatus;
 	ucontext_t uc;
 	int error;
 
 	if (uap == NULL)
 		return (EFAULT);
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
 		return (EFAULT);
 
 	/*
 	 * Make sure the processor mode has not been tampered with and
 	 * interrupts have not been disabled.
 	 * Supervisor interrupts in user mode are always enabled.
 	 */
 	sstatus = uc.uc_mcontext.mc_gpregs.gp_sstatus;
 	if ((sstatus & SSTATUS_SPP) != 0)
 		return (EINVAL);
 
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	set_fpcontext(td, &uc.uc_mcontext);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 
 	memcpy(pcb->pcb_t, tf->tf_t, sizeof(tf->tf_t));
 	memcpy(pcb->pcb_s, tf->tf_s, sizeof(tf->tf_s));
 	memcpy(pcb->pcb_a, tf->tf_a, sizeof(tf->tf_a));
 
 	pcb->pcb_ra = tf->tf_ra;
 	pcb->pcb_sp = tf->tf_sp;
 	pcb->pcb_gp = tf->tf_gp;
 	pcb->pcb_tp = tf->tf_tp;
 	pcb->pcb_sepc = tf->tf_sepc;
 }
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe *fp, frame;
 	struct sysentvec *sysent;
 	struct trapframe *tf;
 	struct sigacts *psp;
 	struct thread *td;
 	struct proc *p;
 	int onstack;
 	int sig;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_sp);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 	} else {
 		fp = (struct sigframe *)td->td_frame->tf_sp;
 	}
 
 	/* Make room, keeping the stack aligned */
 	fp--;
 	fp = (struct sigframe *)STACKALIGN(fp);
 
 	/* Fill in the frame to copy out */
 	bzero(&frame, sizeof(frame));
 	get_mcontext(td, &frame.sf_uc.uc_mcontext, 0);
 	get_fpcontext(td, &frame.sf_uc.uc_mcontext);
 	frame.sf_si = ksi->ksi_info;
 	frame.sf_uc.uc_sigmask = *mask;
 	frame.sf_uc.uc_stack = td->td_sigstk;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ?
 	    (onstack ? SS_ONSTACK : 0) : SS_DISABLE;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	tf->tf_a[0] = sig;
 	tf->tf_a[1] = (register_t)&fp->sf_si;
 	tf->tf_a[2] = (register_t)&fp->sf_uc;
 
 	tf->tf_sepc = (register_t)catcher;
 	tf->tf_sp = (register_t)fp;
 
 	sysent = p->p_sysent;
 	if (sysent->sv_sigcode_base != 0)
 		tf->tf_ra = (register_t)sysent->sv_sigcode_base;
 	else
 		tf->tf_ra = (register_t)(sysent->sv_psstrings -
 		    *(sysent->sv_szsigcode));
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_sepc,
 	    tf->tf_sp);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 static void
 init_proc0(vm_offset_t kstack)
 {
 
 	pcpup = &__pcpu[0];
 
 	proc_linkup0(&proc0, &thread0);
 	thread0.td_kstack = kstack;
 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack) - 1;
 	thread0.td_pcb->pcb_fpflags = 0;
 	thread0.td_frame = &proc0_tf;
 	pcpup->pc_curpcb = thread0.td_pcb;
 }
 
 static int
 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
     u_int *physmap_idxp)
 {
 	u_int i, insert_idx, _physmap_idx;
 
 	_physmap_idx = *physmap_idxp;
 
 	if (length == 0)
 		return (1);
 
 	/*
 	 * Find insertion point while checking for overlap.  Start off by
 	 * assuming the new entry will be added to the end.
 	 */
 	insert_idx = _physmap_idx;
 	for (i = 0; i <= _physmap_idx; i += 2) {
 		if (base < physmap[i + 1]) {
 			if (base + length <= physmap[i]) {
 				insert_idx = i;
 				break;
 			}
 			if (boothowto & RB_VERBOSE)
 				printf(
 		    "Overlapping memory regions, ignoring second region\n");
 			return (1);
 		}
 	}
 
 	/* See if we can prepend to the next entry. */
 	if (insert_idx <= _physmap_idx &&
 	    base + length == physmap[insert_idx]) {
 		physmap[insert_idx] = base;
 		return (1);
 	}
 
 	/* See if we can append to the previous entry. */
 	if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 		physmap[insert_idx - 1] += length;
 		return (1);
 	}
 
 	_physmap_idx += 2;
 	*physmap_idxp = _physmap_idx;
 	if (_physmap_idx == PHYSMAP_SIZE) {
 		printf(
 		"Too many segments in the physical address map, giving up\n");
 		return (0);
 	}
 
 	/*
 	 * Move the last 'N' entries down to make room for the new
 	 * entry if needed.
 	 */
 	for (i = _physmap_idx; i > insert_idx; i -= 2) {
 		physmap[i] = physmap[i - 2];
 		physmap[i + 1] = physmap[i - 1];
 	}
 
 	/* Insert the new entry. */
 	physmap[insert_idx] = base;
 	physmap[insert_idx + 1] = base + length;
 
 	printf("physmap[%d] = 0x%016lx\n", insert_idx, base);
 	printf("physmap[%d] = 0x%016lx\n", insert_idx + 1, base + length);
 	return (1);
 }
 
 #ifdef FDT
 static void
 try_load_dtb(caddr_t kmdp, vm_offset_t dtbp)
 {
 
 #if defined(FDT_DTB_STATIC)
 	dtbp = (vm_offset_t)&fdt_static_dtb;
 #endif
 
 	if (dtbp == (vm_offset_t)NULL) {
 		printf("ERROR loading DTB\n");
 		return;
 	}
 
 	if (OF_install(OFW_FDT, 0) == FALSE)
 		panic("Cannot install FDT");
 
 	if (OF_init((void *)dtbp) != 0)
 		panic("OF_init failed with the found device tree");
 }
 #endif
 
 static void
 cache_setup(void)
 {
 
 	/* TODO */
 }
 
 /*
  * Fake up a boot descriptor table.
  * RISCVTODO: This needs to be done via loader (when it's available).
  */
 vm_offset_t
 fake_preload_metadata(struct riscv_bootparams *rvbp __unused)
 {
 	static uint32_t fake_preload[35];
 #ifdef DDB
 	vm_offset_t zstart = 0, zend = 0;
 #endif
 	vm_offset_t lastaddr;
 	int i;
 
 	i = 0;
 
 	fake_preload[i++] = MODINFO_NAME;
 	fake_preload[i++] = strlen("kernel") + 1;
 	strcpy((char*)&fake_preload[i++], "kernel");
 	i += 1;
 	fake_preload[i++] = MODINFO_TYPE;
 	fake_preload[i++] = strlen("elf64 kernel") + 1;
 	strcpy((char*)&fake_preload[i++], "elf64 kernel");
 	i += 3;
 	fake_preload[i++] = MODINFO_ADDR;
 	fake_preload[i++] = sizeof(vm_offset_t);
 	*(vm_offset_t *)&fake_preload[i++] =
 	    (vm_offset_t)(KERNBASE + KERNENTRY);
 	i += 1;
 	fake_preload[i++] = MODINFO_SIZE;
 	fake_preload[i++] = sizeof(vm_offset_t);
 	fake_preload[i++] = (vm_offset_t)&end -
 	    (vm_offset_t)(KERNBASE + KERNENTRY);
 	i += 1;
 #ifdef DDB
 #if 0
 	/* RISCVTODO */
 	if (*(uint32_t *)KERNVIRTADDR == MAGIC_TRAMP_NUMBER) {
 		fake_preload[i++] = MODINFO_METADATA|MODINFOMD_SSYM;
 		fake_preload[i++] = sizeof(vm_offset_t);
 		fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 4);
 		fake_preload[i++] = MODINFO_METADATA|MODINFOMD_ESYM;
 		fake_preload[i++] = sizeof(vm_offset_t);
 		fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 8);
 		lastaddr = *(uint32_t *)(KERNVIRTADDR + 8);
 		zend = lastaddr;
 		zstart = *(uint32_t *)(KERNVIRTADDR + 4);
 		db_fetch_ksymtab(zstart, zend);
 	} else
 #endif
 #endif
 		lastaddr = (vm_offset_t)&end;
 	fake_preload[i++] = 0;
 	fake_preload[i] = 0;
 	preload_metadata = (void *)fake_preload;
 
 	return (lastaddr);
 }
 
 void
 initriscv(struct riscv_bootparams *rvbp)
 {
 	struct mem_region mem_regions[FDT_MEM_REGIONS];
 	vm_offset_t rstart, rend;
 	vm_offset_t s, e;
 	int mem_regions_sz;
 	vm_offset_t lastaddr;
 	vm_size_t kernlen;
 	caddr_t kmdp;
 	int i;
 
 	/* Set the module data location */
 	lastaddr = fake_preload_metadata(rvbp);
 
 	/* Find the kernel address */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf64 kernel");
 
 	boothowto = RB_VERBOSE | RB_SINGLE;
 	boothowto = RB_VERBOSE;
 
 	kern_envp = NULL;
 
 #ifdef FDT
 	try_load_dtb(kmdp, rvbp->dtbp_virt);
 #endif
 
 	/* Load the physical memory ranges */
 	physmap_idx = 0;
 
 #ifdef FDT
 	/* Grab physical memory regions information from device tree. */
 	if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, NULL) != 0)
 		panic("Cannot get physical memory regions");
 
 	s = rvbp->dtbp_phys;
 	e = s + DTB_SIZE_MAX;
 
 	for (i = 0; i < mem_regions_sz; i++) {
 		rstart = mem_regions[i].mr_start;
 		rend = (mem_regions[i].mr_start + mem_regions[i].mr_size);
 
 		if ((rstart < s) && (rend > e)) {
 			/* Exclude DTB region. */
 			add_physmap_entry(rstart, (s - rstart), physmap, &physmap_idx);
 			add_physmap_entry(e, (rend - e), physmap, &physmap_idx);
 		} else {
 			add_physmap_entry(mem_regions[i].mr_start,
 			    mem_regions[i].mr_size, physmap, &physmap_idx);
 		}
 	}
 #endif
 
 	/* Set the pcpu data, this is needed by pmap_bootstrap */
 	pcpup = &__pcpu[0];
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 
 	/* Set the pcpu pointer */
 	__asm __volatile("mv gp, %0" :: "r"(pcpup));
 
 	PCPU_SET(curthread, &thread0);
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	cache_setup();
 
 	/* Bootstrap enough of pmap to enter the kernel proper */
 	kernlen = (lastaddr - KERNBASE);
 	pmap_bootstrap(rvbp->kern_l1pt, mem_regions[0].mr_start, kernlen);
 
 	cninit();
 
 	init_proc0(rvbp->kern_stack);
 
-	/* set page table base register for thread0 */
-	thread0.td_pcb->pcb_l1addr = \
-	    (rvbp->kern_l1pt - KERNBASE + rvbp->kern_phys);
-
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 	init_param2(physmem);
 	kdb_init();
 
 	early_boot = 0;
 }
 
 #undef bzero
 void
 bzero(void *buf, size_t len)
 {
 	uint8_t *p;
 
 	p = buf;
 	while(len-- > 0)
 		*p++ = 0;
 }
Index: head/sys/riscv/riscv/mp_machdep.c
===================================================================
--- head/sys/riscv/riscv/mp_machdep.c	(revision 344107)
+++ head/sys/riscv/riscv/mp_machdep.c	(revision 344108)
@@ -1,456 +1,460 @@
 /*-
  * Copyright (c) 2015 The FreeBSD Foundation
  * Copyright (c) 2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by Andrew Turner under
  * sponsorship from the FreeBSD Foundation.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_kstack_pages.h"
 #include "opt_platform.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
+#include <vm/vm_map.h>
 
 #include <machine/intr.h>
 #include <machine/smp.h>
 #include <machine/sbi.h>
 
 #ifdef FDT
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_cpu.h>
 #endif
 
 boolean_t ofw_cpu_reg(phandle_t node, u_int, cell_t *);
 
 extern struct pcpu __pcpu[];
 
 uint32_t __riscv_boot_ap[MAXCPU];
 
 static enum {
 	CPUS_UNKNOWN,
 #ifdef FDT
 	CPUS_FDT,
 #endif
 } cpu_enum_method;
 
 static device_identify_t riscv64_cpu_identify;
 static device_probe_t riscv64_cpu_probe;
 static device_attach_t riscv64_cpu_attach;
 
 static int ipi_handler(void *);
 
 struct mtx ap_boot_mtx;
 struct pcb stoppcbs[MAXCPU];
 
 #ifdef INVARIANTS
 static uint32_t cpu_reg[MAXCPU][2];
 #endif
 static device_t cpu_list[MAXCPU];
 
 void mpentry(unsigned long cpuid);
 void init_secondary(uint64_t);
 
 uint8_t secondary_stacks[MAXCPU - 1][PAGE_SIZE * KSTACK_PAGES] __aligned(16);
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 volatile int aps_ready = 0;
 
 /* Temporary variables for init_secondary()  */
 void *dpcpu[MAXCPU - 1];
 
 static device_method_t riscv64_cpu_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	riscv64_cpu_identify),
 	DEVMETHOD(device_probe,		riscv64_cpu_probe),
 	DEVMETHOD(device_attach,	riscv64_cpu_attach),
 
 	DEVMETHOD_END
 };
 
 static devclass_t riscv64_cpu_devclass;
 static driver_t riscv64_cpu_driver = {
 	"riscv64_cpu",
 	riscv64_cpu_methods,
 	0
 };
 
 DRIVER_MODULE(riscv64_cpu, cpu, riscv64_cpu_driver, riscv64_cpu_devclass, 0, 0);
 
 static void
 riscv64_cpu_identify(driver_t *driver, device_t parent)
 {
 
 	if (device_find_child(parent, "riscv64_cpu", -1) != NULL)
 		return;
 	if (BUS_ADD_CHILD(parent, 0, "riscv64_cpu", -1) == NULL)
 		device_printf(parent, "add child failed\n");
 }
 
 static int
 riscv64_cpu_probe(device_t dev)
 {
 	u_int cpuid;
 
 	cpuid = device_get_unit(dev);
 	if (cpuid >= MAXCPU || cpuid > mp_maxid)
 		return (EINVAL);
 
 	device_quiet(dev);
 	return (0);
 }
 
 static int
 riscv64_cpu_attach(device_t dev)
 {
 	const uint32_t *reg;
 	size_t reg_size;
 	u_int cpuid;
 	int i;
 
 	cpuid = device_get_unit(dev);
 
 	if (cpuid >= MAXCPU || cpuid > mp_maxid)
 		return (EINVAL);
 	KASSERT(cpu_list[cpuid] == NULL, ("Already have cpu %u", cpuid));
 
 	reg = cpu_get_cpuid(dev, &reg_size);
 	if (reg == NULL)
 		return (EINVAL);
 
 	if (bootverbose) {
 		device_printf(dev, "register <");
 		for (i = 0; i < reg_size; i++)
 			printf("%s%x", (i == 0) ? "" : " ", reg[i]);
 		printf(">\n");
 	}
 
 	/* Set the device to start it later */
 	cpu_list[cpuid] = dev;
 
 	return (0);
 }
 
 static void
 release_aps(void *dummy __unused)
 {
 	u_long mask;
 	int cpu, i;
 
 	if (mp_ncpus == 1)
 		return;
 
 	/* Setup the IPI handler */
 	riscv_setup_ipihandler(ipi_handler);
 
 	atomic_store_rel_int(&aps_ready, 1);
 
 	/* Wake up the other CPUs */
 	mask = 0;
 
 	for (i = 1; i < mp_ncpus; i++)
 		mask |= (1 << i);
 
 	sbi_send_ipi(&mask);
 
 	printf("Release APs\n");
 
 	for (i = 0; i < 2000; i++) {
 		if (smp_started) {
 			for (cpu = 0; cpu <= mp_maxid; cpu++) {
 				if (CPU_ABSENT(cpu))
 					continue;
 			}
 			return;
 		}
 		DELAY(1000);
 	}
 
 	printf("APs not started\n");
 }
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 
 void
 init_secondary(uint64_t cpu)
 {
 	struct pcpu *pcpup;
 
 	/* Setup the pcpu pointer */
 	pcpup = &__pcpu[cpu];
 	__asm __volatile("mv gp, %0" :: "r"(pcpup));
 
 	/* Workaround: make sure wfi doesn't halt the hart */
 	csr_set(sie, SIE_SSIE);
 	csr_set(sip, SIE_SSIE);
 
 	/* Spin until the BSP releases the APs */
 	while (!aps_ready)
 		__asm __volatile("wfi");
 
 	/* Initialize curthread */
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	pcpup->pc_curthread = pcpup->pc_idlethread;
 	pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb;
 
 	/*
 	 * Identify current CPU. This is necessary to setup
 	 * affinity registers and to provide support for
 	 * runtime chip identification.
 	 */
 	identify_cpu();
 
 	/* Enable software interrupts */
 	riscv_unmask_ipi();
 
 	/* Start per-CPU event timers. */
 	cpu_initclocks_ap();
 
 	/* Enable external (PLIC) interrupts */
 	csr_set(sie, SIE_SEIE);
+
+	/* Activate process 0's pmap. */
+	pmap_activate_boot(vmspace_pmap(proc0.p_vmspace));
 
 	mtx_lock_spin(&ap_boot_mtx);
 
 	atomic_add_rel_32(&smp_cpus, 1);
 
 	if (smp_cpus == mp_ncpus) {
 		/* enable IPI's, tlb shootdown, freezes etc */
 		atomic_store_rel_int(&smp_started, 1);
 	}
 
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* Enter the scheduler */
 	sched_throw(NULL);
 
 	panic("scheduler returned us to init_secondary");
 	/* NOTREACHED */
 }
 
 static int
 ipi_handler(void *arg)
 {
 	u_int ipi_bitmap;
 	u_int cpu, ipi;
 	int bit;
 
 	sbi_clear_ipi();
 
 	cpu = PCPU_GET(cpuid);
 
 	mb();
 
 	ipi_bitmap = atomic_readandclear_int(PCPU_PTR(pending_ipis));
 	if (ipi_bitmap == 0)
 		return (FILTER_HANDLED);
 
 	while ((bit = ffs(ipi_bitmap))) {
 		bit = (bit - 1);
 		ipi = (1 << bit);
 		ipi_bitmap &= ~ipi;
 
 		mb();
 
 		switch (ipi) {
 		case IPI_AST:
 			CTR0(KTR_SMP, "IPI_AST");
 			break;
 		case IPI_PREEMPT:
 			CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__);
 			sched_preempt(curthread);
 			break;
 		case IPI_RENDEZVOUS:
 			CTR0(KTR_SMP, "IPI_RENDEZVOUS");
 			smp_rendezvous_action();
 			break;
 		case IPI_STOP:
 		case IPI_STOP_HARD:
 			CTR0(KTR_SMP, (ipi == IPI_STOP) ? "IPI_STOP" : "IPI_STOP_HARD");
 			savectx(&stoppcbs[cpu]);
 
 			/* Indicate we are stopped */
 			CPU_SET_ATOMIC(cpu, &stopped_cpus);
 
 			/* Wait for restart */
 			while (!CPU_ISSET(cpu, &started_cpus))
 				cpu_spinwait();
 
 			CPU_CLR_ATOMIC(cpu, &started_cpus);
 			CPU_CLR_ATOMIC(cpu, &stopped_cpus);
 			CTR0(KTR_SMP, "IPI_STOP (restart)");
 
 			/*
 			 * The kernel debugger might have set a breakpoint,
 			 * so flush the instruction cache.
 			 */
 			fence_i();
 			break;
 		case IPI_HARDCLOCK:
 			CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
 			hardclockintr();
 			break;
 		default:
 			panic("Unknown IPI %#0x on cpu %d", ipi, curcpu);
 		}
 	}
 
 	return (FILTER_HANDLED);
 }
 
 struct cpu_group *
 cpu_topo(void)
 {
 
 	return (smp_topo_none());
 }
 
 /* Determine if we running MP machine */
 int
 cpu_mp_probe(void)
 {
 
 	return (mp_ncpus > 1);
 }
 
 #ifdef FDT
 static boolean_t
 cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
 {
 	uint64_t target_cpu;
 	struct pcpu *pcpup;
 
 	/* Check we are able to start this cpu */
 	if (id > mp_maxid)
 		return (0);
 
 	KASSERT(id < MAXCPU, ("Too many CPUs"));
 
 	KASSERT(addr_size == 1 || addr_size == 2, ("Invalid register size"));
 #ifdef INVARIANTS
 	cpu_reg[id][0] = reg[0];
 	if (addr_size == 2)
 		cpu_reg[id][1] = reg[1];
 #endif
 
 	target_cpu = reg[0];
 	if (addr_size == 2) {
 		target_cpu <<= 32;
 		target_cpu |= reg[1];
 	}
 
 	pcpup = &__pcpu[id];
 
 	/* We are already running on cpu 0 */
 	if (id == 0) {
 		return (1);
 	}
 
 	pcpu_init(pcpup, id, sizeof(struct pcpu));
 
 	dpcpu[id - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO);
 	dpcpu_init(dpcpu[id - 1], id);
 
 	printf("Starting CPU %u (%lx)\n", id, target_cpu);
 	__riscv_boot_ap[id] = 1;
 
 	CPU_SET(id, &all_cpus);
 
 	return (1);
 }
 #endif
 
 /* Initialize and fire up non-boot processors */
 void
 cpu_mp_start(void)
 {
 
 	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
 
 	CPU_SET(0, &all_cpus);
 
 	switch(cpu_enum_method) {
 #ifdef FDT
 	case CPUS_FDT:
 		ofw_cpu_early_foreach(cpu_init_fdt, true);
 		break;
 #endif
 	case CPUS_UNKNOWN:
 		break;
 	}
 }
 
 /* Introduce rest of cores to the world */
 void
 cpu_mp_announce(void)
 {
 }
 
 void
 cpu_mp_setmaxid(void)
 {
 #ifdef FDT
 	int cores;
 
 	cores = ofw_cpu_early_foreach(NULL, false);
 	if (cores > 0) {
 		cores = MIN(cores, MAXCPU);
 		if (bootverbose)
 			printf("Found %d CPUs in the device tree\n", cores);
 		mp_ncpus = cores;
 		mp_maxid = cores - 1;
 		cpu_enum_method = CPUS_FDT;
 		return;
 	}
 #endif
 
 	if (bootverbose)
 		printf("No CPU data, limiting to 1 core\n");
 	mp_ncpus = 1;
 	mp_maxid = 0;
 }
Index: head/sys/riscv/riscv/pmap.c
===================================================================
--- head/sys/riscv/riscv/pmap.c	(revision 344107)
+++ head/sys/riscv/riscv/pmap.c	(revision 344108)
@@ -1,4398 +1,4440 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 2003 Peter Wemm
  * All rights reserved.
  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
  * All rights reserved.
  * Copyright (c) 2014 Andrew Turner
  * All rights reserved.
  * Copyright (c) 2014 The FreeBSD Foundation
  * All rights reserved.
  * Copyright (c) 2015-2018 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Portions of this software were developed by Andrew Turner under
  * sponsorship from The FreeBSD Foundation.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  */
 /*-
  * Copyright (c) 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jake Burkholder,
  * Safeport Network Services, and Network Associates Laboratories, the
  * Security Research Division of Network Associates, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
  * CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Manages physical address maps.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include <sys/param.h>
+#include <sys/systm.h>
 #include <sys/bitstring.h>
 #include <sys/bus.h>
-#include <sys/systm.h>
+#include <sys/cpuset.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/vmem.h>
 #include <sys/vmmeter.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/uma.h>
 
 #include <machine/machdep.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/sbi.h>
 
 #define	NUL1E		(Ln_ENTRIES * Ln_ENTRIES)
 #define	NUL2E		(Ln_ENTRIES * NUL1E)
 
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
 #define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
 #else
 #define PMAP_INLINE	extern inline
 #endif
 #else
 #define PMAP_INLINE
 #endif
 
 #ifdef PV_STATS
 #define PV_STAT(x)	do { x ; } while (0)
 #else
 #define PV_STAT(x)	do { } while (0)
 #endif
 
 #define	pmap_l2_pindex(v)	((v) >> L2_SHIFT)
 #define	pa_to_pvh(pa)		(&pv_table[pa_index(pa)])
 
 #define	NPV_LIST_LOCKS	MAXCPU
 
 #define	PHYS_TO_PV_LIST_LOCK(pa)	\
 			(&pv_list_locks[pmap_l2_pindex(pa) % NPV_LIST_LOCKS])
 
 #define	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)	do {	\
 	struct rwlock **_lockp = (lockp);		\
 	struct rwlock *_new_lock;			\
 							\
 	_new_lock = PHYS_TO_PV_LIST_LOCK(pa);		\
 	if (_new_lock != *_lockp) {			\
 		if (*_lockp != NULL)			\
 			rw_wunlock(*_lockp);		\
 		*_lockp = _new_lock;			\
 		rw_wlock(*_lockp);			\
 	}						\
 } while (0)
 
 #define	CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)	\
 			CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
 
 #define	RELEASE_PV_LIST_LOCK(lockp)		do {	\
 	struct rwlock **_lockp = (lockp);		\
 							\
 	if (*_lockp != NULL) {				\
 		rw_wunlock(*_lockp);			\
 		*_lockp = NULL;				\
 	}						\
 } while (0)
 
 #define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
 			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
 
 /* The list of all the user pmaps */
 LIST_HEAD(pmaplist, pmap);
 static struct pmaplist allpmaps = LIST_HEAD_INITIALIZER();
 
 struct pmap kernel_pmap_store;
 
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 vm_offset_t kernel_vm_end = 0;
 
 vm_paddr_t dmap_phys_base;	/* The start of the dmap region */
 vm_paddr_t dmap_phys_max;	/* The limit of the dmap region */
 vm_offset_t dmap_max_addr;	/* The virtual address limit of the dmap */
 
 /* This code assumes all L1 DMAP entries will be used */
 CTASSERT((DMAP_MIN_ADDRESS  & ~L1_OFFSET) == DMAP_MIN_ADDRESS);
 CTASSERT((DMAP_MAX_ADDRESS  & ~L1_OFFSET) == DMAP_MAX_ADDRESS);
 
 static struct rwlock_padalign pvh_global_lock;
 static struct mtx_padalign allpmaps_lock;
 
 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0,
     "VM/pmap parameters");
 
 static int superpages_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
     CTLFLAG_RDTUN, &superpages_enabled, 0,
     "Enable support for transparent superpages");
 
 static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0,
     "2MB page mapping counters");
 
 static u_long pmap_l2_demotions;
 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD,
     &pmap_l2_demotions, 0,
     "2MB page demotions");
 
 static u_long pmap_l2_mappings;
 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, mappings, CTLFLAG_RD,
     &pmap_l2_mappings, 0,
     "2MB page mappings");
 
 static u_long pmap_l2_p_failures;
 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD,
     &pmap_l2_p_failures, 0,
     "2MB page promotion failures");
 
 static u_long pmap_l2_promotions;
 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD,
     &pmap_l2_promotions, 0,
     "2MB page promotions");
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
 static struct mtx pv_chunks_mutex;
 static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
 static struct md_page *pv_table;
 static struct md_page pv_dummy;
 
 /*
  * Internal flags for pmap_enter()'s helper functions.
  */
 #define	PMAP_ENTER_NORECLAIM	0x1000000	/* Don't reclaim PV entries. */
 #define	PMAP_ENTER_NOREPLACE	0x2000000	/* Don't replace mappings. */
 
 static void	free_pv_chunk(struct pv_chunk *pc);
 static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
 static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
 		    vm_offset_t va);
 static bool	pmap_demote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va);
 static bool	pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2,
 		    vm_offset_t va, struct rwlock **lockp);
 static int	pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2,
 		    u_int flags, vm_page_t m, struct rwlock **lockp);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
     pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
     vm_page_t m, struct rwlock **lockp);
 
 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
 		struct rwlock **lockp);
 
 static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m,
     struct spglist *free);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
 
 #define	pmap_clear(pte)			pmap_store(pte, 0)
 #define	pmap_clear_bits(pte, bits)	atomic_clear_64(pte, bits)
 #define	pmap_load_store(pte, entry)	atomic_swap_64(pte, entry)
 #define	pmap_load_clear(pte)		pmap_load_store(pte, 0)
 #define	pmap_load(pte)			atomic_load_64(pte)
 #define	pmap_store(pte, entry)		atomic_store_64(pte, entry)
 #define	pmap_store_bits(pte, bits)	atomic_set_64(pte, bits)
 
 /********************/
 /* Inline functions */
 /********************/
 
 static __inline void
 pagecopy(void *s, void *d)
 {
 
 	memcpy(d, s, PAGE_SIZE);
 }
 
 static __inline void
 pagezero(void *p)
 {
 
 	bzero(p, PAGE_SIZE);
 }
 
 #define	pmap_l1_index(va)	(((va) >> L1_SHIFT) & Ln_ADDR_MASK)
 #define	pmap_l2_index(va)	(((va) >> L2_SHIFT) & Ln_ADDR_MASK)
 #define	pmap_l3_index(va)	(((va) >> L3_SHIFT) & Ln_ADDR_MASK)
 
 #define	PTE_TO_PHYS(pte)	((pte >> PTE_PPN0_S) * PAGE_SIZE)
 
 static __inline pd_entry_t *
 pmap_l1(pmap_t pmap, vm_offset_t va)
 {
 
 	return (&pmap->pm_l1[pmap_l1_index(va)]);
 }
 
 static __inline pd_entry_t *
 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
 {
 	vm_paddr_t phys;
 	pd_entry_t *l2;
 
 	phys = PTE_TO_PHYS(pmap_load(l1));
 	l2 = (pd_entry_t *)PHYS_TO_DMAP(phys);
 
 	return (&l2[pmap_l2_index(va)]);
 }
 
 static __inline pd_entry_t *
 pmap_l2(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t *l1;
 
 	l1 = pmap_l1(pmap, va);
 	if ((pmap_load(l1) & PTE_V) == 0)
 		return (NULL);
 	if ((pmap_load(l1) & PTE_RX) != 0)
 		return (NULL);
 
 	return (pmap_l1_to_l2(l1, va));
 }
 
 static __inline pt_entry_t *
 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
 {
 	vm_paddr_t phys;
 	pt_entry_t *l3;
 
 	phys = PTE_TO_PHYS(pmap_load(l2));
 	l3 = (pd_entry_t *)PHYS_TO_DMAP(phys);
 
 	return (&l3[pmap_l3_index(va)]);
 }
 
 static __inline pt_entry_t *
 pmap_l3(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t *l2;
 
 	l2 = pmap_l2(pmap, va);
 	if (l2 == NULL)
 		return (NULL);
 	if ((pmap_load(l2) & PTE_V) == 0)
 		return (NULL);
 	if ((pmap_load(l2) & PTE_RX) != 0)
 		return (NULL);
 
 	return (pmap_l2_to_l3(l2, va));
 }
 
 static __inline void
 pmap_resident_count_inc(pmap_t pmap, int count)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pmap->pm_stats.resident_count += count;
 }
 
 static __inline void
 pmap_resident_count_dec(pmap_t pmap, int count)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT(pmap->pm_stats.resident_count >= count,
 	    ("pmap %p resident count underflow %ld %d", pmap,
 	    pmap->pm_stats.resident_count, count));
 	pmap->pm_stats.resident_count -= count;
 }
 
 static void
 pmap_distribute_l1(struct pmap *pmap, vm_pindex_t l1index,
     pt_entry_t entry)
 {
 	struct pmap *user_pmap;
 	pd_entry_t *l1;
 
 	/* Distribute new kernel L1 entry to all the user pmaps */
 	if (pmap != kernel_pmap)
 		return;
 
 	mtx_lock(&allpmaps_lock);
 	LIST_FOREACH(user_pmap, &allpmaps, pm_list) {
 		l1 = &user_pmap->pm_l1[l1index];
 		pmap_store(l1, entry);
 	}
 	mtx_unlock(&allpmaps_lock);
 }
 
 static pt_entry_t *
 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
     u_int *l2_slot)
 {
 	pt_entry_t *l2;
 	pd_entry_t *l1;
 
 	l1 = (pd_entry_t *)l1pt;
 	*l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
 
 	/* Check locore has used a table L1 map */
 	KASSERT((l1[*l1_slot] & PTE_RX) == 0,
 		("Invalid bootstrap L1 table"));
 
 	/* Find the address of the L2 table */
 	l2 = (pt_entry_t *)init_pt_va;
 	*l2_slot = pmap_l2_index(va);
 
 	return (l2);
 }
 
 static vm_paddr_t
 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
 {
 	u_int l1_slot, l2_slot;
 	pt_entry_t *l2;
 	u_int ret;
 
 	l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
 
 	/* Check locore has used L2 superpages */
 	KASSERT((l2[l2_slot] & PTE_RX) != 0,
 		("Invalid bootstrap L2 table"));
 
 	/* L2 is superpages */
 	ret = (l2[l2_slot] >> PTE_PPN1_S) << L2_SHIFT;
 	ret += (va & L2_OFFSET);
 
 	return (ret);
 }
 
 static void
 pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa)
 {
 	vm_offset_t va;
 	vm_paddr_t pa;
 	pd_entry_t *l1;
 	u_int l1_slot;
 	pt_entry_t entry;
 	pn_t pn;
 
 	pa = dmap_phys_base = min_pa & ~L1_OFFSET;
 	va = DMAP_MIN_ADDRESS;
 	l1 = (pd_entry_t *)kern_l1;
 	l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS);
 
 	for (; va < DMAP_MAX_ADDRESS && pa < max_pa;
 	    pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
 		KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
 
 		/* superpages */
 		pn = (pa / PAGE_SIZE);
 		entry = PTE_KERN;
 		entry |= (pn << PTE_PPN0_S);
 		pmap_store(&l1[l1_slot], entry);
 	}
 
 	/* Set the upper limit of the DMAP region */
 	dmap_phys_max = pa;
 	dmap_max_addr = va;
 
 	sfence_vma();
 }
 
 static vm_offset_t
 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
 {
 	vm_offset_t l3pt;
 	pt_entry_t entry;
 	pd_entry_t *l2;
 	vm_paddr_t pa;
 	u_int l2_slot;
 	pn_t pn;
 
 	KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
 
 	l2 = pmap_l2(kernel_pmap, va);
 	l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1));
 	l2_slot = pmap_l2_index(va);
 	l3pt = l3_start;
 
 	for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
 		KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
 
 		pa = pmap_early_vtophys(l1pt, l3pt);
 		pn = (pa / PAGE_SIZE);
 		entry = (PTE_V);
 		entry |= (pn << PTE_PPN0_S);
 		pmap_store(&l2[l2_slot], entry);
 		l3pt += PAGE_SIZE;
 	}
 
 
 	/* Clean the L2 page table */
 	memset((void *)l3_start, 0, l3pt - l3_start);
 
 	return (l3pt);
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  */
 void
 pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
 {
 	u_int l1_slot, l2_slot, avail_slot, map_slot;
 	vm_offset_t freemempos;
 	vm_offset_t dpcpu, msgbufpv;
 	vm_paddr_t end, max_pa, min_pa, pa, start;
 	int i;
 
 	printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
 	printf("%lx\n", l1pt);
 	printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
 
 	/* Set this early so we can use the pagetable walking functions */
 	kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt;
 	PMAP_LOCK_INIT(kernel_pmap);
 
 	rw_init(&pvh_global_lock, "pmap pv global");
 
+	CPU_FILL(&kernel_pmap->pm_active);
+
 	/* Assume the address we were loaded to is a valid physical address. */
 	min_pa = max_pa = kernstart;
 
 	/*
 	 * Find the minimum physical address. physmap is sorted,
 	 * but may contain empty ranges.
 	 */
 	for (i = 0; i < physmap_idx * 2; i += 2) {
 		if (physmap[i] == physmap[i + 1])
 			continue;
 		if (physmap[i] <= min_pa)
 			min_pa = physmap[i];
 		if (physmap[i + 1] > max_pa)
 			max_pa = physmap[i + 1];
 	}
 	printf("physmap_idx %lx\n", physmap_idx);
 	printf("min_pa %lx\n", min_pa);
 	printf("max_pa %lx\n", max_pa);
 
 	/* Create a direct map region early so we can use it for pa -> va */
 	pmap_bootstrap_dmap(l1pt, min_pa, max_pa);
 
 	/*
 	 * Read the page table to find out what is already mapped.
 	 * This assumes we have mapped a block of memory from KERNBASE
 	 * using a single L1 entry.
 	 */
 	(void)pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
 
 	/* Sanity check the index, KERNBASE should be the first VA */
 	KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
 
 	freemempos = roundup2(KERNBASE + kernlen, PAGE_SIZE);
 
 	/* Create the l3 tables for the early devmap */
 	freemempos = pmap_bootstrap_l3(l1pt,
 	    VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
 
 	sfence_vma();
 
 #define alloc_pages(var, np)						\
 	(var) = freemempos;						\
 	freemempos += (np * PAGE_SIZE);					\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	/* Allocate dynamic per-cpu area. */
 	alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu, 0);
 
 	/* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
 	alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 	msgbufp = (void *)msgbufpv;
 
 	virtual_avail = roundup2(freemempos, L2_SIZE);
 	virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE;
 	kernel_vm_end = virtual_avail;
 	
 	pa = pmap_early_vtophys(l1pt, freemempos);
 
 	/* Initialize phys_avail. */
 	for (avail_slot = map_slot = physmem = 0; map_slot < physmap_idx * 2;
 	    map_slot += 2) {
 		start = physmap[map_slot];
 		end = physmap[map_slot + 1];
 
 		if (start == end)
 			continue;
 		if (start >= kernstart && end <= pa)
 			continue;
 
 		if (start < kernstart && end > kernstart)
 			end = kernstart;
 		else if (start < pa && end > pa)
 			start = pa;
 		phys_avail[avail_slot] = start;
 		phys_avail[avail_slot + 1] = end;
 		physmem += (end - start) >> PAGE_SHIFT;
 		avail_slot += 2;
 
 		if (end != physmap[map_slot + 1] && end > pa) {
 			phys_avail[avail_slot] = pa;
 			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
 			physmem += (physmap[map_slot + 1] - pa) >> PAGE_SHIFT;
 			avail_slot += 2;
 		}
 	}
 	phys_avail[avail_slot] = 0;
 	phys_avail[avail_slot + 1] = 0;
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".
 	 */
 	Maxmem = atop(phys_avail[avail_slot - 1]);
 }
 
 /*
  *	Initialize a vm_page's machine-dependent fields.
  */
 void
 pmap_page_init(vm_page_t m)
 {
 
 	TAILQ_INIT(&m->md.pv_list);
 	m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
 }
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  */
 void
 pmap_init(void)
 {
 	vm_size_t s;
 	int i, pv_npg;
 
 	/*
 	 * Initialize the pv chunk and pmap list mutexes.
 	 */
 	mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
 	mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_DEF);
 
 	/*
 	 * Initialize the pool of pv list locks.
 	 */
 	for (i = 0; i < NPV_LIST_LOCKS; i++)
 		rw_init(&pv_list_locks[i], "pmap pv list");
 
 	/*
 	 * Calculate the size of the pv head table for superpages.
 	 */
 	pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE);
 
 	/*
 	 * Allocate memory for the pv head table for superpages.
 	 */
 	s = (vm_size_t)(pv_npg * sizeof(struct md_page));
 	s = round_page(s);
 	pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO);
 	for (i = 0; i < pv_npg; i++)
 		TAILQ_INIT(&pv_table[i].pv_list);
 	TAILQ_INIT(&pv_dummy.pv_list);
 
 	if (superpages_enabled)
 		pagesizes[1] = L2_SIZE;
 }
 
 #ifdef SMP
 /*
  * For SMP, these functions have to use IPIs for coherence.
  *
  * In general, the calling thread uses a plain fence to order the
  * writes to the page tables before invoking an SBI callback to invoke
  * sfence_vma() on remote CPUs.
- *
- * Since the riscv pmap does not yet have a pm_active field, IPIs are
- * sent to all CPUs in the system.
  */
 static void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 	cpuset_t mask;
 
 	sched_pin();
-	mask = all_cpus;
+	mask = pmap->pm_active;
 	CPU_CLR(PCPU_GET(cpuid), &mask);
 	fence();
-	sbi_remote_sfence_vma(mask.__bits, va, 1);
+	if (!CPU_EMPTY(&mask) && smp_started)
+		sbi_remote_sfence_vma(mask.__bits, va, 1);
 	sfence_vma_page(va);
 	sched_unpin();
 }
 
 static void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	cpuset_t mask;
 
 	sched_pin();
-	mask = all_cpus;
+	mask = pmap->pm_active;
 	CPU_CLR(PCPU_GET(cpuid), &mask);
 	fence();
-	sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1);
+	if (!CPU_EMPTY(&mask) && smp_started)
+		sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1);
 
 	/*
 	 * Might consider a loop of sfence_vma_page() for a small
 	 * number of pages in the future.
 	 */
 	sfence_vma();
 	sched_unpin();
 }
 
 static void
 pmap_invalidate_all(pmap_t pmap)
 {
 	cpuset_t mask;
 
 	sched_pin();
-	mask = all_cpus;
+	mask = pmap->pm_active;
 	CPU_CLR(PCPU_GET(cpuid), &mask);
-	fence();
 
 	/*
 	 * XXX: The SBI doc doesn't detail how to specify x0 as the
 	 * address to perform a global fence.  BBL currently treats
 	 * all sfence_vma requests as global however.
 	 */
-	sbi_remote_sfence_vma(mask.__bits, 0, 0);
+	fence();
+	if (!CPU_EMPTY(&mask) && smp_started)
+		sbi_remote_sfence_vma(mask.__bits, 0, 0);
 	sfence_vma();
 	sched_unpin();
 }
 #else
 /*
  * Normal, non-SMP, invalidation functions.
  * We inline these within pmap.c for speed.
  */
 static __inline void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
 	sfence_vma_page(va);
 }
 
 static __inline void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 
 	/*
 	 * Might consider a loop of sfence_vma_page() for a small
 	 * number of pages in the future.
 	 */
 	sfence_vma();
 }
 
 static __inline void
 pmap_invalidate_all(pmap_t pmap)
 {
 
 	sfence_vma();
 }
 #endif
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_paddr_t 
 pmap_extract(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t *l2p, l2;
 	pt_entry_t *l3p, l3;
 	vm_paddr_t pa;
 
 	pa = 0;
 	PMAP_LOCK(pmap);
 	/*
 	 * Start with the l2 tabel. We are unable to allocate
 	 * pages in the l1 table.
 	 */
 	l2p = pmap_l2(pmap, va);
 	if (l2p != NULL) {
 		l2 = pmap_load(l2p);
 		if ((l2 & PTE_RX) == 0) {
 			l3p = pmap_l2_to_l3(l2p, va);
 			if (l3p != NULL) {
 				l3 = pmap_load(l3p);
 				pa = PTE_TO_PHYS(l3);
 				pa |= (va & L3_OFFSET);
 			}
 		} else {
 			/* L2 is superpages */
 			pa = (l2 >> PTE_PPN1_S) << L2_SHIFT;
 			pa |= (va & L2_OFFSET);
 		}
 	}
 	PMAP_UNLOCK(pmap);
 	return (pa);
 }
 
 /*
  *	Routine:	pmap_extract_and_hold
  *	Function:
  *		Atomically extract and hold the physical page
  *		with the given pmap and virtual address pair
  *		if that mapping permits the given protection.
  */
 vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	pt_entry_t *l3p, l3;
 	vm_paddr_t phys;
 	vm_paddr_t pa;
 	vm_page_t m;
 
 	pa = 0;
 	m = NULL;
 	PMAP_LOCK(pmap);
 retry:
 	l3p = pmap_l3(pmap, va);
 	if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) {
 		if ((l3 & PTE_W) != 0 || (prot & VM_PROT_WRITE) == 0) {
 			phys = PTE_TO_PHYS(l3);
 			if (vm_page_pa_tryrelock(pmap, phys, &pa))
 				goto retry;
 			m = PHYS_TO_VM_PAGE(phys);
 			vm_page_hold(m);
 		}
 	}
 	PA_UNLOCK_COND(pa);
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
 	pd_entry_t *l2;
 	pt_entry_t *l3;
 	vm_paddr_t pa;
 
 	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
 		pa = DMAP_TO_PHYS(va);
 	} else {
 		l2 = pmap_l2(kernel_pmap, va);
 		if (l2 == NULL)
 			panic("pmap_kextract: No l2");
 		if ((pmap_load(l2) & PTE_RX) != 0) {
 			/* superpages */
 			pa = (pmap_load(l2) >> PTE_PPN1_S) << L2_SHIFT;
 			pa |= (va & L2_OFFSET);
 			return (pa);
 		}
 
 		l3 = pmap_l2_to_l3(l2, va);
 		if (l3 == NULL)
 			panic("pmap_kextract: No l3...");
 		pa = PTE_TO_PHYS(pmap_load(l3));
 		pa |= (va & PAGE_MASK);
 	}
 	return (pa);
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 void
 pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
 {
 	pt_entry_t entry;
 	pt_entry_t *l3;
 	vm_offset_t va;
 	pn_t pn;
 
 	KASSERT((pa & L3_OFFSET) == 0,
 	   ("pmap_kenter_device: Invalid physical address"));
 	KASSERT((sva & L3_OFFSET) == 0,
 	   ("pmap_kenter_device: Invalid virtual address"));
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("pmap_kenter_device: Mapping is not page-sized"));
 
 	va = sva;
 	while (size != 0) {
 		l3 = pmap_l3(kernel_pmap, va);
 		KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
 
 		pn = (pa / PAGE_SIZE);
 		entry = PTE_KERN;
 		entry |= (pn << PTE_PPN0_S);
 		pmap_store(l3, entry);
 
 		va += PAGE_SIZE;
 		pa += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /*
  * Remove a page from the kernel pagetables.
  * Note: not SMP coherent.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
 	pt_entry_t *l3;
 
 	l3 = pmap_l3(kernel_pmap, va);
 	KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
 
 	pmap_clear(l3);
 	sfence_vma();
 }
 
 void
 pmap_kremove_device(vm_offset_t sva, vm_size_t size)
 {
 	pt_entry_t *l3;
 	vm_offset_t va;
 
 	KASSERT((sva & L3_OFFSET) == 0,
 	   ("pmap_kremove_device: Invalid virtual address"));
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("pmap_kremove_device: Mapping is not page-sized"));
 
 	va = sva;
 	while (size != 0) {
 		l3 = pmap_l3(kernel_pmap, va);
 		KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
 		pmap_clear(l3);
 
 		va += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	The value passed in '*virt' is a suggested virtual address for
  *	the mapping. Architectures which can support a direct-mapped
  *	physical to virtual region can return the appropriate address
  *	within that region, leaving '*virt' unchanged. Other
  *	architectures should map the pages starting at '*virt' and
  *	update '*virt' with the first usable address after the mapped
  *	region.
  */
 vm_offset_t
 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 {
 
 	return PHYS_TO_DMAP(start);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
 	pt_entry_t *l3, pa;
 	vm_offset_t va;
 	vm_page_t m;
 	pt_entry_t entry;
 	pn_t pn;
 	int i;
 
 	va = sva;
 	for (i = 0; i < count; i++) {
 		m = ma[i];
 		pa = VM_PAGE_TO_PHYS(m);
 		pn = (pa / PAGE_SIZE);
 		l3 = pmap_l3(kernel_pmap, va);
 
 		entry = PTE_KERN;
 		entry |= (pn << PTE_PPN0_S);
 		pmap_store(l3, entry);
 
 		va += L3_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
 	pt_entry_t *l3;
 	vm_offset_t va;
 
 	KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
 
 	for (va = sva; count-- > 0; va += PAGE_SIZE) {
 		l3 = pmap_l3(kernel_pmap, va);
 		KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
 		pmap_clear(l3);
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 bool
 pmap_ps_enabled(pmap_t pmap __unused)
 {
 
 	return (superpages_enabled);
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 /*
  * Schedule the specified unused page table page to be freed.  Specifically,
  * add the page to the specified list of pages that will be released to the
  * physical memory manager after the TLB has been updated.
  */
 static __inline void
 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
     boolean_t set_PG_ZERO)
 {
 
 	if (set_PG_ZERO)
 		m->flags |= PG_ZERO;
 	else
 		m->flags &= ~PG_ZERO;
 	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
 }
 
 /*
  * Inserts the specified page table page into the specified pmap's collection
  * of idle page table pages.  Each of a pmap's page table pages is responsible
  * for mapping a distinct range of virtual addresses.  The pmap's collection is
  * ordered by this virtual address range.
  */
 static __inline int
 pmap_insert_pt_page(pmap_t pmap, vm_page_t ml3)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	return (vm_radix_insert(&pmap->pm_root, ml3));
 }
 
 /*
  * Removes the page table page mapping the specified virtual address from the
  * specified pmap's collection of idle page table pages, and returns it.
  * Otherwise, returns NULL if there is no page table page corresponding to the
  * specified virtual address.
  */
 static __inline vm_page_t
 pmap_remove_pt_page(pmap_t pmap, vm_offset_t va)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va)));
 }
 	
 /*
  * Decrements a page table page's wire count, which is used to record the
  * number of valid page table entries within the page.  If the wire count
  * drops to zero, then the page table page is unmapped.  Returns TRUE if the
  * page table page was unmapped and FALSE otherwise.
  */
 static inline boolean_t
 pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 {
 
 	--m->wire_count;
 	if (m->wire_count == 0) {
 		_pmap_unwire_ptp(pmap, va, m, free);
 		return (TRUE);
 	} else {
 		return (FALSE);
 	}
 }
 
 static void
 _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 {
 	vm_paddr_t phys;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if (m->pindex >= NUL1E) {
 		pd_entry_t *l1;
 		l1 = pmap_l1(pmap, va);
 		pmap_clear(l1);
 		pmap_distribute_l1(pmap, pmap_l1_index(va), 0);
 	} else {
 		pd_entry_t *l2;
 		l2 = pmap_l2(pmap, va);
 		pmap_clear(l2);
 	}
 	pmap_resident_count_dec(pmap, 1);
 	if (m->pindex < NUL1E) {
 		pd_entry_t *l1;
 		vm_page_t pdpg;
 
 		l1 = pmap_l1(pmap, va);
 		phys = PTE_TO_PHYS(pmap_load(l1));
 		pdpg = PHYS_TO_VM_PAGE(phys);
 		pmap_unwire_ptp(pmap, va, pdpg, free);
 	}
 	pmap_invalidate_page(pmap, va);
 
 	vm_wire_sub(1);
 
 	/* 
 	 * Put page on a list so that it is released after
 	 * *ALL* TLB shootdown is done
 	 */
 	pmap_add_delayed_free_list(m, free, TRUE);
 }
 
 /*
  * After removing a page table entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
     struct spglist *free)
 {
 	vm_page_t mpte;
 
 	if (va >= VM_MAXUSER_ADDRESS)
 		return (0);
 	KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
 	mpte = PHYS_TO_VM_PAGE(PTE_TO_PHYS(ptepde));
 	return (pmap_unwire_ptp(pmap, va, mpte, free));
 }
 
 void
 pmap_pinit0(pmap_t pmap)
 {
 
 	PMAP_LOCK_INIT(pmap);
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 	pmap->pm_l1 = kernel_pmap->pm_l1;
+	pmap->pm_satp = SATP_MODE_SV39 | (vtophys(pmap->pm_l1) >> PAGE_SHIFT);
+	CPU_ZERO(&pmap->pm_active);
+	pmap_activate_boot(pmap);
 }
 
 int
 pmap_pinit(pmap_t pmap)
 {
 	vm_paddr_t l1phys;
 	vm_page_t l1pt;
 
 	/*
 	 * allocate the l1 page
 	 */
 	while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
 		vm_wait(NULL);
 
 	l1phys = VM_PAGE_TO_PHYS(l1pt);
 	pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys);
+	pmap->pm_satp = SATP_MODE_SV39 | (l1phys >> PAGE_SHIFT);
 
 	if ((l1pt->flags & PG_ZERO) == 0)
 		pagezero(pmap->pm_l1);
 
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 
+	CPU_ZERO(&pmap->pm_active);
+
 	/* Install kernel pagetables */
 	memcpy(pmap->pm_l1, kernel_pmap->pm_l1, PAGE_SIZE);
 
 	/* Add to the list of all user pmaps */
 	mtx_lock(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	mtx_unlock(&allpmaps_lock);
 
 	vm_radix_init(&pmap->pm_root);
 
 	return (1);
 }
 
 /*
  * This routine is called if the desired page table page does not exist.
  *
  * If page table page allocation fails, this routine may sleep before
  * returning NULL.  It sleeps only if a lock pointer was given.
  *
  * Note: If a page allocation fails at page table level two or three,
  * one or two pages may be held during the wait, only to be released
  * afterwards.  This conservative approach is easily argued to avoid
  * race conditions.
  */
 static vm_page_t
 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
 {
 	vm_page_t m, /*pdppg, */pdpg;
 	pt_entry_t entry;
 	vm_paddr_t phys;
 	pn_t pn;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * Allocate a page table page.
 	 */
 	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 		if (lockp != NULL) {
 			RELEASE_PV_LIST_LOCK(lockp);
 			PMAP_UNLOCK(pmap);
 			rw_runlock(&pvh_global_lock);
 			vm_wait(NULL);
 			rw_rlock(&pvh_global_lock);
 			PMAP_LOCK(pmap);
 		}
 
 		/*
 		 * Indicate the need to retry.  While waiting, the page table
 		 * page may have been allocated.
 		 */
 		return (NULL);
 	}
 
 	if ((m->flags & PG_ZERO) == 0)
 		pmap_zero_page(m);
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	if (ptepindex >= NUL1E) {
 		pd_entry_t *l1;
 		vm_pindex_t l1index;
 
 		l1index = ptepindex - NUL1E;
 		l1 = &pmap->pm_l1[l1index];
 
 		pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE);
 		entry = (PTE_V);
 		entry |= (pn << PTE_PPN0_S);
 		pmap_store(l1, entry);
 		pmap_distribute_l1(pmap, l1index, entry);
 	} else {
 		vm_pindex_t l1index;
 		pd_entry_t *l1, *l2;
 
 		l1index = ptepindex >> (L1_SHIFT - L2_SHIFT);
 		l1 = &pmap->pm_l1[l1index];
 		if (pmap_load(l1) == 0) {
 			/* recurse for allocating page dir */
 			if (_pmap_alloc_l3(pmap, NUL1E + l1index,
 			    lockp) == NULL) {
 				vm_page_unwire_noq(m);
 				vm_page_free_zero(m);
 				return (NULL);
 			}
 		} else {
 			phys = PTE_TO_PHYS(pmap_load(l1));
 			pdpg = PHYS_TO_VM_PAGE(phys);
 			pdpg->wire_count++;
 		}
 
 		phys = PTE_TO_PHYS(pmap_load(l1));
 		l2 = (pd_entry_t *)PHYS_TO_DMAP(phys);
 		l2 = &l2[ptepindex & Ln_ADDR_MASK];
 
 		pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE);
 		entry = (PTE_V);
 		entry |= (pn << PTE_PPN0_S);
 		pmap_store(l2, entry);
 	}
 
 	pmap_resident_count_inc(pmap, 1);
 
 	return (m);
 }
 
 static vm_page_t
 pmap_alloc_l2(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 {
 	pd_entry_t *l1;
 	vm_page_t l2pg;
 	vm_pindex_t l2pindex;
 
 retry:
 	l1 = pmap_l1(pmap, va);
 	if (l1 != NULL && (pmap_load(l1) & PTE_RWX) == 0) {
 		/* Add a reference to the L2 page. */
 		l2pg = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l1)));
 		l2pg->wire_count++;
 	} else {
 		/* Allocate a L2 page. */
 		l2pindex = pmap_l2_pindex(va) >> Ln_ENTRIES_SHIFT;
 		l2pg = _pmap_alloc_l3(pmap, NUL2E + l2pindex, lockp);
 		if (l2pg == NULL && lockp != NULL)
 			goto retry;
 	}
 	return (l2pg);
 }
 
 static vm_page_t
 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 {
 	vm_pindex_t ptepindex;
 	pd_entry_t *l2;
 	vm_paddr_t phys;
 	vm_page_t m;
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = pmap_l2_pindex(va);
 retry:
 	/*
 	 * Get the page directory entry
 	 */
 	l2 = pmap_l2(pmap, va);
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (l2 != NULL && pmap_load(l2) != 0) {
 		phys = PTE_TO_PHYS(pmap_load(l2));
 		m = PHYS_TO_VM_PAGE(phys);
 		m->wire_count++;
 	} else {
 		/*
 		 * Here if the pte page isn't mapped, or if it has been
 		 * deallocated.
 		 */
 		m = _pmap_alloc_l3(pmap, ptepindex, lockp);
 		if (m == NULL && lockp != NULL)
 			goto retry;
 	}
 	return (m);
 }
 
 
 /***************************************************
  * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 	vm_page_t m;
 
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
+	KASSERT(CPU_EMPTY(&pmap->pm_active),
+	    ("releasing active pmap %p", pmap));
 
 	mtx_lock(&allpmaps_lock);
 	LIST_REMOVE(pmap, pm_list);
 	mtx_unlock(&allpmaps_lock);
 
 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1));
 	vm_page_unwire_noq(m);
 	vm_page_free(m);
 }
 
 #if 0
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
 
 	return sysctl_handle_long(oidp, &ksize, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_size, "LU", "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
 	return sysctl_handle_long(oidp, &kfree, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "LU", "Amount of KVM free");
 #endif /* 0 */
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	vm_paddr_t paddr;
 	vm_page_t nkpg;
 	pd_entry_t *l1, *l2;
 	pt_entry_t entry;
 	pn_t pn;
 
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 
 	addr = roundup2(addr, L2_SIZE);
 	if (addr - 1 >= vm_map_max(kernel_map))
 		addr = vm_map_max(kernel_map);
 	while (kernel_vm_end < addr) {
 		l1 = pmap_l1(kernel_pmap, kernel_vm_end);
 		if (pmap_load(l1) == 0) {
 			/* We need a new PDP entry */
 			nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
 			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
 			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 			if (nkpg == NULL)
 				panic("pmap_growkernel: no memory to grow kernel");
 			if ((nkpg->flags & PG_ZERO) == 0)
 				pmap_zero_page(nkpg);
 			paddr = VM_PAGE_TO_PHYS(nkpg);
 
 			pn = (paddr / PAGE_SIZE);
 			entry = (PTE_V);
 			entry |= (pn << PTE_PPN0_S);
 			pmap_store(l1, entry);
 			pmap_distribute_l1(kernel_pmap,
 			    pmap_l1_index(kernel_vm_end), entry);
 			continue; /* try again */
 		}
 		l2 = pmap_l1_to_l2(l1, kernel_vm_end);
 		if ((pmap_load(l2) & PTE_V) != 0 &&
 		    (pmap_load(l2) & PTE_RWX) == 0) {
 			kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
 			if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
 				kernel_vm_end = vm_map_max(kernel_map);
 				break;
 			}
 			continue;
 		}
 
 		nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
 		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 		    VM_ALLOC_ZERO);
 		if (nkpg == NULL)
 			panic("pmap_growkernel: no memory to grow kernel");
 		if ((nkpg->flags & PG_ZERO) == 0) {
 			pmap_zero_page(nkpg);
 		}
 		paddr = VM_PAGE_TO_PHYS(nkpg);
 
 		pn = (paddr / PAGE_SIZE);
 		entry = (PTE_V);
 		entry |= (pn << PTE_PPN0_S);
 		pmap_store(l2, entry);
 
 		pmap_invalidate_page(kernel_pmap, kernel_vm_end);
 
 		kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
 		if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
 			kernel_vm_end = vm_map_max(kernel_map);
 			break;                       
 		}
 	}
 }
 
 
 /***************************************************
  * page management routines.
  ***************************************************/
 
 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 CTASSERT(_NPCM == 3);
 CTASSERT(_NPCPV == 168);
 
 static __inline struct pv_chunk *
 pv_to_chunk(pv_entry_t pv)
 {
 
 	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
 }
 
 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 
 #define	PC_FREE0	0xfffffffffffffffful
 #define	PC_FREE1	0xfffffffffffffffful
 #define	PC_FREE2	0x000000fffffffffful
 
 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
 
 #if 0
 #ifdef PV_STATS
 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
 	"Current number of pv entry chunks");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
 	"Current number of pv entry chunks allocated");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
 	"Current number of pv entry chunks frees");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
 	"Number of times tried to get a chunk page but failed.");
 
 static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
 static int pv_entry_spare;
 
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
 	"Current number of pv entry frees");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
 	"Current number of pv entry allocs");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 	"Current number of pv entries");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
 	"Current number of spare pv entries");
 #endif
 #endif /* 0 */
 
 /*
  * We are in a serious low memory condition.  Resort to
  * drastic measures to free some pages so we can allocate
  * another pv entry chunk.
  *
  * Returns NULL if PV entries were reclaimed from the specified pmap.
  *
  * We do not, however, unmap 2mpages because subsequent accesses will
  * allocate per-page pv entries until repromotion occurs, thereby
  * exacerbating the shortage of free pv entries.
  */
 static vm_page_t
 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
 {
 
 	panic("RISCVTODO: reclaim_pv_chunk");
 }
 
 /*
  * free the pv_entry back to the free list
  */
 static void
 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 {
 	struct pv_chunk *pc;
 	int idx, field, bit;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(atomic_add_long(&pv_entry_frees, 1));
 	PV_STAT(atomic_add_int(&pv_entry_spare, 1));
 	PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
 	pc = pv_to_chunk(pv);
 	idx = pv - &pc->pc_pventry[0];
 	field = idx / 64;
 	bit = idx % 64;
 	pc->pc_map[field] |= 1ul << bit;
 	if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
 	    pc->pc_map[2] != PC_FREE2) {
 		/* 98% of the time, pc is already at the head of the list. */
 		if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 		}
 		return;
 	}
 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 	free_pv_chunk(pc);
 }
 
 static void
 free_pv_chunk(struct pv_chunk *pc)
 {
 	vm_page_t m;
 
 	mtx_lock(&pv_chunks_mutex);
  	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 	mtx_unlock(&pv_chunks_mutex);
 	PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
 	PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
 	PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
 	/* entire chunk is free, return it */
 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
 #if 0 /* TODO: For minidump */
 	dump_drop_page(m->phys_addr);
 #endif
 	vm_page_unwire(m, PQ_NONE);
 	vm_page_free(m);
 }
 
 /*
  * Returns a new PV entry, allocating a new PV chunk from the system when
  * needed.  If this PV chunk allocation fails and a PV list lock pointer was
  * given, a PV chunk is reclaimed from an arbitrary pmap.  Otherwise, NULL is
  * returned.
  *
  * The given PV list lock may be released.
  */
 static pv_entry_t
 get_pv_entry(pmap_t pmap, struct rwlock **lockp)
 {
 	int bit, field;
 	pv_entry_t pv;
 	struct pv_chunk *pc;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
 retry:
 	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 	if (pc != NULL) {
 		for (field = 0; field < _NPCM; field++) {
 			if (pc->pc_map[field]) {
 				bit = ffsl(pc->pc_map[field]) - 1;
 				break;
 			}
 		}
 		if (field < _NPCM) {
 			pv = &pc->pc_pventry[field * 64 + bit];
 			pc->pc_map[field] &= ~(1ul << bit);
 			/* If this was the last item, move it to tail */
 			if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
 			    pc->pc_map[2] == 0) {
 				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 				TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
 				    pc_list);
 			}
 			PV_STAT(atomic_add_long(&pv_entry_count, 1));
 			PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
 			return (pv);
 		}
 	}
 	/* No free items, allocate another chunk */
 	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED);
 	if (m == NULL) {
 		if (lockp == NULL) {
 			PV_STAT(pc_chunk_tryfail++);
 			return (NULL);
 		}
 		m = reclaim_pv_chunk(pmap, lockp);
 		if (m == NULL)
 			goto retry;
 	}
 	PV_STAT(atomic_add_int(&pc_chunk_count, 1));
 	PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
 #if 0 /* TODO: This is for minidump */
 	dump_add_page(m->phys_addr);
 #endif
 	pc = (void *)PHYS_TO_DMAP(m->phys_addr);
 	pc->pc_pmap = pmap;
 	pc->pc_map[0] = PC_FREE0 & ~1ul;	/* preallocated bit 0 */
 	pc->pc_map[1] = PC_FREE1;
 	pc->pc_map[2] = PC_FREE2;
 	mtx_lock(&pv_chunks_mutex);
 	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
 	mtx_unlock(&pv_chunks_mutex);
 	pv = &pc->pc_pventry[0];
 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 	PV_STAT(atomic_add_long(&pv_entry_count, 1));
 	PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
 	return (pv);
 }
 
 /*
  * Ensure that the number of spare PV entries in the specified pmap meets or
  * exceeds the given count, "needed".
  *
  * The given PV list lock may be released.
  */
 static void
 reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
 {
 	struct pch new_tail;
 	struct pv_chunk *pc;
 	vm_page_t m;
 	int avail, free;
 	bool reclaimed;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
 
 	/*
 	 * Newly allocated PV chunks must be stored in a private list until
 	 * the required number of PV chunks have been allocated.  Otherwise,
 	 * reclaim_pv_chunk() could recycle one of these chunks.  In
 	 * contrast, these chunks must be added to the pmap upon allocation.
 	 */
 	TAILQ_INIT(&new_tail);
 retry:
 	avail = 0;
 	TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
 		bit_count((bitstr_t *)pc->pc_map, 0,
 		    sizeof(pc->pc_map) * NBBY, &free);
 		if (free == 0)
 			break;
 		avail += free;
 		if (avail >= needed)
 			break;
 	}
 	for (reclaimed = false; avail < needed; avail += _NPCPV) {
 		m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
 		    VM_ALLOC_WIRED);
 		if (m == NULL) {
 			m = reclaim_pv_chunk(pmap, lockp);
 			if (m == NULL)
 				goto retry;
 			reclaimed = true;
 		}
 		/* XXX PV STATS */
 #if 0
 		dump_add_page(m->phys_addr);
 #endif
 		pc = (void *)PHYS_TO_DMAP(m->phys_addr);
 		pc->pc_pmap = pmap;
 		pc->pc_map[0] = PC_FREE0;
 		pc->pc_map[1] = PC_FREE1;
 		pc->pc_map[2] = PC_FREE2;
 		TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 		TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
 
 		/*
 		 * The reclaim might have freed a chunk from the current pmap.
 		 * If that chunk contained available entries, we need to
 		 * re-count the number of available entries.
 		 */
 		if (reclaimed)
 			goto retry;
 	}
 	if (!TAILQ_EMPTY(&new_tail)) {
 		mtx_lock(&pv_chunks_mutex);
 		TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
 		mtx_unlock(&pv_chunks_mutex);
 	}
 }
 
 /*
  * First find and then remove the pv entry for the specified pmap and virtual
  * address from the specified pv list.  Returns the pv entry if found and NULL
  * otherwise.  This operation can be performed on pv lists for either 4KB or
  * 2MB page mappings.
  */
 static __inline pv_entry_t
 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			pvh->pv_gen++;
 			break;
 		}
 	}
 	return (pv);
 }
 
 /*
  * First find and then destroy the pv entry for the specified pmap and virtual
  * address.  This operation can be performed on pv lists for either 4KB or 2MB
  * page mappings.
  */
 static void
 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	pv = pmap_pvh_remove(pvh, pmap, va);
 
 	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found for %#lx", va));
 	free_pv_entry(pmap, pv);
 }
 
 /*
  * Conditionally create the PV entry for a 4KB page mapping if the required
  * memory can be allocated without resorting to reclamation.
  */
 static boolean_t
 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
     struct rwlock **lockp)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/* Pass NULL instead of the lock pointer to disable reclamation. */
 	if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
 		pv->pv_va = va;
 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  * After demotion from a 2MB page mapping to 512 4KB page mappings,
  * destroy the pv entry for the 2MB page mapping and reinstantiate the pv
  * entries for each of the 4KB page mappings.
  */
 static void __unused
 pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
     struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	struct pv_chunk *pc;
 	pv_entry_t pv;
 	vm_page_t m;
 	vm_offset_t va_last;
 	int bit, field;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
 
 	/*
 	 * Transfer the 2mpage's pv entry for this mapping to the first
 	 * page's pv list.  Once this transfer begins, the pv list lock
 	 * must not be released until the last pv entry is reinstantiated.
 	 */
 	pvh = pa_to_pvh(pa);
 	va &= ~L2_OFFSET;
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found"));
 	m = PHYS_TO_VM_PAGE(pa);
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 	m->md.pv_gen++;
 	/* Instantiate the remaining 511 pv entries. */
 	va_last = va + L2_SIZE - PAGE_SIZE;
 	for (;;) {
 		pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 		KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 ||
 		    pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare"));
 		for (field = 0; field < _NPCM; field++) {
 			while (pc->pc_map[field] != 0) {
 				bit = ffsl(pc->pc_map[field]) - 1;
 				pc->pc_map[field] &= ~(1ul << bit);
 				pv = &pc->pc_pventry[field * 64 + bit];
 				va += PAGE_SIZE;
 				pv->pv_va = va;
 				m++;
 				KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 			    ("pmap_pv_demote_l2: page %p is not managed", m));
 				TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 				m->md.pv_gen++;
 				if (va == va_last)
 					goto out;
 			}
 		}
 		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 		TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 	}
 out:
 	if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) {
 		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 		TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 	}
 	/* XXX PV stats */
 }
 
 #if VM_NRESERVLEVEL > 0
 static void
 pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
     struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_page_t m;
 	vm_offset_t va_last;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	KASSERT((va & L2_OFFSET) == 0,
 	    ("pmap_pv_promote_l2: misaligned va %#lx", va));
 
 	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
 
 	m = PHYS_TO_VM_PAGE(pa);
 	pv = pmap_pvh_remove(&m->md, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv for %#lx not found", va));
 	pvh = pa_to_pvh(pa);
 	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 	pvh->pv_gen++;
 
 	va_last = va + L2_SIZE - PAGE_SIZE;
 	do {
 		m++;
 		va += PAGE_SIZE;
 		pmap_pvh_free(&m->md, pmap, va);
 	} while (va < va_last);
 }
 #endif /* VM_NRESERVLEVEL > 0 */
 
 /*
  * Create the PV entry for a 2MB page mapping.  Always returns true unless the
  * flag PMAP_ENTER_NORECLAIM is specified.  If that flag is specified, returns
  * false if the PV entry cannot be allocated without resorting to reclamation.
  */
 static bool
 pmap_pv_insert_l2(pmap_t pmap, vm_offset_t va, pd_entry_t l2e, u_int flags,
     struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_paddr_t pa;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/* Pass NULL instead of the lock pointer to disable reclamation. */
 	if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ?
 	    NULL : lockp)) == NULL)
 		return (false);
 	pv->pv_va = va;
 	pa = PTE_TO_PHYS(l2e);
 	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
 	pvh = pa_to_pvh(pa);
 	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 	pvh->pv_gen++;
 	return (true);
 }
 
 static void
 pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
 {
 	pt_entry_t newl2, oldl2;
 	vm_page_t ml3;
 	vm_paddr_t ml3pa;
 
 	KASSERT(!VIRT_IN_DMAP(va), ("removing direct mapping of %#lx", va));
 	KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap));
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	ml3 = pmap_remove_pt_page(pmap, va);
 	if (ml3 == NULL)
 		panic("pmap_remove_kernel_l2: Missing pt page");
 
 	ml3pa = VM_PAGE_TO_PHYS(ml3);
 	newl2 = ml3pa | PTE_V;
 
 	/*
 	 * Initialize the page table page.
 	 */
 	pagezero((void *)PHYS_TO_DMAP(ml3pa));
 
 	/*
 	 * Demote the mapping.
 	 */
 	oldl2 = pmap_load_store(l2, newl2);
 	KASSERT(oldl2 == 0, ("%s: found existing mapping at %p: %#lx",
 	    __func__, l2, oldl2));
 }
 
 /*
  * pmap_remove_l2: Do the things to unmap a level 2 superpage.
  */
 static int
 pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
     pd_entry_t l1e, struct spglist *free, struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	pt_entry_t oldl2;
 	vm_offset_t eva, va;
 	vm_page_t m, ml3;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((sva & L2_OFFSET) == 0, ("pmap_remove_l2: sva is not aligned"));
 	oldl2 = pmap_load_clear(l2);
 	KASSERT((oldl2 & PTE_RWX) != 0,
 	    ("pmap_remove_l2: L2e %lx is not a superpage mapping", oldl2));
 
 	/*
 	 * The sfence.vma documentation states that it is sufficient to specify
 	 * a single address within a superpage mapping.  However, since we do
 	 * not perform any invalidation upon promotion, TLBs may still be
 	 * caching 4KB mappings within the superpage, so we must invalidate the
 	 * entire range.
 	 */
 	pmap_invalidate_range(pmap, sva, sva + L2_SIZE);
 	if ((oldl2 & PTE_SW_WIRED) != 0)
 		pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE;
 	pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE);
 	if ((oldl2 & PTE_SW_MANAGED) != 0) {
 		CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, PTE_TO_PHYS(oldl2));
 		pvh = pa_to_pvh(PTE_TO_PHYS(oldl2));
 		pmap_pvh_free(pvh, pmap, sva);
 		eva = sva + L2_SIZE;
 		for (va = sva, m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(oldl2));
 		    va < eva; va += PAGE_SIZE, m++) {
 			if ((oldl2 & PTE_D) != 0)
 				vm_page_dirty(m);
 			if ((oldl2 & PTE_A) != 0)
 				vm_page_aflag_set(m, PGA_REFERENCED);
 			if (TAILQ_EMPTY(&m->md.pv_list) &&
 			    TAILQ_EMPTY(&pvh->pv_list))
 				vm_page_aflag_clear(m, PGA_WRITEABLE);
 		}
 	}
 	if (pmap == kernel_pmap) {
 		pmap_remove_kernel_l2(pmap, l2, sva);
 	} else {
 		ml3 = pmap_remove_pt_page(pmap, sva);
 		if (ml3 != NULL) {
 			pmap_resident_count_dec(pmap, 1);
 			KASSERT(ml3->wire_count == Ln_ENTRIES,
 			    ("pmap_remove_l2: l3 page wire count error"));
 			ml3->wire_count = 1;
 			vm_page_unwire_noq(ml3);
 			pmap_add_delayed_free_list(ml3, free, FALSE);
 		}
 	}
 	return (pmap_unuse_pt(pmap, sva, l1e, free));
 }
 
 /*
  * pmap_remove_l3: do the things to unmap a page in a process
  */
 static int
 pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 
     pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
 {
 	pt_entry_t old_l3;
 	vm_paddr_t phys;
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	old_l3 = pmap_load_clear(l3);
 	pmap_invalidate_page(pmap, va);
 	if (old_l3 & PTE_SW_WIRED)
 		pmap->pm_stats.wired_count -= 1;
 	pmap_resident_count_dec(pmap, 1);
 	if (old_l3 & PTE_SW_MANAGED) {
 		phys = PTE_TO_PHYS(old_l3);
 		m = PHYS_TO_VM_PAGE(phys);
 		if ((old_l3 & PTE_D) != 0)
 			vm_page_dirty(m);
 		if (old_l3 & PTE_A)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 		pmap_pvh_free(&m->md, pmap, va);
 	}
 
 	return (pmap_unuse_pt(pmap, va, l2e, free));
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	struct spglist free;
 	struct rwlock *lock;
 	vm_offset_t va, va_next;
 	pd_entry_t *l1, *l2, l2e;
 	pt_entry_t *l3;
 
 	/*
 	 * Perform an unsynchronized read.  This is, however, safe.
 	 */
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	SLIST_INIT(&free);
 
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 
 	lock = NULL;
 	for (; sva < eva; sva = va_next) {
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		l1 = pmap_l1(pmap, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 
 		l2 = pmap_l1_to_l2(l1, sva);
 		if (l2 == NULL)
 			continue;
 		if ((l2e = pmap_load(l2)) == 0)
 			continue;
 		if ((l2e & PTE_RWX) != 0) {
 			if (sva + L2_SIZE == va_next && eva >= va_next) {
 				(void)pmap_remove_l2(pmap, l2, sva,
 				    pmap_load(l1), &free, &lock);
 				continue;
 			} else if (!pmap_demote_l2_locked(pmap, l2, sva,
 			    &lock)) {
 				/*
 				 * The large page mapping was destroyed.
 				 */
 				continue;
 			}
 			l2e = pmap_load(l2);
 		}
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (va_next > eva)
 			va_next = eva;
 
 		va = va_next;
 		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 		    sva += L3_SIZE) {
 			if (pmap_load(l3) == 0) {
 				if (va != va_next) {
 					pmap_invalidate_range(pmap, va, sva);
 					va = va_next;
 				}
 				continue;
 			}
 			if (va == va_next)
 				va = sva;
 			if (pmap_remove_l3(pmap, l3, sva, l2e, &free, &lock)) {
 				sva += L3_SIZE;
 				break;
 			}
 		}
 		if (va != va_next)
 			pmap_invalidate_range(pmap, va, sva);
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	vm_page_free_pages_toq(&free, false);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 void
 pmap_remove_all(vm_page_t m)
 {
 	struct spglist free;
 	struct md_page *pvh;
 	pmap_t pmap;
 	pt_entry_t *l3, l3e;
 	pd_entry_t *l2, l2e;
 	pv_entry_t pv;
 	vm_offset_t va;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_all: page %p is not managed", m));
 	SLIST_INIT(&free);
 	pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
 	    pa_to_pvh(VM_PAGE_TO_PHYS(m));
 
 	rw_wlock(&pvh_global_lock);
 	while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		va = pv->pv_va;
 		l2 = pmap_l2(pmap, va);
 		(void)pmap_demote_l2(pmap, l2, va);
 		PMAP_UNLOCK(pmap);
 	}
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pmap_resident_count_dec(pmap, 1);
 		l2 = pmap_l2(pmap, pv->pv_va);
 		KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found"));
 		l2e = pmap_load(l2);
 
 		KASSERT((l2e & PTE_RX) == 0,
 		    ("pmap_remove_all: found a superpage in %p's pv list", m));
 
 		l3 = pmap_l2_to_l3(l2, pv->pv_va);
 		l3e = pmap_load_clear(l3);
 		pmap_invalidate_page(pmap, pv->pv_va);
 		if (l3e & PTE_SW_WIRED)
 			pmap->pm_stats.wired_count--;
 		if ((l3e & PTE_A) != 0)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if ((l3e & PTE_D) != 0)
 			vm_page_dirty(m);
 		pmap_unuse_pt(pmap, pv->pv_va, pmap_load(l2), &free);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		free_pv_entry(pmap, pv);
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_wunlock(&pvh_global_lock);
 	vm_page_free_pages_toq(&free, false);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	pd_entry_t *l1, *l2, l2e;
 	pt_entry_t *l3, l3e, mask;
 	vm_page_t m;
 	vm_paddr_t pa;
 	vm_offset_t va, va_next;
 	bool anychanged, pv_lists_locked;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ==
 	    (VM_PROT_WRITE | VM_PROT_EXECUTE))
 		return;
 
 	anychanged = false;
 	pv_lists_locked = false;
 	mask = 0;
 	if ((prot & VM_PROT_WRITE) == 0)
 		mask |= PTE_W | PTE_D;
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		mask |= PTE_X;
 resume:
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = va_next) {
 		l1 = pmap_l1(pmap, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 
 		l2 = pmap_l1_to_l2(l1, sva);
 		if (l2 == NULL || (l2e = pmap_load(l2)) == 0)
 			continue;
 		if ((l2e & PTE_RWX) != 0) {
 			if (sva + L2_SIZE == va_next && eva >= va_next) {
 retryl2:
 				if ((l2e & (PTE_SW_MANAGED | PTE_D)) ==
 				    (PTE_SW_MANAGED | PTE_D)) {
 					pa = PTE_TO_PHYS(l2e);
 					for (va = sva, m = PHYS_TO_VM_PAGE(pa);
 					    va < va_next; m++, va += PAGE_SIZE)
 						vm_page_dirty(m);
 				}
 				if (!atomic_fcmpset_long(l2, &l2e, l2e & ~mask))
 					goto retryl2;
 				anychanged = true;
 			} else {
 				if (!pv_lists_locked) {
 					pv_lists_locked = true;
 					if (!rw_try_rlock(&pvh_global_lock)) {
 						if (anychanged)
 							pmap_invalidate_all(
 							    pmap);
 						PMAP_UNLOCK(pmap);
 						rw_rlock(&pvh_global_lock);
 						goto resume;
 					}
 				}
 				if (!pmap_demote_l2(pmap, l2, sva)) {
 					/*
 					 * The large page mapping was destroyed.
 					 */
 					continue;
 				}
 			}
 		}
 
 		if (va_next > eva)
 			va_next = eva;
 
 		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 		    sva += L3_SIZE) {
 			l3e = pmap_load(l3);
 retryl3:
 			if ((l3e & PTE_V) == 0)
 				continue;
 			if ((prot & VM_PROT_WRITE) == 0 &&
 			    (l3e & (PTE_SW_MANAGED | PTE_D)) ==
 			    (PTE_SW_MANAGED | PTE_D)) {
 				m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(l3e));
 				vm_page_dirty(m);
 			}
 			if (!atomic_fcmpset_long(l3, &l3e, l3e & ~mask))
 				goto retryl3;
 			anychanged = true;
 		}
 	}
 	if (anychanged)
 		pmap_invalidate_all(pmap);
 	if (pv_lists_locked)
 		rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 int
 pmap_fault_fixup(pmap_t pmap, vm_offset_t va, vm_prot_t ftype)
 {
 	pd_entry_t *l2, l2e;
 	pt_entry_t bits, *pte, oldpte;
 	int rv;
 
 	rv = 0;
 	PMAP_LOCK(pmap);
 	l2 = pmap_l2(pmap, va);
 	if (l2 == NULL || ((l2e = pmap_load(l2)) & PTE_V) == 0)
 		goto done;
 	if ((l2e & PTE_RWX) == 0) {
 		pte = pmap_l2_to_l3(l2, va);
 		if (pte == NULL || ((oldpte = pmap_load(pte) & PTE_V)) == 0)
 			goto done;
 	} else {
 		pte = l2;
 		oldpte = l2e;
 	}
 
 	if ((pmap != kernel_pmap && (oldpte & PTE_U) == 0) ||
 	    (ftype == VM_PROT_WRITE && (oldpte & PTE_W) == 0) ||
 	    (ftype == VM_PROT_EXECUTE && (oldpte & PTE_X) == 0) ||
 	    (ftype == VM_PROT_READ && (oldpte & PTE_R) == 0))
 		goto done;
 
 	bits = PTE_A;
 	if (ftype == VM_PROT_WRITE)
 		bits |= PTE_D;
 
 	/*
 	 * Spurious faults can occur if the implementation caches invalid
 	 * entries in the TLB, or if simultaneous accesses on multiple CPUs
 	 * race with each other.
 	 */
 	if ((oldpte & bits) != bits)
 		pmap_store_bits(pte, bits);
 	sfence_vma();
 	rv = 1;
 done:
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 static bool
 pmap_demote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va)
 {
 	struct rwlock *lock;
 	bool rv;
 
 	lock = NULL;
 	rv = pmap_demote_l2_locked(pmap, l2, va, &lock);
 	if (lock != NULL)
 		rw_wunlock(lock);
 	return (rv);
 }
 
 /*
  * Tries to demote a 2MB page mapping.  If demotion fails, the 2MB page
  * mapping is invalidated.
  */
 static bool
 pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
     struct rwlock **lockp)
 {
 	struct spglist free;
 	vm_page_t mpte;
 	pd_entry_t newl2, oldl2;
 	pt_entry_t *firstl3, newl3;
 	vm_paddr_t mptepa;
 	int i;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	oldl2 = pmap_load(l2);
 	KASSERT((oldl2 & PTE_RWX) != 0,
 	    ("pmap_demote_l2_locked: oldl2 is not a leaf entry"));
 	if ((oldl2 & PTE_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) ==
 	    NULL) {
 		if ((oldl2 & PTE_A) == 0 || (mpte = vm_page_alloc(NULL,
 		    pmap_l2_pindex(va), (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT :
 		    VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) ==
 		    NULL) {
 			SLIST_INIT(&free);
 			(void)pmap_remove_l2(pmap, l2, va & ~L2_OFFSET,
 			    pmap_load(pmap_l1(pmap, va)), &free, lockp);
 			vm_page_free_pages_toq(&free, true);
 			CTR2(KTR_PMAP, "pmap_demote_l2_locked: "
 			    "failure for va %#lx in pmap %p", va, pmap);
 			return (false);
 		}
 		if (va < VM_MAXUSER_ADDRESS)
 			pmap_resident_count_inc(pmap, 1);
 	}
 	mptepa = VM_PAGE_TO_PHYS(mpte);
 	firstl3 = (pt_entry_t *)PHYS_TO_DMAP(mptepa);
 	newl2 = ((mptepa / PAGE_SIZE) << PTE_PPN0_S) | PTE_V;
 	KASSERT((oldl2 & PTE_A) != 0,
 	    ("pmap_demote_l2_locked: oldl2 is missing PTE_A"));
 	KASSERT((oldl2 & (PTE_D | PTE_W)) != PTE_W,
 	    ("pmap_demote_l2_locked: oldl2 is missing PTE_D"));
 	newl3 = oldl2;
 
 	/*
 	 * If the page table page is new, initialize it.
 	 */
 	if (mpte->wire_count == 1) {
 		mpte->wire_count = Ln_ENTRIES;
 		for (i = 0; i < Ln_ENTRIES; i++)
 			pmap_store(firstl3 + i, newl3 + (i << PTE_PPN0_S));
 	}
 	KASSERT(PTE_TO_PHYS(pmap_load(firstl3)) == PTE_TO_PHYS(newl3),
 	    ("pmap_demote_l2_locked: firstl3 and newl3 map different physical "
 	    "addresses"));
 
 	/*
 	 * If the mapping has changed attributes, update the page table
 	 * entries.
 	 */
 	if ((pmap_load(firstl3) & PTE_PROMOTE) != (newl3 & PTE_PROMOTE))
 		for (i = 0; i < Ln_ENTRIES; i++)
 			pmap_store(firstl3 + i, newl3 + (i << PTE_PPN0_S));
 
 	/*
 	 * The spare PV entries must be reserved prior to demoting the
 	 * mapping, that is, prior to changing the L2 entry.  Otherwise, the
 	 * state of the L2 entry and the PV lists will be inconsistent, which
 	 * can result in reclaim_pv_chunk() attempting to remove a PV entry from
 	 * the wrong PV list and pmap_pv_demote_l2() failing to find the
 	 * expected PV entry for the 2MB page mapping that is being demoted.
 	 */
 	if ((oldl2 & PTE_SW_MANAGED) != 0)
 		reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp);
 
 	/*
 	 * Demote the mapping.
 	 */
 	pmap_store(l2, newl2);
 
 	/*
 	 * Demote the PV entry.
 	 */
 	if ((oldl2 & PTE_SW_MANAGED) != 0)
 		pmap_pv_demote_l2(pmap, va, PTE_TO_PHYS(oldl2), lockp);
 
 	atomic_add_long(&pmap_l2_demotions, 1);
 	CTR2(KTR_PMAP, "pmap_demote_l2_locked: success for va %#lx in pmap %p",
 	    va, pmap);
 	return (true);
 }
 
 #if VM_NRESERVLEVEL > 0
 static void
 pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
     struct rwlock **lockp)
 {
 	pt_entry_t *firstl3, *l3;
 	vm_paddr_t pa;
 	vm_page_t ml3;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	va &= ~L2_OFFSET;
 	KASSERT((pmap_load(l2) & PTE_RWX) == 0,
 	    ("pmap_promote_l2: invalid l2 entry %p", l2));
 
 	firstl3 = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l2)));
 	pa = PTE_TO_PHYS(pmap_load(firstl3));
 	if ((pa & L2_OFFSET) != 0) {
 		CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p",
 		    va, pmap);
 		atomic_add_long(&pmap_l2_p_failures, 1);
 		return;
 	}
 
 	pa += PAGE_SIZE;
 	for (l3 = firstl3 + 1; l3 < firstl3 + Ln_ENTRIES; l3++) {
 		if (PTE_TO_PHYS(pmap_load(l3)) != pa) {
 			CTR2(KTR_PMAP,
 			    "pmap_promote_l2: failure for va %#lx pmap %p",
 			    va, pmap);
 			atomic_add_long(&pmap_l2_p_failures, 1);
 			return;
 		}
 		if ((pmap_load(l3) & PTE_PROMOTE) !=
 		    (pmap_load(firstl3) & PTE_PROMOTE)) {
 			CTR2(KTR_PMAP,
 			    "pmap_promote_l2: failure for va %#lx pmap %p",
 			    va, pmap);
 			atomic_add_long(&pmap_l2_p_failures, 1);
 			return;
 		}
 		pa += PAGE_SIZE;
 	}
 
 	ml3 = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2)));
 	KASSERT(ml3->pindex == pmap_l2_pindex(va),
 	    ("pmap_promote_l2: page table page's pindex is wrong"));
 	if (pmap_insert_pt_page(pmap, ml3)) {
 		CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p",
 		    va, pmap);
 		atomic_add_long(&pmap_l2_p_failures, 1);
 		return;
 	}
 
 	if ((pmap_load(firstl3) & PTE_SW_MANAGED) != 0)
 		pmap_pv_promote_l2(pmap, va, PTE_TO_PHYS(pmap_load(firstl3)),
 		    lockp);
 
 	pmap_store(l2, pmap_load(firstl3));
 
 	atomic_add_long(&pmap_l2_promotions, 1);
 	CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va,
 	    pmap);
 }
 #endif
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 int
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
     u_int flags, int8_t psind)
 {
 	struct rwlock *lock;
 	pd_entry_t *l1, *l2, l2e;
 	pt_entry_t new_l3, orig_l3;
 	pt_entry_t *l3;
 	pv_entry_t pv;
 	vm_paddr_t opa, pa, l2_pa, l3_pa;
 	vm_page_t mpte, om, l2_m, l3_m;
 	pt_entry_t entry;
 	pn_t l2_pn, l3_pn, pn;
 	int rv;
 	bool nosleep;
 
 	va = trunc_page(va);
 	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);
 	pa = VM_PAGE_TO_PHYS(m);
 	pn = (pa / PAGE_SIZE);
 
 	new_l3 = PTE_V | PTE_R | PTE_A;
 	if (prot & VM_PROT_EXECUTE)
 		new_l3 |= PTE_X;
 	if (flags & VM_PROT_WRITE)
 		new_l3 |= PTE_D;
 	if (prot & VM_PROT_WRITE)
 		new_l3 |= PTE_W;
 	if (va < VM_MAX_USER_ADDRESS)
 		new_l3 |= PTE_U;
 
 	new_l3 |= (pn << PTE_PPN0_S);
 	if ((flags & PMAP_ENTER_WIRED) != 0)
 		new_l3 |= PTE_SW_WIRED;
 
 	/*
 	 * Set modified bit gratuitously for writeable mappings if
 	 * the page is unmanaged. We do not want to take a fault
 	 * to do the dirty bit accounting for these mappings.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) != 0) {
 		if (prot & VM_PROT_WRITE)
 			new_l3 |= PTE_D;
 	} else
 		new_l3 |= PTE_SW_MANAGED;
 
 	CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
 
 	lock = NULL;
 	mpte = NULL;
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	if (psind == 1) {
 		/* Assert the required virtual and physical alignment. */
 		KASSERT((va & L2_OFFSET) == 0,
 		    ("pmap_enter: va %#lx unaligned", va));
 		KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind"));
 		rv = pmap_enter_l2(pmap, va, new_l3, flags, m, &lock);
 		goto out;
 	}
 
 	l2 = pmap_l2(pmap, va);
 	if (l2 != NULL && ((l2e = pmap_load(l2)) & PTE_V) != 0 &&
 	    ((l2e & PTE_RWX) == 0 || pmap_demote_l2_locked(pmap, l2,
 	    va, &lock))) {
 		l3 = pmap_l2_to_l3(l2, va);
 		if (va < VM_MAXUSER_ADDRESS) {
 			mpte = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2)));
 			mpte->wire_count++;
 		}
 	} else if (va < VM_MAXUSER_ADDRESS) {
 		nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
 		mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock);
 		if (mpte == NULL && nosleep) {
 			CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
 			if (lock != NULL)
 				rw_wunlock(lock);
 			rw_runlock(&pvh_global_lock);
 			PMAP_UNLOCK(pmap);
 			return (KERN_RESOURCE_SHORTAGE);
 		}
 		l3 = pmap_l3(pmap, va);
 	} else {
 		l3 = pmap_l3(pmap, va);
 		/* TODO: This is not optimal, but should mostly work */
 		if (l3 == NULL) {
 			if (l2 == NULL) {
 				l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 				    VM_ALLOC_ZERO);
 				if (l2_m == NULL)
 					panic("pmap_enter: l2 pte_m == NULL");
 				if ((l2_m->flags & PG_ZERO) == 0)
 					pmap_zero_page(l2_m);
 
 				l2_pa = VM_PAGE_TO_PHYS(l2_m);
 				l2_pn = (l2_pa / PAGE_SIZE);
 
 				l1 = pmap_l1(pmap, va);
 				entry = (PTE_V);
 				entry |= (l2_pn << PTE_PPN0_S);
 				pmap_store(l1, entry);
 				pmap_distribute_l1(pmap, pmap_l1_index(va), entry);
 				l2 = pmap_l1_to_l2(l1, va);
 			}
 
 			l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 			    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 			if (l3_m == NULL)
 				panic("pmap_enter: l3 pte_m == NULL");
 			if ((l3_m->flags & PG_ZERO) == 0)
 				pmap_zero_page(l3_m);
 
 			l3_pa = VM_PAGE_TO_PHYS(l3_m);
 			l3_pn = (l3_pa / PAGE_SIZE);
 			entry = (PTE_V);
 			entry |= (l3_pn << PTE_PPN0_S);
 			pmap_store(l2, entry);
 			l3 = pmap_l2_to_l3(l2, va);
 		}
 		pmap_invalidate_page(pmap, va);
 	}
 
 	orig_l3 = pmap_load(l3);
 	opa = PTE_TO_PHYS(orig_l3);
 	pv = NULL;
 
 	/*
 	 * Is the specified virtual address already mapped?
 	 */
 	if ((orig_l3 & PTE_V) != 0) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if ((flags & PMAP_ENTER_WIRED) != 0 &&
 		    (orig_l3 & PTE_SW_WIRED) == 0)
 			pmap->pm_stats.wired_count++;
 		else if ((flags & PMAP_ENTER_WIRED) == 0 &&
 		    (orig_l3 & PTE_SW_WIRED) != 0)
 			pmap->pm_stats.wired_count--;
 
 		/*
 		 * Remove the extra PT page reference.
 		 */
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			KASSERT(mpte->wire_count > 0,
 			    ("pmap_enter: missing reference to page table page,"
 			     " va: 0x%lx", va));
 		}
 
 		/*
 		 * Has the physical page changed?
 		 */
 		if (opa == pa) {
 			/*
 			 * No, might be a protection or wiring change.
 			 */
 			if ((orig_l3 & PTE_SW_MANAGED) != 0 &&
 			    (new_l3 & PTE_W) != 0)
 				vm_page_aflag_set(m, PGA_WRITEABLE);
 			goto validate;
 		}
 
 		/*
 		 * The physical page has changed.  Temporarily invalidate
 		 * the mapping.  This ensures that all threads sharing the
 		 * pmap keep a consistent view of the mapping, which is
 		 * necessary for the correct handling of COW faults.  It
 		 * also permits reuse of the old mapping's PV entry,
 		 * avoiding an allocation.
 		 *
 		 * For consistency, handle unmanaged mappings the same way.
 		 */
 		orig_l3 = pmap_load_clear(l3);
 		KASSERT(PTE_TO_PHYS(orig_l3) == opa,
 		    ("pmap_enter: unexpected pa update for %#lx", va));
 		if ((orig_l3 & PTE_SW_MANAGED) != 0) {
 			om = PHYS_TO_VM_PAGE(opa);
 
 			/*
 			 * The pmap lock is sufficient to synchronize with
 			 * concurrent calls to pmap_page_test_mappings() and
 			 * pmap_ts_referenced().
 			 */
 			if ((orig_l3 & PTE_D) != 0)
 				vm_page_dirty(om);
 			if ((orig_l3 & PTE_A) != 0)
 				vm_page_aflag_set(om, PGA_REFERENCED);
 			CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
 			pv = pmap_pvh_remove(&om->md, pmap, va);
 			KASSERT(pv != NULL,
 			    ("pmap_enter: no PV entry for %#lx", va));
 			if ((new_l3 & PTE_SW_MANAGED) == 0)
 				free_pv_entry(pmap, pv);
 			if ((om->aflags & PGA_WRITEABLE) != 0 &&
 			    TAILQ_EMPTY(&om->md.pv_list))
 				vm_page_aflag_clear(om, PGA_WRITEABLE);
 		}
 		pmap_invalidate_page(pmap, va);
 		orig_l3 = 0;
 	} else {
 		/*
 		 * Increment the counters.
 		 */
 		if ((new_l3 & PTE_SW_WIRED) != 0)
 			pmap->pm_stats.wired_count++;
 		pmap_resident_count_inc(pmap, 1);
 	}
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((new_l3 & PTE_SW_MANAGED) != 0) {
 		if (pv == NULL) {
 			pv = get_pv_entry(pmap, &lock);
 			pv->pv_va = va;
 		}
 		CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		if ((new_l3 & PTE_W) != 0)
 			vm_page_aflag_set(m, PGA_WRITEABLE);
 	}
 
 validate:
 	/*
 	 * Sync the i-cache on all harts before updating the PTE
 	 * if the new PTE is executable.
 	 */
 	if (prot & VM_PROT_EXECUTE)
 		pmap_sync_icache(pmap, va, PAGE_SIZE);
 
 	/*
 	 * Update the L3 entry.
 	 */
 	if (orig_l3 != 0) {
 		orig_l3 = pmap_load_store(l3, new_l3);
 		pmap_invalidate_page(pmap, va);
 		KASSERT(PTE_TO_PHYS(orig_l3) == pa,
 		    ("pmap_enter: invalid update"));
 		if ((orig_l3 & (PTE_D | PTE_SW_MANAGED)) ==
 		    (PTE_D | PTE_SW_MANAGED))
 			vm_page_dirty(m);
 	} else {
 		pmap_store(l3, new_l3);
 	}
 
 #if VM_NRESERVLEVEL > 0
 	if (mpte != NULL && mpte->wire_count == Ln_ENTRIES &&
 	    pmap_ps_enabled(pmap) &&
 	    (m->flags & PG_FICTITIOUS) == 0 &&
 	    vm_reserv_level_iffullpop(m) == 0)
 		pmap_promote_l2(pmap, l2, va, &lock);
 #endif
 
 	rv = KERN_SUCCESS;
 out:
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  * Tries to create a read- and/or execute-only 2MB page mapping.  Returns true
  * if successful.  Returns false if (1) a page table page cannot be allocated
  * without sleeping, (2) a mapping already exists at the specified virtual
  * address, or (3) a PV entry cannot be allocated without reclaiming another
  * PV entry.
  */
 static bool
 pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
     struct rwlock **lockp)
 {
 	pd_entry_t new_l2;
 	pn_t pn;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	pn = VM_PAGE_TO_PHYS(m) / PAGE_SIZE;
 	new_l2 = (pd_entry_t)((pn << PTE_PPN0_S) | PTE_R | PTE_V);
 	if ((m->oflags & VPO_UNMANAGED) == 0)
 		new_l2 |= PTE_SW_MANAGED;
 	if ((prot & VM_PROT_EXECUTE) != 0)
 		new_l2 |= PTE_X;
 	if (va < VM_MAXUSER_ADDRESS)
 		new_l2 |= PTE_U;
 	return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP |
 	    PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) ==
 	    KERN_SUCCESS);
 }
 
 /*
  * Tries to create the specified 2MB page mapping.  Returns KERN_SUCCESS if
  * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE
  * otherwise.  Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and
  * a mapping already exists at the specified virtual address.  Returns
  * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table
  * page allocation failed.  Returns KERN_RESOURCE_SHORTAGE if
  * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed.
  *
  * The parameter "m" is only used when creating a managed, writeable mapping.
  */
 static int
 pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags,
     vm_page_t m, struct rwlock **lockp)
 {
 	struct spglist free;
 	pd_entry_t *l2, *l3, oldl2;
 	vm_offset_t sva;
 	vm_page_t l2pg, mt;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	if ((l2pg = pmap_alloc_l2(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ?
 	    NULL : lockp)) == NULL) {
 		CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p",
 		    va, pmap);
 		return (KERN_RESOURCE_SHORTAGE);
 	}
 
 	l2 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(l2pg));
 	l2 = &l2[pmap_l2_index(va)];
 	if ((oldl2 = pmap_load(l2)) != 0) {
 		KASSERT(l2pg->wire_count > 1,
 		    ("pmap_enter_l2: l2pg's wire count is too low"));
 		if ((flags & PMAP_ENTER_NOREPLACE) != 0) {
 			l2pg->wire_count--;
 			CTR2(KTR_PMAP,
 			    "pmap_enter_l2: failure for va %#lx in pmap %p",
 			    va, pmap);
 			return (KERN_FAILURE);
 		}
 		SLIST_INIT(&free);
 		if ((oldl2 & PTE_RWX) != 0)
 			(void)pmap_remove_l2(pmap, l2, va,
 			    pmap_load(pmap_l1(pmap, va)), &free, lockp);
 		else
 			for (sva = va; sva < va + L2_SIZE; sva += PAGE_SIZE) {
 				l3 = pmap_l2_to_l3(l2, sva);
 				if ((pmap_load(l3) & PTE_V) != 0 &&
 				    pmap_remove_l3(pmap, l3, sva, oldl2, &free,
 				    lockp) != 0)
 					break;
 			}
 		vm_page_free_pages_toq(&free, true);
 		if (va >= VM_MAXUSER_ADDRESS) {
 			mt = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2)));
 			if (pmap_insert_pt_page(pmap, mt)) {
 				/*
 				 * XXX Currently, this can't happen bacuse
 				 * we do not perform pmap_enter(psind == 1)
 				 * on the kernel pmap.
 				 */
 				panic("pmap_enter_l2: trie insert failed");
 			}
 		} else
 			KASSERT(pmap_load(l2) == 0,
 			    ("pmap_enter_l2: non-zero L2 entry %p", l2));
 	}
 
 	if ((new_l2 & PTE_SW_MANAGED) != 0) {
 		/*
 		 * Abort this mapping if its PV entry could not be created.
 		 */
 		if (!pmap_pv_insert_l2(pmap, va, new_l2, flags, lockp)) {
 			SLIST_INIT(&free);
 			if (pmap_unwire_ptp(pmap, va, l2pg, &free)) {
 				/*
 				 * Although "va" is not mapped, paging-structure
 				 * caches could nonetheless have entries that
 				 * refer to the freed page table pages.
 				 * Invalidate those entries.
 				 */
 				pmap_invalidate_page(pmap, va);
 				vm_page_free_pages_toq(&free, true);
 			}
 			CTR2(KTR_PMAP,
 			    "pmap_enter_l2: failure for va %#lx in pmap %p",
 			    va, pmap);
 			return (KERN_RESOURCE_SHORTAGE);
 		}
 		if ((new_l2 & PTE_W) != 0)
 			for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
 				vm_page_aflag_set(mt, PGA_WRITEABLE);
 	}
 
 	/*
 	 * Increment counters.
 	 */
 	if ((new_l2 & PTE_SW_WIRED) != 0)
 		pmap->pm_stats.wired_count += L2_SIZE / PAGE_SIZE;
 	pmap->pm_stats.resident_count += L2_SIZE / PAGE_SIZE;
 
 	/*
 	 * Map the superpage.
 	 */
 	pmap_store(l2, new_l2);
 
 	atomic_add_long(&pmap_l2_mappings, 1);
 	CTR2(KTR_PMAP, "pmap_enter_l2: success for va %#lx in pmap %p",
 	    va, pmap);
 
 	return (KERN_SUCCESS);
 }
 
 /*
  * Maps a sequence of resident pages belonging to the same object.
  * The sequence begins with the given page m_start.  This page is
  * mapped at the given virtual address start.  Each subsequent page is
  * mapped at a virtual address that is offset from start by the same
  * amount as the page is offset from m_start within the object.  The
  * last page in the sequence is the page with the largest offset from
  * m_start that can be mapped at a virtual address less than the given
  * virtual address end.  Not every virtual page between start and end
  * is mapped; only those for which a resident page exists with the
  * corresponding offset from m_start are mapped.
  */
 void
 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	struct rwlock *lock;
 	vm_offset_t va;
 	vm_page_t m, mpte;
 	vm_pindex_t diff, psize;
 
 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
 
 	psize = atop(end - start);
 	mpte = NULL;
 	m = m_start;
 	lock = NULL;
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		va = start + ptoa(diff);
 		if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end &&
 		    m->psind == 1 && pmap_ps_enabled(pmap) &&
 		    pmap_enter_2mpage(pmap, va, m, prot, &lock))
 			m = &m[L2_SIZE / PAGE_SIZE - 1];
 		else
 			mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte,
 			    &lock);
 		m = TAILQ_NEXT(m, listq);
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * but is *MUCH* faster than pmap_enter...
  */
 
 void
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 	struct rwlock *lock;
 
 	lock = NULL;
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 static vm_page_t
 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
 {
 	struct spglist free;
 	vm_paddr_t phys;
 	pd_entry_t *l2;
 	pt_entry_t *l3, newl3;
 
 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 	    (m->oflags & VPO_UNMANAGED) != 0,
 	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		vm_pindex_t l2pindex;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		l2pindex = pmap_l2_pindex(va);
 		if (mpte && (mpte->pindex == l2pindex)) {
 			mpte->wire_count++;
 		} else {
 			/*
 			 * Get the l2 entry
 			 */
 			l2 = pmap_l2(pmap, va);
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.  Otherwise, we
 			 * attempt to allocate a page table page.  If this
 			 * attempt fails, we don't retry.  Instead, we give up.
 			 */
 			if (l2 != NULL && pmap_load(l2) != 0) {
 				phys = PTE_TO_PHYS(pmap_load(l2));
 				mpte = PHYS_TO_VM_PAGE(phys);
 				mpte->wire_count++;
 			} else {
 				/*
 				 * Pass NULL instead of the PV list lock
 				 * pointer, because we don't intend to sleep.
 				 */
 				mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
 				if (mpte == NULL)
 					return (mpte);
 			}
 		}
 		l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
 		l3 = &l3[pmap_l3_index(va)];
 	} else {
 		mpte = NULL;
 		l3 = pmap_l3(kernel_pmap, va);
 	}
 	if (l3 == NULL)
 		panic("pmap_enter_quick_locked: No l3");
 	if (pmap_load(l3) != 0) {
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0 &&
 	    !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
 		if (mpte != NULL) {
 			SLIST_INIT(&free);
 			if (pmap_unwire_ptp(pmap, va, mpte, &free)) {
 				pmap_invalidate_page(pmap, va);
 				vm_page_free_pages_toq(&free, false);
 			}
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap_resident_count_inc(pmap, 1);
 
 	newl3 = ((VM_PAGE_TO_PHYS(m) / PAGE_SIZE) << PTE_PPN0_S) |
 	    PTE_V | PTE_R;
 	if ((prot & VM_PROT_EXECUTE) != 0)
 		newl3 |= PTE_X;
 	if ((m->oflags & VPO_UNMANAGED) == 0)
 		newl3 |= PTE_SW_MANAGED;
 	if (va < VM_MAX_USER_ADDRESS)
 		newl3 |= PTE_U;
 
 	/*
 	 * Sync the i-cache on all harts before updating the PTE
 	 * if the new PTE is executable.
 	 */
 	if (prot & VM_PROT_EXECUTE)
 		pmap_sync_icache(pmap, va, PAGE_SIZE);
 
 	pmap_store(l3, newl3);
 
 	pmap_invalidate_page(pmap, va);
 	return (mpte);
 }
 
 /*
  * This code maps large physical mmap regions into the
  * processor address space.  Note that some shortcuts
  * are taken, but the code works.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 	    ("pmap_object_init_pt: non-device object"));
 }
 
 /*
  *	Clear the wired attribute from the mappings for the specified range of
  *	addresses in the given pmap.  Every valid mapping within that range
  *	must have the wired attribute set.  In contrast, invalid mappings
  *	cannot have the wired attribute set, so they are ignored.
  *
  *	The wired attribute of the page table entry is not a hardware feature,
  *	so there is no need to invalidate any TLB entries.
  */
 void
 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t va_next;
 	pd_entry_t *l1, *l2, l2e;
 	pt_entry_t *l3, l3e;
 	bool pv_lists_locked;
 
 	pv_lists_locked = false;
 retry:
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = va_next) {
 		l1 = pmap_l1(pmap, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 
 		l2 = pmap_l1_to_l2(l1, sva);
 		if ((l2e = pmap_load(l2)) == 0)
 			continue;
 		if ((l2e & PTE_RWX) != 0) {
 			if (sva + L2_SIZE == va_next && eva >= va_next) {
 				if ((l2e & PTE_SW_WIRED) == 0)
 					panic("pmap_unwire: l2 %#jx is missing "
 					    "PTE_SW_WIRED", (uintmax_t)l2e);
 				pmap_clear_bits(l2, PTE_SW_WIRED);
 				continue;
 			} else {
 				if (!pv_lists_locked) {
 					pv_lists_locked = true;
 					if (!rw_try_rlock(&pvh_global_lock)) {
 						PMAP_UNLOCK(pmap);
 						rw_rlock(&pvh_global_lock);
 						/* Repeat sva. */
 						goto retry;
 					}
 				}
 				if (!pmap_demote_l2(pmap, l2, sva))
 					panic("pmap_unwire: demotion failed");
 			}
 		}
 
 		if (va_next > eva)
 			va_next = eva;
 		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 		    sva += L3_SIZE) {
 			if ((l3e = pmap_load(l3)) == 0)
 				continue;
 			if ((l3e & PTE_SW_WIRED) == 0)
 				panic("pmap_unwire: l3 %#jx is missing "
 				    "PTE_SW_WIRED", (uintmax_t)l3e);
 
 			/*
 			 * PG_W must be cleared atomically.  Although the pmap
 			 * lock synchronizes access to PG_W, another processor
 			 * could be setting PG_M and/or PG_A concurrently.
 			 */
 			pmap_clear_bits(l3, PTE_SW_WIRED);
 			pmap->pm_stats.wired_count--;
 		}
 	}
 	if (pv_lists_locked)
 		rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
     vm_offset_t src_addr)
 {
 
 }
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(vm_page_t m)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	pagezero((void *)va);
 }
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(vm_page_t m, int off, int size)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	if (off == 0 && size == PAGE_SIZE)
 		pagezero((void *)va);
 	else
 		bzero((char *)va + off, size);
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 {
 	vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
 	vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
 
 	pagecopy((void *)src, (void *)dst);
 }
 
 int unmapped_buf_allowed = 1;
 
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)
 {
 	void *a_cp, *b_cp;
 	vm_page_t m_a, m_b;
 	vm_paddr_t p_a, p_b;
 	vm_offset_t a_pg_offset, b_pg_offset;
 	int cnt;
 
 	while (xfersize > 0) {
 		a_pg_offset = a_offset & PAGE_MASK;
 		m_a = ma[a_offset >> PAGE_SHIFT];
 		p_a = m_a->phys_addr;
 		b_pg_offset = b_offset & PAGE_MASK;
 		m_b = mb[b_offset >> PAGE_SHIFT];
 		p_b = m_b->phys_addr;
 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 		if (__predict_false(!PHYS_IN_DMAP(p_a))) {
 			panic("!DMAP a %lx", p_a);
 		} else {
 			a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
 		}
 		if (__predict_false(!PHYS_IN_DMAP(p_b))) {
 			panic("!DMAP b %lx", p_b);
 		} else {
 			b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
 		}
 		bcopy(a_cp, b_cp, cnt);
 		a_offset += cnt;
 		b_offset += cnt;
 		xfersize -= cnt;
 	}
 }
 
 vm_offset_t
 pmap_quick_enter_page(vm_page_t m)
 {
 
 	return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
 }
 
 void
 pmap_quick_remove_page(vm_offset_t addr)
 {
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 {
 	struct md_page *pvh;
 	struct rwlock *lock;
 	pv_entry_t pv;
 	int loops = 0;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_page_exists_quick: page %p is not managed", m));
 	rv = FALSE;
 	rw_rlock(&pvh_global_lock);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		if (PV_PMAP(pv) == pmap) {
 			rv = TRUE;
 			break;
 		}
 		loops++;
 		if (loops >= 16)
 			break;
 	}
 	if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 			if (PV_PMAP(pv) == pmap) {
 				rv = TRUE;
 				break;
 			}
 			loops++;
 			if (loops >= 16)
 				break;
 		}
 	}
 	rw_runlock(lock);
 	rw_runlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_page_wired_mappings:
  *
  *	Return the number of managed mappings to the given physical page
  *	that are wired.
  */
 int
 pmap_page_wired_mappings(vm_page_t m)
 {
 	struct md_page *pvh;
 	struct rwlock *lock;
 	pmap_t pmap;
 	pd_entry_t *l2;
 	pt_entry_t *l3;
 	pv_entry_t pv;
 	int count, md_gen, pvh_gen;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (0);
 	rw_rlock(&pvh_global_lock);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 restart:
 	count = 0;
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_runlock(lock);
 			PMAP_LOCK(pmap);
 			rw_rlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		l3 = pmap_l3(pmap, pv->pv_va);
 		if ((pmap_load(l3) & PTE_SW_WIRED) != 0)
 			count++;
 		PMAP_UNLOCK(pmap);
 	}
 	if ((m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 			pmap = PV_PMAP(pv);
 			if (!PMAP_TRYLOCK(pmap)) {
 				md_gen = m->md.pv_gen;
 				pvh_gen = pvh->pv_gen;
 				rw_runlock(lock);
 				PMAP_LOCK(pmap);
 				rw_rlock(lock);
 				if (md_gen != m->md.pv_gen ||
 				    pvh_gen != pvh->pv_gen) {
 					PMAP_UNLOCK(pmap);
 					goto restart;
 				}
 			}
 			l2 = pmap_l2(pmap, pv->pv_va);
 			if ((pmap_load(l2) & PTE_SW_WIRED) != 0)
 				count++;
 			PMAP_UNLOCK(pmap);
 		}
 	}
 	rw_runlock(lock);
 	rw_runlock(&pvh_global_lock);
 	return (count);
 }
 
 static void
 pmap_remove_pages_pv(pmap_t pmap, vm_page_t m, pv_entry_t pv,
     struct spglist *free, bool superpage)
 {
 	struct md_page *pvh;
 	vm_page_t mpte, mt;
 
 	if (superpage) {
 		pmap_resident_count_dec(pmap, Ln_ENTRIES);
 		pvh = pa_to_pvh(m->phys_addr);
 		TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 		pvh->pv_gen++;
 		if (TAILQ_EMPTY(&pvh->pv_list)) {
 			for (mt = m; mt < &m[Ln_ENTRIES]; mt++)
 				if (TAILQ_EMPTY(&mt->md.pv_list) &&
 				    (mt->aflags & PGA_WRITEABLE) != 0)
 					vm_page_aflag_clear(mt, PGA_WRITEABLE);
 		}
 		mpte = pmap_remove_pt_page(pmap, pv->pv_va);
 		if (mpte != NULL) {
 			pmap_resident_count_dec(pmap, 1);
 			KASSERT(mpte->wire_count == Ln_ENTRIES,
 			    ("pmap_remove_pages: pte page wire count error"));
 			mpte->wire_count = 0;
 			pmap_add_delayed_free_list(mpte, free, FALSE);
 		}
 	} else {
 		pmap_resident_count_dec(pmap, 1);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		if (TAILQ_EMPTY(&m->md.pv_list) &&
 		    (m->aflags & PGA_WRITEABLE) != 0) {
 			pvh = pa_to_pvh(m->phys_addr);
 			if (TAILQ_EMPTY(&pvh->pv_list))
 				vm_page_aflag_clear(m, PGA_WRITEABLE);
 		}
 	}
 }
 
 /*
  * Destroy all managed, non-wired mappings in the given user-space
  * pmap.  This pmap cannot be active on any processor besides the
  * caller.
  *
  * This function cannot be applied to the kernel pmap.  Moreover, it
  * is not intended for general use.  It is only to be used during
  * process termination.  Consequently, it can be implemented in ways
  * that make it faster than pmap_remove().  First, it can more quickly
  * destroy mappings by iterating over the pmap's collection of PV
  * entries, rather than searching the page table.  Second, it doesn't
  * have to test and clear the page table entries atomically, because
  * no processor is currently accessing the user address space.  In
  * particular, a page table entry's dirty bit won't change state once
  * this function starts.
  */
 void
 pmap_remove_pages(pmap_t pmap)
 {
 	struct spglist free;
 	pd_entry_t ptepde;
 	pt_entry_t *pte, tpte;
 	vm_page_t m, mt;
 	pv_entry_t pv;
 	struct pv_chunk *pc, *npc;
 	struct rwlock *lock;
 	int64_t bit;
 	uint64_t inuse, bitmask;
 	int allfree, field, freed, idx;
 	bool superpage;
 
 	lock = NULL;
 
 	SLIST_INIT(&free);
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 		allfree = 1;
 		freed = 0;
 		for (field = 0; field < _NPCM; field++) {
 			inuse = ~pc->pc_map[field] & pc_freemask[field];
 			while (inuse != 0) {
 				bit = ffsl(inuse) - 1;
 				bitmask = 1UL << bit;
 				idx = field * 64 + bit;
 				pv = &pc->pc_pventry[idx];
 				inuse &= ~bitmask;
 
 				pte = pmap_l1(pmap, pv->pv_va);
 				ptepde = pmap_load(pte);
 				pte = pmap_l1_to_l2(pte, pv->pv_va);
 				tpte = pmap_load(pte);
 				if ((tpte & PTE_RWX) != 0) {
 					superpage = true;
 				} else {
 					ptepde = tpte;
 					pte = pmap_l2_to_l3(pte, pv->pv_va);
 					tpte = pmap_load(pte);
 					superpage = false;
 				}
 
 				/*
 				 * We cannot remove wired pages from a
 				 * process' mapping at this time.
 				 */
 				if (tpte & PTE_SW_WIRED) {
 					allfree = 0;
 					continue;
 				}
 
 				m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(tpte));
 				KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
 				    m < &vm_page_array[vm_page_array_size],
 				    ("pmap_remove_pages: bad pte %#jx",
 				    (uintmax_t)tpte));
 
 				pmap_clear(pte);
 
 				/*
 				 * Update the vm_page_t clean/reference bits.
 				 */
 				if ((tpte & (PTE_D | PTE_W)) ==
 				    (PTE_D | PTE_W)) {
 					if (superpage)
 						for (mt = m;
 						    mt < &m[Ln_ENTRIES]; mt++)
 							vm_page_dirty(mt);
 					else
 						vm_page_dirty(m);
 				}
 
 				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
 
 				/* Mark free */
 				pc->pc_map[field] |= bitmask;
 
 				pmap_remove_pages_pv(pmap, m, pv, &free,
 				    superpage);
 				pmap_unuse_pt(pmap, pv->pv_va, ptepde, &free);
 				freed++;
 			}
 		}
 		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
 		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
 		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
 		if (allfree) {
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			free_pv_chunk(pc);
 		}
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 	pmap_invalidate_all(pmap);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	vm_page_free_pages_toq(&free, false);
 }
 
 static bool
 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
 {
 	struct md_page *pvh;
 	struct rwlock *lock;
 	pd_entry_t *l2;
 	pt_entry_t *l3, mask;
 	pv_entry_t pv;
 	pmap_t pmap;
 	int md_gen, pvh_gen;
 	bool rv;
 
 	mask = 0;
 	if (modified)
 		mask |= PTE_D;
 	if (accessed)
 		mask |= PTE_A;
 
 	rv = FALSE;
 	rw_rlock(&pvh_global_lock);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 restart:
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_runlock(lock);
 			PMAP_LOCK(pmap);
 			rw_rlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		l3 = pmap_l3(pmap, pv->pv_va);
 		rv = (pmap_load(l3) & mask) == mask;
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			goto out;
 	}
 	if ((m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 			pmap = PV_PMAP(pv);
 			if (!PMAP_TRYLOCK(pmap)) {
 				md_gen = m->md.pv_gen;
 				pvh_gen = pvh->pv_gen;
 				rw_runlock(lock);
 				PMAP_LOCK(pmap);
 				rw_rlock(lock);
 				if (md_gen != m->md.pv_gen ||
 				    pvh_gen != pvh->pv_gen) {
 					PMAP_UNLOCK(pmap);
 					goto restart;
 				}
 			}
 			l2 = pmap_l2(pmap, pv->pv_va);
 			rv = (pmap_load(l2) & mask) == mask;
 			PMAP_UNLOCK(pmap);
 			if (rv)
 				goto out;
 		}
 	}
 out:
 	rw_runlock(lock);
 	rw_runlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_modified: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have PG_M set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	return (pmap_page_test_mappings(m, FALSE, TRUE));
 }
 
 /*
  *	pmap_is_prefaultable:
  *
  *	Return whether or not the specified virtual address is eligible
  *	for prefault.
  */
 boolean_t
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 {
 	pt_entry_t *l3;
 	boolean_t rv;
 
 	rv = FALSE;
 	PMAP_LOCK(pmap);
 	l3 = pmap_l3(pmap, addr);
 	if (l3 != NULL && pmap_load(l3) != 0) {
 		rv = TRUE;
 	}
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  *	pmap_is_referenced:
  *
  *	Return whether or not the specified physical page was referenced
  *	in any physical maps.
  */
 boolean_t
 pmap_is_referenced(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_referenced: page %p is not managed", m));
 	return (pmap_page_test_mappings(m, TRUE, FALSE));
 }
 
 /*
  * Clear the write and modified bits in each of the given page's mappings.
  */
 void
 pmap_remove_write(vm_page_t m)
 {
 	struct md_page *pvh;
 	struct rwlock *lock;
 	pmap_t pmap;
 	pd_entry_t *l2;
 	pt_entry_t *l3, oldl3, newl3;
 	pv_entry_t next_pv, pv;
 	vm_offset_t va;
 	int md_gen, pvh_gen;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_write: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * set by another thread while the object is locked.  Thus,
 	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
 	    pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	rw_rlock(&pvh_global_lock);
 retry_pv_loop:
 	rw_wlock(lock);
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen) {
 				PMAP_UNLOCK(pmap);
 				rw_wunlock(lock);
 				goto retry_pv_loop;
 			}
 		}
 		va = pv->pv_va;
 		l2 = pmap_l2(pmap, va);
 		if ((pmap_load(l2) & PTE_W) != 0)
 			(void)pmap_demote_l2_locked(pmap, l2, va, &lock);
 		KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
 		    ("inconsistent pv lock %p %p for page %p",
 		    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
 		PMAP_UNLOCK(pmap);
 	}
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			md_gen = m->md.pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				rw_wunlock(lock);
 				goto retry_pv_loop;
 			}
 		}
 		l3 = pmap_l3(pmap, pv->pv_va);
 		oldl3 = pmap_load(l3);
 retry:
 		if ((oldl3 & PTE_W) != 0) {
 			newl3 = oldl3 & ~(PTE_D | PTE_W);
 			if (!atomic_fcmpset_long(l3, &oldl3, newl3))
 				goto retry;
 			if ((oldl3 & PTE_D) != 0)
 				vm_page_dirty(m);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	rw_wunlock(lock);
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_runlock(&pvh_global_lock);
 }
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	As an optimization, update the page's dirty field if a modified bit is
  *	found while counting reference bits.  This opportunistic update can be
  *	performed at low cost and can eliminate the need for some future calls
  *	to pmap_is_modified().  However, since this function stops after
  *	finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some
  *	dirty pages.  Those dirty pages will only be detected by a future call
  *	to pmap_is_modified().
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	struct spglist free;
 	struct md_page *pvh;
 	struct rwlock *lock;
 	pv_entry_t pv, pvf;
 	pmap_t pmap;
 	pd_entry_t *l2, l2e;
 	pt_entry_t *l3, l3e;
 	vm_paddr_t pa;
 	vm_offset_t va;
 	int md_gen, pvh_gen, ret;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_ts_referenced: page %p is not managed", m));
 	SLIST_INIT(&free);
 	ret = 0;
 	pa = VM_PAGE_TO_PHYS(m);
 	pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa);
 
 	lock = PHYS_TO_PV_LIST_LOCK(pa);
 	rw_rlock(&pvh_global_lock);
 	rw_wlock(lock);
 retry:
 	if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
 		goto small_mappings;
 	pv = pvf;
 	do {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto retry;
 			}
 		}
 		va = pv->pv_va;
 		l2 = pmap_l2(pmap, va);
 		l2e = pmap_load(l2);
 		if ((l2e & (PTE_W | PTE_D)) == (PTE_W | PTE_D)) {
 			/*
 			 * Although l2e is mapping a 2MB page, because
 			 * this function is called at a 4KB page granularity,
 			 * we only update the 4KB page under test.
 			 */
 			vm_page_dirty(m);
 		}
 		if ((l2e & PTE_A) != 0) {
 			/*
 			 * Since this reference bit is shared by 512 4KB
 			 * pages, it should not be cleared every time it is
 			 * tested.  Apply a simple "hash" function on the
 			 * physical page number, the virtual superpage number,
 			 * and the pmap address to select one 4KB page out of
 			 * the 512 on which testing the reference bit will
 			 * result in clearing that reference bit.  This
 			 * function is designed to avoid the selection of the
 			 * same 4KB page for every 2MB page mapping.
 			 *
 			 * On demotion, a mapping that hasn't been referenced
 			 * is simply destroyed.  To avoid the possibility of a
 			 * subsequent page fault on a demoted wired mapping,
 			 * always leave its reference bit set.  Moreover,
 			 * since the superpage is wired, the current state of
 			 * its reference bit won't affect page replacement.
 			 */
 			if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^
 			    (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 &&
 			    (l2e & PTE_SW_WIRED) == 0) {
 				pmap_clear_bits(l2, PTE_A);
 				pmap_invalidate_page(pmap, va);
 			}
 			ret++;
 		}
 		PMAP_UNLOCK(pmap);
 		/* Rotate the PV list if it has more than one entry. */
 		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 			pvh->pv_gen++;
 		}
 		if (ret >= PMAP_TS_REFERENCED_MAX)
 			goto out;
 	} while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf);
 small_mappings:
 	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
 		goto out;
 	pv = pvf;
 	do {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			md_gen = m->md.pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto retry;
 			}
 		}
 		l2 = pmap_l2(pmap, pv->pv_va);
 
 		KASSERT((pmap_load(l2) & PTE_RX) == 0,
 		    ("pmap_ts_referenced: found an invalid l2 table"));
 
 		l3 = pmap_l2_to_l3(l2, pv->pv_va);
 		l3e = pmap_load(l3);
 		if ((l3e & PTE_D) != 0)
 			vm_page_dirty(m);
 		if ((l3e & PTE_A) != 0) {
 			if ((l3e & PTE_SW_WIRED) == 0) {
 				/*
 				 * Wired pages cannot be paged out so
 				 * doing accessed bit emulation for
 				 * them is wasted effort. We do the
 				 * hard work for unwired pages only.
 				 */
 				pmap_clear_bits(l3, PTE_A);
 				pmap_invalidate_page(pmap, pv->pv_va);
 			}
 			ret++;
 		}
 		PMAP_UNLOCK(pmap);
 		/* Rotate the PV list if it has more than one entry. */
 		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 			m->md.pv_gen++;
 		}
 	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && ret <
 	    PMAP_TS_REFERENCED_MAX);
 out:
 	rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	vm_page_free_pages_toq(&free, false);
 	return (ret);
 }
 
 /*
  *	Apply the given advice to the specified range of addresses within the
  *	given pmap.  Depending on the advice, clear the referenced and/or
  *	modified flags in each mapping and set the mapped page's dirty field.
  */
 void
 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 {
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	struct md_page *pvh;
 	struct rwlock *lock;
 	pmap_t pmap;
 	pv_entry_t next_pv, pv;
 	pd_entry_t *l2, oldl2;
 	pt_entry_t *l3, oldl3;
 	vm_offset_t va;
 	int md_gen, pvh_gen;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	KASSERT(!vm_page_xbusied(m),
 	    ("pmap_clear_modify: page %p is exclusive busied", m));
 
 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
 	 * If the object containing the page is locked and the page is not
 	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
 	    pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(&pvh_global_lock);
 	rw_wlock(lock);
 restart:
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		va = pv->pv_va;
 		l2 = pmap_l2(pmap, va);
 		oldl2 = pmap_load(l2);
 		if ((oldl2 & PTE_W) != 0) {
 			if (pmap_demote_l2_locked(pmap, l2, va, &lock)) {
 				if ((oldl2 & PTE_SW_WIRED) == 0) {
 					/*
 					 * Write protect the mapping to a
 					 * single page so that a subsequent
 					 * write access may repromote.
 					 */
 					va += VM_PAGE_TO_PHYS(m) -
 					    PTE_TO_PHYS(oldl2);
 					l3 = pmap_l2_to_l3(l2, va);
 					oldl3 = pmap_load(l3);
 					if ((oldl3 & PTE_V) != 0) {
 						while (!atomic_fcmpset_long(l3,
 						    &oldl3, oldl3 & ~(PTE_D |
 						    PTE_W)))
 							cpu_spinwait();
 						vm_page_dirty(m);
 						pmap_invalidate_page(pmap, va);
 					}
 				}
 			}
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			pvh_gen = pvh->pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		l2 = pmap_l2(pmap, pv->pv_va);
 		KASSERT((pmap_load(l2) & PTE_RWX) == 0,
 		    ("pmap_clear_modify: found a 2mpage in page %p's pv list",
 		    m));
 		l3 = pmap_l2_to_l3(l2, pv->pv_va);
 		if ((pmap_load(l3) & (PTE_D | PTE_W)) == (PTE_D | PTE_W)) {
 			pmap_clear_bits(l3, PTE_D);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 }
 
 void *
 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 {
 
         return ((void *)PHYS_TO_DMAP(pa));
 }
 
 void
 pmap_unmapbios(vm_paddr_t pa, vm_size_t size)
 {
 }
 
 /*
  * Sets the memory attribute for the specified page.
  */
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
 
 	m->md.pv_memattr = ma;
 
 	/*
 	 * RISCVTODO: Implement the below (from the amd64 pmap)
 	 * If "m" is a normal page, update its direct mapping.  This update
 	 * can be relied upon to perform any cache operations that are
 	 * required for data coherence.
 	 */
 	if ((m->flags & PG_FICTITIOUS) == 0 &&
 	    PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m)))
 		panic("RISCVTODO: pmap_page_set_memattr");
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
 {
 	pt_entry_t *l2, *l3, tpte;
 	vm_paddr_t pa;
 	int val;
 	bool managed;
 
 	PMAP_LOCK(pmap);
 retry:
 	managed = false;
 	val = 0;
 
 	l2 = pmap_l2(pmap, addr);
 	if (l2 != NULL && ((tpte = pmap_load(l2)) & PTE_V) != 0) {
 		if ((tpte & PTE_RWX) != 0) {
 			pa = PTE_TO_PHYS(tpte) | (addr & L2_OFFSET);
 			val = MINCORE_INCORE | MINCORE_SUPER;
 		} else {
 			l3 = pmap_l2_to_l3(l2, addr);
 			tpte = pmap_load(l3);
 			if ((tpte & PTE_V) == 0)
 				goto done;
 			pa = PTE_TO_PHYS(tpte) | (addr & L3_OFFSET);
 			val = MINCORE_INCORE;
 		}
 
 		if ((tpte & PTE_D) != 0)
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if ((tpte & PTE_A) != 0)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 		managed = (tpte & PTE_SW_MANAGED) == PTE_SW_MANAGED;
 	}
 
 done:
 	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
 	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
 		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
 		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
 			goto retry;
 	} else
 		PA_UNLOCK_COND(*locked_pa);
 	PMAP_UNLOCK(pmap);
 	return (val);
 }
 
 void
-pmap_activate(struct thread *td)
+pmap_activate_sw(struct thread *td)
 {
-	pmap_t pmap;
-	uint64_t reg;
+	pmap_t oldpmap, pmap;
+	u_int cpu;
 
-	critical_enter();
+	oldpmap = PCPU_GET(curpmap);
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
-	td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1);
+	if (pmap == oldpmap)
+		return;
+	load_satp(pmap->pm_satp);
 
-	reg = SATP_MODE_SV39;
-	reg |= (td->td_pcb->pcb_l1addr >> PAGE_SHIFT);
-	load_satp(reg);
+	cpu = PCPU_GET(cpuid);
+#ifdef SMP
+	CPU_SET_ATOMIC(cpu, &pmap->pm_active);
+	CPU_CLR_ATOMIC(cpu, &oldpmap->pm_active);
+#else
+	CPU_SET(cpu, &pmap->pm_active);
+	CPU_CLR(cpu, &oldpmap->pm_active);
+#endif
+	PCPU_SET(curpmap, pmap);
 
-	pmap_invalidate_all(pmap);
+	sfence_vma();
+}
+
+void
+pmap_activate(struct thread *td)
+{
+
+	critical_enter();
+	pmap_activate_sw(td);
 	critical_exit();
 }
 
 void
-pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
+pmap_activate_boot(pmap_t pmap)
 {
+	u_int cpu;
+
+	cpu = PCPU_GET(cpuid);
+#ifdef SMP
+	CPU_SET_ATOMIC(cpu, &pmap->pm_active);
+#else
+	CPU_SET(cpu, &pmap->pm_active);
+#endif
+	PCPU_SET(curpmap, pmap);
+}
+
+void
+pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz)
+{
 	cpuset_t mask;
 
 	/*
 	 * From the RISC-V User-Level ISA V2.2:
 	 *
 	 * "To make a store to instruction memory visible to all
 	 * RISC-V harts, the writing hart has to execute a data FENCE
 	 * before requesting that all remote RISC-V harts execute a
 	 * FENCE.I."
 	 */
 	sched_pin();
 	mask = all_cpus;
 	CPU_CLR(PCPU_GET(cpuid), &mask);
 	fence();
-	sbi_remote_fence_i(mask.__bits);
+	if (!CPU_EMPTY(&mask) && smp_started)
+		sbi_remote_fence_i(mask.__bits);
 	sched_unpin();
 }
 
 /*
  *	Increase the starting virtual address of the given mapping if a
  *	different alignment might result in more superpage mappings.
  */
 void
 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t size)
 {
 	vm_offset_t superpage_offset;
 
 	if (size < L2_SIZE)
 		return;
 	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
 		offset += ptoa(object->pg_color);
 	superpage_offset = offset & L2_OFFSET;
 	if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE ||
 	    (*addr & L2_OFFSET) == superpage_offset)
 		return;
 	if ((*addr & L2_OFFSET) < superpage_offset)
 		*addr = (*addr & ~L2_OFFSET) + superpage_offset;
 	else
 		*addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset;
 }
 
 /**
  * Get the kernel virtual address of a set of physical pages. If there are
  * physical addresses not covered by the DMAP perform a transient mapping
  * that will be removed when calling pmap_unmap_io_transient.
  *
  * \param page        The pages the caller wishes to obtain the virtual
  *                    address on the kernel memory map.
  * \param vaddr       On return contains the kernel virtual memory address
  *                    of the pages passed in the page parameter.
  * \param count       Number of pages passed in.
  * \param can_fault   TRUE if the thread using the mapped pages can take
  *                    page faults, FALSE otherwise.
  *
  * \returns TRUE if the caller must call pmap_unmap_io_transient when
  *          finished or FALSE otherwise.
  *
  */
 boolean_t
 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
     boolean_t can_fault)
 {
 	vm_paddr_t paddr;
 	boolean_t needs_mapping;
 	int error, i;
 
 	/*
 	 * Allocate any KVA space that we need, this is done in a separate
 	 * loop to prevent calling vmem_alloc while pinned.
 	 */
 	needs_mapping = FALSE;
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) {
 			error = vmem_alloc(kernel_arena, PAGE_SIZE,
 			    M_BESTFIT | M_WAITOK, &vaddr[i]);
 			KASSERT(error == 0, ("vmem_alloc failed: %d", error));
 			needs_mapping = TRUE;
 		} else {
 			vaddr[i] = PHYS_TO_DMAP(paddr);
 		}
 	}
 
 	/* Exit early if everything is covered by the DMAP */
 	if (!needs_mapping)
 		return (FALSE);
 
 	if (!can_fault)
 		sched_pin();
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (paddr >= DMAP_MAX_PHYSADDR) {
 			panic(
 			   "pmap_map_io_transient: TODO: Map out of DMAP data");
 		}
 	}
 
 	return (needs_mapping);
 }
 
 void
 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
     boolean_t can_fault)
 {
 	vm_paddr_t paddr;
 	int i;
 
 	if (!can_fault)
 		sched_unpin();
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (paddr >= DMAP_MAX_PHYSADDR) {
 			panic("RISCVTODO: pmap_unmap_io_transient: Unmap data");
 		}
 	}
 }
 
 boolean_t
 pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode)
 {
 
 	return (mode >= VM_MEMATTR_DEVICE && mode <= VM_MEMATTR_WRITE_BACK);
 }
Index: head/sys/riscv/riscv/swtch.S
===================================================================
--- head/sys/riscv/riscv/swtch.S	(revision 344107)
+++ head/sys/riscv/riscv/swtch.S	(revision 344108)
@@ -1,494 +1,476 @@
 /*-
  * Copyright (c) 2015-2017 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "assym.inc"
 #include "opt_sched.h"
 
 #include <machine/param.h>
 #include <machine/asm.h>
 #include <machine/riscvreg.h>
 #include <machine/pte.h>
 
 __FBSDID("$FreeBSD$");
 
 #ifdef FPE
 .macro __fpe_state_save p
 	/*
 	 * Enable FPE usage in supervisor mode,
 	 * so we can access registers.
 	 */
 	li	t0, SSTATUS_FS_INITIAL
 	csrs	sstatus, t0
 
 	/* Store registers */
 	frcsr	t0
 	sd	t0, (PCB_FCSR)(\p)
 	fsd	f0, (PCB_X + 0 * 16)(\p)
 	fsd	f1, (PCB_X + 1 * 16)(\p)
 	fsd	f2, (PCB_X + 2 * 16)(\p)
 	fsd	f3, (PCB_X + 3 * 16)(\p)
 	fsd	f4, (PCB_X + 4 * 16)(\p)
 	fsd	f5, (PCB_X + 5 * 16)(\p)
 	fsd	f6, (PCB_X + 6 * 16)(\p)
 	fsd	f7, (PCB_X + 7 * 16)(\p)
 	fsd	f8, (PCB_X + 8 * 16)(\p)
 	fsd	f9, (PCB_X + 9 * 16)(\p)
 	fsd	f10, (PCB_X + 10 * 16)(\p)
 	fsd	f11, (PCB_X + 11 * 16)(\p)
 	fsd	f12, (PCB_X + 12 * 16)(\p)
 	fsd	f13, (PCB_X + 13 * 16)(\p)
 	fsd	f14, (PCB_X + 14 * 16)(\p)
 	fsd	f15, (PCB_X + 15 * 16)(\p)
 	fsd	f16, (PCB_X + 16 * 16)(\p)
 	fsd	f17, (PCB_X + 17 * 16)(\p)
 	fsd	f18, (PCB_X + 18 * 16)(\p)
 	fsd	f19, (PCB_X + 19 * 16)(\p)
 	fsd	f20, (PCB_X + 20 * 16)(\p)
 	fsd	f21, (PCB_X + 21 * 16)(\p)
 	fsd	f22, (PCB_X + 22 * 16)(\p)
 	fsd	f23, (PCB_X + 23 * 16)(\p)
 	fsd	f24, (PCB_X + 24 * 16)(\p)
 	fsd	f25, (PCB_X + 25 * 16)(\p)
 	fsd	f26, (PCB_X + 26 * 16)(\p)
 	fsd	f27, (PCB_X + 27 * 16)(\p)
 	fsd	f28, (PCB_X + 28 * 16)(\p)
 	fsd	f29, (PCB_X + 29 * 16)(\p)
 	fsd	f30, (PCB_X + 30 * 16)(\p)
 	fsd	f31, (PCB_X + 31 * 16)(\p)
 
 	/* Disable FPE usage in supervisor mode. */
 	li	t0, SSTATUS_FS_MASK
 	csrc	sstatus, t0
 .endm
 
 .macro __fpe_state_load p
 	/*
 	 * Enable FPE usage in supervisor mode,
 	 * so we can access registers.
 	 */
 	li	t0, SSTATUS_FS_INITIAL
 	csrs	sstatus, t0
 
 	/* Restore registers */
 	ld	t0, (PCB_FCSR)(\p)
 	fscsr	t0
 	fld	f0, (PCB_X + 0 * 16)(\p)
 	fld	f1, (PCB_X + 1 * 16)(\p)
 	fld	f2, (PCB_X + 2 * 16)(\p)
 	fld	f3, (PCB_X + 3 * 16)(\p)
 	fld	f4, (PCB_X + 4 * 16)(\p)
 	fld	f5, (PCB_X + 5 * 16)(\p)
 	fld	f6, (PCB_X + 6 * 16)(\p)
 	fld	f7, (PCB_X + 7 * 16)(\p)
 	fld	f8, (PCB_X + 8 * 16)(\p)
 	fld	f9, (PCB_X + 9 * 16)(\p)
 	fld	f10, (PCB_X + 10 * 16)(\p)
 	fld	f11, (PCB_X + 11 * 16)(\p)
 	fld	f12, (PCB_X + 12 * 16)(\p)
 	fld	f13, (PCB_X + 13 * 16)(\p)
 	fld	f14, (PCB_X + 14 * 16)(\p)
 	fld	f15, (PCB_X + 15 * 16)(\p)
 	fld	f16, (PCB_X + 16 * 16)(\p)
 	fld	f17, (PCB_X + 17 * 16)(\p)
 	fld	f18, (PCB_X + 18 * 16)(\p)
 	fld	f19, (PCB_X + 19 * 16)(\p)
 	fld	f20, (PCB_X + 20 * 16)(\p)
 	fld	f21, (PCB_X + 21 * 16)(\p)
 	fld	f22, (PCB_X + 22 * 16)(\p)
 	fld	f23, (PCB_X + 23 * 16)(\p)
 	fld	f24, (PCB_X + 24 * 16)(\p)
 	fld	f25, (PCB_X + 25 * 16)(\p)
 	fld	f26, (PCB_X + 26 * 16)(\p)
 	fld	f27, (PCB_X + 27 * 16)(\p)
 	fld	f28, (PCB_X + 28 * 16)(\p)
 	fld	f29, (PCB_X + 29 * 16)(\p)
 	fld	f30, (PCB_X + 30 * 16)(\p)
 	fld	f31, (PCB_X + 31 * 16)(\p)
 
 	/* Disable FPE usage in supervisor mode. */
 	li	t0, SSTATUS_FS_MASK
 	csrc	sstatus, t0
 .endm
 
 /*
  * void
  * fpe_state_save(struct thread *td)
  */
 ENTRY(fpe_state_save)
 	/* Get pointer to PCB */
 	ld	a0, TD_PCB(a0)
 	__fpe_state_save a0
 	ret
 END(fpe_state_save)
 #endif /* FPE */
 
 /*
  * void
  * fpe_state_clear(void)
  */
 ENTRY(fpe_state_clear)
 	/*
 	 * Enable FPE usage in supervisor mode,
 	 * so we can access registers.
 	 */
 	li	t0, SSTATUS_FS_INITIAL
 	csrs	sstatus, t0
 
 	fscsr	zero
 	fcvt.d.l f0, zero
 	fcvt.d.l f1, zero
 	fcvt.d.l f2, zero
 	fcvt.d.l f3, zero
 	fcvt.d.l f4, zero
 	fcvt.d.l f5, zero
 	fcvt.d.l f6, zero
 	fcvt.d.l f7, zero
 	fcvt.d.l f8, zero
 	fcvt.d.l f9, zero
 	fcvt.d.l f10, zero
 	fcvt.d.l f11, zero
 	fcvt.d.l f12, zero
 	fcvt.d.l f13, zero
 	fcvt.d.l f14, zero
 	fcvt.d.l f15, zero
 	fcvt.d.l f16, zero
 	fcvt.d.l f17, zero
 	fcvt.d.l f18, zero
 	fcvt.d.l f19, zero
 	fcvt.d.l f20, zero
 	fcvt.d.l f21, zero
 	fcvt.d.l f22, zero
 	fcvt.d.l f23, zero
 	fcvt.d.l f24, zero
 	fcvt.d.l f25, zero
 	fcvt.d.l f26, zero
 	fcvt.d.l f27, zero
 	fcvt.d.l f28, zero
 	fcvt.d.l f29, zero
 	fcvt.d.l f30, zero
 	fcvt.d.l f31, zero
 
 	/* Disable FPE usage in supervisor mode. */
 	li	t0, SSTATUS_FS_MASK
 	csrc	sstatus, t0
 
 	ret
 END(fpe_state_clear)
 	
 /*
- * void cpu_throw(struct thread *old, struct thread *new)
+ * void cpu_throw(struct thread *old __unused, struct thread *new)
  */
 ENTRY(cpu_throw)
+	/* Activate the new thread's pmap. */
+	mv	s0, a1
+	mv	a0, a1
+	call	_C_LABEL(pmap_activate_sw)
+	mv	a0, s0
+
 	/* Store the new curthread */
-	sd	a1, PC_CURTHREAD(gp)
+	sd	a0, PC_CURTHREAD(gp)
 	/* And the new pcb */
-	ld	x13, TD_PCB(a1)
+	ld	x13, TD_PCB(a0)
 	sd	x13, PC_CURPCB(gp)
 
-	sfence.vma
-
-	/* Switch to the new pmap */
-	ld	t0, PCB_L1ADDR(x13)
-	srli	t0, t0, PAGE_SHIFT
-	li	t1, SATP_MODE_SV39
-	or	t0, t0, t1
-	csrw	satp, t0
-
-	/* TODO: Invalidate the TLB */
-
-	sfence.vma
-
 	/* Load registers */
 	ld	ra, (PCB_RA)(x13)
 	ld	sp, (PCB_SP)(x13)
 	ld	tp, (PCB_TP)(x13)
 
 	/* s[0-11] */
 	ld	s0, (PCB_S + 0 * 8)(x13)
 	ld	s1, (PCB_S + 1 * 8)(x13)
 	ld	s2, (PCB_S + 2 * 8)(x13)
 	ld	s3, (PCB_S + 3 * 8)(x13)
 	ld	s4, (PCB_S + 4 * 8)(x13)
 	ld	s5, (PCB_S + 5 * 8)(x13)
 	ld	s6, (PCB_S + 6 * 8)(x13)
 	ld	s7, (PCB_S + 7 * 8)(x13)
 	ld	s8, (PCB_S + 8 * 8)(x13)
 	ld	s9, (PCB_S + 9 * 8)(x13)
 	ld	s10, (PCB_S + 10 * 8)(x13)
 	ld	s11, (PCB_S + 11 * 8)(x13)
 
 #ifdef FPE
 	/* Is FPE enabled for new thread? */
-	ld	t0, TD_FRAME(a1)
+	ld	t0, TD_FRAME(a0)
 	ld	t1, (TF_SSTATUS)(t0)
 	li	t2, SSTATUS_FS_MASK
 	and	t3, t1, t2
 	beqz	t3, 1f		/* No, skip. */
 
 	/* Restore registers. */
 	__fpe_state_load x13
 1:
 #endif
 
 	ret
 END(cpu_throw)
 
 /*
  * void cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx)
  *
  * a0 = old
  * a1 = new
  * a2 = mtx
  * x3 to x7, x16 and x17 are caller saved
  */
 ENTRY(cpu_switch)
 	/* Store the new curthread */
 	sd	a1, PC_CURTHREAD(gp)
 	/* And the new pcb */
 	ld	x13, TD_PCB(a1)
 	sd	x13, PC_CURPCB(gp)
 
 	/* Save the old context. */
 	ld	x13, TD_PCB(a0)
 
 	/* Store ra, sp and the callee-saved registers */
 	sd	ra, (PCB_RA)(x13)
 	sd	sp, (PCB_SP)(x13)
 	sd	tp, (PCB_TP)(x13)
 
 	/* s[0-11] */
 	sd	s0, (PCB_S + 0 * 8)(x13)
 	sd	s1, (PCB_S + 1 * 8)(x13)
 	sd	s2, (PCB_S + 2 * 8)(x13)
 	sd	s3, (PCB_S + 3 * 8)(x13)
 	sd	s4, (PCB_S + 4 * 8)(x13)
 	sd	s5, (PCB_S + 5 * 8)(x13)
 	sd	s6, (PCB_S + 6 * 8)(x13)
 	sd	s7, (PCB_S + 7 * 8)(x13)
 	sd	s8, (PCB_S + 8 * 8)(x13)
 	sd	s9, (PCB_S + 9 * 8)(x13)
 	sd	s10, (PCB_S + 10 * 8)(x13)
 	sd	s11, (PCB_S + 11 * 8)(x13)
 
 #ifdef FPE
 	/*
 	 * Is FPE enabled and is it in dirty state
 	 * for the old thread?
 	 */
 	ld	t0, TD_FRAME(a0)
 	ld	t1, (TF_SSTATUS)(t0)
 	li	t2, SSTATUS_FS_MASK
 	and	t3, t1, t2
 	li	t2, SSTATUS_FS_DIRTY
 	bne	t3, t2, 1f		/* No, skip. */
 
 	/* Yes, mark FPE state clean and save registers. */
 	li	t2, ~SSTATUS_FS_MASK
 	and	t3, t1, t2
 	li	t2, SSTATUS_FS_CLEAN
 	or	t3, t3, t2
 	sd	t3, (TF_SSTATUS)(t0)
 
 	__fpe_state_save x13
 1:
 #endif
 
-	/*
-	 * Restore the saved context.
-	 */
-	ld	x13, TD_PCB(a1)
+	/* Activate the new thread's pmap */
+	mv	s0, a0
+	mv	s1, a1
+	mv	s2, a2
+	mv	a0, a1
+	call	_C_LABEL(pmap_activate_sw)
+	mv	a1, s1
 
-	/*
-	 * TODO: We may need to flush the cache here if switching
-	 * to a user process.
-	 */
-
-	sfence.vma
-
-	/* Switch to the new pmap */
-	ld	t0, PCB_L1ADDR(x13)
-	srli	t0, t0, PAGE_SHIFT
-	li	t1, SATP_MODE_SV39
-	or	t0, t0, t1
-	csrw	satp, t0
-
-	/* TODO: Invalidate the TLB */
-
-	sfence.vma
-
 	/* Release the old thread */
-	sd	a2, TD_LOCK(a0)
+	sd	s2, TD_LOCK(s0)
 #if defined(SCHED_ULE) && defined(SMP)
 	/* Spin if TD_LOCK points to a blocked_lock */
-	la	a2, _C_LABEL(blocked_lock)
+	la	s2, _C_LABEL(blocked_lock)
 1:
 	ld	t0, TD_LOCK(a1)
-	beq	t0, a2, 1b
+	beq	t0, s2, 1b
 #endif
+	/*
+	 * Restore the saved context.
+	 */
+	ld	x13, TD_PCB(a1)
 
 	/* Restore the registers */
 	ld	tp, (PCB_TP)(x13)
 	ld	ra, (PCB_RA)(x13)
 	ld	sp, (PCB_SP)(x13)
 
 	/* s[0-11] */
 	ld	s0, (PCB_S + 0 * 8)(x13)
 	ld	s1, (PCB_S + 1 * 8)(x13)
 	ld	s2, (PCB_S + 2 * 8)(x13)
 	ld	s3, (PCB_S + 3 * 8)(x13)
 	ld	s4, (PCB_S + 4 * 8)(x13)
 	ld	s5, (PCB_S + 5 * 8)(x13)
 	ld	s6, (PCB_S + 6 * 8)(x13)
 	ld	s7, (PCB_S + 7 * 8)(x13)
 	ld	s8, (PCB_S + 8 * 8)(x13)
 	ld	s9, (PCB_S + 9 * 8)(x13)
 	ld	s10, (PCB_S + 10 * 8)(x13)
 	ld	s11, (PCB_S + 11 * 8)(x13)
 
 #ifdef FPE
 	/* Is FPE enabled for new thread? */
 	ld	t0, TD_FRAME(a1)
 	ld	t1, (TF_SSTATUS)(t0)
 	li	t2, SSTATUS_FS_MASK
 	and	t3, t1, t2
 	beqz	t3, 1f		/* No, skip. */
 
 	/* Restore registers. */
 	__fpe_state_load x13
 1:
 #endif
 
 	ret
 .Lcpu_switch_panic_str:
 	.asciz "cpu_switch: %p\0"
 END(cpu_switch)
 
 /*
  * fork_exit(void (*callout)(void *, struct trapframe *), void *arg,
  *  struct trapframe *frame)
  */
 
 ENTRY(fork_trampoline)
 	mv	a0, s0
 	mv	a1, s1
 	mv	a2, sp
 	call	_C_LABEL(fork_exit)
 
 	/* Restore sstatus */
 	ld	t0, (TF_SSTATUS)(sp)
 	/* Ensure interrupts disabled */
 	li	t1, ~SSTATUS_SIE
 	and	t0, t0, t1
 	csrw	sstatus, t0
 
 	/* Restore exception program counter */
 	ld	t0, (TF_SEPC)(sp)
 	csrw	sepc, t0
 
 	/* Restore the registers */
 	ld	t0, (TF_T + 0 * 8)(sp)
 	ld	t1, (TF_T + 1 * 8)(sp)
 	ld	t2, (TF_T + 2 * 8)(sp)
 	ld	t3, (TF_T + 3 * 8)(sp)
 	ld	t4, (TF_T + 4 * 8)(sp)
 	ld	t5, (TF_T + 5 * 8)(sp)
 	ld	t6, (TF_T + 6 * 8)(sp)
 
 	ld	s0, (TF_S + 0 * 8)(sp)
 	ld	s1, (TF_S + 1 * 8)(sp)
 	ld	s2, (TF_S + 2 * 8)(sp)
 	ld	s3, (TF_S + 3 * 8)(sp)
 	ld	s4, (TF_S + 4 * 8)(sp)
 	ld	s5, (TF_S + 5 * 8)(sp)
 	ld	s6, (TF_S + 6 * 8)(sp)
 	ld	s7, (TF_S + 7 * 8)(sp)
 	ld	s8, (TF_S + 8 * 8)(sp)
 	ld	s9, (TF_S + 9 * 8)(sp)
 	ld	s10, (TF_S + 10 * 8)(sp)
 	ld	s11, (TF_S + 11 * 8)(sp)
 
 	ld	a0, (TF_A + 0 * 8)(sp)
 	ld	a1, (TF_A + 1 * 8)(sp)
 	ld	a2, (TF_A + 2 * 8)(sp)
 	ld	a3, (TF_A + 3 * 8)(sp)
 	ld	a4, (TF_A + 4 * 8)(sp)
 	ld	a5, (TF_A + 5 * 8)(sp)
 	ld	a6, (TF_A + 6 * 8)(sp)
 	ld	a7, (TF_A + 7 * 8)(sp)
 
 	/* Load user ra and sp */
 	ld	ra, (TF_RA)(sp)
 
 	/*
 	 * Store our pcpup on stack, we will load it back
 	 * on kernel mode trap.
 	 */
 	sd	gp, (TF_SIZE)(sp)
 	ld	gp, (TF_GP)(sp)
 
 	/* Save kernel stack so we can use it doing a user trap */
 	addi	sp, sp, TF_SIZE
 	csrw	sscratch, sp
 
 	/* Load user stack */
 	ld	sp, (TF_SP - TF_SIZE)(sp)
 
 	sret
 END(fork_trampoline)
 
 ENTRY(savectx)
 	/* Store ra, sp and the callee-saved registers */
 	sd	ra, (PCB_RA)(a0)
 	sd	sp, (PCB_SP)(a0)
 	sd	tp, (PCB_TP)(a0)
 
 	/* s[0-11] */
 	sd	s0, (PCB_S + 0 * 8)(a0)
 	sd	s1, (PCB_S + 1 * 8)(a0)
 	sd	s2, (PCB_S + 2 * 8)(a0)
 	sd	s3, (PCB_S + 3 * 8)(a0)
 	sd	s4, (PCB_S + 4 * 8)(a0)
 	sd	s5, (PCB_S + 5 * 8)(a0)
 	sd	s6, (PCB_S + 6 * 8)(a0)
 	sd	s7, (PCB_S + 7 * 8)(a0)
 	sd	s8, (PCB_S + 8 * 8)(a0)
 	sd	s9, (PCB_S + 9 * 8)(a0)
 	sd	s10, (PCB_S + 10 * 8)(a0)
 	sd	s11, (PCB_S + 11 * 8)(a0)
 
 #ifdef FPE
 	__fpe_state_save a0
 #endif
 	ret
 END(savectx)
Index: head/sys/riscv/riscv/vm_machdep.c
===================================================================
--- head/sys/riscv/riscv/vm_machdep.c	(revision 344107)
+++ head/sys/riscv/riscv/vm_machdep.c	(revision 344108)
@@ -1,275 +1,272 @@
 /*-
  * Copyright (c) 2015-2018 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/limits.h>
 #include <sys/proc.h>
 #include <sys/sf_buf.h>
 #include <sys/signal.h>
 #include <sys/unistd.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 
 #include <machine/riscvreg.h>
 #include <machine/cpu.h>
 #include <machine/pcb.h>
 #include <machine/frame.h>
 #include <machine/sbi.h>
 
 #if __riscv_xlen == 64
 #define	TP_OFFSET	16	/* sizeof(struct tcb) */
 #endif
 
 /*
  * Finish a fork operation, with process p2 nearly set up.
  * Copy and update the pcb, set up the stack so that the child
  * ready to run and return to user mode.
  */
 void
 cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
 {
 	struct pcb *pcb2;
 	struct trapframe *tf;
 	register_t val;
 
 	if ((flags & RFPROC) == 0)
 		return;
 
 	if (td1 == curthread) {
 		/*
 		 * Save the tp. These normally happen in cpu_switch,
 		 * but if userland changes this then forks this may
 		 * not have happened.
 		 */
 		__asm __volatile("mv %0, tp" : "=&r"(val));
 		td1->td_pcb->pcb_tp = val;
 
 		/* RISCVTODO: save the FPU state here */
 	}
 
 	pcb2 = (struct pcb *)(td2->td_kstack +
 	    td2->td_kstack_pages * PAGE_SIZE) - 1;
 
 	td2->td_pcb = pcb2;
 	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
 
-	td2->td_pcb->pcb_l1addr =
-	    vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l1);
-
 	tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1);
 	bcopy(td1->td_frame, tf, sizeof(*tf));
 
 	/* Clear syscall error flag */
 	tf->tf_t[0] = 0;
 
 	/* Arguments for child */
 	tf->tf_a[0] = 0;
 	tf->tf_a[1] = 0;
 	tf->tf_sstatus |= (SSTATUS_SPIE); /* Enable interrupts. */
 	tf->tf_sstatus &= ~(SSTATUS_SPP); /* User mode. */
 
 	td2->td_frame = tf;
 
 	/* Set the return value registers for fork() */
 	td2->td_pcb->pcb_s[0] = (uintptr_t)fork_return;
 	td2->td_pcb->pcb_s[1] = (uintptr_t)td2;
 	td2->td_pcb->pcb_ra = (uintptr_t)fork_trampoline;
 	td2->td_pcb->pcb_sp = (uintptr_t)td2->td_frame;
 
 	/* Setup to release spin count in fork_exit(). */
 	td2->td_md.md_spinlock_count = 1;
 	td2->td_md.md_saved_sstatus_ie = (SSTATUS_SIE);
 }
 
 void
 cpu_reset(void)
 {
 
 	sbi_shutdown();
 
 	while(1);
 }
 
 void
 cpu_thread_swapin(struct thread *td)
 {
 }
 
 void
 cpu_thread_swapout(struct thread *td)
 {
 }
 
 void
 cpu_set_syscall_retval(struct thread *td, int error)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 
 	switch (error) {
 	case 0:
 		frame->tf_a[0] = td->td_retval[0];
 		frame->tf_a[1] = td->td_retval[1];
 		frame->tf_t[0] = 0;		/* syscall succeeded */
 		break;
 	case ERESTART:
 		frame->tf_sepc -= 4;		/* prev instruction */
 		break;
 	case EJUSTRETURN:
 		break;
 	default:
 		frame->tf_a[0] = error;
 		frame->tf_t[0] = 1;		/* syscall error */
 		break;
 	}
 }
 
 /*
  * Initialize machine state, mostly pcb and trap frame for a new
  * thread, about to return to userspace.  Put enough state in the new
  * thread's PCB to get it to go back to the fork_return(), which
  * finalizes the thread state and handles peculiarities of the first
  * return to userspace for the new thread.
  */
 void
 cpu_copy_thread(struct thread *td, struct thread *td0)
 {
 
 	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
 	bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb));
 
 	td->td_pcb->pcb_s[0] = (uintptr_t)fork_return;
 	td->td_pcb->pcb_s[1] = (uintptr_t)td;
 	td->td_pcb->pcb_ra = (uintptr_t)fork_trampoline;
 	td->td_pcb->pcb_sp = (uintptr_t)td->td_frame;
 
 	/* Setup to release spin count in fork_exit(). */
 	td->td_md.md_spinlock_count = 1;
 	td->td_md.md_saved_sstatus_ie = (SSTATUS_SIE);
 }
 
 /*
  * Set that machine state for performing an upcall that starts
  * the entry function with the given argument.
  */
 void
 cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg,
 	stack_t *stack)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 
 	tf->tf_sp = STACKALIGN((uintptr_t)stack->ss_sp + stack->ss_size);
 	tf->tf_sepc = (register_t)entry;
 	tf->tf_a[0] = (register_t)arg;
 }
 
 int
 cpu_set_user_tls(struct thread *td, void *tls_base)
 {
 	struct pcb *pcb;
 
 	if ((uintptr_t)tls_base >= VM_MAXUSER_ADDRESS)
 		return (EINVAL);
 
 	pcb = td->td_pcb;
 	pcb->pcb_tp = (register_t)tls_base + TP_OFFSET;
 	if (td == curthread)
 		__asm __volatile("mv tp, %0" :: "r"(pcb->pcb_tp));
 
 	return (0);
 }
 
 void
 cpu_thread_exit(struct thread *td)
 {
 }
 
 void
 cpu_thread_alloc(struct thread *td)
 {
 
 	td->td_pcb = (struct pcb *)(td->td_kstack +
 	    td->td_kstack_pages * PAGE_SIZE) - 1;
 	td->td_frame = (struct trapframe *)STACKALIGN(
 	    (caddr_t)td->td_pcb - 8 - sizeof(struct trapframe));
 }
 
 void
 cpu_thread_free(struct thread *td)
 {
 }
 
 void
 cpu_thread_clean(struct thread *td)
 {
 }
 
 /*
  * Intercept the return address from a freshly forked process that has NOT
  * been scheduled yet.
  *
  * This is needed to make kernel threads stay in kernel mode.
  */
 void
 cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg)
 {
 
 	td->td_pcb->pcb_s[0] = (uintptr_t)func;
 	td->td_pcb->pcb_s[1] = (uintptr_t)arg;
 	td->td_pcb->pcb_ra = (uintptr_t)fork_trampoline;
 	td->td_pcb->pcb_sp = (uintptr_t)td->td_frame;
 }
 
 void
 cpu_exit(struct thread *td)
 {
 }
 
 void
 swi_vm(void *v)
 {
 
 	/* Nothing to do here - busdma bounce buffers are not implemented. */
 }