Index: head/sys/riscv/include/pcb.h =================================================================== --- head/sys/riscv/include/pcb.h (revision 344107) +++ head/sys/riscv/include/pcb.h (revision 344108) @@ -1,69 +1,68 @@ /*- * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_PCB_H_ #define _MACHINE_PCB_H_ #ifndef LOCORE struct trapframe; struct pcb { uint64_t pcb_ra; /* Return address */ uint64_t pcb_sp; /* Stack pointer */ uint64_t pcb_gp; /* Global pointer */ uint64_t pcb_tp; /* Thread pointer */ uint64_t pcb_t[7]; /* Temporary registers */ uint64_t pcb_s[12]; /* Saved registers */ uint64_t pcb_a[8]; /* Argument registers */ uint64_t pcb_x[32][2]; /* Floating point registers */ uint64_t pcb_fcsr; /* Floating point control reg */ uint64_t pcb_fpflags; /* Floating point flags */ #define PCB_FP_STARTED 0x1 #define PCB_FP_USERMASK 0x1 uint64_t pcb_sepc; /* Supervisor exception pc */ - vm_offset_t pcb_l1addr; /* L1 page tables base address */ vm_offset_t pcb_onfault; /* Copyinout fault handler */ }; #ifdef _KERNEL void makectx(struct trapframe *tf, struct pcb *pcb); int savectx(struct pcb *pcb) __returns_twice; #endif #endif /* !LOCORE */ #endif /* !_MACHINE_PCB_H_ */ Index: head/sys/riscv/include/pcpu.h =================================================================== --- head/sys/riscv/include/pcpu.h (revision 344107) +++ head/sys/riscv/include/pcpu.h (revision 344108) @@ -1,87 +1,88 @@ /*- * Copyright (c) 1999 Luoqi Chen * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: FreeBSD: src/sys/i386/include/globaldata.h,v 1.27 2001/04/27 * $FreeBSD$ */ #ifndef _MACHINE_PCPU_H_ #define _MACHINE_PCPU_H_ #include #include #define ALT_STACK_SIZE 128 #define PCPU_MD_FIELDS \ + struct pmap *pc_curpmap; /* Currently active pmap */ \ uint32_t pc_pending_ipis; /* IPIs pending to this CPU */ \ char __pad[61] #ifdef _KERNEL struct pcb; struct pcpu; extern struct pcpu *pcpup; static inline struct pcpu * get_pcpu(void) { struct pcpu *pcpu; __asm __volatile("mv %0, gp" : "=&r"(pcpu)); return (pcpu); } static inline struct thread * get_curthread(void) { struct thread *td; __asm __volatile("ld %0, 0(gp)" : "=&r"(td)); return (td); } #define curthread get_curthread() #define PCPU_GET(member) (get_pcpu()->pc_ ## member) #define PCPU_ADD(member, value) (get_pcpu()->pc_ ## member += (value)) #define PCPU_INC(member) PCPU_ADD(member, 1) #define PCPU_PTR(member) (&get_pcpu()->pc_ ## member) #define PCPU_SET(member,value) (get_pcpu()->pc_ ## member = (value)) #endif /* _KERNEL */ #endif /* !_MACHINE_PCPU_H_ */ Index: head/sys/riscv/include/pmap.h =================================================================== --- head/sys/riscv/include/pmap.h (revision 344107) +++ head/sys/riscv/include/pmap.h (revision 344108) @@ -1,166 +1,173 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_PMAP_H_ #define _MACHINE_PMAP_H_ #include #ifndef LOCORE #include +#include #include #include #include #ifdef _KERNEL #define vtophys(va) pmap_kextract((vm_offset_t)(va)) #endif #define pmap_page_get_memattr(m) ((m)->md.pv_memattr) #define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0) void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma); /* * Pmap stuff */ struct md_page { TAILQ_HEAD(,pv_entry) pv_list; int pv_gen; vm_memattr_t pv_memattr; }; /* * This structure is used to hold a virtual<->physical address * association and is used mostly by bootstrap code */ struct pv_addr { SLIST_ENTRY(pv_addr) pv_list; vm_offset_t pv_va; vm_paddr_t pv_pa; }; struct pmap { struct mtx pm_mtx; struct pmap_statistics pm_stats; /* pmap statictics */ pd_entry_t *pm_l1; + u_long pm_satp; /* value for SATP register */ + cpuset_t pm_active; /* active on cpus */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ struct vm_radix pm_root; }; typedef struct pv_entry { vm_offset_t pv_va; /* virtual address for mapping */ TAILQ_ENTRY(pv_entry) pv_next; } *pv_entry_t; /* * pv_entries are allocated in chunks per-process. This avoids the * need to track per-pmap assignments. */ #define _NPCM 3 #define _NPCPV 168 struct pv_chunk { struct pmap * pc_pmap; TAILQ_ENTRY(pv_chunk) pc_list; uint64_t pc_map[_NPCM]; /* bitmap; 1 = free */ TAILQ_ENTRY(pv_chunk) pc_lru; struct pv_entry pc_pventry[_NPCPV]; }; typedef struct pmap *pmap_t; #ifdef _KERNEL extern struct pmap kernel_pmap_store; #define kernel_pmap (&kernel_pmap_store) #define pmap_kernel() kernel_pmap #define PMAP_ASSERT_LOCKED(pmap) \ mtx_assert(&(pmap)->pm_mtx, MA_OWNED) #define PMAP_LOCK(pmap) mtx_lock(&(pmap)->pm_mtx) #define PMAP_LOCK_ASSERT(pmap, type) \ mtx_assert(&(pmap)->pm_mtx, (type)) #define PMAP_LOCK_DESTROY(pmap) mtx_destroy(&(pmap)->pm_mtx) #define PMAP_LOCK_INIT(pmap) mtx_init(&(pmap)->pm_mtx, "pmap", \ NULL, MTX_DEF | MTX_DUPOK) #define PMAP_OWNED(pmap) mtx_owned(&(pmap)->pm_mtx) #define PMAP_MTX(pmap) (&(pmap)->pm_mtx) #define PMAP_TRYLOCK(pmap) mtx_trylock(&(pmap)->pm_mtx) #define PMAP_UNLOCK(pmap) mtx_unlock(&(pmap)->pm_mtx) #define PHYS_AVAIL_SIZE 10 extern vm_paddr_t phys_avail[]; extern vm_paddr_t dump_avail[]; extern vm_offset_t virtual_avail; extern vm_offset_t virtual_end; /* * Macros to test if a mapping is mappable with an L1 Section mapping * or an L2 Large Page mapping. */ #define L1_MAPPABLE_P(va, pa, size) \ ((((va) | (pa)) & L1_OFFSET) == 0 && (size) >= L1_SIZE) +struct thread; + +void pmap_activate_boot(pmap_t); +void pmap_activate_sw(struct thread *); void pmap_bootstrap(vm_offset_t, vm_paddr_t, vm_size_t); void pmap_kenter_device(vm_offset_t, vm_size_t, vm_paddr_t); vm_paddr_t pmap_kextract(vm_offset_t va); void pmap_kremove(vm_offset_t); void pmap_kremove_device(vm_offset_t, vm_size_t); bool pmap_ps_enabled(pmap_t); void *pmap_mapdev(vm_offset_t, vm_size_t); void *pmap_mapbios(vm_paddr_t, vm_size_t); void pmap_unmapdev(vm_offset_t, vm_size_t); void pmap_unmapbios(vm_offset_t, vm_size_t); boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); bool pmap_get_tables(pmap_t, vm_offset_t, pd_entry_t **, pd_entry_t **, pt_entry_t **); #define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list)) int pmap_fault_fixup(pmap_t, vm_offset_t, vm_prot_t); #endif /* _KERNEL */ #endif /* !LOCORE */ #endif /* !_MACHINE_PMAP_H_ */ Index: head/sys/riscv/riscv/genassym.c =================================================================== --- head/sys/riscv/riscv/genassym.c (revision 344107) +++ head/sys/riscv/riscv/genassym.c (revision 344108) @@ -1,101 +1,100 @@ /*- * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include ASSYM(KERNBASE, KERNBASE); ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); ASSYM(VM_MAX_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); -ASSYM(PCB_L1ADDR, offsetof(struct pcb, pcb_l1addr)); ASSYM(PCB_SIZE, sizeof(struct pcb)); ASSYM(PCB_RA, offsetof(struct pcb, pcb_ra)); ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp)); ASSYM(PCB_GP, offsetof(struct pcb, pcb_gp)); ASSYM(PCB_TP, offsetof(struct pcb, pcb_tp)); ASSYM(PCB_T, offsetof(struct pcb, pcb_t)); ASSYM(PCB_S, offsetof(struct pcb, pcb_s)); ASSYM(PCB_A, offsetof(struct pcb, pcb_a)); ASSYM(PCB_X, offsetof(struct pcb, pcb_x)); ASSYM(PCB_FCSR, offsetof(struct pcb, pcb_fcsr)); ASSYM(SF_UC, offsetof(struct sigframe, sf_uc)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_FRAME, offsetof(struct thread, td_frame)); ASSYM(TD_MD, offsetof(struct thread, td_md)); ASSYM(TD_LOCK, offsetof(struct thread, td_lock)); ASSYM(TF_SIZE, sizeof(struct trapframe)); ASSYM(TF_RA, offsetof(struct trapframe, tf_ra)); ASSYM(TF_SP, offsetof(struct trapframe, tf_sp)); ASSYM(TF_GP, offsetof(struct trapframe, tf_gp)); ASSYM(TF_TP, offsetof(struct trapframe, tf_tp)); ASSYM(TF_T, offsetof(struct trapframe, tf_t)); ASSYM(TF_S, offsetof(struct trapframe, tf_s)); ASSYM(TF_A, offsetof(struct trapframe, tf_a)); ASSYM(TF_SEPC, offsetof(struct trapframe, tf_sepc)); ASSYM(TF_STVAL, offsetof(struct trapframe, tf_stval)); ASSYM(TF_SCAUSE, offsetof(struct trapframe, tf_scause)); ASSYM(TF_SSTATUS, offsetof(struct trapframe, tf_sstatus)); Index: head/sys/riscv/riscv/machdep.c =================================================================== --- head/sys/riscv/riscv/machdep.c (revision 344107) +++ head/sys/riscv/riscv/machdep.c (revision 344108) @@ -1,895 +1,891 @@ /*- * Copyright (c) 2014 Andrew Turner * Copyright (c) 2015-2017 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FPE #include #endif #ifdef FDT #include #include #endif struct pcpu __pcpu[MAXCPU]; static struct trapframe proc0_tf; vm_paddr_t phys_avail[PHYS_AVAIL_SIZE + 2]; vm_paddr_t dump_avail[PHYS_AVAIL_SIZE + 2]; int early_boot = 1; int cold = 1; long realmem = 0; long Maxmem = 0; #define DTB_SIZE_MAX (1024 * 1024) #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t physmap[PHYSMAP_SIZE]; u_int physmap_idx; struct kva_md_info kmi; int64_t dcache_line_size; /* The minimum D cache line size */ int64_t icache_line_size; /* The minimum I cache line size */ int64_t idcache_line_size; /* The minimum cache line size */ extern int *end; extern int *initstack_end; struct pcpu *pcpup; uintptr_t mcall_trap(uintptr_t mcause, uintptr_t* regs); uintptr_t mcall_trap(uintptr_t mcause, uintptr_t* regs) { return (0); } static void cpu_startup(void *dummy) { identify_cpu(); vm_ksubmap_init(&kmi); bufinit(); vm_pager_bufferinit(); } SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); int cpu_idle_wakeup(int cpu) { return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; regs->sepc = frame->tf_sepc; regs->sstatus = frame->tf_sstatus; regs->ra = frame->tf_ra; regs->sp = frame->tf_sp; regs->gp = frame->tf_gp; regs->tp = frame->tf_tp; memcpy(regs->t, frame->tf_t, sizeof(regs->t)); memcpy(regs->s, frame->tf_s, sizeof(regs->s)); memcpy(regs->a, frame->tf_a, sizeof(regs->a)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; frame->tf_sepc = regs->sepc; frame->tf_ra = regs->ra; frame->tf_sp = regs->sp; frame->tf_gp = regs->gp; frame->tf_tp = regs->tp; memcpy(frame->tf_t, regs->t, sizeof(frame->tf_t)); memcpy(frame->tf_s, regs->s, sizeof(frame->tf_s)); memcpy(frame->tf_a, regs->a, sizeof(frame->tf_a)); return (0); } int fill_fpregs(struct thread *td, struct fpreg *regs) { #ifdef FPE struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) { /* * If we have just been running FPE instructions we will * need to save the state to memcpy it below. */ if (td == curthread) fpe_state_save(td); memcpy(regs->fp_x, pcb->pcb_x, sizeof(regs->fp_x)); regs->fp_fcsr = pcb->pcb_fcsr; } else #endif memset(regs, 0, sizeof(*regs)); return (0); } int set_fpregs(struct thread *td, struct fpreg *regs) { #ifdef FPE struct trapframe *frame; struct pcb *pcb; frame = td->td_frame; pcb = td->td_pcb; memcpy(pcb->pcb_x, regs->fp_x, sizeof(regs->fp_x)); pcb->pcb_fcsr = regs->fp_fcsr; pcb->pcb_fpflags |= PCB_FP_STARTED; frame->tf_sstatus &= ~SSTATUS_FS_MASK; frame->tf_sstatus |= SSTATUS_FS_CLEAN; #endif return (0); } int fill_dbregs(struct thread *td, struct dbreg *regs) { panic("fill_dbregs"); } int set_dbregs(struct thread *td, struct dbreg *regs) { panic("set_dbregs"); } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_sepc = addr; return (0); } int ptrace_single_step(struct thread *td) { /* TODO; */ return (EOPNOTSUPP); } int ptrace_clear_single_step(struct thread *td) { /* TODO; */ return (EOPNOTSUPP); } void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf; struct pcb *pcb; tf = td->td_frame; pcb = td->td_pcb; memset(tf, 0, sizeof(struct trapframe)); tf->tf_a[0] = stack; tf->tf_sp = STACKALIGN(stack); tf->tf_ra = imgp->entry_addr; tf->tf_sepc = imgp->entry_addr; pcb->pcb_fpflags &= ~PCB_FP_STARTED; } /* Sanity check these are the same size, they will be memcpy'd to and fro */ CTASSERT(sizeof(((struct trapframe *)0)->tf_a) == sizeof((struct gpregs *)0)->gp_a); CTASSERT(sizeof(((struct trapframe *)0)->tf_s) == sizeof((struct gpregs *)0)->gp_s); CTASSERT(sizeof(((struct trapframe *)0)->tf_t) == sizeof((struct gpregs *)0)->gp_t); CTASSERT(sizeof(((struct trapframe *)0)->tf_a) == sizeof((struct reg *)0)->a); CTASSERT(sizeof(((struct trapframe *)0)->tf_s) == sizeof((struct reg *)0)->s); CTASSERT(sizeof(((struct trapframe *)0)->tf_t) == sizeof((struct reg *)0)->t); /* Support for FDT configurations only. */ CTASSERT(FDT); int get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) { struct trapframe *tf = td->td_frame; memcpy(mcp->mc_gpregs.gp_t, tf->tf_t, sizeof(mcp->mc_gpregs.gp_t)); memcpy(mcp->mc_gpregs.gp_s, tf->tf_s, sizeof(mcp->mc_gpregs.gp_s)); memcpy(mcp->mc_gpregs.gp_a, tf->tf_a, sizeof(mcp->mc_gpregs.gp_a)); if (clear_ret & GET_MC_CLEAR_RET) { mcp->mc_gpregs.gp_a[0] = 0; mcp->mc_gpregs.gp_t[0] = 0; /* clear syscall error */ } mcp->mc_gpregs.gp_ra = tf->tf_ra; mcp->mc_gpregs.gp_sp = tf->tf_sp; mcp->mc_gpregs.gp_gp = tf->tf_gp; mcp->mc_gpregs.gp_tp = tf->tf_tp; mcp->mc_gpregs.gp_sepc = tf->tf_sepc; mcp->mc_gpregs.gp_sstatus = tf->tf_sstatus; return (0); } int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tf; tf = td->td_frame; memcpy(tf->tf_t, mcp->mc_gpregs.gp_t, sizeof(tf->tf_t)); memcpy(tf->tf_s, mcp->mc_gpregs.gp_s, sizeof(tf->tf_s)); memcpy(tf->tf_a, mcp->mc_gpregs.gp_a, sizeof(tf->tf_a)); tf->tf_ra = mcp->mc_gpregs.gp_ra; tf->tf_sp = mcp->mc_gpregs.gp_sp; tf->tf_gp = mcp->mc_gpregs.gp_gp; tf->tf_sepc = mcp->mc_gpregs.gp_sepc; tf->tf_sstatus = mcp->mc_gpregs.gp_sstatus; return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { #ifdef FPE struct pcb *curpcb; critical_enter(); curpcb = curthread->td_pcb; KASSERT(td->td_pcb == curpcb, ("Invalid fpe pcb")); if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) { /* * If we have just been running FPE instructions we will * need to save the state to memcpy it below. */ fpe_state_save(td); KASSERT((curpcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0, ("Non-userspace FPE flags set in get_fpcontext")); memcpy(mcp->mc_fpregs.fp_x, curpcb->pcb_x, sizeof(mcp->mc_fpregs)); mcp->mc_fpregs.fp_fcsr = curpcb->pcb_fcsr; mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags; mcp->mc_flags |= _MC_FP_VALID; } critical_exit(); #endif } static void set_fpcontext(struct thread *td, mcontext_t *mcp) { #ifdef FPE struct pcb *curpcb; critical_enter(); if ((mcp->mc_flags & _MC_FP_VALID) != 0) { curpcb = curthread->td_pcb; /* FPE usage is enabled, override registers. */ memcpy(curpcb->pcb_x, mcp->mc_fpregs.fp_x, sizeof(mcp->mc_fpregs)); curpcb->pcb_fcsr = mcp->mc_fpregs.fp_fcsr; curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags & PCB_FP_USERMASK; } critical_exit(); #endif } void cpu_idle(int busy) { spinlock_enter(); if (!busy) cpu_idleclock(); if (!sched_runnable()) __asm __volatile( "fence \n" "wfi \n"); if (!busy) cpu_activeclock(); spinlock_exit(); } void cpu_halt(void) { intr_disable(); for (;;) __asm __volatile("wfi"); } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { /* TBD */ } /* Get current clock frequency for the given CPU ID. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { panic("cpu_est_clockrate"); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) { td->td_md.md_spinlock_count = 1; td->td_md.md_saved_sstatus_ie = intr_disable(); } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t sstatus_ie; td = curthread; critical_exit(); sstatus_ie = td->td_md.md_saved_sstatus_ie; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(sstatus_ie); } #ifndef _SYS_SYSPROTO_H_ struct sigreturn_args { ucontext_t *ucp; }; #endif int sys_sigreturn(struct thread *td, struct sigreturn_args *uap) { uint64_t sstatus; ucontext_t uc; int error; if (uap == NULL) return (EFAULT); if (copyin(uap->sigcntxp, &uc, sizeof(uc))) return (EFAULT); /* * Make sure the processor mode has not been tampered with and * interrupts have not been disabled. * Supervisor interrupts in user mode are always enabled. */ sstatus = uc.uc_mcontext.mc_gpregs.gp_sstatus; if ((sstatus & SSTATUS_SPP) != 0) return (EINVAL); error = set_mcontext(td, &uc.uc_mcontext); if (error != 0) return (error); set_fpcontext(td, &uc.uc_mcontext); /* Restore signal mask. */ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { memcpy(pcb->pcb_t, tf->tf_t, sizeof(tf->tf_t)); memcpy(pcb->pcb_s, tf->tf_s, sizeof(tf->tf_s)); memcpy(pcb->pcb_a, tf->tf_a, sizeof(tf->tf_a)); pcb->pcb_ra = tf->tf_ra; pcb->pcb_sp = tf->tf_sp; pcb->pcb_gp = tf->tf_gp; pcb->pcb_tp = tf->tf_tp; pcb->pcb_sepc = tf->tf_sepc; } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe *fp, frame; struct sysentvec *sysent; struct trapframe *tf; struct sigacts *psp; struct thread *td; struct proc *p; int onstack; int sig; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size); } else { fp = (struct sigframe *)td->td_frame->tf_sp; } /* Make room, keeping the stack aligned */ fp--; fp = (struct sigframe *)STACKALIGN(fp); /* Fill in the frame to copy out */ bzero(&frame, sizeof(frame)); get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); get_fpcontext(td, &frame.sf_uc.uc_mcontext); frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack = td->td_sigstk; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ? (onstack ? SS_ONSTACK : 0) : SS_DISABLE; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } tf->tf_a[0] = sig; tf->tf_a[1] = (register_t)&fp->sf_si; tf->tf_a[2] = (register_t)&fp->sf_uc; tf->tf_sepc = (register_t)catcher; tf->tf_sp = (register_t)fp; sysent = p->p_sysent; if (sysent->sv_sigcode_base != 0) tf->tf_ra = (register_t)sysent->sv_sigcode_base; else tf->tf_ra = (register_t)(sysent->sv_psstrings - *(sysent->sv_szsigcode)); CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_sepc, tf->tf_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } static void init_proc0(vm_offset_t kstack) { pcpup = &__pcpu[0]; proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_pcb = (struct pcb *)(thread0.td_kstack) - 1; thread0.td_pcb->pcb_fpflags = 0; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; } static int add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, u_int *physmap_idxp) { u_int i, insert_idx, _physmap_idx; _physmap_idx = *physmap_idxp; if (length == 0) return (1); /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. */ insert_idx = _physmap_idx; for (i = 0; i <= _physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= _physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += length; return (1); } _physmap_idx += 2; *physmap_idxp = _physmap_idx; if (_physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = _physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; printf("physmap[%d] = 0x%016lx\n", insert_idx, base); printf("physmap[%d] = 0x%016lx\n", insert_idx + 1, base + length); return (1); } #ifdef FDT static void try_load_dtb(caddr_t kmdp, vm_offset_t dtbp) { #if defined(FDT_DTB_STATIC) dtbp = (vm_offset_t)&fdt_static_dtb; #endif if (dtbp == (vm_offset_t)NULL) { printf("ERROR loading DTB\n"); return; } if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); } #endif static void cache_setup(void) { /* TODO */ } /* * Fake up a boot descriptor table. * RISCVTODO: This needs to be done via loader (when it's available). */ vm_offset_t fake_preload_metadata(struct riscv_bootparams *rvbp __unused) { static uint32_t fake_preload[35]; #ifdef DDB vm_offset_t zstart = 0, zend = 0; #endif vm_offset_t lastaddr; int i; i = 0; fake_preload[i++] = MODINFO_NAME; fake_preload[i++] = strlen("kernel") + 1; strcpy((char*)&fake_preload[i++], "kernel"); i += 1; fake_preload[i++] = MODINFO_TYPE; fake_preload[i++] = strlen("elf64 kernel") + 1; strcpy((char*)&fake_preload[i++], "elf64 kernel"); i += 3; fake_preload[i++] = MODINFO_ADDR; fake_preload[i++] = sizeof(vm_offset_t); *(vm_offset_t *)&fake_preload[i++] = (vm_offset_t)(KERNBASE + KERNENTRY); i += 1; fake_preload[i++] = MODINFO_SIZE; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = (vm_offset_t)&end - (vm_offset_t)(KERNBASE + KERNENTRY); i += 1; #ifdef DDB #if 0 /* RISCVTODO */ if (*(uint32_t *)KERNVIRTADDR == MAGIC_TRAMP_NUMBER) { fake_preload[i++] = MODINFO_METADATA|MODINFOMD_SSYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 4); fake_preload[i++] = MODINFO_METADATA|MODINFOMD_ESYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 8); lastaddr = *(uint32_t *)(KERNVIRTADDR + 8); zend = lastaddr; zstart = *(uint32_t *)(KERNVIRTADDR + 4); db_fetch_ksymtab(zstart, zend); } else #endif #endif lastaddr = (vm_offset_t)&end; fake_preload[i++] = 0; fake_preload[i] = 0; preload_metadata = (void *)fake_preload; return (lastaddr); } void initriscv(struct riscv_bootparams *rvbp) { struct mem_region mem_regions[FDT_MEM_REGIONS]; vm_offset_t rstart, rend; vm_offset_t s, e; int mem_regions_sz; vm_offset_t lastaddr; vm_size_t kernlen; caddr_t kmdp; int i; /* Set the module data location */ lastaddr = fake_preload_metadata(rvbp); /* Find the kernel address */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); boothowto = RB_VERBOSE | RB_SINGLE; boothowto = RB_VERBOSE; kern_envp = NULL; #ifdef FDT try_load_dtb(kmdp, rvbp->dtbp_virt); #endif /* Load the physical memory ranges */ physmap_idx = 0; #ifdef FDT /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, NULL) != 0) panic("Cannot get physical memory regions"); s = rvbp->dtbp_phys; e = s + DTB_SIZE_MAX; for (i = 0; i < mem_regions_sz; i++) { rstart = mem_regions[i].mr_start; rend = (mem_regions[i].mr_start + mem_regions[i].mr_size); if ((rstart < s) && (rend > e)) { /* Exclude DTB region. */ add_physmap_entry(rstart, (s - rstart), physmap, &physmap_idx); add_physmap_entry(e, (rend - e), physmap, &physmap_idx); } else { add_physmap_entry(mem_regions[i].mr_start, mem_regions[i].mr_size, physmap, &physmap_idx); } } #endif /* Set the pcpu data, this is needed by pmap_bootstrap */ pcpup = &__pcpu[0]; pcpu_init(pcpup, 0, sizeof(struct pcpu)); /* Set the pcpu pointer */ __asm __volatile("mv gp, %0" :: "r"(pcpup)); PCPU_SET(curthread, &thread0); /* Do basic tuning, hz etc */ init_param1(); cache_setup(); /* Bootstrap enough of pmap to enter the kernel proper */ kernlen = (lastaddr - KERNBASE); pmap_bootstrap(rvbp->kern_l1pt, mem_regions[0].mr_start, kernlen); cninit(); init_proc0(rvbp->kern_stack); - /* set page table base register for thread0 */ - thread0.td_pcb->pcb_l1addr = \ - (rvbp->kern_l1pt - KERNBASE + rvbp->kern_phys); - msgbufinit(msgbufp, msgbufsize); mutex_init(); init_param2(physmem); kdb_init(); early_boot = 0; } #undef bzero void bzero(void *buf, size_t len) { uint8_t *p; p = buf; while(len-- > 0) *p++ = 0; } Index: head/sys/riscv/riscv/mp_machdep.c =================================================================== --- head/sys/riscv/riscv/mp_machdep.c (revision 344107) +++ head/sys/riscv/riscv/mp_machdep.c (revision 344108) @@ -1,456 +1,460 @@ /*- * Copyright (c) 2015 The FreeBSD Foundation * Copyright (c) 2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_kstack_pages.h" #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #ifdef FDT #include #include #endif boolean_t ofw_cpu_reg(phandle_t node, u_int, cell_t *); extern struct pcpu __pcpu[]; uint32_t __riscv_boot_ap[MAXCPU]; static enum { CPUS_UNKNOWN, #ifdef FDT CPUS_FDT, #endif } cpu_enum_method; static device_identify_t riscv64_cpu_identify; static device_probe_t riscv64_cpu_probe; static device_attach_t riscv64_cpu_attach; static int ipi_handler(void *); struct mtx ap_boot_mtx; struct pcb stoppcbs[MAXCPU]; #ifdef INVARIANTS static uint32_t cpu_reg[MAXCPU][2]; #endif static device_t cpu_list[MAXCPU]; void mpentry(unsigned long cpuid); void init_secondary(uint64_t); uint8_t secondary_stacks[MAXCPU - 1][PAGE_SIZE * KSTACK_PAGES] __aligned(16); /* Set to 1 once we're ready to let the APs out of the pen. */ volatile int aps_ready = 0; /* Temporary variables for init_secondary() */ void *dpcpu[MAXCPU - 1]; static device_method_t riscv64_cpu_methods[] = { /* Device interface */ DEVMETHOD(device_identify, riscv64_cpu_identify), DEVMETHOD(device_probe, riscv64_cpu_probe), DEVMETHOD(device_attach, riscv64_cpu_attach), DEVMETHOD_END }; static devclass_t riscv64_cpu_devclass; static driver_t riscv64_cpu_driver = { "riscv64_cpu", riscv64_cpu_methods, 0 }; DRIVER_MODULE(riscv64_cpu, cpu, riscv64_cpu_driver, riscv64_cpu_devclass, 0, 0); static void riscv64_cpu_identify(driver_t *driver, device_t parent) { if (device_find_child(parent, "riscv64_cpu", -1) != NULL) return; if (BUS_ADD_CHILD(parent, 0, "riscv64_cpu", -1) == NULL) device_printf(parent, "add child failed\n"); } static int riscv64_cpu_probe(device_t dev) { u_int cpuid; cpuid = device_get_unit(dev); if (cpuid >= MAXCPU || cpuid > mp_maxid) return (EINVAL); device_quiet(dev); return (0); } static int riscv64_cpu_attach(device_t dev) { const uint32_t *reg; size_t reg_size; u_int cpuid; int i; cpuid = device_get_unit(dev); if (cpuid >= MAXCPU || cpuid > mp_maxid) return (EINVAL); KASSERT(cpu_list[cpuid] == NULL, ("Already have cpu %u", cpuid)); reg = cpu_get_cpuid(dev, ®_size); if (reg == NULL) return (EINVAL); if (bootverbose) { device_printf(dev, "register <"); for (i = 0; i < reg_size; i++) printf("%s%x", (i == 0) ? "" : " ", reg[i]); printf(">\n"); } /* Set the device to start it later */ cpu_list[cpuid] = dev; return (0); } static void release_aps(void *dummy __unused) { u_long mask; int cpu, i; if (mp_ncpus == 1) return; /* Setup the IPI handler */ riscv_setup_ipihandler(ipi_handler); atomic_store_rel_int(&aps_ready, 1); /* Wake up the other CPUs */ mask = 0; for (i = 1; i < mp_ncpus; i++) mask |= (1 << i); sbi_send_ipi(&mask); printf("Release APs\n"); for (i = 0; i < 2000; i++) { if (smp_started) { for (cpu = 0; cpu <= mp_maxid; cpu++) { if (CPU_ABSENT(cpu)) continue; } return; } DELAY(1000); } printf("APs not started\n"); } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); void init_secondary(uint64_t cpu) { struct pcpu *pcpup; /* Setup the pcpu pointer */ pcpup = &__pcpu[cpu]; __asm __volatile("mv gp, %0" :: "r"(pcpup)); /* Workaround: make sure wfi doesn't halt the hart */ csr_set(sie, SIE_SSIE); csr_set(sip, SIE_SSIE); /* Spin until the BSP releases the APs */ while (!aps_ready) __asm __volatile("wfi"); /* Initialize curthread */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pcpup->pc_curthread = pcpup->pc_idlethread; pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb; /* * Identify current CPU. This is necessary to setup * affinity registers and to provide support for * runtime chip identification. */ identify_cpu(); /* Enable software interrupts */ riscv_unmask_ipi(); /* Start per-CPU event timers. */ cpu_initclocks_ap(); /* Enable external (PLIC) interrupts */ csr_set(sie, SIE_SEIE); + + /* Activate process 0's pmap. */ + pmap_activate_boot(vmspace_pmap(proc0.p_vmspace)); mtx_lock_spin(&ap_boot_mtx); atomic_add_rel_32(&smp_cpus, 1); if (smp_cpus == mp_ncpus) { /* enable IPI's, tlb shootdown, freezes etc */ atomic_store_rel_int(&smp_started, 1); } mtx_unlock_spin(&ap_boot_mtx); /* Enter the scheduler */ sched_throw(NULL); panic("scheduler returned us to init_secondary"); /* NOTREACHED */ } static int ipi_handler(void *arg) { u_int ipi_bitmap; u_int cpu, ipi; int bit; sbi_clear_ipi(); cpu = PCPU_GET(cpuid); mb(); ipi_bitmap = atomic_readandclear_int(PCPU_PTR(pending_ipis)); if (ipi_bitmap == 0) return (FILTER_HANDLED); while ((bit = ffs(ipi_bitmap))) { bit = (bit - 1); ipi = (1 << bit); ipi_bitmap &= ~ipi; mb(); switch (ipi) { case IPI_AST: CTR0(KTR_SMP, "IPI_AST"); break; case IPI_PREEMPT: CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__); sched_preempt(curthread); break; case IPI_RENDEZVOUS: CTR0(KTR_SMP, "IPI_RENDEZVOUS"); smp_rendezvous_action(); break; case IPI_STOP: case IPI_STOP_HARD: CTR0(KTR_SMP, (ipi == IPI_STOP) ? "IPI_STOP" : "IPI_STOP_HARD"); savectx(&stoppcbs[cpu]); /* Indicate we are stopped */ CPU_SET_ATOMIC(cpu, &stopped_cpus); /* Wait for restart */ while (!CPU_ISSET(cpu, &started_cpus)) cpu_spinwait(); CPU_CLR_ATOMIC(cpu, &started_cpus); CPU_CLR_ATOMIC(cpu, &stopped_cpus); CTR0(KTR_SMP, "IPI_STOP (restart)"); /* * The kernel debugger might have set a breakpoint, * so flush the instruction cache. */ fence_i(); break; case IPI_HARDCLOCK: CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__); hardclockintr(); break; default: panic("Unknown IPI %#0x on cpu %d", ipi, curcpu); } } return (FILTER_HANDLED); } struct cpu_group * cpu_topo(void) { return (smp_topo_none()); } /* Determine if we running MP machine */ int cpu_mp_probe(void) { return (mp_ncpus > 1); } #ifdef FDT static boolean_t cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) { uint64_t target_cpu; struct pcpu *pcpup; /* Check we are able to start this cpu */ if (id > mp_maxid) return (0); KASSERT(id < MAXCPU, ("Too many CPUs")); KASSERT(addr_size == 1 || addr_size == 2, ("Invalid register size")); #ifdef INVARIANTS cpu_reg[id][0] = reg[0]; if (addr_size == 2) cpu_reg[id][1] = reg[1]; #endif target_cpu = reg[0]; if (addr_size == 2) { target_cpu <<= 32; target_cpu |= reg[1]; } pcpup = &__pcpu[id]; /* We are already running on cpu 0 */ if (id == 0) { return (1); } pcpu_init(pcpup, id, sizeof(struct pcpu)); dpcpu[id - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); dpcpu_init(dpcpu[id - 1], id); printf("Starting CPU %u (%lx)\n", id, target_cpu); __riscv_boot_ap[id] = 1; CPU_SET(id, &all_cpus); return (1); } #endif /* Initialize and fire up non-boot processors */ void cpu_mp_start(void) { mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); CPU_SET(0, &all_cpus); switch(cpu_enum_method) { #ifdef FDT case CPUS_FDT: ofw_cpu_early_foreach(cpu_init_fdt, true); break; #endif case CPUS_UNKNOWN: break; } } /* Introduce rest of cores to the world */ void cpu_mp_announce(void) { } void cpu_mp_setmaxid(void) { #ifdef FDT int cores; cores = ofw_cpu_early_foreach(NULL, false); if (cores > 0) { cores = MIN(cores, MAXCPU); if (bootverbose) printf("Found %d CPUs in the device tree\n", cores); mp_ncpus = cores; mp_maxid = cores - 1; cpu_enum_method = CPUS_FDT; return; } #endif if (bootverbose) printf("No CPU data, limiting to 1 core\n"); mp_ncpus = 1; mp_maxid = 0; } Index: head/sys/riscv/riscv/pmap.c =================================================================== --- head/sys/riscv/riscv/pmap.c (revision 344107) +++ head/sys/riscv/riscv/pmap.c (revision 344108) @@ -1,4398 +1,4440 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2003 Peter Wemm * All rights reserved. * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * Copyright (c) 2014 Andrew Turner * All rights reserved. * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * Copyright (c) 2015-2018 Ruslan Bukin * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Portions of this software were developed by Andrew Turner under * sponsorship from The FreeBSD Foundation. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include +#include #include #include -#include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NUL1E (Ln_ENTRIES * Ln_ENTRIES) #define NUL2E (Ln_ENTRIES * NUL1E) #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline #else #define PMAP_INLINE extern inline #endif #else #define PMAP_INLINE #endif #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) #define NPV_LIST_LOCKS MAXCPU #define PHYS_TO_PV_LIST_LOCK(pa) \ (&pv_list_locks[pmap_l2_pindex(pa) % NPV_LIST_LOCKS]) #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ struct rwlock **_lockp = (lockp); \ struct rwlock *_new_lock; \ \ _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ if (_new_lock != *_lockp) { \ if (*_lockp != NULL) \ rw_wunlock(*_lockp); \ *_lockp = _new_lock; \ rw_wlock(*_lockp); \ } \ } while (0) #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) #define RELEASE_PV_LIST_LOCK(lockp) do { \ struct rwlock **_lockp = (lockp); \ \ if (*_lockp != NULL) { \ rw_wunlock(*_lockp); \ *_lockp = NULL; \ } \ } while (0) #define VM_PAGE_TO_PV_LIST_LOCK(m) \ PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) /* The list of all the user pmaps */ LIST_HEAD(pmaplist, pmap); static struct pmaplist allpmaps = LIST_HEAD_INITIALIZER(); struct pmap kernel_pmap_store; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ vm_offset_t kernel_vm_end = 0; vm_paddr_t dmap_phys_base; /* The start of the dmap region */ vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ /* This code assumes all L1 DMAP entries will be used */ CTASSERT((DMAP_MIN_ADDRESS & ~L1_OFFSET) == DMAP_MIN_ADDRESS); CTASSERT((DMAP_MAX_ADDRESS & ~L1_OFFSET) == DMAP_MAX_ADDRESS); static struct rwlock_padalign pvh_global_lock; static struct mtx_padalign allpmaps_lock; static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); static int superpages_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, CTLFLAG_RDTUN, &superpages_enabled, 0, "Enable support for transparent superpages"); static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0, "2MB page mapping counters"); static u_long pmap_l2_demotions; SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD, &pmap_l2_demotions, 0, "2MB page demotions"); static u_long pmap_l2_mappings; SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, mappings, CTLFLAG_RD, &pmap_l2_mappings, 0, "2MB page mappings"); static u_long pmap_l2_p_failures; SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD, &pmap_l2_p_failures, 0, "2MB page promotion failures"); static u_long pmap_l2_promotions; SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD, &pmap_l2_promotions, 0, "2MB page promotions"); /* * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static struct mtx pv_chunks_mutex; static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; static struct md_page *pv_table; static struct md_page pv_dummy; /* * Internal flags for pmap_enter()'s helper functions. */ #define PMAP_ENTER_NORECLAIM 0x1000000 /* Don't reclaim PV entries. */ #define PMAP_ENTER_NOREPLACE 0x2000000 /* Don't replace mappings. */ static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static bool pmap_demote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va); static bool pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, struct rwlock **lockp); static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, vm_page_t m, struct rwlock **lockp); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp); static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp); static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free); static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); #define pmap_clear(pte) pmap_store(pte, 0) #define pmap_clear_bits(pte, bits) atomic_clear_64(pte, bits) #define pmap_load_store(pte, entry) atomic_swap_64(pte, entry) #define pmap_load_clear(pte) pmap_load_store(pte, 0) #define pmap_load(pte) atomic_load_64(pte) #define pmap_store(pte, entry) atomic_store_64(pte, entry) #define pmap_store_bits(pte, bits) atomic_set_64(pte, bits) /********************/ /* Inline functions */ /********************/ static __inline void pagecopy(void *s, void *d) { memcpy(d, s, PAGE_SIZE); } static __inline void pagezero(void *p) { bzero(p, PAGE_SIZE); } #define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) #define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) #define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) #define PTE_TO_PHYS(pte) ((pte >> PTE_PPN0_S) * PAGE_SIZE) static __inline pd_entry_t * pmap_l1(pmap_t pmap, vm_offset_t va) { return (&pmap->pm_l1[pmap_l1_index(va)]); } static __inline pd_entry_t * pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) { vm_paddr_t phys; pd_entry_t *l2; phys = PTE_TO_PHYS(pmap_load(l1)); l2 = (pd_entry_t *)PHYS_TO_DMAP(phys); return (&l2[pmap_l2_index(va)]); } static __inline pd_entry_t * pmap_l2(pmap_t pmap, vm_offset_t va) { pd_entry_t *l1; l1 = pmap_l1(pmap, va); if ((pmap_load(l1) & PTE_V) == 0) return (NULL); if ((pmap_load(l1) & PTE_RX) != 0) return (NULL); return (pmap_l1_to_l2(l1, va)); } static __inline pt_entry_t * pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) { vm_paddr_t phys; pt_entry_t *l3; phys = PTE_TO_PHYS(pmap_load(l2)); l3 = (pd_entry_t *)PHYS_TO_DMAP(phys); return (&l3[pmap_l3_index(va)]); } static __inline pt_entry_t * pmap_l3(pmap_t pmap, vm_offset_t va) { pd_entry_t *l2; l2 = pmap_l2(pmap, va); if (l2 == NULL) return (NULL); if ((pmap_load(l2) & PTE_V) == 0) return (NULL); if ((pmap_load(l2) & PTE_RX) != 0) return (NULL); return (pmap_l2_to_l3(l2, va)); } static __inline void pmap_resident_count_inc(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); pmap->pm_stats.resident_count += count; } static __inline void pmap_resident_count_dec(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(pmap->pm_stats.resident_count >= count, ("pmap %p resident count underflow %ld %d", pmap, pmap->pm_stats.resident_count, count)); pmap->pm_stats.resident_count -= count; } static void pmap_distribute_l1(struct pmap *pmap, vm_pindex_t l1index, pt_entry_t entry) { struct pmap *user_pmap; pd_entry_t *l1; /* Distribute new kernel L1 entry to all the user pmaps */ if (pmap != kernel_pmap) return; mtx_lock(&allpmaps_lock); LIST_FOREACH(user_pmap, &allpmaps, pm_list) { l1 = &user_pmap->pm_l1[l1index]; pmap_store(l1, entry); } mtx_unlock(&allpmaps_lock); } static pt_entry_t * pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, u_int *l2_slot) { pt_entry_t *l2; pd_entry_t *l1; l1 = (pd_entry_t *)l1pt; *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; /* Check locore has used a table L1 map */ KASSERT((l1[*l1_slot] & PTE_RX) == 0, ("Invalid bootstrap L1 table")); /* Find the address of the L2 table */ l2 = (pt_entry_t *)init_pt_va; *l2_slot = pmap_l2_index(va); return (l2); } static vm_paddr_t pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) { u_int l1_slot, l2_slot; pt_entry_t *l2; u_int ret; l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); /* Check locore has used L2 superpages */ KASSERT((l2[l2_slot] & PTE_RX) != 0, ("Invalid bootstrap L2 table")); /* L2 is superpages */ ret = (l2[l2_slot] >> PTE_PPN1_S) << L2_SHIFT; ret += (va & L2_OFFSET); return (ret); } static void pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa) { vm_offset_t va; vm_paddr_t pa; pd_entry_t *l1; u_int l1_slot; pt_entry_t entry; pn_t pn; pa = dmap_phys_base = min_pa & ~L1_OFFSET; va = DMAP_MIN_ADDRESS; l1 = (pd_entry_t *)kern_l1; l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS); for (; va < DMAP_MAX_ADDRESS && pa < max_pa; pa += L1_SIZE, va += L1_SIZE, l1_slot++) { KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); /* superpages */ pn = (pa / PAGE_SIZE); entry = PTE_KERN; entry |= (pn << PTE_PPN0_S); pmap_store(&l1[l1_slot], entry); } /* Set the upper limit of the DMAP region */ dmap_phys_max = pa; dmap_max_addr = va; sfence_vma(); } static vm_offset_t pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) { vm_offset_t l3pt; pt_entry_t entry; pd_entry_t *l2; vm_paddr_t pa; u_int l2_slot; pn_t pn; KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); l2 = pmap_l2(kernel_pmap, va); l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1)); l2_slot = pmap_l2_index(va); l3pt = l3_start; for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); pa = pmap_early_vtophys(l1pt, l3pt); pn = (pa / PAGE_SIZE); entry = (PTE_V); entry |= (pn << PTE_PPN0_S); pmap_store(&l2[l2_slot], entry); l3pt += PAGE_SIZE; } /* Clean the L2 page table */ memset((void *)l3_start, 0, l3pt - l3_start); return (l3pt); } /* * Bootstrap the system enough to run with virtual memory. */ void pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) { u_int l1_slot, l2_slot, avail_slot, map_slot; vm_offset_t freemempos; vm_offset_t dpcpu, msgbufpv; vm_paddr_t end, max_pa, min_pa, pa, start; int i; printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); printf("%lx\n", l1pt); printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); /* Set this early so we can use the pagetable walking functions */ kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt; PMAP_LOCK_INIT(kernel_pmap); rw_init(&pvh_global_lock, "pmap pv global"); + CPU_FILL(&kernel_pmap->pm_active); + /* Assume the address we were loaded to is a valid physical address. */ min_pa = max_pa = kernstart; /* * Find the minimum physical address. physmap is sorted, * but may contain empty ranges. */ for (i = 0; i < physmap_idx * 2; i += 2) { if (physmap[i] == physmap[i + 1]) continue; if (physmap[i] <= min_pa) min_pa = physmap[i]; if (physmap[i + 1] > max_pa) max_pa = physmap[i + 1]; } printf("physmap_idx %lx\n", physmap_idx); printf("min_pa %lx\n", min_pa); printf("max_pa %lx\n", max_pa); /* Create a direct map region early so we can use it for pa -> va */ pmap_bootstrap_dmap(l1pt, min_pa, max_pa); /* * Read the page table to find out what is already mapped. * This assumes we have mapped a block of memory from KERNBASE * using a single L1 entry. */ (void)pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); /* Sanity check the index, KERNBASE should be the first VA */ KASSERT(l2_slot == 0, ("The L2 index is non-zero")); freemempos = roundup2(KERNBASE + kernlen, PAGE_SIZE); /* Create the l3 tables for the early devmap */ freemempos = pmap_bootstrap_l3(l1pt, VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); sfence_vma(); #define alloc_pages(var, np) \ (var) = freemempos; \ freemempos += (np * PAGE_SIZE); \ memset((char *)(var), 0, ((np) * PAGE_SIZE)); /* Allocate dynamic per-cpu area. */ alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu, 0); /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); msgbufp = (void *)msgbufpv; virtual_avail = roundup2(freemempos, L2_SIZE); virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; kernel_vm_end = virtual_avail; pa = pmap_early_vtophys(l1pt, freemempos); /* Initialize phys_avail. */ for (avail_slot = map_slot = physmem = 0; map_slot < physmap_idx * 2; map_slot += 2) { start = physmap[map_slot]; end = physmap[map_slot + 1]; if (start == end) continue; if (start >= kernstart && end <= pa) continue; if (start < kernstart && end > kernstart) end = kernstart; else if (start < pa && end > pa) start = pa; phys_avail[avail_slot] = start; phys_avail[avail_slot + 1] = end; physmem += (end - start) >> PAGE_SHIFT; avail_slot += 2; if (end != physmap[map_slot + 1] && end > pa) { phys_avail[avail_slot] = pa; phys_avail[avail_slot + 1] = physmap[map_slot + 1]; physmem += (physmap[map_slot + 1] - pa) >> PAGE_SHIFT; avail_slot += 2; } } phys_avail[avail_slot] = 0; phys_avail[avail_slot + 1] = 0; /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". */ Maxmem = atop(phys_avail[avail_slot - 1]); } /* * Initialize a vm_page's machine-dependent fields. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { vm_size_t s; int i, pv_npg; /* * Initialize the pv chunk and pmap list mutexes. */ mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_DEF); /* * Initialize the pool of pv list locks. */ for (i = 0; i < NPV_LIST_LOCKS; i++) rw_init(&pv_list_locks[i], "pmap pv list"); /* * Calculate the size of the pv head table for superpages. */ pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE); /* * Allocate memory for the pv head table for superpages. */ s = (vm_size_t)(pv_npg * sizeof(struct md_page)); s = round_page(s); pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); TAILQ_INIT(&pv_dummy.pv_list); if (superpages_enabled) pagesizes[1] = L2_SIZE; } #ifdef SMP /* * For SMP, these functions have to use IPIs for coherence. * * In general, the calling thread uses a plain fence to order the * writes to the page tables before invoking an SBI callback to invoke * sfence_vma() on remote CPUs. - * - * Since the riscv pmap does not yet have a pm_active field, IPIs are - * sent to all CPUs in the system. */ static void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { cpuset_t mask; sched_pin(); - mask = all_cpus; + mask = pmap->pm_active; CPU_CLR(PCPU_GET(cpuid), &mask); fence(); - sbi_remote_sfence_vma(mask.__bits, va, 1); + if (!CPU_EMPTY(&mask) && smp_started) + sbi_remote_sfence_vma(mask.__bits, va, 1); sfence_vma_page(va); sched_unpin(); } static void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { cpuset_t mask; sched_pin(); - mask = all_cpus; + mask = pmap->pm_active; CPU_CLR(PCPU_GET(cpuid), &mask); fence(); - sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1); + if (!CPU_EMPTY(&mask) && smp_started) + sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1); /* * Might consider a loop of sfence_vma_page() for a small * number of pages in the future. */ sfence_vma(); sched_unpin(); } static void pmap_invalidate_all(pmap_t pmap) { cpuset_t mask; sched_pin(); - mask = all_cpus; + mask = pmap->pm_active; CPU_CLR(PCPU_GET(cpuid), &mask); - fence(); /* * XXX: The SBI doc doesn't detail how to specify x0 as the * address to perform a global fence. BBL currently treats * all sfence_vma requests as global however. */ - sbi_remote_sfence_vma(mask.__bits, 0, 0); + fence(); + if (!CPU_EMPTY(&mask) && smp_started) + sbi_remote_sfence_vma(mask.__bits, 0, 0); sfence_vma(); sched_unpin(); } #else /* * Normal, non-SMP, invalidation functions. * We inline these within pmap.c for speed. */ static __inline void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { sfence_vma_page(va); } static __inline void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { /* * Might consider a loop of sfence_vma_page() for a small * number of pages in the future. */ sfence_vma(); } static __inline void pmap_invalidate_all(pmap_t pmap) { sfence_vma(); } #endif /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { pd_entry_t *l2p, l2; pt_entry_t *l3p, l3; vm_paddr_t pa; pa = 0; PMAP_LOCK(pmap); /* * Start with the l2 tabel. We are unable to allocate * pages in the l1 table. */ l2p = pmap_l2(pmap, va); if (l2p != NULL) { l2 = pmap_load(l2p); if ((l2 & PTE_RX) == 0) { l3p = pmap_l2_to_l3(l2p, va); if (l3p != NULL) { l3 = pmap_load(l3p); pa = PTE_TO_PHYS(l3); pa |= (va & L3_OFFSET); } } else { /* L2 is superpages */ pa = (l2 >> PTE_PPN1_S) << L2_SHIFT; pa |= (va & L2_OFFSET); } } PMAP_UNLOCK(pmap); return (pa); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t *l3p, l3; vm_paddr_t phys; vm_paddr_t pa; vm_page_t m; pa = 0; m = NULL; PMAP_LOCK(pmap); retry: l3p = pmap_l3(pmap, va); if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { if ((l3 & PTE_W) != 0 || (prot & VM_PROT_WRITE) == 0) { phys = PTE_TO_PHYS(l3); if (vm_page_pa_tryrelock(pmap, phys, &pa)) goto retry; m = PHYS_TO_VM_PAGE(phys); vm_page_hold(m); } } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } vm_paddr_t pmap_kextract(vm_offset_t va) { pd_entry_t *l2; pt_entry_t *l3; vm_paddr_t pa; if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { pa = DMAP_TO_PHYS(va); } else { l2 = pmap_l2(kernel_pmap, va); if (l2 == NULL) panic("pmap_kextract: No l2"); if ((pmap_load(l2) & PTE_RX) != 0) { /* superpages */ pa = (pmap_load(l2) >> PTE_PPN1_S) << L2_SHIFT; pa |= (va & L2_OFFSET); return (pa); } l3 = pmap_l2_to_l3(l2, va); if (l3 == NULL) panic("pmap_kextract: No l3..."); pa = PTE_TO_PHYS(pmap_load(l3)); pa |= (va & PAGE_MASK); } return (pa); } /*************************************************** * Low level mapping routines..... ***************************************************/ void pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) { pt_entry_t entry; pt_entry_t *l3; vm_offset_t va; pn_t pn; KASSERT((pa & L3_OFFSET) == 0, ("pmap_kenter_device: Invalid physical address")); KASSERT((sva & L3_OFFSET) == 0, ("pmap_kenter_device: Invalid virtual address")); KASSERT((size & PAGE_MASK) == 0, ("pmap_kenter_device: Mapping is not page-sized")); va = sva; while (size != 0) { l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); pn = (pa / PAGE_SIZE); entry = PTE_KERN; entry |= (pn << PTE_PPN0_S); pmap_store(l3, entry); va += PAGE_SIZE; pa += PAGE_SIZE; size -= PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt_entry_t *l3; l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); pmap_clear(l3); sfence_vma(); } void pmap_kremove_device(vm_offset_t sva, vm_size_t size) { pt_entry_t *l3; vm_offset_t va; KASSERT((sva & L3_OFFSET) == 0, ("pmap_kremove_device: Invalid virtual address")); KASSERT((size & PAGE_MASK) == 0, ("pmap_kremove_device: Mapping is not page-sized")); va = sva; while (size != 0) { l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); pmap_clear(l3); va += PAGE_SIZE; size -= PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { return PHYS_TO_DMAP(start); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { pt_entry_t *l3, pa; vm_offset_t va; vm_page_t m; pt_entry_t entry; pn_t pn; int i; va = sva; for (i = 0; i < count; i++) { m = ma[i]; pa = VM_PAGE_TO_PHYS(m); pn = (pa / PAGE_SIZE); l3 = pmap_l3(kernel_pmap, va); entry = PTE_KERN; entry |= (pn << PTE_PPN0_S); pmap_store(l3, entry); va += L3_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { pt_entry_t *l3; vm_offset_t va; KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); for (va = sva; count-- > 0; va += PAGE_SIZE) { l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); pmap_clear(l3); } pmap_invalidate_range(kernel_pmap, sva, va); } bool pmap_ps_enabled(pmap_t pmap __unused) { return (superpages_enabled); } /*************************************************** * Page table page management routines..... ***************************************************/ /* * Schedule the specified unused page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the * physical memory manager after the TLB has been updated. */ static __inline void pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, boolean_t set_PG_ZERO) { if (set_PG_ZERO) m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; SLIST_INSERT_HEAD(free, m, plinks.s.ss); } /* * Inserts the specified page table page into the specified pmap's collection * of idle page table pages. Each of a pmap's page table pages is responsible * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. */ static __inline int pmap_insert_pt_page(pmap_t pmap, vm_page_t ml3) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); return (vm_radix_insert(&pmap->pm_root, ml3)); } /* * Removes the page table page mapping the specified virtual address from the * specified pmap's collection of idle page table pages, and returns it. * Otherwise, returns NULL if there is no page table page corresponding to the * specified virtual address. */ static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va))); } /* * Decrements a page table page's wire count, which is used to record the * number of valid page table entries within the page. If the wire count * drops to zero, then the page table page is unmapped. Returns TRUE if the * page table page was unmapped and FALSE otherwise. */ static inline boolean_t pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { --m->wire_count; if (m->wire_count == 0) { _pmap_unwire_ptp(pmap, va, m, free); return (TRUE); } else { return (FALSE); } } static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { vm_paddr_t phys; PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (m->pindex >= NUL1E) { pd_entry_t *l1; l1 = pmap_l1(pmap, va); pmap_clear(l1); pmap_distribute_l1(pmap, pmap_l1_index(va), 0); } else { pd_entry_t *l2; l2 = pmap_l2(pmap, va); pmap_clear(l2); } pmap_resident_count_dec(pmap, 1); if (m->pindex < NUL1E) { pd_entry_t *l1; vm_page_t pdpg; l1 = pmap_l1(pmap, va); phys = PTE_TO_PHYS(pmap_load(l1)); pdpg = PHYS_TO_VM_PAGE(phys); pmap_unwire_ptp(pmap, va, pdpg, free); } pmap_invalidate_page(pmap, va); vm_wire_sub(1); /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ pmap_add_delayed_free_list(m, free, TRUE); } /* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, struct spglist *free) { vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (0); KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); mpte = PHYS_TO_VM_PAGE(PTE_TO_PHYS(ptepde)); return (pmap_unwire_ptp(pmap, va, mpte, free)); } void pmap_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); pmap->pm_l1 = kernel_pmap->pm_l1; + pmap->pm_satp = SATP_MODE_SV39 | (vtophys(pmap->pm_l1) >> PAGE_SHIFT); + CPU_ZERO(&pmap->pm_active); + pmap_activate_boot(pmap); } int pmap_pinit(pmap_t pmap) { vm_paddr_t l1phys; vm_page_t l1pt; /* * allocate the l1 page */ while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) vm_wait(NULL); l1phys = VM_PAGE_TO_PHYS(l1pt); pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); + pmap->pm_satp = SATP_MODE_SV39 | (l1phys >> PAGE_SHIFT); if ((l1pt->flags & PG_ZERO) == 0) pagezero(pmap->pm_l1); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); + CPU_ZERO(&pmap->pm_active); + /* Install kernel pagetables */ memcpy(pmap->pm_l1, kernel_pmap->pm_l1, PAGE_SIZE); /* Add to the list of all user pmaps */ mtx_lock(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); mtx_unlock(&allpmaps_lock); vm_radix_init(&pmap->pm_root); return (1); } /* * This routine is called if the desired page table page does not exist. * * If page table page allocation fails, this routine may sleep before * returning NULL. It sleeps only if a lock pointer was given. * * Note: If a page allocation fails at page table level two or three, * one or two pages may be held during the wait, only to be released * afterwards. This conservative approach is easily argued to avoid * race conditions. */ static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) { vm_page_t m, /*pdppg, */pdpg; pt_entry_t entry; vm_paddr_t phys; pn_t pn; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if (lockp != NULL) { RELEASE_PV_LIST_LOCK(lockp); PMAP_UNLOCK(pmap); rw_runlock(&pvh_global_lock); vm_wait(NULL); rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, the page table * page may have been allocated. */ return (NULL); } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); /* * Map the pagetable page into the process address space, if * it isn't already there. */ if (ptepindex >= NUL1E) { pd_entry_t *l1; vm_pindex_t l1index; l1index = ptepindex - NUL1E; l1 = &pmap->pm_l1[l1index]; pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); entry = (PTE_V); entry |= (pn << PTE_PPN0_S); pmap_store(l1, entry); pmap_distribute_l1(pmap, l1index, entry); } else { vm_pindex_t l1index; pd_entry_t *l1, *l2; l1index = ptepindex >> (L1_SHIFT - L2_SHIFT); l1 = &pmap->pm_l1[l1index]; if (pmap_load(l1) == 0) { /* recurse for allocating page dir */ if (_pmap_alloc_l3(pmap, NUL1E + l1index, lockp) == NULL) { vm_page_unwire_noq(m); vm_page_free_zero(m); return (NULL); } } else { phys = PTE_TO_PHYS(pmap_load(l1)); pdpg = PHYS_TO_VM_PAGE(phys); pdpg->wire_count++; } phys = PTE_TO_PHYS(pmap_load(l1)); l2 = (pd_entry_t *)PHYS_TO_DMAP(phys); l2 = &l2[ptepindex & Ln_ADDR_MASK]; pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); entry = (PTE_V); entry |= (pn << PTE_PPN0_S); pmap_store(l2, entry); } pmap_resident_count_inc(pmap, 1); return (m); } static vm_page_t pmap_alloc_l2(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { pd_entry_t *l1; vm_page_t l2pg; vm_pindex_t l2pindex; retry: l1 = pmap_l1(pmap, va); if (l1 != NULL && (pmap_load(l1) & PTE_RWX) == 0) { /* Add a reference to the L2 page. */ l2pg = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l1))); l2pg->wire_count++; } else { /* Allocate a L2 page. */ l2pindex = pmap_l2_pindex(va) >> Ln_ENTRIES_SHIFT; l2pg = _pmap_alloc_l3(pmap, NUL2E + l2pindex, lockp); if (l2pg == NULL && lockp != NULL) goto retry; } return (l2pg); } static vm_page_t pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { vm_pindex_t ptepindex; pd_entry_t *l2; vm_paddr_t phys; vm_page_t m; /* * Calculate pagetable page index */ ptepindex = pmap_l2_pindex(va); retry: /* * Get the page directory entry */ l2 = pmap_l2(pmap, va); /* * If the page table page is mapped, we just increment the * hold count, and activate it. */ if (l2 != NULL && pmap_load(l2) != 0) { phys = PTE_TO_PHYS(pmap_load(l2)); m = PHYS_TO_VM_PAGE(phys); m->wire_count++; } else { /* * Here if the pte page isn't mapped, or if it has been * deallocated. */ m = _pmap_alloc_l3(pmap, ptepindex, lockp); if (m == NULL && lockp != NULL) goto retry; } return (m); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { vm_page_t m; KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); + KASSERT(CPU_EMPTY(&pmap->pm_active), + ("releasing active pmap %p", pmap)); mtx_lock(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock(&allpmaps_lock); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); vm_page_unwire_noq(m); vm_page_free(m); } #if 0 static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; return sysctl_handle_long(oidp, &ksize, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "LU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; return sysctl_handle_long(oidp, &kfree, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "LU", "Amount of KVM free"); #endif /* 0 */ /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { vm_paddr_t paddr; vm_page_t nkpg; pd_entry_t *l1, *l2; pt_entry_t entry; pn_t pn; mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, L2_SIZE); if (addr - 1 >= vm_map_max(kernel_map)) addr = vm_map_max(kernel_map); while (kernel_vm_end < addr) { l1 = pmap_l1(kernel_pmap, kernel_vm_end); if (pmap_load(l1) == 0) { /* We need a new PDP entry */ nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); paddr = VM_PAGE_TO_PHYS(nkpg); pn = (paddr / PAGE_SIZE); entry = (PTE_V); entry |= (pn << PTE_PPN0_S); pmap_store(l1, entry); pmap_distribute_l1(kernel_pmap, pmap_l1_index(kernel_vm_end), entry); continue; /* try again */ } l2 = pmap_l1_to_l2(l1, kernel_vm_end); if ((pmap_load(l2) & PTE_V) != 0 && (pmap_load(l2) & PTE_RWX) == 0) { kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { kernel_vm_end = vm_map_max(kernel_map); break; } continue; } nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) { pmap_zero_page(nkpg); } paddr = VM_PAGE_TO_PHYS(nkpg); pn = (paddr / PAGE_SIZE); entry = (PTE_V); entry |= (pn << PTE_PPN0_S); pmap_store(l2, entry); pmap_invalidate_page(kernel_pmap, kernel_vm_end); kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { kernel_vm_end = vm_map_max(kernel_map); break; } } } /*************************************************** * page management routines. ***************************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 3); CTASSERT(_NPCPV == 168); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0 0xfffffffffffffffful #define PC_FREE1 0xfffffffffffffffful #define PC_FREE2 0x000000fffffffffful static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; #if 0 #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs, pv_entry_count; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); #endif #endif /* 0 */ /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. * * Returns NULL if PV entries were reclaimed from the specified pmap. * * We do not, however, unmap 2mpages because subsequent accesses will * allocate per-page pv entries until repromotion occurs, thereby * exacerbating the shortage of free pv entries. */ static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) { panic("RISCVTODO: reclaim_pv_chunk"); } /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { struct pv_chunk *pc; int idx, field, bit; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_frees, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, 1)); PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 64; bit = idx % 64; pc->pc_map[field] |= 1ul << bit; if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || pc->pc_map[2] != PC_FREE2) { /* 98% of the time, pc is already at the head of the list. */ if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); } return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } static void free_pv_chunk(struct pv_chunk *pc) { vm_page_t m; mtx_lock(&pv_chunks_mutex); TAILQ_REMOVE(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); #if 0 /* TODO: For minidump */ dump_drop_page(m->phys_addr); #endif vm_page_unwire(m, PQ_NONE); vm_page_free(m); } /* * Returns a new PV entry, allocating a new PV chunk from the system when * needed. If this PV chunk allocation fails and a PV list lock pointer was * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is * returned. * * The given PV list lock may be released. */ static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp) { int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = ffsl(pc->pc_map[field]) - 1; break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 64 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); return (pv); } } /* No free items, allocate another chunk */ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) { if (lockp == NULL) { PV_STAT(pc_chunk_tryfail++); return (NULL); } m = reclaim_pv_chunk(pmap, lockp); if (m == NULL) goto retry; } PV_STAT(atomic_add_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); #if 0 /* TODO: This is for minidump */ dump_add_page(m->phys_addr); #endif pc = (void *)PHYS_TO_DMAP(m->phys_addr); pc->pc_pmap = pmap; pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; mtx_lock(&pv_chunks_mutex); TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); return (pv); } /* * Ensure that the number of spare PV entries in the specified pmap meets or * exceeds the given count, "needed". * * The given PV list lock may be released. */ static void reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) { struct pch new_tail; struct pv_chunk *pc; vm_page_t m; int avail, free; bool reclaimed; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); /* * Newly allocated PV chunks must be stored in a private list until * the required number of PV chunks have been allocated. Otherwise, * reclaim_pv_chunk() could recycle one of these chunks. In * contrast, these chunks must be added to the pmap upon allocation. */ TAILQ_INIT(&new_tail); retry: avail = 0; TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { bit_count((bitstr_t *)pc->pc_map, 0, sizeof(pc->pc_map) * NBBY, &free); if (free == 0) break; avail += free; if (avail >= needed) break; } for (reclaimed = false; avail < needed; avail += _NPCPV) { m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) { m = reclaim_pv_chunk(pmap, lockp); if (m == NULL) goto retry; reclaimed = true; } /* XXX PV STATS */ #if 0 dump_add_page(m->phys_addr); #endif pc = (void *)PHYS_TO_DMAP(m->phys_addr); pc->pc_pmap = pmap; pc->pc_map[0] = PC_FREE0; pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); /* * The reclaim might have freed a chunk from the current pmap. * If that chunk contained available entries, we need to * re-count the number of available entries. */ if (reclaimed) goto retry; } if (!TAILQ_EMPTY(&new_tail)) { mtx_lock(&pv_chunks_mutex); TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); mtx_unlock(&pv_chunks_mutex); } } /* * First find and then remove the pv entry for the specified pmap and virtual * address from the specified pv list. Returns the pv entry if found and NULL * otherwise. This operation can be performed on pv lists for either 4KB or * 2MB page mappings. */ static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_LOCKED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; break; } } return (pv); } /* * First find and then destroy the pv entry for the specified pmap and virtual * address. This operation can be performed on pv lists for either 4KB or 2MB * page mappings. */ static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found for %#lx", va)); free_pv_entry(pmap, pv); } /* * Conditionally create the PV entry for a 4KB page mapping if the required * memory can be allocated without resorting to reclamation. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Pass NULL instead of the lock pointer to disable reclamation. */ if ((pv = get_pv_entry(pmap, NULL)) != NULL) { pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; return (TRUE); } else return (FALSE); } /* * After demotion from a 2MB page mapping to 512 4KB page mappings, * destroy the pv entry for the 2MB page mapping and reinstantiate the pv * entries for each of the 4KB page mappings. */ static void __unused pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp) { struct md_page *pvh; struct pv_chunk *pc; pv_entry_t pv; vm_page_t m; vm_offset_t va_last; int bit, field; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); /* * Transfer the 2mpage's pv entry for this mapping to the first * page's pv list. Once this transfer begins, the pv list lock * must not be released until the last pv entry is reinstantiated. */ pvh = pa_to_pvh(pa); va &= ~L2_OFFSET; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found")); m = PHYS_TO_VM_PAGE(pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; /* Instantiate the remaining 511 pv entries. */ va_last = va + L2_SIZE - PAGE_SIZE; for (;;) { pc = TAILQ_FIRST(&pmap->pm_pvchunk); KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare")); for (field = 0; field < _NPCM; field++) { while (pc->pc_map[field] != 0) { bit = ffsl(pc->pc_map[field]) - 1; pc->pc_map[field] &= ~(1ul << bit); pv = &pc->pc_pventry[field * 64 + bit]; va += PAGE_SIZE; pv->pv_va = va; m++; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_pv_demote_l2: page %p is not managed", m)); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if (va == va_last) goto out; } } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } out: if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } /* XXX PV stats */ } #if VM_NRESERVLEVEL > 0 static void pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp) { struct md_page *pvh; pv_entry_t pv; vm_page_t m; vm_offset_t va_last; rw_assert(&pvh_global_lock, RA_LOCKED); KASSERT((va & L2_OFFSET) == 0, ("pmap_pv_promote_l2: misaligned va %#lx", va)); CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); m = PHYS_TO_VM_PAGE(pa); pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv for %#lx not found", va)); pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; va_last = va + L2_SIZE - PAGE_SIZE; do { m++; va += PAGE_SIZE; pmap_pvh_free(&m->md, pmap, va); } while (va < va_last); } #endif /* VM_NRESERVLEVEL > 0 */ /* * Create the PV entry for a 2MB page mapping. Always returns true unless the * flag PMAP_ENTER_NORECLAIM is specified. If that flag is specified, returns * false if the PV entry cannot be allocated without resorting to reclamation. */ static bool pmap_pv_insert_l2(pmap_t pmap, vm_offset_t va, pd_entry_t l2e, u_int flags, struct rwlock **lockp) { struct md_page *pvh; pv_entry_t pv; vm_paddr_t pa; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Pass NULL instead of the lock pointer to disable reclamation. */ if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ? NULL : lockp)) == NULL) return (false); pv->pv_va = va; pa = PTE_TO_PHYS(l2e); CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; return (true); } static void pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) { pt_entry_t newl2, oldl2; vm_page_t ml3; vm_paddr_t ml3pa; KASSERT(!VIRT_IN_DMAP(va), ("removing direct mapping of %#lx", va)); KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); ml3 = pmap_remove_pt_page(pmap, va); if (ml3 == NULL) panic("pmap_remove_kernel_l2: Missing pt page"); ml3pa = VM_PAGE_TO_PHYS(ml3); newl2 = ml3pa | PTE_V; /* * Initialize the page table page. */ pagezero((void *)PHYS_TO_DMAP(ml3pa)); /* * Demote the mapping. */ oldl2 = pmap_load_store(l2, newl2); KASSERT(oldl2 == 0, ("%s: found existing mapping at %p: %#lx", __func__, l2, oldl2)); } /* * pmap_remove_l2: Do the things to unmap a level 2 superpage. */ static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pd_entry_t l1e, struct spglist *free, struct rwlock **lockp) { struct md_page *pvh; pt_entry_t oldl2; vm_offset_t eva, va; vm_page_t m, ml3; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & L2_OFFSET) == 0, ("pmap_remove_l2: sva is not aligned")); oldl2 = pmap_load_clear(l2); KASSERT((oldl2 & PTE_RWX) != 0, ("pmap_remove_l2: L2e %lx is not a superpage mapping", oldl2)); /* * The sfence.vma documentation states that it is sufficient to specify * a single address within a superpage mapping. However, since we do * not perform any invalidation upon promotion, TLBs may still be * caching 4KB mappings within the superpage, so we must invalidate the * entire range. */ pmap_invalidate_range(pmap, sva, sva + L2_SIZE); if ((oldl2 & PTE_SW_WIRED) != 0) pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE; pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE); if ((oldl2 & PTE_SW_MANAGED) != 0) { CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, PTE_TO_PHYS(oldl2)); pvh = pa_to_pvh(PTE_TO_PHYS(oldl2)); pmap_pvh_free(pvh, pmap, sva); eva = sva + L2_SIZE; for (va = sva, m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(oldl2)); va < eva; va += PAGE_SIZE, m++) { if ((oldl2 & PTE_D) != 0) vm_page_dirty(m); if ((oldl2 & PTE_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); if (TAILQ_EMPTY(&m->md.pv_list) && TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } if (pmap == kernel_pmap) { pmap_remove_kernel_l2(pmap, l2, sva); } else { ml3 = pmap_remove_pt_page(pmap, sva); if (ml3 != NULL) { pmap_resident_count_dec(pmap, 1); KASSERT(ml3->wire_count == Ln_ENTRIES, ("pmap_remove_l2: l3 page wire count error")); ml3->wire_count = 1; vm_page_unwire_noq(ml3); pmap_add_delayed_free_list(ml3, free, FALSE); } } return (pmap_unuse_pt(pmap, sva, l1e, free)); } /* * pmap_remove_l3: do the things to unmap a page in a process */ static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) { pt_entry_t old_l3; vm_paddr_t phys; vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); old_l3 = pmap_load_clear(l3); pmap_invalidate_page(pmap, va); if (old_l3 & PTE_SW_WIRED) pmap->pm_stats.wired_count -= 1; pmap_resident_count_dec(pmap, 1); if (old_l3 & PTE_SW_MANAGED) { phys = PTE_TO_PHYS(old_l3); m = PHYS_TO_VM_PAGE(phys); if ((old_l3 & PTE_D) != 0) vm_page_dirty(m); if (old_l3 & PTE_A) vm_page_aflag_set(m, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); pmap_pvh_free(&m->md, pmap, va); } return (pmap_unuse_pt(pmap, va, l2e, free)); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct spglist free; struct rwlock *lock; vm_offset_t va, va_next; pd_entry_t *l1, *l2, l2e; pt_entry_t *l3; /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; SLIST_INIT(&free); rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); lock = NULL; for (; sva < eva; sva = va_next) { if (pmap->pm_stats.resident_count == 0) break; l1 = pmap_l1(pmap, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } /* * Calculate index for next page table. */ va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (l2 == NULL) continue; if ((l2e = pmap_load(l2)) == 0) continue; if ((l2e & PTE_RWX) != 0) { if (sva + L2_SIZE == va_next && eva >= va_next) { (void)pmap_remove_l2(pmap, l2, sva, pmap_load(l1), &free, &lock); continue; } else if (!pmap_demote_l2_locked(pmap, l2, sva, &lock)) { /* * The large page mapping was destroyed. */ continue; } l2e = pmap_load(l2); } /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (va_next > eva) va_next = eva; va = va_next; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { if (pmap_load(l3) == 0) { if (va != va_next) { pmap_invalidate_range(pmap, va, sva); va = va_next; } continue; } if (va == va_next) va = sva; if (pmap_remove_l3(pmap, l3, sva, l2e, &free, &lock)) { sva += L3_SIZE; break; } } if (va != va_next) pmap_invalidate_range(pmap, va, sva); } if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); vm_page_free_pages_toq(&free, false); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { struct spglist free; struct md_page *pvh; pmap_t pmap; pt_entry_t *l3, l3e; pd_entry_t *l2, l2e; pv_entry_t pv; vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); rw_wlock(&pvh_global_lock); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; l2 = pmap_l2(pmap, va); (void)pmap_demote_l2(pmap, l2, va); PMAP_UNLOCK(pmap); } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap_resident_count_dec(pmap, 1); l2 = pmap_l2(pmap, pv->pv_va); KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found")); l2e = pmap_load(l2); KASSERT((l2e & PTE_RX) == 0, ("pmap_remove_all: found a superpage in %p's pv list", m)); l3 = pmap_l2_to_l3(l2, pv->pv_va); l3e = pmap_load_clear(l3); pmap_invalidate_page(pmap, pv->pv_va); if (l3e & PTE_SW_WIRED) pmap->pm_stats.wired_count--; if ((l3e & PTE_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if ((l3e & PTE_D) != 0) vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, pmap_load(l2), &free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); vm_page_free_pages_toq(&free, false); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { pd_entry_t *l1, *l2, l2e; pt_entry_t *l3, l3e, mask; vm_page_t m; vm_paddr_t pa; vm_offset_t va, va_next; bool anychanged, pv_lists_locked; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == (VM_PROT_WRITE | VM_PROT_EXECUTE)) return; anychanged = false; pv_lists_locked = false; mask = 0; if ((prot & VM_PROT_WRITE) == 0) mask |= PTE_W | PTE_D; if ((prot & VM_PROT_EXECUTE) == 0) mask |= PTE_X; resume: PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { l1 = pmap_l1(pmap, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (l2 == NULL || (l2e = pmap_load(l2)) == 0) continue; if ((l2e & PTE_RWX) != 0) { if (sva + L2_SIZE == va_next && eva >= va_next) { retryl2: if ((l2e & (PTE_SW_MANAGED | PTE_D)) == (PTE_SW_MANAGED | PTE_D)) { pa = PTE_TO_PHYS(l2e); for (va = sva, m = PHYS_TO_VM_PAGE(pa); va < va_next; m++, va += PAGE_SIZE) vm_page_dirty(m); } if (!atomic_fcmpset_long(l2, &l2e, l2e & ~mask)) goto retryl2; anychanged = true; } else { if (!pv_lists_locked) { pv_lists_locked = true; if (!rw_try_rlock(&pvh_global_lock)) { if (anychanged) pmap_invalidate_all( pmap); PMAP_UNLOCK(pmap); rw_rlock(&pvh_global_lock); goto resume; } } if (!pmap_demote_l2(pmap, l2, sva)) { /* * The large page mapping was destroyed. */ continue; } } } if (va_next > eva) va_next = eva; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { l3e = pmap_load(l3); retryl3: if ((l3e & PTE_V) == 0) continue; if ((prot & VM_PROT_WRITE) == 0 && (l3e & (PTE_SW_MANAGED | PTE_D)) == (PTE_SW_MANAGED | PTE_D)) { m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(l3e)); vm_page_dirty(m); } if (!atomic_fcmpset_long(l3, &l3e, l3e & ~mask)) goto retryl3; anychanged = true; } } if (anychanged) pmap_invalidate_all(pmap); if (pv_lists_locked) rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } int pmap_fault_fixup(pmap_t pmap, vm_offset_t va, vm_prot_t ftype) { pd_entry_t *l2, l2e; pt_entry_t bits, *pte, oldpte; int rv; rv = 0; PMAP_LOCK(pmap); l2 = pmap_l2(pmap, va); if (l2 == NULL || ((l2e = pmap_load(l2)) & PTE_V) == 0) goto done; if ((l2e & PTE_RWX) == 0) { pte = pmap_l2_to_l3(l2, va); if (pte == NULL || ((oldpte = pmap_load(pte) & PTE_V)) == 0) goto done; } else { pte = l2; oldpte = l2e; } if ((pmap != kernel_pmap && (oldpte & PTE_U) == 0) || (ftype == VM_PROT_WRITE && (oldpte & PTE_W) == 0) || (ftype == VM_PROT_EXECUTE && (oldpte & PTE_X) == 0) || (ftype == VM_PROT_READ && (oldpte & PTE_R) == 0)) goto done; bits = PTE_A; if (ftype == VM_PROT_WRITE) bits |= PTE_D; /* * Spurious faults can occur if the implementation caches invalid * entries in the TLB, or if simultaneous accesses on multiple CPUs * race with each other. */ if ((oldpte & bits) != bits) pmap_store_bits(pte, bits); sfence_vma(); rv = 1; done: PMAP_UNLOCK(pmap); return (rv); } static bool pmap_demote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va) { struct rwlock *lock; bool rv; lock = NULL; rv = pmap_demote_l2_locked(pmap, l2, va, &lock); if (lock != NULL) rw_wunlock(lock); return (rv); } /* * Tries to demote a 2MB page mapping. If demotion fails, the 2MB page * mapping is invalidated. */ static bool pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, struct rwlock **lockp) { struct spglist free; vm_page_t mpte; pd_entry_t newl2, oldl2; pt_entry_t *firstl3, newl3; vm_paddr_t mptepa; int i; PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldl2 = pmap_load(l2); KASSERT((oldl2 & PTE_RWX) != 0, ("pmap_demote_l2_locked: oldl2 is not a leaf entry")); if ((oldl2 & PTE_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) == NULL) { if ((oldl2 & PTE_A) == 0 || (mpte = vm_page_alloc(NULL, pmap_l2_pindex(va), (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { SLIST_INIT(&free); (void)pmap_remove_l2(pmap, l2, va & ~L2_OFFSET, pmap_load(pmap_l1(pmap, va)), &free, lockp); vm_page_free_pages_toq(&free, true); CTR2(KTR_PMAP, "pmap_demote_l2_locked: " "failure for va %#lx in pmap %p", va, pmap); return (false); } if (va < VM_MAXUSER_ADDRESS) pmap_resident_count_inc(pmap, 1); } mptepa = VM_PAGE_TO_PHYS(mpte); firstl3 = (pt_entry_t *)PHYS_TO_DMAP(mptepa); newl2 = ((mptepa / PAGE_SIZE) << PTE_PPN0_S) | PTE_V; KASSERT((oldl2 & PTE_A) != 0, ("pmap_demote_l2_locked: oldl2 is missing PTE_A")); KASSERT((oldl2 & (PTE_D | PTE_W)) != PTE_W, ("pmap_demote_l2_locked: oldl2 is missing PTE_D")); newl3 = oldl2; /* * If the page table page is new, initialize it. */ if (mpte->wire_count == 1) { mpte->wire_count = Ln_ENTRIES; for (i = 0; i < Ln_ENTRIES; i++) pmap_store(firstl3 + i, newl3 + (i << PTE_PPN0_S)); } KASSERT(PTE_TO_PHYS(pmap_load(firstl3)) == PTE_TO_PHYS(newl3), ("pmap_demote_l2_locked: firstl3 and newl3 map different physical " "addresses")); /* * If the mapping has changed attributes, update the page table * entries. */ if ((pmap_load(firstl3) & PTE_PROMOTE) != (newl3 & PTE_PROMOTE)) for (i = 0; i < Ln_ENTRIES; i++) pmap_store(firstl3 + i, newl3 + (i << PTE_PPN0_S)); /* * The spare PV entries must be reserved prior to demoting the * mapping, that is, prior to changing the L2 entry. Otherwise, the * state of the L2 entry and the PV lists will be inconsistent, which * can result in reclaim_pv_chunk() attempting to remove a PV entry from * the wrong PV list and pmap_pv_demote_l2() failing to find the * expected PV entry for the 2MB page mapping that is being demoted. */ if ((oldl2 & PTE_SW_MANAGED) != 0) reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp); /* * Demote the mapping. */ pmap_store(l2, newl2); /* * Demote the PV entry. */ if ((oldl2 & PTE_SW_MANAGED) != 0) pmap_pv_demote_l2(pmap, va, PTE_TO_PHYS(oldl2), lockp); atomic_add_long(&pmap_l2_demotions, 1); CTR2(KTR_PMAP, "pmap_demote_l2_locked: success for va %#lx in pmap %p", va, pmap); return (true); } #if VM_NRESERVLEVEL > 0 static void pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, struct rwlock **lockp) { pt_entry_t *firstl3, *l3; vm_paddr_t pa; vm_page_t ml3; PMAP_LOCK_ASSERT(pmap, MA_OWNED); va &= ~L2_OFFSET; KASSERT((pmap_load(l2) & PTE_RWX) == 0, ("pmap_promote_l2: invalid l2 entry %p", l2)); firstl3 = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l2))); pa = PTE_TO_PHYS(pmap_load(firstl3)); if ((pa & L2_OFFSET) != 0) { CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", va, pmap); atomic_add_long(&pmap_l2_p_failures, 1); return; } pa += PAGE_SIZE; for (l3 = firstl3 + 1; l3 < firstl3 + Ln_ENTRIES; l3++) { if (PTE_TO_PHYS(pmap_load(l3)) != pa) { CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", va, pmap); atomic_add_long(&pmap_l2_p_failures, 1); return; } if ((pmap_load(l3) & PTE_PROMOTE) != (pmap_load(firstl3) & PTE_PROMOTE)) { CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", va, pmap); atomic_add_long(&pmap_l2_p_failures, 1); return; } pa += PAGE_SIZE; } ml3 = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); KASSERT(ml3->pindex == pmap_l2_pindex(va), ("pmap_promote_l2: page table page's pindex is wrong")); if (pmap_insert_pt_page(pmap, ml3)) { CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", va, pmap); atomic_add_long(&pmap_l2_p_failures, 1); return; } if ((pmap_load(firstl3) & PTE_SW_MANAGED) != 0) pmap_pv_promote_l2(pmap, va, PTE_TO_PHYS(pmap_load(firstl3)), lockp); pmap_store(l2, pmap_load(firstl3)); atomic_add_long(&pmap_l2_promotions, 1); CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va, pmap); } #endif /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind) { struct rwlock *lock; pd_entry_t *l1, *l2, l2e; pt_entry_t new_l3, orig_l3; pt_entry_t *l3; pv_entry_t pv; vm_paddr_t opa, pa, l2_pa, l3_pa; vm_page_t mpte, om, l2_m, l3_m; pt_entry_t entry; pn_t l2_pn, l3_pn, pn; int rv; bool nosleep; va = trunc_page(va); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); pa = VM_PAGE_TO_PHYS(m); pn = (pa / PAGE_SIZE); new_l3 = PTE_V | PTE_R | PTE_A; if (prot & VM_PROT_EXECUTE) new_l3 |= PTE_X; if (flags & VM_PROT_WRITE) new_l3 |= PTE_D; if (prot & VM_PROT_WRITE) new_l3 |= PTE_W; if (va < VM_MAX_USER_ADDRESS) new_l3 |= PTE_U; new_l3 |= (pn << PTE_PPN0_S); if ((flags & PMAP_ENTER_WIRED) != 0) new_l3 |= PTE_SW_WIRED; /* * Set modified bit gratuitously for writeable mappings if * the page is unmanaged. We do not want to take a fault * to do the dirty bit accounting for these mappings. */ if ((m->oflags & VPO_UNMANAGED) != 0) { if (prot & VM_PROT_WRITE) new_l3 |= PTE_D; } else new_l3 |= PTE_SW_MANAGED; CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); lock = NULL; mpte = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); if (psind == 1) { /* Assert the required virtual and physical alignment. */ KASSERT((va & L2_OFFSET) == 0, ("pmap_enter: va %#lx unaligned", va)); KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); rv = pmap_enter_l2(pmap, va, new_l3, flags, m, &lock); goto out; } l2 = pmap_l2(pmap, va); if (l2 != NULL && ((l2e = pmap_load(l2)) & PTE_V) != 0 && ((l2e & PTE_RWX) == 0 || pmap_demote_l2_locked(pmap, l2, va, &lock))) { l3 = pmap_l2_to_l3(l2, va); if (va < VM_MAXUSER_ADDRESS) { mpte = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); mpte->wire_count++; } } else if (va < VM_MAXUSER_ADDRESS) { nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); if (mpte == NULL && nosleep) { CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_RESOURCE_SHORTAGE); } l3 = pmap_l3(pmap, va); } else { l3 = pmap_l3(pmap, va); /* TODO: This is not optimal, but should mostly work */ if (l3 == NULL) { if (l2 == NULL) { l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (l2_m == NULL) panic("pmap_enter: l2 pte_m == NULL"); if ((l2_m->flags & PG_ZERO) == 0) pmap_zero_page(l2_m); l2_pa = VM_PAGE_TO_PHYS(l2_m); l2_pn = (l2_pa / PAGE_SIZE); l1 = pmap_l1(pmap, va); entry = (PTE_V); entry |= (l2_pn << PTE_PPN0_S); pmap_store(l1, entry); pmap_distribute_l1(pmap, pmap_l1_index(va), entry); l2 = pmap_l1_to_l2(l1, va); } l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (l3_m == NULL) panic("pmap_enter: l3 pte_m == NULL"); if ((l3_m->flags & PG_ZERO) == 0) pmap_zero_page(l3_m); l3_pa = VM_PAGE_TO_PHYS(l3_m); l3_pn = (l3_pa / PAGE_SIZE); entry = (PTE_V); entry |= (l3_pn << PTE_PPN0_S); pmap_store(l2, entry); l3 = pmap_l2_to_l3(l2, va); } pmap_invalidate_page(pmap, va); } orig_l3 = pmap_load(l3); opa = PTE_TO_PHYS(orig_l3); pv = NULL; /* * Is the specified virtual address already mapped? */ if ((orig_l3 & PTE_V) != 0) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if ((flags & PMAP_ENTER_WIRED) != 0 && (orig_l3 & PTE_SW_WIRED) == 0) pmap->pm_stats.wired_count++; else if ((flags & PMAP_ENTER_WIRED) == 0 && (orig_l3 & PTE_SW_WIRED) != 0) pmap->pm_stats.wired_count--; /* * Remove the extra PT page reference. */ if (mpte != NULL) { mpte->wire_count--; KASSERT(mpte->wire_count > 0, ("pmap_enter: missing reference to page table page," " va: 0x%lx", va)); } /* * Has the physical page changed? */ if (opa == pa) { /* * No, might be a protection or wiring change. */ if ((orig_l3 & PTE_SW_MANAGED) != 0 && (new_l3 & PTE_W) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); goto validate; } /* * The physical page has changed. Temporarily invalidate * the mapping. This ensures that all threads sharing the * pmap keep a consistent view of the mapping, which is * necessary for the correct handling of COW faults. It * also permits reuse of the old mapping's PV entry, * avoiding an allocation. * * For consistency, handle unmanaged mappings the same way. */ orig_l3 = pmap_load_clear(l3); KASSERT(PTE_TO_PHYS(orig_l3) == opa, ("pmap_enter: unexpected pa update for %#lx", va)); if ((orig_l3 & PTE_SW_MANAGED) != 0) { om = PHYS_TO_VM_PAGE(opa); /* * The pmap lock is sufficient to synchronize with * concurrent calls to pmap_page_test_mappings() and * pmap_ts_referenced(). */ if ((orig_l3 & PTE_D) != 0) vm_page_dirty(om); if ((orig_l3 & PTE_A) != 0) vm_page_aflag_set(om, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); pv = pmap_pvh_remove(&om->md, pmap, va); KASSERT(pv != NULL, ("pmap_enter: no PV entry for %#lx", va)); if ((new_l3 & PTE_SW_MANAGED) == 0) free_pv_entry(pmap, pv); if ((om->aflags & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&om->md.pv_list)) vm_page_aflag_clear(om, PGA_WRITEABLE); } pmap_invalidate_page(pmap, va); orig_l3 = 0; } else { /* * Increment the counters. */ if ((new_l3 & PTE_SW_WIRED) != 0) pmap->pm_stats.wired_count++; pmap_resident_count_inc(pmap, 1); } /* * Enter on the PV list if part of our managed memory. */ if ((new_l3 & PTE_SW_MANAGED) != 0) { if (pv == NULL) { pv = get_pv_entry(pmap, &lock); pv->pv_va = va; } CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if ((new_l3 & PTE_W) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); } validate: /* * Sync the i-cache on all harts before updating the PTE * if the new PTE is executable. */ if (prot & VM_PROT_EXECUTE) pmap_sync_icache(pmap, va, PAGE_SIZE); /* * Update the L3 entry. */ if (orig_l3 != 0) { orig_l3 = pmap_load_store(l3, new_l3); pmap_invalidate_page(pmap, va); KASSERT(PTE_TO_PHYS(orig_l3) == pa, ("pmap_enter: invalid update")); if ((orig_l3 & (PTE_D | PTE_SW_MANAGED)) == (PTE_D | PTE_SW_MANAGED)) vm_page_dirty(m); } else { pmap_store(l3, new_l3); } #if VM_NRESERVLEVEL > 0 if (mpte != NULL && mpte->wire_count == Ln_ENTRIES && pmap_ps_enabled(pmap) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) pmap_promote_l2(pmap, l2, va, &lock); #endif rv = KERN_SUCCESS; out: if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (rv); } /* * Tries to create a read- and/or execute-only 2MB page mapping. Returns true * if successful. Returns false if (1) a page table page cannot be allocated * without sleeping, (2) a mapping already exists at the specified virtual * address, or (3) a PV entry cannot be allocated without reclaiming another * PV entry. */ static bool pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp) { pd_entry_t new_l2; pn_t pn; PMAP_LOCK_ASSERT(pmap, MA_OWNED); pn = VM_PAGE_TO_PHYS(m) / PAGE_SIZE; new_l2 = (pd_entry_t)((pn << PTE_PPN0_S) | PTE_R | PTE_V); if ((m->oflags & VPO_UNMANAGED) == 0) new_l2 |= PTE_SW_MANAGED; if ((prot & VM_PROT_EXECUTE) != 0) new_l2 |= PTE_X; if (va < VM_MAXUSER_ADDRESS) new_l2 |= PTE_U; return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP | PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) == KERN_SUCCESS); } /* * Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE * otherwise. Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and * a mapping already exists at the specified virtual address. Returns * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table * page allocation failed. Returns KERN_RESOURCE_SHORTAGE if * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed. * * The parameter "m" is only used when creating a managed, writeable mapping. */ static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, vm_page_t m, struct rwlock **lockp) { struct spglist free; pd_entry_t *l2, *l3, oldl2; vm_offset_t sva; vm_page_t l2pg, mt; PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((l2pg = pmap_alloc_l2(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ? NULL : lockp)) == NULL) { CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p", va, pmap); return (KERN_RESOURCE_SHORTAGE); } l2 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(l2pg)); l2 = &l2[pmap_l2_index(va)]; if ((oldl2 = pmap_load(l2)) != 0) { KASSERT(l2pg->wire_count > 1, ("pmap_enter_l2: l2pg's wire count is too low")); if ((flags & PMAP_ENTER_NOREPLACE) != 0) { l2pg->wire_count--; CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p", va, pmap); return (KERN_FAILURE); } SLIST_INIT(&free); if ((oldl2 & PTE_RWX) != 0) (void)pmap_remove_l2(pmap, l2, va, pmap_load(pmap_l1(pmap, va)), &free, lockp); else for (sva = va; sva < va + L2_SIZE; sva += PAGE_SIZE) { l3 = pmap_l2_to_l3(l2, sva); if ((pmap_load(l3) & PTE_V) != 0 && pmap_remove_l3(pmap, l3, sva, oldl2, &free, lockp) != 0) break; } vm_page_free_pages_toq(&free, true); if (va >= VM_MAXUSER_ADDRESS) { mt = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); if (pmap_insert_pt_page(pmap, mt)) { /* * XXX Currently, this can't happen bacuse * we do not perform pmap_enter(psind == 1) * on the kernel pmap. */ panic("pmap_enter_l2: trie insert failed"); } } else KASSERT(pmap_load(l2) == 0, ("pmap_enter_l2: non-zero L2 entry %p", l2)); } if ((new_l2 & PTE_SW_MANAGED) != 0) { /* * Abort this mapping if its PV entry could not be created. */ if (!pmap_pv_insert_l2(pmap, va, new_l2, flags, lockp)) { SLIST_INIT(&free); if (pmap_unwire_ptp(pmap, va, l2pg, &free)) { /* * Although "va" is not mapped, paging-structure * caches could nonetheless have entries that * refer to the freed page table pages. * Invalidate those entries. */ pmap_invalidate_page(pmap, va); vm_page_free_pages_toq(&free, true); } CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p", va, pmap); return (KERN_RESOURCE_SHORTAGE); } if ((new_l2 & PTE_W) != 0) for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) vm_page_aflag_set(mt, PGA_WRITEABLE); } /* * Increment counters. */ if ((new_l2 & PTE_SW_WIRED) != 0) pmap->pm_stats.wired_count += L2_SIZE / PAGE_SIZE; pmap->pm_stats.resident_count += L2_SIZE / PAGE_SIZE; /* * Map the superpage. */ pmap_store(l2, new_l2); atomic_add_long(&pmap_l2_mappings, 1); CTR2(KTR_PMAP, "pmap_enter_l2: success for va %#lx in pmap %p", va, pmap); return (KERN_SUCCESS); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { struct rwlock *lock; vm_offset_t va; vm_page_t m, mpte; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); mpte = NULL; m = m_start; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end && m->psind == 1 && pmap_ps_enabled(pmap) && pmap_enter_2mpage(pmap, va, m, prot, &lock)) m = &m[L2_SIZE / PAGE_SIZE - 1]; else mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); m = TAILQ_NEXT(m, listq); } if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { struct rwlock *lock; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) { struct spglist free; vm_paddr_t phys; pd_entry_t *l2; pt_entry_t *l3, newl3; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { vm_pindex_t l2pindex; /* * Calculate pagetable page index */ l2pindex = pmap_l2_pindex(va); if (mpte && (mpte->pindex == l2pindex)) { mpte->wire_count++; } else { /* * Get the l2 entry */ l2 = pmap_l2(pmap, va); /* * If the page table page is mapped, we just increment * the hold count, and activate it. Otherwise, we * attempt to allocate a page table page. If this * attempt fails, we don't retry. Instead, we give up. */ if (l2 != NULL && pmap_load(l2) != 0) { phys = PTE_TO_PHYS(pmap_load(l2)); mpte = PHYS_TO_VM_PAGE(phys); mpte->wire_count++; } else { /* * Pass NULL instead of the PV list lock * pointer, because we don't intend to sleep. */ mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); if (mpte == NULL) return (mpte); } } l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); l3 = &l3[pmap_l3_index(va)]; } else { mpte = NULL; l3 = pmap_l3(kernel_pmap, va); } if (l3 == NULL) panic("pmap_enter_quick_locked: No l3"); if (pmap_load(l3) != 0) { if (mpte != NULL) { mpte->wire_count--; mpte = NULL; } return (mpte); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0 && !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { if (mpte != NULL) { SLIST_INIT(&free); if (pmap_unwire_ptp(pmap, va, mpte, &free)) { pmap_invalidate_page(pmap, va); vm_page_free_pages_toq(&free, false); } mpte = NULL; } return (mpte); } /* * Increment counters */ pmap_resident_count_inc(pmap, 1); newl3 = ((VM_PAGE_TO_PHYS(m) / PAGE_SIZE) << PTE_PPN0_S) | PTE_V | PTE_R; if ((prot & VM_PROT_EXECUTE) != 0) newl3 |= PTE_X; if ((m->oflags & VPO_UNMANAGED) == 0) newl3 |= PTE_SW_MANAGED; if (va < VM_MAX_USER_ADDRESS) newl3 |= PTE_U; /* * Sync the i-cache on all harts before updating the PTE * if the new PTE is executable. */ if (prot & VM_PROT_EXECUTE) pmap_sync_icache(pmap, va, PAGE_SIZE); pmap_store(l3, newl3); pmap_invalidate_page(pmap, va); return (mpte); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range * must have the wired attribute set. In contrast, invalid mappings * cannot have the wired attribute set, so they are ignored. * * The wired attribute of the page table entry is not a hardware feature, * so there is no need to invalidate any TLB entries. */ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t va_next; pd_entry_t *l1, *l2, l2e; pt_entry_t *l3, l3e; bool pv_lists_locked; pv_lists_locked = false; retry: PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { l1 = pmap_l1(pmap, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if ((l2e = pmap_load(l2)) == 0) continue; if ((l2e & PTE_RWX) != 0) { if (sva + L2_SIZE == va_next && eva >= va_next) { if ((l2e & PTE_SW_WIRED) == 0) panic("pmap_unwire: l2 %#jx is missing " "PTE_SW_WIRED", (uintmax_t)l2e); pmap_clear_bits(l2, PTE_SW_WIRED); continue; } else { if (!pv_lists_locked) { pv_lists_locked = true; if (!rw_try_rlock(&pvh_global_lock)) { PMAP_UNLOCK(pmap); rw_rlock(&pvh_global_lock); /* Repeat sva. */ goto retry; } } if (!pmap_demote_l2(pmap, l2, sva)) panic("pmap_unwire: demotion failed"); } } if (va_next > eva) va_next = eva; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { if ((l3e = pmap_load(l3)) == 0) continue; if ((l3e & PTE_SW_WIRED) == 0) panic("pmap_unwire: l3 %#jx is missing " "PTE_SW_WIRED", (uintmax_t)l3e); /* * PG_W must be cleared atomically. Although the pmap * lock synchronizes access to PG_W, another processor * could be setting PG_M and/or PG_A concurrently. */ pmap_clear_bits(l3, PTE_SW_WIRED); pmap->pm_stats.wired_count--; } } if (pv_lists_locked) rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { } /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. */ void pmap_zero_page(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); pagezero((void *)va); } /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); if (off == 0 && size == PAGE_SIZE) pagezero((void *)va); else bzero((char *)va + off, size); } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ void pmap_copy_page(vm_page_t msrc, vm_page_t mdst) { vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); pagecopy((void *)src, (void *)dst); } int unmapped_buf_allowed = 1; void pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_page_t m_a, m_b; vm_paddr_t p_a, p_b; vm_offset_t a_pg_offset, b_pg_offset; int cnt; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; m_a = ma[a_offset >> PAGE_SHIFT]; p_a = m_a->phys_addr; b_pg_offset = b_offset & PAGE_MASK; m_b = mb[b_offset >> PAGE_SHIFT]; p_b = m_b->phys_addr; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); cnt = min(cnt, PAGE_SIZE - b_pg_offset); if (__predict_false(!PHYS_IN_DMAP(p_a))) { panic("!DMAP a %lx", p_a); } else { a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; } if (__predict_false(!PHYS_IN_DMAP(p_b))) { panic("!DMAP b %lx", p_b); } else { b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; } bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } } vm_offset_t pmap_quick_enter_page(vm_page_t m) { return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); } void pmap_quick_remove_page(vm_offset_t addr) { } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { struct md_page *pvh; struct rwlock *lock; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { struct md_page *pvh; struct rwlock *lock; pmap_t pmap; pd_entry_t *l2; pt_entry_t *l3; pv_entry_t pv; int count, md_gen, pvh_gen; if ((m->oflags & VPO_UNMANAGED) != 0) return (0); rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: count = 0; TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } l3 = pmap_l3(pmap, pv->pv_va); if ((pmap_load(l3) & PTE_SW_WIRED) != 0) count++; PMAP_UNLOCK(pmap); } if ((m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; pvh_gen = pvh->pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen || pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } l2 = pmap_l2(pmap, pv->pv_va); if ((pmap_load(l2) & PTE_SW_WIRED) != 0) count++; PMAP_UNLOCK(pmap); } } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (count); } static void pmap_remove_pages_pv(pmap_t pmap, vm_page_t m, pv_entry_t pv, struct spglist *free, bool superpage) { struct md_page *pvh; vm_page_t mpte, mt; if (superpage) { pmap_resident_count_dec(pmap, Ln_ENTRIES); pvh = pa_to_pvh(m->phys_addr); TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[Ln_ENTRIES]; mt++) if (TAILQ_EMPTY(&mt->md.pv_list) && (mt->aflags & PGA_WRITEABLE) != 0) vm_page_aflag_clear(mt, PGA_WRITEABLE); } mpte = pmap_remove_pt_page(pmap, pv->pv_va); if (mpte != NULL) { pmap_resident_count_dec(pmap, 1); KASSERT(mpte->wire_count == Ln_ENTRIES, ("pmap_remove_pages: pte page wire count error")); mpte->wire_count = 0; pmap_add_delayed_free_list(mpte, free, FALSE); } } else { pmap_resident_count_dec(pmap, 1); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if (TAILQ_EMPTY(&m->md.pv_list) && (m->aflags & PGA_WRITEABLE) != 0) { pvh = pa_to_pvh(m->phys_addr); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } } /* * Destroy all managed, non-wired mappings in the given user-space * pmap. This pmap cannot be active on any processor besides the * caller. * * This function cannot be applied to the kernel pmap. Moreover, it * is not intended for general use. It is only to be used during * process termination. Consequently, it can be implemented in ways * that make it faster than pmap_remove(). First, it can more quickly * destroy mappings by iterating over the pmap's collection of PV * entries, rather than searching the page table. Second, it doesn't * have to test and clear the page table entries atomically, because * no processor is currently accessing the user address space. In * particular, a page table entry's dirty bit won't change state once * this function starts. */ void pmap_remove_pages(pmap_t pmap) { struct spglist free; pd_entry_t ptepde; pt_entry_t *pte, tpte; vm_page_t m, mt; pv_entry_t pv; struct pv_chunk *pc, *npc; struct rwlock *lock; int64_t bit; uint64_t inuse, bitmask; int allfree, field, freed, idx; bool superpage; lock = NULL; SLIST_INIT(&free); rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { allfree = 1; freed = 0; for (field = 0; field < _NPCM; field++) { inuse = ~pc->pc_map[field] & pc_freemask[field]; while (inuse != 0) { bit = ffsl(inuse) - 1; bitmask = 1UL << bit; idx = field * 64 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; pte = pmap_l1(pmap, pv->pv_va); ptepde = pmap_load(pte); pte = pmap_l1_to_l2(pte, pv->pv_va); tpte = pmap_load(pte); if ((tpte & PTE_RWX) != 0) { superpage = true; } else { ptepde = tpte; pte = pmap_l2_to_l3(pte, pv->pv_va); tpte = pmap_load(pte); superpage = false; } /* * We cannot remove wired pages from a * process' mapping at this time. */ if (tpte & PTE_SW_WIRED) { allfree = 0; continue; } m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(tpte)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], ("pmap_remove_pages: bad pte %#jx", (uintmax_t)tpte)); pmap_clear(pte); /* * Update the vm_page_t clean/reference bits. */ if ((tpte & (PTE_D | PTE_W)) == (PTE_D | PTE_W)) { if (superpage) for (mt = m; mt < &m[Ln_ENTRIES]; mt++) vm_page_dirty(mt); else vm_page_dirty(m); } CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); /* Mark free */ pc->pc_map[field] |= bitmask; pmap_remove_pages_pv(pmap, m, pv, &free, superpage); pmap_unuse_pt(pmap, pv->pv_va, ptepde, &free); freed++; } } PV_STAT(atomic_add_long(&pv_entry_frees, freed)); PV_STAT(atomic_add_int(&pv_entry_spare, freed)); PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); if (allfree) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } } if (lock != NULL) rw_wunlock(lock); pmap_invalidate_all(pmap); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); vm_page_free_pages_toq(&free, false); } static bool pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) { struct md_page *pvh; struct rwlock *lock; pd_entry_t *l2; pt_entry_t *l3, mask; pv_entry_t pv; pmap_t pmap; int md_gen, pvh_gen; bool rv; mask = 0; if (modified) mask |= PTE_D; if (accessed) mask |= PTE_A; rv = FALSE; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } l3 = pmap_l3(pmap, pv->pv_va); rv = (pmap_load(l3) & mask) == mask; PMAP_UNLOCK(pmap); if (rv) goto out; } if ((m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; pvh_gen = pvh->pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen || pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } l2 = pmap_l2(pmap, pv->pv_va); rv = (pmap_load(l2) & mask) == mask; PMAP_UNLOCK(pmap); if (rv) goto out; } } out: rw_runlock(lock); rw_runlock(&pvh_global_lock); return (rv); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_modified: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); return (pmap_page_test_mappings(m, FALSE, TRUE)); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is eligible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { pt_entry_t *l3; boolean_t rv; rv = FALSE; PMAP_LOCK(pmap); l3 = pmap_l3(pmap, addr); if (l3 != NULL && pmap_load(l3) != 0) { rv = TRUE; } PMAP_UNLOCK(pmap); return (rv); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ boolean_t pmap_is_referenced(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_referenced: page %p is not managed", m)); return (pmap_page_test_mappings(m, TRUE, FALSE)); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { struct md_page *pvh; struct rwlock *lock; pmap_t pmap; pd_entry_t *l2; pt_entry_t *l3, oldl3, newl3; pv_entry_t next_pv, pv; vm_offset_t va; int md_gen, pvh_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; lock = VM_PAGE_TO_PV_LIST_LOCK(m); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); rw_rlock(&pvh_global_lock); retry_pv_loop: rw_wlock(lock); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); rw_wunlock(lock); goto retry_pv_loop; } } va = pv->pv_va; l2 = pmap_l2(pmap, va); if ((pmap_load(l2) & PTE_W) != 0) (void)pmap_demote_l2_locked(pmap, l2, va, &lock); KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), ("inconsistent pv lock %p %p for page %p", lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); PMAP_UNLOCK(pmap); } TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); rw_wunlock(lock); goto retry_pv_loop; } } l3 = pmap_l3(pmap, pv->pv_va); oldl3 = pmap_load(l3); retry: if ((oldl3 & PTE_W) != 0) { newl3 = oldl3 & ~(PTE_D | PTE_W); if (!atomic_fcmpset_long(l3, &oldl3, newl3)) goto retry; if ((oldl3 & PTE_D) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } rw_wunlock(lock); vm_page_aflag_clear(m, PGA_WRITEABLE); rw_runlock(&pvh_global_lock); } /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * As an optimization, update the page's dirty field if a modified bit is * found while counting reference bits. This opportunistic update can be * performed at low cost and can eliminate the need for some future calls * to pmap_is_modified(). However, since this function stops after * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some * dirty pages. Those dirty pages will only be detected by a future call * to pmap_is_modified(). */ int pmap_ts_referenced(vm_page_t m) { struct spglist free; struct md_page *pvh; struct rwlock *lock; pv_entry_t pv, pvf; pmap_t pmap; pd_entry_t *l2, l2e; pt_entry_t *l3, l3e; vm_paddr_t pa; vm_offset_t va; int md_gen, pvh_gen, ret; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); SLIST_INIT(&free); ret = 0; pa = VM_PAGE_TO_PHYS(m); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); lock = PHYS_TO_PV_LIST_LOCK(pa); rw_rlock(&pvh_global_lock); rw_wlock(lock); retry: if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) goto small_mappings; pv = pvf; do { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto retry; } } va = pv->pv_va; l2 = pmap_l2(pmap, va); l2e = pmap_load(l2); if ((l2e & (PTE_W | PTE_D)) == (PTE_W | PTE_D)) { /* * Although l2e is mapping a 2MB page, because * this function is called at a 4KB page granularity, * we only update the 4KB page under test. */ vm_page_dirty(m); } if ((l2e & PTE_A) != 0) { /* * Since this reference bit is shared by 512 4KB * pages, it should not be cleared every time it is * tested. Apply a simple "hash" function on the * physical page number, the virtual superpage number, * and the pmap address to select one 4KB page out of * the 512 on which testing the reference bit will * result in clearing that reference bit. This * function is designed to avoid the selection of the * same 4KB page for every 2MB page mapping. * * On demotion, a mapping that hasn't been referenced * is simply destroyed. To avoid the possibility of a * subsequent page fault on a demoted wired mapping, * always leave its reference bit set. Moreover, * since the superpage is wired, the current state of * its reference bit won't affect page replacement. */ if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^ (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 && (l2e & PTE_SW_WIRED) == 0) { pmap_clear_bits(l2, PTE_A); pmap_invalidate_page(pmap, va); } ret++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; } if (ret >= PMAP_TS_REFERENCED_MAX) goto out; } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); small_mappings: if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; do { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto retry; } } l2 = pmap_l2(pmap, pv->pv_va); KASSERT((pmap_load(l2) & PTE_RX) == 0, ("pmap_ts_referenced: found an invalid l2 table")); l3 = pmap_l2_to_l3(l2, pv->pv_va); l3e = pmap_load(l3); if ((l3e & PTE_D) != 0) vm_page_dirty(m); if ((l3e & PTE_A) != 0) { if ((l3e & PTE_SW_WIRED) == 0) { /* * Wired pages cannot be paged out so * doing accessed bit emulation for * them is wasted effort. We do the * hard work for unwired pages only. */ pmap_clear_bits(l3, PTE_A); pmap_invalidate_page(pmap, pv->pv_va); } ret++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; } } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && ret < PMAP_TS_REFERENCED_MAX); out: rw_wunlock(lock); rw_runlock(&pvh_global_lock); vm_page_free_pages_toq(&free, false); return (ret); } /* * Apply the given advice to the specified range of addresses within the * given pmap. Depending on the advice, clear the referenced and/or * modified flags in each mapping and set the mapped page's dirty field. */ void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) { } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { struct md_page *pvh; struct rwlock *lock; pmap_t pmap; pv_entry_t next_pv, pv; pd_entry_t *l2, oldl2; pt_entry_t *l3, oldl3; vm_offset_t va; int md_gen, pvh_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(&pvh_global_lock); rw_wlock(lock); restart: TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } va = pv->pv_va; l2 = pmap_l2(pmap, va); oldl2 = pmap_load(l2); if ((oldl2 & PTE_W) != 0) { if (pmap_demote_l2_locked(pmap, l2, va, &lock)) { if ((oldl2 & PTE_SW_WIRED) == 0) { /* * Write protect the mapping to a * single page so that a subsequent * write access may repromote. */ va += VM_PAGE_TO_PHYS(m) - PTE_TO_PHYS(oldl2); l3 = pmap_l2_to_l3(l2, va); oldl3 = pmap_load(l3); if ((oldl3 & PTE_V) != 0) { while (!atomic_fcmpset_long(l3, &oldl3, oldl3 & ~(PTE_D | PTE_W))) cpu_spinwait(); vm_page_dirty(m); pmap_invalidate_page(pmap, va); } } } } PMAP_UNLOCK(pmap); } TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } l2 = pmap_l2(pmap, pv->pv_va); KASSERT((pmap_load(l2) & PTE_RWX) == 0, ("pmap_clear_modify: found a 2mpage in page %p's pv list", m)); l3 = pmap_l2_to_l3(l2, pv->pv_va); if ((pmap_load(l3) & (PTE_D | PTE_W)) == (PTE_D | PTE_W)) { pmap_clear_bits(l3, PTE_D); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } rw_wunlock(lock); rw_runlock(&pvh_global_lock); } void * pmap_mapbios(vm_paddr_t pa, vm_size_t size) { return ((void *)PHYS_TO_DMAP(pa)); } void pmap_unmapbios(vm_paddr_t pa, vm_size_t size) { } /* * Sets the memory attribute for the specified page. */ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { m->md.pv_memattr = ma; /* * RISCVTODO: Implement the below (from the amd64 pmap) * If "m" is a normal page, update its direct mapping. This update * can be relied upon to perform any cache operations that are * required for data coherence. */ if ((m->flags & PG_FICTITIOUS) == 0 && PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m))) panic("RISCVTODO: pmap_page_set_memattr"); } /* * perform the pmap work for mincore */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pt_entry_t *l2, *l3, tpte; vm_paddr_t pa; int val; bool managed; PMAP_LOCK(pmap); retry: managed = false; val = 0; l2 = pmap_l2(pmap, addr); if (l2 != NULL && ((tpte = pmap_load(l2)) & PTE_V) != 0) { if ((tpte & PTE_RWX) != 0) { pa = PTE_TO_PHYS(tpte) | (addr & L2_OFFSET); val = MINCORE_INCORE | MINCORE_SUPER; } else { l3 = pmap_l2_to_l3(l2, addr); tpte = pmap_load(l3); if ((tpte & PTE_V) == 0) goto done; pa = PTE_TO_PHYS(tpte) | (addr & L3_OFFSET); val = MINCORE_INCORE; } if ((tpte & PTE_D) != 0) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((tpte & PTE_A) != 0) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; managed = (tpte & PTE_SW_MANAGED) == PTE_SW_MANAGED; } done: if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) goto retry; } else PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); return (val); } void -pmap_activate(struct thread *td) +pmap_activate_sw(struct thread *td) { - pmap_t pmap; - uint64_t reg; + pmap_t oldpmap, pmap; + u_int cpu; - critical_enter(); + oldpmap = PCPU_GET(curpmap); pmap = vmspace_pmap(td->td_proc->p_vmspace); - td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1); + if (pmap == oldpmap) + return; + load_satp(pmap->pm_satp); - reg = SATP_MODE_SV39; - reg |= (td->td_pcb->pcb_l1addr >> PAGE_SHIFT); - load_satp(reg); + cpu = PCPU_GET(cpuid); +#ifdef SMP + CPU_SET_ATOMIC(cpu, &pmap->pm_active); + CPU_CLR_ATOMIC(cpu, &oldpmap->pm_active); +#else + CPU_SET(cpu, &pmap->pm_active); + CPU_CLR(cpu, &oldpmap->pm_active); +#endif + PCPU_SET(curpmap, pmap); - pmap_invalidate_all(pmap); + sfence_vma(); +} + +void +pmap_activate(struct thread *td) +{ + + critical_enter(); + pmap_activate_sw(td); critical_exit(); } void -pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) +pmap_activate_boot(pmap_t pmap) { + u_int cpu; + + cpu = PCPU_GET(cpuid); +#ifdef SMP + CPU_SET_ATOMIC(cpu, &pmap->pm_active); +#else + CPU_SET(cpu, &pmap->pm_active); +#endif + PCPU_SET(curpmap, pmap); +} + +void +pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) +{ cpuset_t mask; /* * From the RISC-V User-Level ISA V2.2: * * "To make a store to instruction memory visible to all * RISC-V harts, the writing hart has to execute a data FENCE * before requesting that all remote RISC-V harts execute a * FENCE.I." */ sched_pin(); mask = all_cpus; CPU_CLR(PCPU_GET(cpuid), &mask); fence(); - sbi_remote_fence_i(mask.__bits); + if (!CPU_EMPTY(&mask) && smp_started) + sbi_remote_fence_i(mask.__bits); sched_unpin(); } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { vm_offset_t superpage_offset; if (size < L2_SIZE) return; if (object != NULL && (object->flags & OBJ_COLORED) != 0) offset += ptoa(object->pg_color); superpage_offset = offset & L2_OFFSET; if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE || (*addr & L2_OFFSET) == superpage_offset) return; if ((*addr & L2_OFFSET) < superpage_offset) *addr = (*addr & ~L2_OFFSET) + superpage_offset; else *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset; } /** * Get the kernel virtual address of a set of physical pages. If there are * physical addresses not covered by the DMAP perform a transient mapping * that will be removed when calling pmap_unmap_io_transient. * * \param page The pages the caller wishes to obtain the virtual * address on the kernel memory map. * \param vaddr On return contains the kernel virtual memory address * of the pages passed in the page parameter. * \param count Number of pages passed in. * \param can_fault TRUE if the thread using the mapped pages can take * page faults, FALSE otherwise. * * \returns TRUE if the caller must call pmap_unmap_io_transient when * finished or FALSE otherwise. * */ boolean_t pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; boolean_t needs_mapping; int error, i; /* * Allocate any KVA space that we need, this is done in a separate * loop to prevent calling vmem_alloc while pinned. */ needs_mapping = FALSE; for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) { error = vmem_alloc(kernel_arena, PAGE_SIZE, M_BESTFIT | M_WAITOK, &vaddr[i]); KASSERT(error == 0, ("vmem_alloc failed: %d", error)); needs_mapping = TRUE; } else { vaddr[i] = PHYS_TO_DMAP(paddr); } } /* Exit early if everything is covered by the DMAP */ if (!needs_mapping) return (FALSE); if (!can_fault) sched_pin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (paddr >= DMAP_MAX_PHYSADDR) { panic( "pmap_map_io_transient: TODO: Map out of DMAP data"); } } return (needs_mapping); } void pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; int i; if (!can_fault) sched_unpin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (paddr >= DMAP_MAX_PHYSADDR) { panic("RISCVTODO: pmap_unmap_io_transient: Unmap data"); } } } boolean_t pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode) { return (mode >= VM_MEMATTR_DEVICE && mode <= VM_MEMATTR_WRITE_BACK); } Index: head/sys/riscv/riscv/swtch.S =================================================================== --- head/sys/riscv/riscv/swtch.S (revision 344107) +++ head/sys/riscv/riscv/swtch.S (revision 344108) @@ -1,494 +1,476 @@ /*- * Copyright (c) 2015-2017 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "assym.inc" #include "opt_sched.h" #include #include #include #include __FBSDID("$FreeBSD$"); #ifdef FPE .macro __fpe_state_save p /* * Enable FPE usage in supervisor mode, * so we can access registers. */ li t0, SSTATUS_FS_INITIAL csrs sstatus, t0 /* Store registers */ frcsr t0 sd t0, (PCB_FCSR)(\p) fsd f0, (PCB_X + 0 * 16)(\p) fsd f1, (PCB_X + 1 * 16)(\p) fsd f2, (PCB_X + 2 * 16)(\p) fsd f3, (PCB_X + 3 * 16)(\p) fsd f4, (PCB_X + 4 * 16)(\p) fsd f5, (PCB_X + 5 * 16)(\p) fsd f6, (PCB_X + 6 * 16)(\p) fsd f7, (PCB_X + 7 * 16)(\p) fsd f8, (PCB_X + 8 * 16)(\p) fsd f9, (PCB_X + 9 * 16)(\p) fsd f10, (PCB_X + 10 * 16)(\p) fsd f11, (PCB_X + 11 * 16)(\p) fsd f12, (PCB_X + 12 * 16)(\p) fsd f13, (PCB_X + 13 * 16)(\p) fsd f14, (PCB_X + 14 * 16)(\p) fsd f15, (PCB_X + 15 * 16)(\p) fsd f16, (PCB_X + 16 * 16)(\p) fsd f17, (PCB_X + 17 * 16)(\p) fsd f18, (PCB_X + 18 * 16)(\p) fsd f19, (PCB_X + 19 * 16)(\p) fsd f20, (PCB_X + 20 * 16)(\p) fsd f21, (PCB_X + 21 * 16)(\p) fsd f22, (PCB_X + 22 * 16)(\p) fsd f23, (PCB_X + 23 * 16)(\p) fsd f24, (PCB_X + 24 * 16)(\p) fsd f25, (PCB_X + 25 * 16)(\p) fsd f26, (PCB_X + 26 * 16)(\p) fsd f27, (PCB_X + 27 * 16)(\p) fsd f28, (PCB_X + 28 * 16)(\p) fsd f29, (PCB_X + 29 * 16)(\p) fsd f30, (PCB_X + 30 * 16)(\p) fsd f31, (PCB_X + 31 * 16)(\p) /* Disable FPE usage in supervisor mode. */ li t0, SSTATUS_FS_MASK csrc sstatus, t0 .endm .macro __fpe_state_load p /* * Enable FPE usage in supervisor mode, * so we can access registers. */ li t0, SSTATUS_FS_INITIAL csrs sstatus, t0 /* Restore registers */ ld t0, (PCB_FCSR)(\p) fscsr t0 fld f0, (PCB_X + 0 * 16)(\p) fld f1, (PCB_X + 1 * 16)(\p) fld f2, (PCB_X + 2 * 16)(\p) fld f3, (PCB_X + 3 * 16)(\p) fld f4, (PCB_X + 4 * 16)(\p) fld f5, (PCB_X + 5 * 16)(\p) fld f6, (PCB_X + 6 * 16)(\p) fld f7, (PCB_X + 7 * 16)(\p) fld f8, (PCB_X + 8 * 16)(\p) fld f9, (PCB_X + 9 * 16)(\p) fld f10, (PCB_X + 10 * 16)(\p) fld f11, (PCB_X + 11 * 16)(\p) fld f12, (PCB_X + 12 * 16)(\p) fld f13, (PCB_X + 13 * 16)(\p) fld f14, (PCB_X + 14 * 16)(\p) fld f15, (PCB_X + 15 * 16)(\p) fld f16, (PCB_X + 16 * 16)(\p) fld f17, (PCB_X + 17 * 16)(\p) fld f18, (PCB_X + 18 * 16)(\p) fld f19, (PCB_X + 19 * 16)(\p) fld f20, (PCB_X + 20 * 16)(\p) fld f21, (PCB_X + 21 * 16)(\p) fld f22, (PCB_X + 22 * 16)(\p) fld f23, (PCB_X + 23 * 16)(\p) fld f24, (PCB_X + 24 * 16)(\p) fld f25, (PCB_X + 25 * 16)(\p) fld f26, (PCB_X + 26 * 16)(\p) fld f27, (PCB_X + 27 * 16)(\p) fld f28, (PCB_X + 28 * 16)(\p) fld f29, (PCB_X + 29 * 16)(\p) fld f30, (PCB_X + 30 * 16)(\p) fld f31, (PCB_X + 31 * 16)(\p) /* Disable FPE usage in supervisor mode. */ li t0, SSTATUS_FS_MASK csrc sstatus, t0 .endm /* * void * fpe_state_save(struct thread *td) */ ENTRY(fpe_state_save) /* Get pointer to PCB */ ld a0, TD_PCB(a0) __fpe_state_save a0 ret END(fpe_state_save) #endif /* FPE */ /* * void * fpe_state_clear(void) */ ENTRY(fpe_state_clear) /* * Enable FPE usage in supervisor mode, * so we can access registers. */ li t0, SSTATUS_FS_INITIAL csrs sstatus, t0 fscsr zero fcvt.d.l f0, zero fcvt.d.l f1, zero fcvt.d.l f2, zero fcvt.d.l f3, zero fcvt.d.l f4, zero fcvt.d.l f5, zero fcvt.d.l f6, zero fcvt.d.l f7, zero fcvt.d.l f8, zero fcvt.d.l f9, zero fcvt.d.l f10, zero fcvt.d.l f11, zero fcvt.d.l f12, zero fcvt.d.l f13, zero fcvt.d.l f14, zero fcvt.d.l f15, zero fcvt.d.l f16, zero fcvt.d.l f17, zero fcvt.d.l f18, zero fcvt.d.l f19, zero fcvt.d.l f20, zero fcvt.d.l f21, zero fcvt.d.l f22, zero fcvt.d.l f23, zero fcvt.d.l f24, zero fcvt.d.l f25, zero fcvt.d.l f26, zero fcvt.d.l f27, zero fcvt.d.l f28, zero fcvt.d.l f29, zero fcvt.d.l f30, zero fcvt.d.l f31, zero /* Disable FPE usage in supervisor mode. */ li t0, SSTATUS_FS_MASK csrc sstatus, t0 ret END(fpe_state_clear) /* - * void cpu_throw(struct thread *old, struct thread *new) + * void cpu_throw(struct thread *old __unused, struct thread *new) */ ENTRY(cpu_throw) + /* Activate the new thread's pmap. */ + mv s0, a1 + mv a0, a1 + call _C_LABEL(pmap_activate_sw) + mv a0, s0 + /* Store the new curthread */ - sd a1, PC_CURTHREAD(gp) + sd a0, PC_CURTHREAD(gp) /* And the new pcb */ - ld x13, TD_PCB(a1) + ld x13, TD_PCB(a0) sd x13, PC_CURPCB(gp) - sfence.vma - - /* Switch to the new pmap */ - ld t0, PCB_L1ADDR(x13) - srli t0, t0, PAGE_SHIFT - li t1, SATP_MODE_SV39 - or t0, t0, t1 - csrw satp, t0 - - /* TODO: Invalidate the TLB */ - - sfence.vma - /* Load registers */ ld ra, (PCB_RA)(x13) ld sp, (PCB_SP)(x13) ld tp, (PCB_TP)(x13) /* s[0-11] */ ld s0, (PCB_S + 0 * 8)(x13) ld s1, (PCB_S + 1 * 8)(x13) ld s2, (PCB_S + 2 * 8)(x13) ld s3, (PCB_S + 3 * 8)(x13) ld s4, (PCB_S + 4 * 8)(x13) ld s5, (PCB_S + 5 * 8)(x13) ld s6, (PCB_S + 6 * 8)(x13) ld s7, (PCB_S + 7 * 8)(x13) ld s8, (PCB_S + 8 * 8)(x13) ld s9, (PCB_S + 9 * 8)(x13) ld s10, (PCB_S + 10 * 8)(x13) ld s11, (PCB_S + 11 * 8)(x13) #ifdef FPE /* Is FPE enabled for new thread? */ - ld t0, TD_FRAME(a1) + ld t0, TD_FRAME(a0) ld t1, (TF_SSTATUS)(t0) li t2, SSTATUS_FS_MASK and t3, t1, t2 beqz t3, 1f /* No, skip. */ /* Restore registers. */ __fpe_state_load x13 1: #endif ret END(cpu_throw) /* * void cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx) * * a0 = old * a1 = new * a2 = mtx * x3 to x7, x16 and x17 are caller saved */ ENTRY(cpu_switch) /* Store the new curthread */ sd a1, PC_CURTHREAD(gp) /* And the new pcb */ ld x13, TD_PCB(a1) sd x13, PC_CURPCB(gp) /* Save the old context. */ ld x13, TD_PCB(a0) /* Store ra, sp and the callee-saved registers */ sd ra, (PCB_RA)(x13) sd sp, (PCB_SP)(x13) sd tp, (PCB_TP)(x13) /* s[0-11] */ sd s0, (PCB_S + 0 * 8)(x13) sd s1, (PCB_S + 1 * 8)(x13) sd s2, (PCB_S + 2 * 8)(x13) sd s3, (PCB_S + 3 * 8)(x13) sd s4, (PCB_S + 4 * 8)(x13) sd s5, (PCB_S + 5 * 8)(x13) sd s6, (PCB_S + 6 * 8)(x13) sd s7, (PCB_S + 7 * 8)(x13) sd s8, (PCB_S + 8 * 8)(x13) sd s9, (PCB_S + 9 * 8)(x13) sd s10, (PCB_S + 10 * 8)(x13) sd s11, (PCB_S + 11 * 8)(x13) #ifdef FPE /* * Is FPE enabled and is it in dirty state * for the old thread? */ ld t0, TD_FRAME(a0) ld t1, (TF_SSTATUS)(t0) li t2, SSTATUS_FS_MASK and t3, t1, t2 li t2, SSTATUS_FS_DIRTY bne t3, t2, 1f /* No, skip. */ /* Yes, mark FPE state clean and save registers. */ li t2, ~SSTATUS_FS_MASK and t3, t1, t2 li t2, SSTATUS_FS_CLEAN or t3, t3, t2 sd t3, (TF_SSTATUS)(t0) __fpe_state_save x13 1: #endif - /* - * Restore the saved context. - */ - ld x13, TD_PCB(a1) + /* Activate the new thread's pmap */ + mv s0, a0 + mv s1, a1 + mv s2, a2 + mv a0, a1 + call _C_LABEL(pmap_activate_sw) + mv a1, s1 - /* - * TODO: We may need to flush the cache here if switching - * to a user process. - */ - - sfence.vma - - /* Switch to the new pmap */ - ld t0, PCB_L1ADDR(x13) - srli t0, t0, PAGE_SHIFT - li t1, SATP_MODE_SV39 - or t0, t0, t1 - csrw satp, t0 - - /* TODO: Invalidate the TLB */ - - sfence.vma - /* Release the old thread */ - sd a2, TD_LOCK(a0) + sd s2, TD_LOCK(s0) #if defined(SCHED_ULE) && defined(SMP) /* Spin if TD_LOCK points to a blocked_lock */ - la a2, _C_LABEL(blocked_lock) + la s2, _C_LABEL(blocked_lock) 1: ld t0, TD_LOCK(a1) - beq t0, a2, 1b + beq t0, s2, 1b #endif + /* + * Restore the saved context. + */ + ld x13, TD_PCB(a1) /* Restore the registers */ ld tp, (PCB_TP)(x13) ld ra, (PCB_RA)(x13) ld sp, (PCB_SP)(x13) /* s[0-11] */ ld s0, (PCB_S + 0 * 8)(x13) ld s1, (PCB_S + 1 * 8)(x13) ld s2, (PCB_S + 2 * 8)(x13) ld s3, (PCB_S + 3 * 8)(x13) ld s4, (PCB_S + 4 * 8)(x13) ld s5, (PCB_S + 5 * 8)(x13) ld s6, (PCB_S + 6 * 8)(x13) ld s7, (PCB_S + 7 * 8)(x13) ld s8, (PCB_S + 8 * 8)(x13) ld s9, (PCB_S + 9 * 8)(x13) ld s10, (PCB_S + 10 * 8)(x13) ld s11, (PCB_S + 11 * 8)(x13) #ifdef FPE /* Is FPE enabled for new thread? */ ld t0, TD_FRAME(a1) ld t1, (TF_SSTATUS)(t0) li t2, SSTATUS_FS_MASK and t3, t1, t2 beqz t3, 1f /* No, skip. */ /* Restore registers. */ __fpe_state_load x13 1: #endif ret .Lcpu_switch_panic_str: .asciz "cpu_switch: %p\0" END(cpu_switch) /* * fork_exit(void (*callout)(void *, struct trapframe *), void *arg, * struct trapframe *frame) */ ENTRY(fork_trampoline) mv a0, s0 mv a1, s1 mv a2, sp call _C_LABEL(fork_exit) /* Restore sstatus */ ld t0, (TF_SSTATUS)(sp) /* Ensure interrupts disabled */ li t1, ~SSTATUS_SIE and t0, t0, t1 csrw sstatus, t0 /* Restore exception program counter */ ld t0, (TF_SEPC)(sp) csrw sepc, t0 /* Restore the registers */ ld t0, (TF_T + 0 * 8)(sp) ld t1, (TF_T + 1 * 8)(sp) ld t2, (TF_T + 2 * 8)(sp) ld t3, (TF_T + 3 * 8)(sp) ld t4, (TF_T + 4 * 8)(sp) ld t5, (TF_T + 5 * 8)(sp) ld t6, (TF_T + 6 * 8)(sp) ld s0, (TF_S + 0 * 8)(sp) ld s1, (TF_S + 1 * 8)(sp) ld s2, (TF_S + 2 * 8)(sp) ld s3, (TF_S + 3 * 8)(sp) ld s4, (TF_S + 4 * 8)(sp) ld s5, (TF_S + 5 * 8)(sp) ld s6, (TF_S + 6 * 8)(sp) ld s7, (TF_S + 7 * 8)(sp) ld s8, (TF_S + 8 * 8)(sp) ld s9, (TF_S + 9 * 8)(sp) ld s10, (TF_S + 10 * 8)(sp) ld s11, (TF_S + 11 * 8)(sp) ld a0, (TF_A + 0 * 8)(sp) ld a1, (TF_A + 1 * 8)(sp) ld a2, (TF_A + 2 * 8)(sp) ld a3, (TF_A + 3 * 8)(sp) ld a4, (TF_A + 4 * 8)(sp) ld a5, (TF_A + 5 * 8)(sp) ld a6, (TF_A + 6 * 8)(sp) ld a7, (TF_A + 7 * 8)(sp) /* Load user ra and sp */ ld ra, (TF_RA)(sp) /* * Store our pcpup on stack, we will load it back * on kernel mode trap. */ sd gp, (TF_SIZE)(sp) ld gp, (TF_GP)(sp) /* Save kernel stack so we can use it doing a user trap */ addi sp, sp, TF_SIZE csrw sscratch, sp /* Load user stack */ ld sp, (TF_SP - TF_SIZE)(sp) sret END(fork_trampoline) ENTRY(savectx) /* Store ra, sp and the callee-saved registers */ sd ra, (PCB_RA)(a0) sd sp, (PCB_SP)(a0) sd tp, (PCB_TP)(a0) /* s[0-11] */ sd s0, (PCB_S + 0 * 8)(a0) sd s1, (PCB_S + 1 * 8)(a0) sd s2, (PCB_S + 2 * 8)(a0) sd s3, (PCB_S + 3 * 8)(a0) sd s4, (PCB_S + 4 * 8)(a0) sd s5, (PCB_S + 5 * 8)(a0) sd s6, (PCB_S + 6 * 8)(a0) sd s7, (PCB_S + 7 * 8)(a0) sd s8, (PCB_S + 8 * 8)(a0) sd s9, (PCB_S + 9 * 8)(a0) sd s10, (PCB_S + 10 * 8)(a0) sd s11, (PCB_S + 11 * 8)(a0) #ifdef FPE __fpe_state_save a0 #endif ret END(savectx) Index: head/sys/riscv/riscv/vm_machdep.c =================================================================== --- head/sys/riscv/riscv/vm_machdep.c (revision 344107) +++ head/sys/riscv/riscv/vm_machdep.c (revision 344108) @@ -1,275 +1,272 @@ /*- * Copyright (c) 2015-2018 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if __riscv_xlen == 64 #define TP_OFFSET 16 /* sizeof(struct tcb) */ #endif /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { struct pcb *pcb2; struct trapframe *tf; register_t val; if ((flags & RFPROC) == 0) return; if (td1 == curthread) { /* * Save the tp. These normally happen in cpu_switch, * but if userland changes this then forks this may * not have happened. */ __asm __volatile("mv %0, tp" : "=&r"(val)); td1->td_pcb->pcb_tp = val; /* RISCVTODO: save the FPU state here */ } pcb2 = (struct pcb *)(td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1; td2->td_pcb = pcb2; bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); - td2->td_pcb->pcb_l1addr = - vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l1); - tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1); bcopy(td1->td_frame, tf, sizeof(*tf)); /* Clear syscall error flag */ tf->tf_t[0] = 0; /* Arguments for child */ tf->tf_a[0] = 0; tf->tf_a[1] = 0; tf->tf_sstatus |= (SSTATUS_SPIE); /* Enable interrupts. */ tf->tf_sstatus &= ~(SSTATUS_SPP); /* User mode. */ td2->td_frame = tf; /* Set the return value registers for fork() */ td2->td_pcb->pcb_s[0] = (uintptr_t)fork_return; td2->td_pcb->pcb_s[1] = (uintptr_t)td2; td2->td_pcb->pcb_ra = (uintptr_t)fork_trampoline; td2->td_pcb->pcb_sp = (uintptr_t)td2->td_frame; /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_sstatus_ie = (SSTATUS_SIE); } void cpu_reset(void) { sbi_shutdown(); while(1); } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame; frame = td->td_frame; switch (error) { case 0: frame->tf_a[0] = td->td_retval[0]; frame->tf_a[1] = td->td_retval[1]; frame->tf_t[0] = 0; /* syscall succeeded */ break; case ERESTART: frame->tf_sepc -= 4; /* prev instruction */ break; case EJUSTRETURN: break; default: frame->tf_a[0] = error; frame->tf_t[0] = 1; /* syscall error */ break; } } /* * Initialize machine state, mostly pcb and trap frame for a new * thread, about to return to userspace. Put enough state in the new * thread's PCB to get it to go back to the fork_return(), which * finalizes the thread state and handles peculiarities of the first * return to userspace for the new thread. */ void cpu_copy_thread(struct thread *td, struct thread *td0) { bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb)); td->td_pcb->pcb_s[0] = (uintptr_t)fork_return; td->td_pcb->pcb_s[1] = (uintptr_t)td; td->td_pcb->pcb_ra = (uintptr_t)fork_trampoline; td->td_pcb->pcb_sp = (uintptr_t)td->td_frame; /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_sstatus_ie = (SSTATUS_SIE); } /* * Set that machine state for performing an upcall that starts * the entry function with the given argument. */ void cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { struct trapframe *tf; tf = td->td_frame; tf->tf_sp = STACKALIGN((uintptr_t)stack->ss_sp + stack->ss_size); tf->tf_sepc = (register_t)entry; tf->tf_a[0] = (register_t)arg; } int cpu_set_user_tls(struct thread *td, void *tls_base) { struct pcb *pcb; if ((uintptr_t)tls_base >= VM_MAXUSER_ADDRESS) return (EINVAL); pcb = td->td_pcb; pcb->pcb_tp = (register_t)tls_base + TP_OFFSET; if (td == curthread) __asm __volatile("mv tp, %0" :: "r"(pcb->pcb_tp)); return (0); } void cpu_thread_exit(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = (struct trapframe *)STACKALIGN( (caddr_t)td->td_pcb - 8 - sizeof(struct trapframe)); } void cpu_thread_free(struct thread *td) { } void cpu_thread_clean(struct thread *td) { } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { td->td_pcb->pcb_s[0] = (uintptr_t)func; td->td_pcb->pcb_s[1] = (uintptr_t)arg; td->td_pcb->pcb_ra = (uintptr_t)fork_trampoline; td->td_pcb->pcb_sp = (uintptr_t)td->td_frame; } void cpu_exit(struct thread *td) { } void swi_vm(void *v) { /* Nothing to do here - busdma bounce buffers are not implemented. */ }