Index: head/sys/arm/arm/genassym.c =================================================================== --- head/sys/arm/arm/genassym.c (revision 280711) +++ head/sys/arm/arm/genassym.c (revision 280712) @@ -1,154 +1,166 @@ /*- * Copyright (c) 2004 Olivier Houchard * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include ASSYM(KERNBASE, KERNBASE); ASSYM(PCB_NOALIGNFLT, PCB_NOALIGNFLT); +#ifdef ARM_NEW_PMAP +ASSYM(CPU_ASID_KERNEL,CPU_ASID_KERNEL); +#endif ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); +#ifndef ARM_NEW_PMAP ASSYM(PCB_DACR, offsetof(struct pcb, pcb_dacr)); +#endif ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); ASSYM(PCB_PAGEDIR, offsetof(struct pcb, pcb_pagedir)); +#ifndef ARM_NEW_PMAP ASSYM(PCB_L1VEC, offsetof(struct pcb, pcb_l1vec)); ASSYM(PCB_PL1VEC, offsetof(struct pcb, pcb_pl1vec)); +#endif ASSYM(PCB_R4, offsetof(struct pcb, pcb_regs.sf_r4)); ASSYM(PCB_R5, offsetof(struct pcb, pcb_regs.sf_r5)); ASSYM(PCB_R6, offsetof(struct pcb, pcb_regs.sf_r6)); ASSYM(PCB_R7, offsetof(struct pcb, pcb_regs.sf_r7)); ASSYM(PCB_R8, offsetof(struct pcb, pcb_regs.sf_r8)); ASSYM(PCB_R9, offsetof(struct pcb, pcb_regs.sf_r9)); ASSYM(PCB_R10, offsetof(struct pcb, pcb_regs.sf_r10)); ASSYM(PCB_R11, offsetof(struct pcb, pcb_regs.sf_r11)); ASSYM(PCB_R12, offsetof(struct pcb, pcb_regs.sf_r12)); ASSYM(PCB_SP, offsetof(struct pcb, pcb_regs.sf_sp)); ASSYM(PCB_LR, offsetof(struct pcb, pcb_regs.sf_lr)); ASSYM(PCB_PC, offsetof(struct pcb, pcb_regs.sf_pc)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); ASSYM(M_LEN, offsetof(struct mbuf, m_len)); ASSYM(M_DATA, offsetof(struct mbuf, m_data)); ASSYM(M_NEXT, offsetof(struct mbuf, m_next)); ASSYM(IP_SRC, offsetof(struct ip, ip_src)); ASSYM(IP_DST, offsetof(struct ip, ip_dst)); ASSYM(CF_SETTTB, offsetof(struct cpu_functions, cf_setttb)); ASSYM(CF_CONTROL, offsetof(struct cpu_functions, cf_control)); ASSYM(CF_CONTEXT_SWITCH, offsetof(struct cpu_functions, cf_context_switch)); ASSYM(CF_DCACHE_WB_RANGE, offsetof(struct cpu_functions, cf_dcache_wb_range)); ASSYM(CF_L2CACHE_WB_RANGE, offsetof(struct cpu_functions, cf_l2cache_wb_range)); ASSYM(CF_IDCACHE_WBINV_ALL, offsetof(struct cpu_functions, cf_idcache_wbinv_all)); ASSYM(CF_L2CACHE_WBINV_ALL, offsetof(struct cpu_functions, cf_l2cache_wbinv_all)); ASSYM(CF_TLB_FLUSHID_SE, offsetof(struct cpu_functions, cf_tlb_flushID_SE)); ASSYM(CF_ICACHE_SYNC, offsetof(struct cpu_functions, cf_icache_sync_all)); ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap)); ASSYM(V_SOFT, offsetof(struct vmmeter, v_soft)); ASSYM(V_INTR, offsetof(struct vmmeter, v_intr)); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_FRAME, offsetof(struct thread, td_frame)); ASSYM(TD_MD, offsetof(struct thread, td_md)); ASSYM(TD_LOCK, offsetof(struct thread, td_lock)); ASSYM(MD_TP, offsetof(struct mdthread, md_tp)); ASSYM(MD_RAS_START, offsetof(struct mdthread, md_ras_start)); ASSYM(MD_RAS_END, offsetof(struct mdthread, md_ras_end)); ASSYM(TF_R0, offsetof(struct trapframe, tf_r0)); ASSYM(TF_R1, offsetof(struct trapframe, tf_r1)); ASSYM(TF_PC, offsetof(struct trapframe, tf_pc)); ASSYM(P_PID, offsetof(struct proc, p_pid)); ASSYM(P_FLAG, offsetof(struct proc, p_flag)); ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc)); #ifdef ARM_TP_ADDRESS ASSYM(ARM_TP_ADDRESS, ARM_TP_ADDRESS); ASSYM(ARM_RAS_START, ARM_RAS_START); ASSYM(ARM_RAS_END, ARM_RAS_END); #endif #ifdef VFP ASSYM(PCB_VFPSTATE, offsetof(struct pcb, pcb_vfpstate)); ASSYM(PC_CPU, offsetof(struct pcpu, pc_cpu)); ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap)); #endif ASSYM(PAGE_SIZE, PAGE_SIZE); -ASSYM(PDESIZE, PDESIZE); ASSYM(PMAP_DOMAIN_KERNEL, PMAP_DOMAIN_KERNEL); #ifdef PMAP_INCLUDE_PTE_SYNC ASSYM(PMAP_INCLUDE_PTE_SYNC, 1); #endif ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); ASSYM(P_TRACED, P_TRACED); ASSYM(P_SIGEVENT, P_SIGEVENT); ASSYM(P_PROFIL, P_PROFIL); ASSYM(TRAPFRAMESIZE, sizeof(struct trapframe)); ASSYM(MAXCOMLEN, MAXCOMLEN); ASSYM(MAXCPU, MAXCPU); +ASSYM(_NCPUWORDS, _NCPUWORDS); ASSYM(NIRQ, NIRQ); ASSYM(PCPU_SIZE, sizeof(struct pcpu)); +ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); +ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); +ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active)); +ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid)); ASSYM(DCACHE_LINE_SIZE, offsetof(struct cpuinfo, dcache_line_size)); ASSYM(DCACHE_LINE_MASK, offsetof(struct cpuinfo, dcache_line_mask)); ASSYM(ICACHE_LINE_SIZE, offsetof(struct cpuinfo, icache_line_size)); ASSYM(ICACHE_LINE_MASK, offsetof(struct cpuinfo, icache_line_mask)); Index: head/sys/arm/arm/machdep.c =================================================================== --- head/sys/arm/arm/machdep.c (revision 280711) +++ head/sys/arm/arm/machdep.c (revision 280712) @@ -1,1319 +1,1519 @@ /* $NetBSD: arm32_machdep.c,v 1.44 2004/03/24 15:34:47 atatat Exp $ */ /*- * Copyright (c) 2004 Olivier Houchard * Copyright (c) 1994-1998 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Mark Brinicombe * for the NetBSD Project. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Machine dependant functions for kernel setup * * Created : 17/09/94 * Updated : 18/04/01 updated for new wscons */ #include "opt_compat.h" #include "opt_ddb.h" #include "opt_platform.h" #include "opt_sched.h" #include "opt_timer.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #endif #ifdef DDB #include #endif #ifdef DEBUG #define debugf(fmt, args...) printf(fmt, ##args) #else #define debugf(fmt, args...) #endif struct pcpu __pcpu[MAXCPU]; struct pcpu *pcpup = &__pcpu[0]; static struct trapframe proc0_tf; uint32_t cpu_reset_address = 0; int cold = 1; vm_offset_t vector_page; int (*_arm_memcpy)(void *, void *, int, int) = NULL; int (*_arm_bzero)(void *, int, int) = NULL; int _min_memcpy_size = 0; int _min_bzero_size = 0; extern int *end; #ifdef FDT +vm_paddr_t pmap_pa; + +#ifdef ARM_NEW_PMAP +vm_offset_t systempage; +vm_offset_t irqstack; +vm_offset_t undstack; +vm_offset_t abtstack; +#else /* * This is the number of L2 page tables required for covering max * (hypothetical) memsize of 4GB and all kernel mappings (vectors, msgbuf, * stacks etc.), uprounded to be divisible by 4. */ #define KERNEL_PT_MAX 78 static struct pv_addr kernel_pt_table[KERNEL_PT_MAX]; -vm_paddr_t pmap_pa; - struct pv_addr systempage; static struct pv_addr msgbufpv; struct pv_addr irqstack; struct pv_addr undstack; struct pv_addr abtstack; static struct pv_addr kernelstack; - #endif +#endif #if defined(LINUX_BOOT_ABI) #define LBABI_MAX_BANKS 10 uint32_t board_id; struct arm_lbabi_tag *atag_list; char linux_command_line[LBABI_MAX_COMMAND_LINE + 1]; char atags[LBABI_MAX_COMMAND_LINE * 2]; uint32_t memstart[LBABI_MAX_BANKS]; uint32_t memsize[LBABI_MAX_BANKS]; uint32_t membanks; #endif static uint32_t board_revision; /* hex representation of uint64_t */ static char board_serial[32]; SYSCTL_NODE(_hw, OID_AUTO, board, CTLFLAG_RD, 0, "Board attributes"); SYSCTL_UINT(_hw_board, OID_AUTO, revision, CTLFLAG_RD, &board_revision, 0, "Board revision"); SYSCTL_STRING(_hw_board, OID_AUTO, serial, CTLFLAG_RD, board_serial, 0, "Board serial"); int vfp_exists; SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &vfp_exists, 0, "Floating point support enabled"); void board_set_serial(uint64_t serial) { snprintf(board_serial, sizeof(board_serial)-1, "%016jx", serial); } void board_set_revision(uint32_t revision) { board_revision = revision; } void sendsig(catcher, ksi, mask) sig_t catcher; ksiginfo_t *ksi; sigset_t *mask; { struct thread *td; struct proc *p; struct trapframe *tf; struct sigframe *fp, frame; struct sigacts *psp; int onstack; int sig; int code; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_usr_sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !(onstack) && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct sigframe *)td->td_frame->tf_usr_sp; /* make room on the stack */ fp--; /* make the stack aligned */ fp = (struct sigframe *)STACKALIGN(fp); /* Populate the siginfo frame. */ get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK ) ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE; frame.sf_uc.uc_stack = td->td_sigstk; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* * Build context to run handler in. We invoke the handler * directly, only returning via the trampoline. Note the * trampoline version numbers are coordinated with machine- * dependent code in libc. */ tf->tf_r0 = sig; tf->tf_r1 = (register_t)&fp->sf_si; tf->tf_r2 = (register_t)&fp->sf_uc; /* the trampoline uses r5 as the uc address */ tf->tf_r5 = (register_t)&fp->sf_uc; tf->tf_pc = (register_t)catcher; tf->tf_usr_sp = (register_t)fp; tf->tf_usr_lr = (register_t)(PS_STRINGS - *(p->p_sysent->sv_szsigcode)); CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_usr_lr, tf->tf_usr_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } struct kva_md_info kmi; /* * arm32_vector_init: * * Initialize the vector page, and select whether or not to * relocate the vectors. * * NOTE: We expect the vector page to be mapped at its expected * destination. */ extern unsigned int page0[], page0_data[]; void arm_vector_init(vm_offset_t va, int which) { unsigned int *vectors = (int *) va; unsigned int *vectors_data = vectors + (page0_data - page0); int vec; /* * Loop through the vectors we're taking over, and copy the * vector's insn and data word. */ for (vec = 0; vec < ARM_NVEC; vec++) { if ((which & (1 << vec)) == 0) { /* Don't want to take over this vector. */ continue; } vectors[vec] = page0[vec]; vectors_data[vec] = page0_data[vec]; } /* Now sync the vectors. */ cpu_icache_sync_range(va, (ARM_NVEC * 2) * sizeof(u_int)); vector_page = va; if (va == ARM_VECTORS_HIGH) { /* * Assume the MD caller knows what it's doing here, and * really does want the vector page relocated. * * Note: This has to be done here (and not just in * cpu_setup()) because the vector page needs to be * accessible *before* cpu_startup() is called. * Think ddb(9) ... * * NOTE: If the CPU control register is not readable, * this will totally fail! We'll just assume that * any system that has high vector support has a * readable CPU control register, for now. If we * ever encounter one that does not, we'll have to * rethink this. */ cpu_control(CPU_CONTROL_VECRELOC, CPU_CONTROL_VECRELOC); } } static void cpu_startup(void *dummy) { struct pcb *pcb = thread0.td_pcb; const unsigned int mbyte = 1024 * 1024; #ifdef ARM_TP_ADDRESS #ifndef ARM_CACHE_LOCK_ENABLE vm_page_t m; #endif #endif identify_arm_cpu(); vm_ksubmap_init(&kmi); /* * Display the RAM layout. */ printf("real memory = %ju (%ju MB)\n", (uintmax_t)arm32_ptob(realmem), (uintmax_t)arm32_ptob(realmem) / mbyte); printf("avail memory = %ju (%ju MB)\n", (uintmax_t)arm32_ptob(vm_cnt.v_free_count), (uintmax_t)arm32_ptob(vm_cnt.v_free_count) / mbyte); if (bootverbose) { arm_physmem_print_tables(); arm_devmap_print_table(); } bufinit(); vm_pager_bufferinit(); pcb->pcb_regs.sf_sp = (u_int)thread0.td_kstack + USPACE_SVC_STACK_TOP; - vector_page_setprot(VM_PROT_READ); pmap_set_pcb_pagedir(pmap_kernel(), pcb); +#ifndef ARM_NEW_PMAP + vector_page_setprot(VM_PROT_READ); pmap_postinit(); +#endif #ifdef ARM_TP_ADDRESS #ifdef ARM_CACHE_LOCK_ENABLE pmap_kenter_user(ARM_TP_ADDRESS, ARM_TP_ADDRESS); arm_lock_cache_line(ARM_TP_ADDRESS); #else m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_ZERO); pmap_kenter_user(ARM_TP_ADDRESS, VM_PAGE_TO_PHYS(m)); #endif *(uint32_t *)ARM_RAS_START = 0; *(uint32_t *)ARM_RAS_END = 0xffffffff; #endif } SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { cpu_dcache_wb_range((uintptr_t)ptr, len); #ifdef ARM_L2_PIPT cpu_l2cache_wb_range((uintptr_t)vtophys(ptr), len); #else cpu_l2cache_wb_range((uintptr_t)ptr, len); #endif } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { return (ENXIO); } void cpu_idle(int busy) { CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); spinlock_enter(); #ifndef NO_EVENTTIMERS if (!busy) cpu_idleclock(); #endif if (!sched_runnable()) cpu_sleep(0); #ifndef NO_EVENTTIMERS if (!busy) cpu_activeclock(); #endif spinlock_exit(); CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", busy, curcpu); } int cpu_idle_wakeup(int cpu) { return (0); } /* * Most ARM platforms don't need to do anything special to init their clocks * (they get intialized during normal device attachment), and by not defining a * cpu_initclocks() function they get this generic one. Any platform that needs * to do something special can just provide their own implementation, which will * override this one due to the weak linkage. */ void arm_generic_initclocks(void) { #ifndef NO_EVENTTIMERS #ifdef SMP if (PCPU_GET(cpuid) == 0) cpu_initclocks_bsp(); else cpu_initclocks_ap(); #else cpu_initclocks_bsp(); #endif #endif } __weak_reference(arm_generic_initclocks, cpu_initclocks); int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tf = td->td_frame; bcopy(&tf->tf_r0, regs->r, sizeof(regs->r)); regs->r_sp = tf->tf_usr_sp; regs->r_lr = tf->tf_usr_lr; regs->r_pc = tf->tf_pc; regs->r_cpsr = tf->tf_spsr; return (0); } int fill_fpregs(struct thread *td, struct fpreg *regs) { bzero(regs, sizeof(*regs)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf = td->td_frame; bcopy(regs->r, &tf->tf_r0, sizeof(regs->r)); tf->tf_usr_sp = regs->r_sp; tf->tf_usr_lr = regs->r_lr; tf->tf_pc = regs->r_pc; tf->tf_spsr &= ~PSR_FLAGS; tf->tf_spsr |= regs->r_cpsr & PSR_FLAGS; return (0); } int set_fpregs(struct thread *td, struct fpreg *regs) { return (0); } int fill_dbregs(struct thread *td, struct dbreg *regs) { return (0); } int set_dbregs(struct thread *td, struct dbreg *regs) { return (0); } static int ptrace_read_int(struct thread *td, vm_offset_t addr, u_int32_t *v) { struct iovec iov; struct uio uio; PROC_LOCK_ASSERT(td->td_proc, MA_NOTOWNED); iov.iov_base = (caddr_t) v; iov.iov_len = sizeof(u_int32_t); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)addr; uio.uio_resid = sizeof(u_int32_t); uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; return proc_rwmem(td->td_proc, &uio); } static int ptrace_write_int(struct thread *td, vm_offset_t addr, u_int32_t v) { struct iovec iov; struct uio uio; PROC_LOCK_ASSERT(td->td_proc, MA_NOTOWNED); iov.iov_base = (caddr_t) &v; iov.iov_len = sizeof(u_int32_t); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)addr; uio.uio_resid = sizeof(u_int32_t); uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_WRITE; uio.uio_td = td; return proc_rwmem(td->td_proc, &uio); } int ptrace_single_step(struct thread *td) { struct proc *p; int error; KASSERT(td->td_md.md_ptrace_instr == 0, ("Didn't clear single step")); p = td->td_proc; PROC_UNLOCK(p); error = ptrace_read_int(td, td->td_frame->tf_pc + 4, &td->td_md.md_ptrace_instr); if (error) goto out; error = ptrace_write_int(td, td->td_frame->tf_pc + 4, PTRACE_BREAKPOINT); if (error) td->td_md.md_ptrace_instr = 0; td->td_md.md_ptrace_addr = td->td_frame->tf_pc + 4; out: PROC_LOCK(p); return (error); } int ptrace_clear_single_step(struct thread *td) { struct proc *p; if (td->td_md.md_ptrace_instr) { p = td->td_proc; PROC_UNLOCK(p); ptrace_write_int(td, td->td_md.md_ptrace_addr, td->td_md.md_ptrace_instr); PROC_LOCK(p); td->td_md.md_ptrace_instr = 0; } return (0); } int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->tf_pc = addr; return (0); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { } void spinlock_enter(void) { struct thread *td; register_t cspr; td = curthread; if (td->td_md.md_spinlock_count == 0) { cspr = disable_interrupts(PSR_I | PSR_F); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_cspr = cspr; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t cspr; td = curthread; critical_exit(); cspr = td->td_md.md_saved_cspr; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) restore_interrupts(cspr); } /* * Clear registers on exec */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf = td->td_frame; memset(tf, 0, sizeof(*tf)); tf->tf_usr_sp = stack; tf->tf_usr_lr = imgp->entry_addr; tf->tf_svc_lr = 0x77777777; tf->tf_pc = imgp->entry_addr; tf->tf_spsr = PSR_USR32_MODE; } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) { struct trapframe *tf = td->td_frame; __greg_t *gr = mcp->__gregs; if (clear_ret & GET_MC_CLEAR_RET) gr[_REG_R0] = 0; else gr[_REG_R0] = tf->tf_r0; gr[_REG_R1] = tf->tf_r1; gr[_REG_R2] = tf->tf_r2; gr[_REG_R3] = tf->tf_r3; gr[_REG_R4] = tf->tf_r4; gr[_REG_R5] = tf->tf_r5; gr[_REG_R6] = tf->tf_r6; gr[_REG_R7] = tf->tf_r7; gr[_REG_R8] = tf->tf_r8; gr[_REG_R9] = tf->tf_r9; gr[_REG_R10] = tf->tf_r10; gr[_REG_R11] = tf->tf_r11; gr[_REG_R12] = tf->tf_r12; gr[_REG_SP] = tf->tf_usr_sp; gr[_REG_LR] = tf->tf_usr_lr; gr[_REG_PC] = tf->tf_pc; gr[_REG_CPSR] = tf->tf_spsr; return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tf = td->td_frame; const __greg_t *gr = mcp->__gregs; tf->tf_r0 = gr[_REG_R0]; tf->tf_r1 = gr[_REG_R1]; tf->tf_r2 = gr[_REG_R2]; tf->tf_r3 = gr[_REG_R3]; tf->tf_r4 = gr[_REG_R4]; tf->tf_r5 = gr[_REG_R5]; tf->tf_r6 = gr[_REG_R6]; tf->tf_r7 = gr[_REG_R7]; tf->tf_r8 = gr[_REG_R8]; tf->tf_r9 = gr[_REG_R9]; tf->tf_r10 = gr[_REG_R10]; tf->tf_r11 = gr[_REG_R11]; tf->tf_r12 = gr[_REG_R12]; tf->tf_usr_sp = gr[_REG_SP]; tf->tf_usr_lr = gr[_REG_LR]; tf->tf_pc = gr[_REG_PC]; tf->tf_spsr = gr[_REG_CPSR]; return (0); } /* * MPSAFE */ int sys_sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; int spsr; if (uap == NULL) return (EFAULT); if (copyin(uap->sigcntxp, &uc, sizeof(uc))) return (EFAULT); /* * Make sure the processor mode has not been tampered with and * interrupts have not been disabled. */ spsr = uc.uc_mcontext.__gregs[_REG_CPSR]; if ((spsr & PSR_MODE) != PSR_USR32_MODE || (spsr & (PSR_I | PSR_F)) != 0) return (EINVAL); /* Restore register context. */ set_mcontext(td, &uc.uc_mcontext); /* Restore signal mask. */ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_regs.sf_r4 = tf->tf_r4; pcb->pcb_regs.sf_r5 = tf->tf_r5; pcb->pcb_regs.sf_r6 = tf->tf_r6; pcb->pcb_regs.sf_r7 = tf->tf_r7; pcb->pcb_regs.sf_r8 = tf->tf_r8; pcb->pcb_regs.sf_r9 = tf->tf_r9; pcb->pcb_regs.sf_r10 = tf->tf_r10; pcb->pcb_regs.sf_r11 = tf->tf_r11; pcb->pcb_regs.sf_r12 = tf->tf_r12; pcb->pcb_regs.sf_pc = tf->tf_pc; pcb->pcb_regs.sf_lr = tf->tf_usr_lr; pcb->pcb_regs.sf_sp = tf->tf_usr_sp; } /* * Fake up a boot descriptor table */ vm_offset_t fake_preload_metadata(struct arm_boot_params *abp __unused) { #ifdef DDB vm_offset_t zstart = 0, zend = 0; #endif vm_offset_t lastaddr; int i = 0; static uint32_t fake_preload[35]; fake_preload[i++] = MODINFO_NAME; fake_preload[i++] = strlen("kernel") + 1; strcpy((char*)&fake_preload[i++], "kernel"); i += 1; fake_preload[i++] = MODINFO_TYPE; fake_preload[i++] = strlen("elf kernel") + 1; strcpy((char*)&fake_preload[i++], "elf kernel"); i += 2; fake_preload[i++] = MODINFO_ADDR; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = KERNVIRTADDR; fake_preload[i++] = MODINFO_SIZE; fake_preload[i++] = sizeof(uint32_t); fake_preload[i++] = (uint32_t)&end - KERNVIRTADDR; #ifdef DDB if (*(uint32_t *)KERNVIRTADDR == MAGIC_TRAMP_NUMBER) { fake_preload[i++] = MODINFO_METADATA|MODINFOMD_SSYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 4); fake_preload[i++] = MODINFO_METADATA|MODINFOMD_ESYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 8); lastaddr = *(uint32_t *)(KERNVIRTADDR + 8); zend = lastaddr; zstart = *(uint32_t *)(KERNVIRTADDR + 4); db_fetch_ksymtab(zstart, zend); } else #endif lastaddr = (vm_offset_t)&end; fake_preload[i++] = 0; fake_preload[i] = 0; preload_metadata = (void *)fake_preload; return (lastaddr); } void pcpu0_init(void) { #if ARM_ARCH_6 || ARM_ARCH_7A || defined(CPU_MV_PJ4B) set_curthread(&thread0); #endif pcpu_init(pcpup, 0, sizeof(struct pcpu)); PCPU_SET(curthread, &thread0); #ifdef VFP PCPU_SET(cpu, 0); #endif } #if defined(LINUX_BOOT_ABI) vm_offset_t linux_parse_boot_param(struct arm_boot_params *abp) { struct arm_lbabi_tag *walker; uint32_t revision; uint64_t serial; /* * Linux boot ABI: r0 = 0, r1 is the board type (!= 0) and r2 * is atags or dtb pointer. If all of these aren't satisfied, * then punt. */ if (!(abp->abp_r0 == 0 && abp->abp_r1 != 0 && abp->abp_r2 != 0)) return 0; board_id = abp->abp_r1; walker = (struct arm_lbabi_tag *) (abp->abp_r2 + KERNVIRTADDR - abp->abp_physaddr); /* xxx - Need to also look for binary device tree */ if (ATAG_TAG(walker) != ATAG_CORE) return 0; atag_list = walker; while (ATAG_TAG(walker) != ATAG_NONE) { switch (ATAG_TAG(walker)) { case ATAG_CORE: break; case ATAG_MEM: arm_physmem_hardware_region(walker->u.tag_mem.start, walker->u.tag_mem.size); break; case ATAG_INITRD2: break; case ATAG_SERIAL: serial = walker->u.tag_sn.low | ((uint64_t)walker->u.tag_sn.high << 32); board_set_serial(serial); break; case ATAG_REVISION: revision = walker->u.tag_rev.rev; board_set_revision(revision); break; case ATAG_CMDLINE: /* XXX open question: Parse this for boothowto? */ bcopy(walker->u.tag_cmd.command, linux_command_line, ATAG_SIZE(walker)); break; default: break; } walker = ATAG_NEXT(walker); } /* Save a copy for later */ bcopy(atag_list, atags, (char *)walker - (char *)atag_list + ATAG_SIZE(walker)); return fake_preload_metadata(abp); } #endif #if defined(FREEBSD_BOOT_LOADER) vm_offset_t freebsd_parse_boot_param(struct arm_boot_params *abp) { vm_offset_t lastaddr = 0; void *mdp; void *kmdp; #ifdef DDB vm_offset_t ksym_start; vm_offset_t ksym_end; #endif /* * Mask metadata pointer: it is supposed to be on page boundary. If * the first argument (mdp) doesn't point to a valid address the * bootloader must have passed us something else than the metadata * ptr, so we give up. Also give up if we cannot find metadta section * the loader creates that we get all this data out of. */ if ((mdp = (void *)(abp->abp_r0 & ~PAGE_MASK)) == NULL) return 0; preload_metadata = mdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) return 0; boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); db_fetch_ksymtab(ksym_start, ksym_end); #endif preload_addr_relocate = KERNVIRTADDR - abp->abp_physaddr; return lastaddr; } #endif vm_offset_t default_parse_boot_param(struct arm_boot_params *abp) { vm_offset_t lastaddr; #if defined(LINUX_BOOT_ABI) if ((lastaddr = linux_parse_boot_param(abp)) != 0) return lastaddr; #endif #if defined(FREEBSD_BOOT_LOADER) if ((lastaddr = freebsd_parse_boot_param(abp)) != 0) return lastaddr; #endif /* Fall back to hardcoded metadata. */ lastaddr = fake_preload_metadata(abp); return lastaddr; } /* * Stub version of the boot parameter parsing routine. We are * called early in initarm, before even VM has been initialized. * This routine needs to preserve any data that the boot loader * has passed in before the kernel starts to grow past the end * of the BSS, traditionally the place boot-loaders put this data. * * Since this is called so early, things that depend on the vm system * being setup (including access to some SoC's serial ports), about * all that can be done in this routine is to copy the arguments. * * This is the default boot parameter parsing routine. Individual * kernels/boards can override this weak function with one of their * own. We just fake metadata... */ __weak_reference(default_parse_boot_param, parse_boot_param); /* * Initialize proc0 */ void init_proc0(vm_offset_t kstack) { proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; thread0.td_pcb->pcb_flags = 0; thread0.td_pcb->pcb_vfpcpu = -1; thread0.td_pcb->pcb_vfpstate.fpscr = VFPSCR_DN | VFPSCR_FZ; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; } +#ifdef ARM_NEW_PMAP void set_stackptrs(int cpu) { set_stackptr(PSR_IRQ32_MODE, + irqstack + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); + set_stackptr(PSR_ABT32_MODE, + abtstack + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); + set_stackptr(PSR_UND32_MODE, + undstack + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); +} +#else +void +set_stackptrs(int cpu) +{ + + set_stackptr(PSR_IRQ32_MODE, irqstack.pv_va + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_ABT32_MODE, abtstack.pv_va + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_UND32_MODE, undstack.pv_va + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); } +#endif #ifdef FDT static char * kenv_next(char *cp) { if (cp != NULL) { while (*cp != 0) cp++; cp++; if (*cp == 0) cp = NULL; } return (cp); } static void print_kenv(void) { int len; char *cp; debugf("loader passed (static) kenv:\n"); if (kern_envp == NULL) { debugf(" no env, null ptr\n"); return; } debugf(" kern_envp = 0x%08x\n", (uint32_t)kern_envp); len = 0; for (cp = kern_envp; cp != NULL; cp = kenv_next(cp)) debugf(" %x %s\n", (uint32_t)cp, cp); } +#ifndef ARM_NEW_PMAP void * initarm(struct arm_boot_params *abp) { struct mem_region mem_regions[FDT_MEM_REGIONS]; struct pv_addr kernel_l1pt; struct pv_addr dpcpu; vm_offset_t dtbp, freemempos, l2_start, lastaddr; uint32_t memsize, l2size; char *env; void *kmdp; u_int l1pagetable; int i, j, err_devmap, mem_regions_sz; lastaddr = parse_boot_param(abp); arm_physmem_kernaddr = abp->abp_physaddr; memsize = 0; cpuinfo_init(); set_cpufuncs(); /* * Find the dtb passed in by the boot loader. */ kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); else dtbp = (vm_offset_t)NULL; #if defined(FDT_DTB_STATIC) /* * In case the device tree blob was not retrieved (from metadata) try * to use the statically embedded one. */ if (dtbp == (vm_offset_t)NULL) dtbp = (vm_offset_t)&fdt_static_dtb; #endif if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, &memsize) != 0) panic("Cannot get physical memory regions"); arm_physmem_hardware_regions(mem_regions, mem_regions_sz); /* Grab reserved memory regions information from device tree. */ if (fdt_get_reserved_regions(mem_regions, &mem_regions_sz) == 0) arm_physmem_exclude_regions(mem_regions, mem_regions_sz, EXFLAG_NODUMP | EXFLAG_NOALLOC); /* Platform-specific initialisation */ platform_probe_and_attach(); pcpu0_init(); /* Do basic tuning, hz etc */ init_param1(); /* Calculate number of L2 tables needed for mapping vm_page_array */ l2size = (memsize / PAGE_SIZE) * sizeof(struct vm_page); l2size = (l2size >> L1_S_SHIFT) + 1; /* * Add one table for end of kernel map, one for stacks, msgbuf and * L1 and L2 tables map and one for vectors map. */ l2size += 3; /* Make it divisible by 4 */ l2size = (l2size + 3) & ~3; freemempos = (lastaddr + PAGE_MASK) & ~PAGE_MASK; /* Define a macro to simplify memory allocation */ #define valloc_pages(var, np) \ alloc_pages((var).pv_va, (np)); \ (var).pv_pa = (var).pv_va + (abp->abp_physaddr - KERNVIRTADDR); #define alloc_pages(var, np) \ (var) = freemempos; \ freemempos += (np * PAGE_SIZE); \ memset((char *)(var), 0, ((np) * PAGE_SIZE)); while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0) freemempos += PAGE_SIZE; valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE); for (i = 0, j = 0; i < l2size; ++i) { if (!(i % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) { valloc_pages(kernel_pt_table[i], L2_TABLE_SIZE / PAGE_SIZE); j = i; } else { kernel_pt_table[i].pv_va = kernel_pt_table[j].pv_va + L2_TABLE_SIZE_REAL * (i - j); kernel_pt_table[i].pv_pa = kernel_pt_table[i].pv_va - KERNVIRTADDR + abp->abp_physaddr; } } /* * Allocate a page for the system page mapped to 0x00000000 * or 0xffff0000. This page will just contain the system vectors * and can be shared by all processes. */ valloc_pages(systempage, 1); /* Allocate dynamic per-cpu area. */ valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu.pv_va, 0); /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE * MAXCPU); valloc_pages(abtstack, ABT_STACK_SIZE * MAXCPU); valloc_pages(undstack, UND_STACK_SIZE * MAXCPU); valloc_pages(kernelstack, KSTACK_PAGES * MAXCPU); valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); /* * Now we start construction of the L1 page table * We start by mapping the L2 page tables into the L1. * This means that we can replace L1 mappings later on if necessary */ l1pagetable = kernel_l1pt.pv_va; /* * Try to map as much as possible of kernel text and data using * 1MB section mapping and for the rest of initial kernel address * space use L2 coarse tables. * * Link L2 tables for mapping remainder of kernel (modulo 1MB) * and kernel structures */ l2_start = lastaddr & ~(L1_S_OFFSET); for (i = 0 ; i < l2size - 1; i++) pmap_link_l2pt(l1pagetable, l2_start + i * L1_S_SIZE, &kernel_pt_table[i]); pmap_curmaxkvaddr = l2_start + (l2size - 1) * L1_S_SIZE; /* Map kernel code and data */ pmap_map_chunk(l1pagetable, KERNVIRTADDR, abp->abp_physaddr, (((uint32_t)(lastaddr) - KERNVIRTADDR) + PAGE_MASK) & ~PAGE_MASK, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Map L1 directory and allocated L2 page tables */ pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa, L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); pmap_map_chunk(l1pagetable, kernel_pt_table[0].pv_va, kernel_pt_table[0].pv_pa, L2_TABLE_SIZE_REAL * l2size, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); /* Map allocated DPCPU, stacks and msgbuf */ pmap_map_chunk(l1pagetable, dpcpu.pv_va, dpcpu.pv_pa, freemempos - dpcpu.pv_va, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Link and map the vector page */ pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH, &kernel_pt_table[l2size - 1]); pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, PTE_CACHE); /* Establish static device mappings. */ err_devmap = platform_devmap_init(); arm_devmap_bootstrap(l1pagetable, NULL); vm_max_kernel_address = platform_lastaddr(); cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT); pmap_pa = kernel_l1pt.pv_pa; setttb(kernel_l1pt.pv_pa); cpu_tlb_flushID(); cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)); /* * Now that proper page tables are installed, call cpu_setup() to enable * instruction and data caches and other chip-specific features. */ cpu_setup(""); /* * Only after the SOC registers block is mapped we can perform device * tree fixups, as they may attempt to read parameters from hardware. */ OF_interpret("perform-fixup", 0); platform_gpio_init(); cninit(); debugf("initarm: console initialized\n"); debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp); debugf(" boothowto = 0x%08x\n", boothowto); debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp); print_kenv(); env = kern_getenv("kernelname"); if (env != NULL) { strlcpy(kernelname, env, sizeof(kernelname)); freeenv(env); } if (err_devmap != 0) printf("WARNING: could not fully configure devmap, error=%d\n", err_devmap); platform_late_init(); /* * Pages were allocated during the secondary bootstrap for the * stacks for different CPU modes. * We must now set the r13 registers in the different CPU modes to * point to these stacks. * Since the ARM stacks use STMFD etc. we must set r13 to the top end * of the stack memory. */ cpu_control(CPU_CONTROL_MMU_ENABLE, CPU_CONTROL_MMU_ENABLE); set_stackptrs(0); /* * We must now clean the cache again.... * Cleaning may be done by reading new data to displace any * dirty data in the cache. This will have happened in setttb() * but since we are boot strapping the addresses used for the read * may have just been remapped and thus the cache could be out * of sync. A re-clean after the switch will cure this. * After booting there are no gross relocations of the kernel thus * this problem will not occur after initarm(). */ cpu_idcache_wbinv_all(); undefined_init(); init_proc0(kernelstack.pv_va); arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL); pmap_bootstrap(freemempos, &kernel_l1pt); msgbufp = (void *)msgbufpv.pv_va; msgbufinit(msgbufp, msgbufsize); mutex_init(); /* * Exclude the kernel (and all the things we allocated which immediately * follow the kernel) from the VM allocation pool but not from crash * dumps. virtual_avail is a global variable which tracks the kva we've * "allocated" while setting up pmaps. * * Prepare the list of physical memory available to the vm subsystem. */ arm_physmem_exclude_region(abp->abp_physaddr, (virtual_avail - KERNVIRTADDR), EXFLAG_NOALLOC); arm_physmem_init_kernel_globals(); init_param2(physmem); kdb_init(); return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP - sizeof(struct pcb))); } +#else /* !ARM_NEW_PMAP */ +void * +initarm(struct arm_boot_params *abp) +{ + struct mem_region mem_regions[FDT_MEM_REGIONS]; + vm_paddr_t lastaddr; + vm_offset_t dtbp, kernelstack, dpcpu; + uint32_t memsize; + char *env; + void *kmdp; + int err_devmap, mem_regions_sz; + + /* get last allocated physical address */ + arm_physmem_kernaddr = abp->abp_physaddr; + lastaddr = parse_boot_param(abp) - KERNVIRTADDR + arm_physmem_kernaddr; + + memsize = 0; + set_cpufuncs(); + cpuinfo_init(); + + /* + * Find the dtb passed in by the boot loader. + */ + kmdp = preload_search_by_type("elf kernel"); + if (kmdp != NULL) + dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); + else + dtbp = (vm_offset_t)NULL; +#if defined(FDT_DTB_STATIC) + /* + * In case the device tree blob was not retrieved (from metadata) try + * to use the statically embedded one. + */ + if (dtbp == (vm_offset_t)NULL) + dtbp = (vm_offset_t)&fdt_static_dtb; #endif + + if (OF_install(OFW_FDT, 0) == FALSE) + panic("Cannot install FDT"); + + if (OF_init((void *)dtbp) != 0) + panic("OF_init failed with the found device tree"); + + /* Grab physical memory regions information from device tree. */ + if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, &memsize) != 0) + panic("Cannot get physical memory regions"); + arm_physmem_hardware_regions(mem_regions, mem_regions_sz); + + /* Grab reserved memory regions information from device tree. */ + if (fdt_get_reserved_regions(mem_regions, &mem_regions_sz) == 0) + arm_physmem_exclude_regions(mem_regions, mem_regions_sz, + EXFLAG_NODUMP | EXFLAG_NOALLOC); + + /* + * Set TEX remapping registers. + * Setup kernel page tables and switch to kernel L1 page table. + */ + pmap_set_tex(); + pmap_bootstrap_prepare(lastaddr); + + /* + * Now that proper page tables are installed, call cpu_setup() to enable + * instruction and data caches and other chip-specific features. + */ + cpu_setup(""); + + /* Platform-specific initialisation */ + platform_probe_and_attach(); + pcpu0_init(); + + /* Do basic tuning, hz etc */ + init_param1(); + + /* + * Allocate a page for the system page mapped to 0xffff0000 + * This page will just contain the system vectors and can be + * shared by all processes. + */ + systempage = pmap_preboot_get_pages(1); + + /* Map the vector page. */ + pmap_preboot_map_pages(systempage, ARM_VECTORS_HIGH, 1); + if (virtual_end >= ARM_VECTORS_HIGH) + virtual_end = ARM_VECTORS_HIGH - 1; + + /* Allocate dynamic per-cpu area. */ + dpcpu = pmap_preboot_get_vpages(DPCPU_SIZE / PAGE_SIZE); + dpcpu_init((void *)dpcpu, 0); + + /* Allocate stacks for all modes */ + irqstack = pmap_preboot_get_vpages(IRQ_STACK_SIZE * MAXCPU); + abtstack = pmap_preboot_get_vpages(ABT_STACK_SIZE * MAXCPU); + undstack = pmap_preboot_get_vpages(UND_STACK_SIZE * MAXCPU ); + kernelstack = pmap_preboot_get_vpages(KSTACK_PAGES * MAXCPU); + + /* Allocate message buffer. */ + msgbufp = (void *)pmap_preboot_get_vpages( + round_page(msgbufsize) / PAGE_SIZE); + + /* + * Pages were allocated during the secondary bootstrap for the + * stacks for different CPU modes. + * We must now set the r13 registers in the different CPU modes to + * point to these stacks. + * Since the ARM stacks use STMFD etc. we must set r13 to the top end + * of the stack memory. + */ + set_stackptrs(0); + mutex_init(); + + /* Establish static device mappings. */ + err_devmap = platform_devmap_init(); + arm_devmap_bootstrap(0, NULL); + vm_max_kernel_address = platform_lastaddr(); + + /* + * Only after the SOC registers block is mapped we can perform device + * tree fixups, as they may attempt to read parameters from hardware. + */ + OF_interpret("perform-fixup", 0); + platform_gpio_init(); + cninit(); + + debugf("initarm: console initialized\n"); + debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp); + debugf(" boothowto = 0x%08x\n", boothowto); + debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp); + debugf(" lastaddr1: 0x%08x\n", lastaddr); + print_kenv(); + + env = kern_getenv("kernelname"); + if (env != NULL) + strlcpy(kernelname, env, sizeof(kernelname)); + + if (err_devmap != 0) + printf("WARNING: could not fully configure devmap, error=%d\n", + err_devmap); + + platform_late_init(); + + /* + * We must now clean the cache again.... + * Cleaning may be done by reading new data to displace any + * dirty data in the cache. This will have happened in setttb() + * but since we are boot strapping the addresses used for the read + * may have just been remapped and thus the cache could be out + * of sync. A re-clean after the switch will cure this. + * After booting there are no gross relocations of the kernel thus + * this problem will not occur after initarm(). + */ + /* Set stack for exception handlers */ + undefined_init(); + init_proc0(kernelstack); + arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL); + enable_interrupts(PSR_A); + pmap_bootstrap(0); + + /* Exclude the kernel (and all the things we allocated which immediately + * follow the kernel) from the VM allocation pool but not from crash + * dumps. virtual_avail is a global variable which tracks the kva we've + * "allocated" while setting up pmaps. + * + * Prepare the list of physical memory available to the vm subsystem. + */ + arm_physmem_exclude_region(abp->abp_physaddr, + pmap_preboot_get_pages(0) - abp->abp_physaddr, EXFLAG_NOALLOC); + arm_physmem_init_kernel_globals(); + + init_param2(physmem); + /* Init message buffer. */ + msgbufinit(msgbufp, msgbufsize); + kdb_init(); + return ((void *)STACKALIGN(thread0.td_pcb)); + +} + +#endif /* !ARM_NEW_PMAP */ +#endif /* FDT */ Index: head/sys/arm/arm/mem.c =================================================================== --- head/sys/arm/arm/mem.c (revision 280711) +++ head/sys/arm/arm/mem.c (revision 280712) @@ -1,167 +1,170 @@ /*- * Copyright (c) 1988 University of Utah. * Copyright (c) 1982, 1986, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and code derived from software contributed to * Berkeley by William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah $Hdr: mem.c 1.13 89/10/08$ * from: @(#)mem.c 7.2 (Berkeley) 5/9/91 */ #include __FBSDID("$FreeBSD$"); /* * Memory special file */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Used in /dev/mem drivers and elsewhere */ MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors"); struct mem_range_softc mem_range_softc; static struct sx tmppt_lock; SX_SYSINIT(tmppt, &tmppt_lock, "mem4map"); /* ARGSUSED */ int memrw(struct cdev *dev, struct uio *uio, int flags) { int o; u_int c = 0, v; struct iovec *iov; int error = 0; vm_offset_t addr, eaddr; while (uio->uio_resid > 0 && error == 0) { iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iov++; uio->uio_iovcnt--; if (uio->uio_iovcnt < 0) panic("memrw"); continue; } if (dev2unit(dev) == CDEV_MINOR_MEM) { int i; int address_valid = 0; v = uio->uio_offset; v &= ~PAGE_MASK; for (i = 0; dump_avail[i] || dump_avail[i + 1]; i += 2) { if (v >= dump_avail[i] && v < dump_avail[i + 1]) { address_valid = 1; break; } } if (!address_valid) return (EINVAL); sx_xlock(&tmppt_lock); pmap_kenter((vm_offset_t)_tmppt, v); +#ifdef ARM_NEW_PMAP + pmap_tlb_flush(kernel_pmap, (vm_offset_t)_tmppt); +#endif o = (int)uio->uio_offset & PAGE_MASK; c = (u_int)(PAGE_SIZE - ((int)iov->iov_base & PAGE_MASK)); c = min(c, (u_int)(PAGE_SIZE - o)); c = min(c, (u_int)iov->iov_len); error = uiomove((caddr_t)&_tmppt[o], (int)c, uio); pmap_qremove((vm_offset_t)_tmppt, 1); sx_xunlock(&tmppt_lock); continue; } else if (dev2unit(dev) == CDEV_MINOR_KMEM) { c = iov->iov_len; /* * Make sure that all of the pages are currently * resident so that we don't create any zero-fill * pages. */ addr = trunc_page(uio->uio_offset); eaddr = round_page(uio->uio_offset + c); for (; addr < eaddr; addr += PAGE_SIZE) if (pmap_extract(kernel_pmap, addr) == 0) return (EFAULT); if (!kernacc((caddr_t)(int)uio->uio_offset, c, uio->uio_rw == UIO_READ ? VM_PROT_READ : VM_PROT_WRITE)) return (EFAULT); error = uiomove((caddr_t)(int)uio->uio_offset, (int)c, uio); continue; } /* else panic! */ } return (error); } /* * allow user processes to MMAP some memory sections * instead of going through read/write */ /* ARGSUSED */ int memmmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int prot __unused, vm_memattr_t *memattr __unused) { if (dev2unit(dev) == CDEV_MINOR_MEM) *paddr = offset; else if (dev2unit(dev) == CDEV_MINOR_KMEM) *paddr = vtophys(offset); /* else panic! */ return (0); } Index: head/sys/arm/arm/minidump_machdep.c =================================================================== --- head/sys/arm/arm/minidump_machdep.c (revision 280711) +++ head/sys/arm/arm/minidump_machdep.c (revision 280712) @@ -1,498 +1,513 @@ /*- * Copyright (c) 2008 Semihalf, Grzegorz Bernacki * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: FreeBSD: src/sys/i386/i386/minidump_machdep.c,v 1.6 2008/08/17 23:27:27 */ #include __FBSDID("$FreeBSD$"); #include "opt_watchdog.h" #include #include #include #include #include #include #include #ifdef SW_WATCHDOG #include #endif #include #include #include #include #include #include #include #include CTASSERT(sizeof(struct kerneldumpheader) == 512); /* * Don't touch the first SIZEOF_METADATA bytes on the dump device. This * is to protect us from metadata and to protect metadata from us. */ #define SIZEOF_METADATA (64*1024) uint32_t *vm_page_dump; int vm_page_dump_size; +#ifndef ARM_NEW_PMAP + static struct kerneldumpheader kdh; + static off_t dumplo; /* Handle chunked writes. */ static size_t fragsz, offset; static void *dump_va; static uint64_t counter, progress; CTASSERT(sizeof(*vm_page_dump) == 4); static int is_dumpable(vm_paddr_t pa) { int i; for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) return (1); } return (0); } #define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) static int blk_flush(struct dumperinfo *di) { int error; if (fragsz == 0) return (0); error = dump_write(di, (char*)dump_va + offset, 0, dumplo, fragsz - offset); dumplo += (fragsz - offset); fragsz = 0; offset = 0; return (error); } static int blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) { size_t len; int error, i, c; u_int maxdumpsz; maxdumpsz = di->maxiosize; if (maxdumpsz == 0) /* seatbelt */ maxdumpsz = PAGE_SIZE; error = 0; if (ptr != NULL && pa != 0) { printf("cant have both va and pa!\n"); return (EINVAL); } if (ptr != NULL) { /* If we're doing a virtual dump, flush any pre-existing pa pages */ error = blk_flush(di); if (error) return (error); } while (sz) { if (fragsz == 0) { offset = pa & PAGE_MASK; fragsz += offset; } len = maxdumpsz - fragsz; if (len > sz) len = sz; counter += len; progress -= len; if (counter >> 22) { printf(" %lld", PG2MB(progress >> PAGE_SHIFT)); counter &= (1<<22) - 1; } #ifdef SW_WATCHDOG wdog_kern_pat(WD_LASTVAL); #endif if (ptr) { error = dump_write(di, ptr, 0, dumplo, len); if (error) return (error); dumplo += len; ptr += len; sz -= len; } else { for (i = 0; i < len; i += PAGE_SIZE) dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); fragsz += len; pa += len; sz -= len; if (fragsz == maxdumpsz) { error = blk_flush(di); if (error) return (error); } } /* Check for user abort. */ c = cncheckc(); if (c == 0x03) return (ECANCELED); if (c != -1) printf(" (CTRL-C to abort) "); } return (0); } static int blk_write_cont(struct dumperinfo *di, vm_paddr_t pa, size_t sz) { int error; error = blk_write(di, 0, pa, sz); if (error) return (error); error = blk_flush(di); if (error) return (error); return (0); } /* A fake page table page, to avoid having to handle both 4K and 2M pages */ static pt_entry_t fakept[NPTEPG]; int minidumpsys(struct dumperinfo *di) { struct minidumphdr mdhdr; uint64_t dumpsize; uint32_t ptesize; uint32_t bits; uint32_t pa, prev_pa = 0, count = 0; vm_offset_t va; pd_entry_t *pdp; pt_entry_t *pt, *ptp; int i, k, bit, error; char *addr; /* * Flush caches. Note that in the SMP case this operates only on the * current CPU's L1 cache. Before we reach this point, code in either * the system shutdown or kernel debugger has called stop_cpus() to stop * all cores other than this one. Part of the ARM handling of * stop_cpus() is to call wbinv_all() on that core's local L1 cache. So * by time we get to here, all that remains is to flush the L1 for the * current CPU, then the L2. */ cpu_idcache_wbinv_all(); cpu_l2cache_wbinv_all(); counter = 0; /* Walk page table pages, set bits in vm_page_dump */ ptesize = 0; for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { /* * We always write a page, even if it is zero. Each * page written corresponds to 2MB of space */ ptesize += L2_TABLE_SIZE_REAL; pmap_get_pde_pte(pmap_kernel(), va, &pdp, &ptp); if (pmap_pde_v(pdp) && pmap_pde_section(pdp)) { /* This is a section mapping 1M page. */ pa = (*pdp & L1_S_ADDR_MASK) | (va & ~L1_S_ADDR_MASK); for (k = 0; k < (L1_S_SIZE / PAGE_SIZE); k++) { if (is_dumpable(pa)) dump_add_page(pa); pa += PAGE_SIZE; } continue; } if (pmap_pde_v(pdp) && pmap_pde_page(pdp)) { /* Set bit for each valid page in this 1MB block */ addr = pmap_kenter_temporary(*pdp & L1_C_ADDR_MASK, 0); pt = (pt_entry_t*)(addr + (((uint32_t)*pdp & L1_C_ADDR_MASK) & PAGE_MASK)); for (k = 0; k < 256; k++) { if ((pt[k] & L2_TYPE_MASK) == L2_TYPE_L) { pa = (pt[k] & L2_L_FRAME) | (va & L2_L_OFFSET); for (i = 0; i < 16; i++) { if (is_dumpable(pa)) dump_add_page(pa); k++; pa += PAGE_SIZE; } } else if ((pt[k] & L2_TYPE_MASK) == L2_TYPE_S) { pa = (pt[k] & L2_S_FRAME) | (va & L2_S_OFFSET); if (is_dumpable(pa)) dump_add_page(pa); } } } else { /* Nothing, we're going to dump a null page */ } } /* Calculate dump size. */ dumpsize = ptesize; dumpsize += round_page(msgbufp->msg_size); dumpsize += round_page(vm_page_dump_size); for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { bits = vm_page_dump[i]; while (bits) { bit = ffs(bits) - 1; pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; /* Clear out undumpable pages now if needed */ if (is_dumpable(pa)) dumpsize += PAGE_SIZE; else dump_drop_page(pa); bits &= ~(1ul << bit); } } dumpsize += PAGE_SIZE; /* Determine dump offset on device. */ if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { error = ENOSPC; goto fail; } dumplo = di->mediaoffset + di->mediasize - dumpsize; dumplo -= sizeof(kdh) * 2; progress = dumpsize; /* Initialize mdhdr */ bzero(&mdhdr, sizeof(mdhdr)); strcpy(mdhdr.magic, MINIDUMP_MAGIC); mdhdr.version = MINIDUMP_VERSION; mdhdr.msgbufsize = msgbufp->msg_size; mdhdr.bitmapsize = vm_page_dump_size; mdhdr.ptesize = ptesize; mdhdr.kernbase = KERNBASE; mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_ARM_VERSION, dumpsize, di->blocksize); printf("Physical memory: %u MB\n", ptoa((uintmax_t)physmem) / 1048576); printf("Dumping %llu MB:", (long long)dumpsize >> 20); /* Dump leader */ error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); if (error) goto fail; dumplo += sizeof(kdh); /* Dump my header */ bzero(&fakept, sizeof(fakept)); bcopy(&mdhdr, &fakept, sizeof(mdhdr)); error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); if (error) goto fail; /* Dump msgbuf up front */ error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); if (error) goto fail; /* Dump bitmap */ error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); if (error) goto fail; /* Dump kernel page table pages */ for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { /* We always write a page, even if it is zero */ pmap_get_pde_pte(pmap_kernel(), va, &pdp, &ptp); if (pmap_pde_v(pdp) && pmap_pde_section(pdp)) { if (count) { error = blk_write_cont(di, prev_pa, count * L2_TABLE_SIZE_REAL); if (error) goto fail; count = 0; prev_pa = 0; } /* This is a single 2M block. Generate a fake PTP */ pa = (*pdp & L1_S_ADDR_MASK) | (va & ~L1_S_ADDR_MASK); for (k = 0; k < (L1_S_SIZE / PAGE_SIZE); k++) { fakept[k] = L2_S_PROTO | (pa + (k * PAGE_SIZE)) | L2_S_PROT(PTE_KERNEL, VM_PROT_READ | VM_PROT_WRITE); } error = blk_write(di, (char *)&fakept, 0, L2_TABLE_SIZE_REAL); if (error) goto fail; /* Flush, in case we reuse fakept in the same block */ error = blk_flush(di); if (error) goto fail; continue; } if (pmap_pde_v(pdp) && pmap_pde_page(pdp)) { pa = *pdp & L1_C_ADDR_MASK; if (!count) { prev_pa = pa; count++; } else { if (pa == (prev_pa + count * L2_TABLE_SIZE_REAL)) count++; else { error = blk_write_cont(di, prev_pa, count * L2_TABLE_SIZE_REAL); if (error) goto fail; count = 1; prev_pa = pa; } } } else { if (count) { error = blk_write_cont(di, prev_pa, count * L2_TABLE_SIZE_REAL); if (error) goto fail; count = 0; prev_pa = 0; } bzero(fakept, sizeof(fakept)); error = blk_write(di, (char *)&fakept, 0, L2_TABLE_SIZE_REAL); if (error) goto fail; /* Flush, in case we reuse fakept in the same block */ error = blk_flush(di); if (error) goto fail; } } if (count) { error = blk_write_cont(di, prev_pa, count * L2_TABLE_SIZE_REAL); if (error) goto fail; count = 0; prev_pa = 0; } /* Dump memory chunks */ for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { bits = vm_page_dump[i]; while (bits) { bit = ffs(bits) - 1; pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; if (!count) { prev_pa = pa; count++; } else { if (pa == (prev_pa + count * PAGE_SIZE)) count++; else { error = blk_write_cont(di, prev_pa, count * PAGE_SIZE); if (error) goto fail; count = 1; prev_pa = pa; } } bits &= ~(1ul << bit); } } if (count) { error = blk_write_cont(di, prev_pa, count * PAGE_SIZE); if (error) goto fail; count = 0; prev_pa = 0; } /* Dump trailer */ error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); if (error) goto fail; dumplo += sizeof(kdh); /* Signal completion, signoff and exit stage left. */ dump_write(di, NULL, 0, 0, 0); printf("\nDump complete\n"); return (0); fail: if (error < 0) error = -error; if (error == ECANCELED) printf("\nDump aborted\n"); else if (error == ENOSPC) printf("\nDump failed. Partition too small.\n"); else printf("\n** DUMP FAILED (ERROR %d) **\n", error); return (error); + return (0); } + +#else /* ARM_NEW_PMAP */ + +int +minidumpsys(struct dumperinfo *di) +{ + + return (0); +} + +#endif void dump_add_page(vm_paddr_t pa) { int idx, bit; pa >>= PAGE_SHIFT; idx = pa >> 5; /* 2^5 = 32 */ bit = pa & 31; atomic_set_int(&vm_page_dump[idx], 1ul << bit); } void dump_drop_page(vm_paddr_t pa) { int idx, bit; pa >>= PAGE_SHIFT; idx = pa >> 5; /* 2^5 = 32 */ bit = pa & 31; atomic_clear_int(&vm_page_dump[idx], 1ul << bit); } Index: head/sys/arm/arm/mp_machdep.c =================================================================== --- head/sys/arm/arm/mp_machdep.c (revision 280711) +++ head/sys/arm/arm/mp_machdep.c (revision 280712) @@ -1,410 +1,427 @@ /*- * Copyright (c) 2011 Semihalf. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #ifdef VFP #include #endif #ifdef CPU_MV_PJ4B #include #include #endif #include "opt_smp.h" extern struct pcpu __pcpu[]; /* used to hold the AP's until we are ready to release them */ struct mtx ap_boot_mtx; struct pcb stoppcbs[MAXCPU]; /* # of Applications processors */ volatile int mp_naps; /* Set to 1 once we're ready to let the APs out of the pen. */ volatile int aps_ready = 0; static int ipi_handler(void *arg); void set_stackptrs(int cpu); /* Temporary variables for init_secondary() */ void *dpcpu[MAXCPU - 1]; /* Determine if we running MP machine */ int cpu_mp_probe(void) { CPU_SETOF(0, &all_cpus); return (platform_mp_probe()); } /* Start Application Processor via platform specific function */ static int check_ap(void) { uint32_t ms; for (ms = 0; ms < 2000; ++ms) { if ((mp_naps + 1) == mp_ncpus) return (0); /* success */ else DELAY(1000); } return (-2); } extern unsigned char _end[]; /* Initialize and fire up non-boot processors */ void cpu_mp_start(void) { int error, i; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); /* Reserve memory for application processors */ for(i = 0; i < (mp_ncpus - 1); i++) dpcpu[i] = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); cpu_idcache_wbinv_all(); cpu_l2cache_wbinv_all(); cpu_idcache_wbinv_all(); /* Initialize boot code and start up processors */ platform_mp_start_ap(); /* Check if ap's started properly */ error = check_ap(); if (error) printf("WARNING: Some AP's failed to start\n"); else for (i = 1; i < mp_ncpus; i++) CPU_SET(i, &all_cpus); } /* Introduce rest of cores to the world */ void cpu_mp_announce(void) { } extern vm_paddr_t pmap_pa; void init_secondary(int cpu) { struct pcpu *pc; uint32_t loop_counter; int start = 0, end = 0; +#ifdef ARM_NEW_PMAP + pmap_set_tex(); + reinit_mmu(pmap_kern_ttb, (1<<6) | (1<< 0), (1<<6) | (1<< 0)); + cpu_setup(""); + + /* Provide stack pointers for other processor modes. */ + set_stackptrs(cpu); + + enable_interrupts(PSR_A); +#else /* ARM_NEW_PMAP */ cpu_setup(NULL); setttb(pmap_pa); cpu_tlb_flushID(); - +#endif /* ARM_NEW_PMAP */ pc = &__pcpu[cpu]; /* * pcpu_init() updates queue, so it should not be executed in parallel * on several cores */ while(mp_naps < (cpu - 1)) ; pcpu_init(pc, cpu, sizeof(struct pcpu)); dpcpu_init(dpcpu[cpu - 1], cpu); - +#ifndef ARM_NEW_PMAP /* Provide stack pointers for other processor modes. */ set_stackptrs(cpu); - +#endif /* Signal our startup to BSP */ atomic_add_rel_32(&mp_naps, 1); /* Spin until the BSP releases the APs */ while (!aps_ready) ; /* Initialize curthread */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pc->pc_curthread = pc->pc_idlethread; pc->pc_curpcb = pc->pc_idlethread->td_pcb; set_curthread(pc->pc_idlethread); #ifdef VFP pc->pc_cpu = cpu; vfp_init(); #endif mtx_lock_spin(&ap_boot_mtx); atomic_add_rel_32(&smp_cpus, 1); if (smp_cpus == mp_ncpus) { /* enable IPI's, tlb shootdown, freezes etc */ atomic_store_rel_int(&smp_started, 1); } mtx_unlock_spin(&ap_boot_mtx); /* Enable ipi */ #ifdef IPI_IRQ_START start = IPI_IRQ_START; #ifdef IPI_IRQ_END end = IPI_IRQ_END; #else end = IPI_IRQ_START; #endif #endif for (int i = start; i <= end; i++) arm_unmask_irq(i); enable_interrupts(PSR_I); loop_counter = 0; while (smp_started == 0) { DELAY(100); loop_counter++; if (loop_counter == 1000) CTR0(KTR_SMP, "AP still wait for smp_started"); } /* Start per-CPU event timers. */ cpu_initclocks_ap(); CTR0(KTR_SMP, "go into scheduler"); platform_mp_init_secondary(); /* Enter the scheduler */ sched_throw(NULL); panic("scheduler returned us to %s", __func__); /* NOTREACHED */ } static int ipi_handler(void *arg) { u_int cpu, ipi; cpu = PCPU_GET(cpuid); ipi = pic_ipi_read((int)arg); while ((ipi != 0x3ff)) { switch (ipi) { case IPI_RENDEZVOUS: CTR0(KTR_SMP, "IPI_RENDEZVOUS"); smp_rendezvous_action(); break; case IPI_AST: CTR0(KTR_SMP, "IPI_AST"); break; case IPI_STOP: /* * IPI_STOP_HARD is mapped to IPI_STOP so it is not * necessary to add it in the switch. */ CTR0(KTR_SMP, "IPI_STOP or IPI_STOP_HARD"); savectx(&stoppcbs[cpu]); /* * CPUs are stopped when entering the debugger and at * system shutdown, both events which can precede a * panic dump. For the dump to be correct, all caches * must be flushed and invalidated, but on ARM there's * no way to broadcast a wbinv_all to other cores. * Instead, we have each core do the local wbinv_all as * part of stopping the core. The core requesting the * stop will do the l2 cache flush after all other cores * have done their l1 flushes and stopped. */ cpu_idcache_wbinv_all(); /* Indicate we are stopped */ CPU_SET_ATOMIC(cpu, &stopped_cpus); /* Wait for restart */ while (!CPU_ISSET(cpu, &started_cpus)) cpu_spinwait(); CPU_CLR_ATOMIC(cpu, &started_cpus); CPU_CLR_ATOMIC(cpu, &stopped_cpus); CTR0(KTR_SMP, "IPI_STOP (restart)"); break; case IPI_PREEMPT: CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__); sched_preempt(curthread); break; case IPI_HARDCLOCK: CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__); hardclockintr(); break; case IPI_TLB: CTR1(KTR_SMP, "%s: IPI_TLB", __func__); cpufuncs.cf_tlb_flushID(); break; +#ifdef ARM_NEW_PMAP + case IPI_LAZYPMAP: + CTR1(KTR_SMP, "%s: IPI_LAZYPMAP", __func__); + pmap_lazyfix_action(); + break; +#endif default: panic("Unknown IPI 0x%0x on cpu %d", ipi, curcpu); } pic_ipi_clear(ipi); ipi = pic_ipi_read(-1); } return (FILTER_HANDLED); } static void release_aps(void *dummy __unused) { uint32_t loop_counter; int start = 0, end = 0; if (mp_ncpus == 1) return; #ifdef IPI_IRQ_START start = IPI_IRQ_START; #ifdef IPI_IRQ_END end = IPI_IRQ_END; #else end = IPI_IRQ_START; #endif #endif for (int i = start; i <= end; i++) { /* * IPI handler */ /* * Use 0xdeadbeef as the argument value for irq 0, * if we used 0, the intr code will give the trap frame * pointer instead. */ arm_setup_irqhandler("ipi", ipi_handler, NULL, (void *)i, i, INTR_TYPE_MISC | INTR_EXCL, NULL); /* Enable ipi */ arm_unmask_irq(i); } atomic_store_rel_int(&aps_ready, 1); printf("Release APs\n"); for (loop_counter = 0; loop_counter < 2000; loop_counter++) { if (smp_started) return; DELAY(1000); } printf("AP's not started\n"); } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); struct cpu_group * cpu_topo(void) { return (smp_topo_1level(CG_SHARE_L2, mp_ncpus, 0)); } void cpu_mp_setmaxid(void) { platform_mp_setmaxid(); } /* Sending IPI */ void ipi_all_but_self(u_int ipi) { cpuset_t other_cpus; other_cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &other_cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); platform_ipi_send(other_cpus, ipi); } void ipi_cpu(int cpu, u_int ipi) { cpuset_t cpus; CPU_ZERO(&cpus); CPU_SET(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x", __func__, cpu, ipi); platform_ipi_send(cpus, ipi); } void ipi_selected(cpuset_t cpus, u_int ipi) { CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); platform_ipi_send(cpus, ipi); } void tlb_broadcast(int ipi) { if (smp_started) ipi_all_but_self(ipi); } Index: head/sys/arm/arm/pmap-v6-new.c =================================================================== --- head/sys/arm/arm/pmap-v6-new.c (nonexistent) +++ head/sys/arm/arm/pmap-v6-new.c (revision 280712) @@ -0,0 +1,6723 @@ +/*- + * Copyright (c) 1991 Regents of the University of California. + * Copyright (c) 1994 John S. Dyson + * Copyright (c) 1994 David Greenman + * Copyright (c) 2005-2010 Alan L. Cox + * Copyright (c) 2014 Svatopluk Kraus + * Copyright (c) 2014 Michal Meloun + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 + */ +/*- + * Copyright (c) 2003 Networks Associates Technology, Inc. + * All rights reserved. + * + * This software was developed for the FreeBSD Project by Jake Burkholder, + * Safeport Network Services, and Network Associates Laboratories, the + * Security Research Division of Network Associates, Inc. under + * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA + * CHATS research program. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +/* + * Manages physical address maps. + * + * Since the information managed by this module is + * also stored by the logical address mapping module, + * this module may throw away valid virtual-to-physical + * mappings at almost any time. However, invalidations + * of virtual-to-physical mappings must be done as + * requested. + * + * In order to cope with hardware architectures which + * make virtual-to-physical map invalidates expensive, + * this module may delay invalidate or reduced protection + * operations until such time as they are actually + * necessary. This module is given full information as + * to which processors are currently using which maps, + * and to when physical maps must be made correct. + */ + +#include "opt_vm.h" +#include "opt_pmap.h" +#include "opt_ddb.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef SMP +#include +#else +#include +#endif + +#ifdef DDB +#include +#endif + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#ifdef SMP +#include +#endif + +#ifndef PMAP_SHPGPERPROC +#define PMAP_SHPGPERPROC 200 +#endif + +#ifndef DIAGNOSTIC +#define PMAP_INLINE __inline +#else +#define PMAP_INLINE +#endif + +#ifdef PMAP_DEBUG +static void pmap_zero_page_check(vm_page_t m); +void pmap_debug(int level); +int pmap_pid_dump(int pid); +void pmap_pvdump(vm_paddr_t pa); + +#define PDEBUG(_lev_,_stat_) \ + if (pmap_debug_level >= (_lev_)) \ + ((_stat_)) +#define dprintf printf +int pmap_debug_level = 1; +#else /* PMAP_DEBUG */ +#define PDEBUG(_lev_,_stat_) /* Nothing */ +#define dprintf(x, arg...) +#endif /* PMAP_DEBUG */ + +/* + * Level 2 page tables map definion ('max' is excluded). + */ + +#define PT2V_MIN_ADDRESS ((vm_offset_t)PT2MAP) +#define PT2V_MAX_ADDRESS ((vm_offset_t)PT2MAP + PT2MAP_SIZE) + +#define UPT2V_MIN_ADDRESS ((vm_offset_t)PT2MAP) +#define UPT2V_MAX_ADDRESS \ + ((vm_offset_t)(PT2MAP + (KERNBASE >> PT2MAP_SHIFT))) + +/* + * Promotion to a 1MB (PTE1) page mapping requires that the corresponding + * 4KB (PTE2) page mappings have identical settings for the following fields: + */ +#define PTE2_PROMOTE (PTE2_V | PTE2_A | PTE2_NM | PTE2_S | PTE2_NG | \ + PTE2_NX | PTE2_RO | PTE2_U | PTE2_W | \ + PTE2_ATTR_MASK) + +#define PTE1_PROMOTE (PTE1_V | PTE1_A | PTE1_NM | PTE1_S | PTE1_NG | \ + PTE1_NX | PTE1_RO | PTE1_U | PTE1_W | \ + PTE1_ATTR_MASK) + +#define ATTR_TO_L1(l2_attr) ((((l2_attr) & L2_TEX0) ? L1_S_TEX0 : 0) | \ + (((l2_attr) & L2_C) ? L1_S_C : 0) | \ + (((l2_attr) & L2_B) ? L1_S_B : 0) | \ + (((l2_attr) & PTE2_A) ? PTE1_A : 0) | \ + (((l2_attr) & PTE2_NM) ? PTE1_NM : 0) | \ + (((l2_attr) & PTE2_S) ? PTE1_S : 0) | \ + (((l2_attr) & PTE2_NG) ? PTE1_NG : 0) | \ + (((l2_attr) & PTE2_NX) ? PTE1_NX : 0) | \ + (((l2_attr) & PTE2_RO) ? PTE1_RO : 0) | \ + (((l2_attr) & PTE2_U) ? PTE1_U : 0) | \ + (((l2_attr) & PTE2_W) ? PTE1_W : 0)) + +#define ATTR_TO_L2(l1_attr) ((((l1_attr) & L1_S_TEX0) ? L2_TEX0 : 0) | \ + (((l1_attr) & L1_S_C) ? L2_C : 0) | \ + (((l1_attr) & L1_S_B) ? L2_B : 0) | \ + (((l1_attr) & PTE1_A) ? PTE2_A : 0) | \ + (((l1_attr) & PTE1_NM) ? PTE2_NM : 0) | \ + (((l1_attr) & PTE1_S) ? PTE2_S : 0) | \ + (((l1_attr) & PTE1_NG) ? PTE2_NG : 0) | \ + (((l1_attr) & PTE1_NX) ? PTE2_NX : 0) | \ + (((l1_attr) & PTE1_RO) ? PTE2_RO : 0) | \ + (((l1_attr) & PTE1_U) ? PTE2_U : 0) | \ + (((l1_attr) & PTE1_W) ? PTE2_W : 0)) + +/* + * PTE2 descriptors creation macros. + */ +#define PTE2_KPT(pa) PTE2_KERN(pa, PTE2_AP_KRW, pt_memattr) +#define PTE2_KPT_NG(pa) PTE2_KERN_NG(pa, PTE2_AP_KRW, pt_memattr) + +#define PTE2_KRW(pa) PTE2_KERN(pa, PTE2_AP_KRW, PTE2_ATTR_NORMAL) +#define PTE2_KRO(pa) PTE2_KERN(pa, PTE2_AP_KR, PTE2_ATTR_NORMAL) + +#define PV_STATS +#ifdef PV_STATS +#define PV_STAT(x) do { x ; } while (0) +#else +#define PV_STAT(x) do { } while (0) +#endif + +/* + * The boot_pt1 is used temporary in very early boot stage as L1 page table. + * We can init many things with no memory allocation thanks to its static + * allocation and this brings two main advantages: + * (1) other cores can be started very simply, + * (2) various boot loaders can be supported as its arguments can be processed + * in virtual address space and can be moved to safe location before + * first allocation happened. + * Only disadvantage is that boot_pt1 is used only in very early boot stage. + * However, the table is uninitialized and so lays in bss. Therefore kernel + * image size is not influenced. + * + * QQQ: In the future, maybe, boot_pt1 can be used for soft reset and + * CPU suspend/resume game. + */ +extern pt1_entry_t boot_pt1[]; + +vm_paddr_t base_pt1; +pt1_entry_t *kern_pt1; +pt2_entry_t *kern_pt2tab; +pt2_entry_t *PT2MAP; + +static uint32_t ttb_flags; +static vm_memattr_t pt_memattr; +ttb_entry_t pmap_kern_ttb; + +/* XXX use converion function*/ +#define PTE2_ATTR_NORMAL VM_MEMATTR_DEFAULT +#define PTE1_ATTR_NORMAL ATTR_TO_L1(PTE2_ATTR_NORMAL) + +struct pmap kernel_pmap_store; +LIST_HEAD(pmaplist, pmap); +static struct pmaplist allpmaps; +static struct mtx allpmaps_lock; + +vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ +vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ + +static vm_offset_t kernel_vm_end_new; +vm_offset_t kernel_vm_end = KERNBASE + NKPT2PG * NPT2_IN_PG * PTE1_SIZE; +vm_offset_t vm_max_kernel_address; +vm_paddr_t kernel_l1pa; + +static struct rwlock __aligned(CACHE_LINE_SIZE) pvh_global_lock; + +/* + * Data for the pv entry allocation mechanism + */ +static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); +static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; +static struct md_page *pv_table; /* XXX: Is it used only the list in md_page? */ +static int shpgperproc = PMAP_SHPGPERPROC; + +struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ +int pv_maxchunks; /* How many chunks we have KVA for */ +vm_offset_t pv_vafree; /* freelist stored in the PTE */ + +vm_paddr_t first_managed_pa; +#define pa_to_pvh(pa) (&pv_table[pte1_index(pa - first_managed_pa)]) + +/* + * All those kernel PT submaps that BSD is so fond of + */ +struct sysmaps { + struct mtx lock; + pt2_entry_t *CMAP1; + pt2_entry_t *CMAP2; + pt2_entry_t *CMAP3; + caddr_t CADDR1; + caddr_t CADDR2; + caddr_t CADDR3; +}; +static struct sysmaps sysmaps_pcpu[MAXCPU]; +static pt2_entry_t *CMAP3; +static caddr_t CADDR3; +caddr_t _tmppt = 0; + +struct msgbuf *msgbufp = 0; /* XXX move it to machdep.c */ + +/* + * Crashdump maps. + */ +static caddr_t crashdumpmap; + +static pt2_entry_t *PMAP1 = 0, *PMAP2; +static pt2_entry_t *PADDR1 = 0, *PADDR2; +#ifdef DDB +static pt2_entry_t *PMAP3; +static pt2_entry_t *PADDR3; +static int PMAP3cpu __unused; /* for SMP only */ +#endif +#ifdef SMP +static int PMAP1cpu; +static int PMAP1changedcpu; +SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, + &PMAP1changedcpu, 0, + "Number of times pmap_pte2_quick changed CPU with same PMAP1"); +#endif +static int PMAP1changed; +SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, + &PMAP1changed, 0, + "Number of times pmap_pte2_quick changed PMAP1"); +static int PMAP1unchanged; +SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, + &PMAP1unchanged, 0, + "Number of times pmap_pte2_quick didn't change PMAP1"); +static struct mtx PMAP2mutex; + +static __inline void pt2_wirecount_init(vm_page_t m); +static boolean_t pmap_demote_pte1(pmap_t pmap, pt1_entry_t *pte1p, + vm_offset_t va); +void cache_icache_sync_fresh(vm_offset_t va, vm_paddr_t pa, vm_size_t size); + +/* + * Function to set the debug level of the pmap code. + */ +#ifdef PMAP_DEBUG +void +pmap_debug(int level) +{ + + pmap_debug_level = level; + dprintf("pmap_debug: level=%d\n", pmap_debug_level); +} +#endif /* PMAP_DEBUG */ + +/* + * This table must corespond with memory attribute configuration in vm.h. + * First entry is used for normal system mapping. + * + * Device memory is always marked as shared. + * Normal memory is shared only in SMP . + * Not outer shareable bits are not used yet. + * Class 6 cannot be used on ARM11. + */ +#define TEXDEF_TYPE_SHIFT 0 +#define TEXDEF_TYPE_MASK 0x3 +#define TEXDEF_INNER_SHIFT 2 +#define TEXDEF_INNER_MASK 0x3 +#define TEXDEF_OUTER_SHIFT 4 +#define TEXDEF_OUTER_MASK 0x3 +#define TEXDEF_NOS_SHIFT 6 +#define TEXDEF_NOS_MASK 0x1 + +#define TEX(t, i, o, s) \ + ((t) << TEXDEF_TYPE_SHIFT) | \ + ((i) << TEXDEF_INNER_SHIFT) | \ + ((o) << TEXDEF_OUTER_SHIFT | \ + ((s) << TEXDEF_NOS_SHIFT)) + +static uint32_t tex_class[8] = { +/* type inner cache outer cache */ + TEX(PRRR_MEM, NMRR_WB_WA, NMRR_WB_WA, 0), /* 0 - ATTR_WB_WA */ + TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 1 - ATTR_NOCACHE */ + TEX(PRRR_DEV, NMRR_NC, NMRR_NC, 0), /* 2 - ATTR_DEVICE */ + TEX(PRRR_SO, NMRR_NC, NMRR_NC, 0), /* 3 - ATTR_SO */ + TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 4 - NOT USED YET */ + TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 5 - NOT USED YET */ + TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 6 - NOT USED YET */ + TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 7 - NOT USED YET */ +}; +#undef TEX + +/* + * Convert TEX definition entry to TTB flags. + */ +static uint32_t +encode_ttb_flags(int idx) +{ + uint32_t inner, outer, nos, reg; + + inner = (tex_class[idx] >> TEXDEF_INNER_SHIFT) & + TEXDEF_INNER_MASK; + outer = (tex_class[idx] >> TEXDEF_OUTER_SHIFT) & + TEXDEF_OUTER_MASK; + nos = (tex_class[idx] >> TEXDEF_NOS_SHIFT) & + TEXDEF_NOS_MASK; + + reg = nos << 5; + reg |= outer << 3; + if (cpuinfo.coherent_walk) + reg |= (inner & 0x1) << 6; + reg |= (inner & 0x2) >> 1; +#ifdef SMP + reg |= 1 << 1; +#endif + return reg; +} + +/* + * Set TEX remapping registers in current CPU. + */ +void +pmap_set_tex(void) +{ + uint32_t prrr, nmrr; + uint32_t type, inner, outer, nos; + int i; + +#ifdef PMAP_PTE_NOCACHE + /* XXX fixme */ + if (cpuinfo.coherent_walk) { + pt_memattr = VM_MEMATTR_WB_WA; + ttb_flags = encode_ttb_flags(0); + } + else { + pt_memattr = VM_MEMATTR_NOCACHE; + ttb_flags = encode_ttb_flags(1); + } +#else + pt_memattr = VM_MEMATTR_WB_WA; + ttb_flags = encode_ttb_flags(0); +#endif + + prrr = 0; + nmrr = 0; + + /* Build remapping register from TEX classes. */ + for (i = 0; i < 8; i++) { + type = (tex_class[i] >> TEXDEF_TYPE_SHIFT) & + TEXDEF_TYPE_MASK; + inner = (tex_class[i] >> TEXDEF_INNER_SHIFT) & + TEXDEF_INNER_MASK; + outer = (tex_class[i] >> TEXDEF_OUTER_SHIFT) & + TEXDEF_OUTER_MASK; + nos = (tex_class[i] >> TEXDEF_NOS_SHIFT) & + TEXDEF_NOS_MASK; + + prrr |= type << (i * 2); + prrr |= nos << (i + 24); + nmrr |= inner << (i * 2); + nmrr |= outer << (i * 2 + 16); + } + /* Add shareable bits for device memory. */ + prrr |= PRRR_DS0 | PRRR_DS1; + + /* Add shareable bits for normal memory in SMP case. */ +#ifdef SMP + prrr |= PRRR_NS1; +#endif + cp15_prrr_set(prrr); + cp15_nmrr_set(nmrr); + + /* Caches are disabled, so full TLB flush should be enough. */ + tlb_flush_all_local(); +} + +/* + * KERNBASE must be multiple of NPT2_IN_PG * PTE1_SIZE. In other words, + * KERNBASE is mapped by first L2 page table in L2 page table page. It + * meets same constrain due to PT2MAP being placed just under KERNBASE. + */ +CTASSERT((KERNBASE & (NPT2_IN_PG * PTE1_SIZE - 1)) == 0); +CTASSERT((KERNBASE - VM_MAXUSER_ADDRESS) >= PT2MAP_SIZE); + +/* + * In crazy dreams, PAGE_SIZE could be a multiple of PTE2_SIZE in general. + * For now, anyhow, the following check must be fulfilled. + */ +CTASSERT(PAGE_SIZE == PTE2_SIZE); +/* + * We don't want to mess up MI code with all MMU and PMAP definitions, + * so some things, which depend on other ones, are defined independently. + * Now, it is time to check that we don't screw up something. + */ +CTASSERT(PDRSHIFT == PTE1_SHIFT); +/* + * Check L1 and L2 page table entries definitions consistency. + */ +CTASSERT(NB_IN_PT1 == (sizeof(pt1_entry_t) * NPTE1_IN_PT1)); +CTASSERT(NB_IN_PT2 == (sizeof(pt2_entry_t) * NPTE2_IN_PT2)); +/* + * Check L2 page tables page consistency. + */ +CTASSERT(PAGE_SIZE == (NPT2_IN_PG * NB_IN_PT2)); +CTASSERT((1 << PT2PG_SHIFT) == NPT2_IN_PG); +/* + * Check PT2TAB consistency. + * PT2TAB_ENTRIES is defined as a division of NPTE1_IN_PT1 by NPT2_IN_PG. + * This should be done without remainder. + */ +CTASSERT(NPTE1_IN_PT1 == (PT2TAB_ENTRIES * NPT2_IN_PG)); + +/* + * A PT2MAP magic. + * + * All level 2 page tables (PT2s) are mapped continuously and accordingly + * into PT2MAP address space. As PT2 size is less than PAGE_SIZE, this can + * be done only if PAGE_SIZE is a multiple of PT2 size. All PT2s in one page + * must be used together, but not necessary at once. The first PT2 in a page + * must map things on correctly aligned address and the others must follow + * in right order. + */ +#define NB_IN_PT2TAB (PT2TAB_ENTRIES * sizeof(pt2_entry_t)) +#define NPT2_IN_PT2TAB (NB_IN_PT2TAB / NB_IN_PT2) +#define NPG_IN_PT2TAB (NB_IN_PT2TAB / PAGE_SIZE) + +/* + * Check PT2TAB consistency. + * NPT2_IN_PT2TAB is defined as a division of NB_IN_PT2TAB by NB_IN_PT2. + * NPG_IN_PT2TAB is defined as a division of NB_IN_PT2TAB by PAGE_SIZE. + * The both should be done without remainder. + */ +CTASSERT(NB_IN_PT2TAB == (NPT2_IN_PT2TAB * NB_IN_PT2)); +CTASSERT(NB_IN_PT2TAB == (NPG_IN_PT2TAB * PAGE_SIZE)); +/* + * The implementation was made general, however, with the assumption + * bellow in mind. In case of another value of NPG_IN_PT2TAB, + * the code should be once more rechecked. + */ +CTASSERT(NPG_IN_PT2TAB == 1); + +/* + * Get offset of PT2 in a page + * associated with given PT1 index. + */ +static __inline u_int +page_pt2off(u_int pt1_idx) +{ + + return ((pt1_idx & PT2PG_MASK) * NB_IN_PT2); +} + +/* + * Get physical address of PT2 + * associated with given PT2s page and PT1 index. + */ +static __inline vm_paddr_t +page_pt2pa(vm_paddr_t pgpa, u_int pt1_idx) +{ + + return (pgpa + page_pt2off(pt1_idx)); +} + +/* + * Get first entry of PT2 + * associated with given PT2s page and PT1 index. + */ +static __inline pt2_entry_t * +page_pt2(vm_offset_t pgva, u_int pt1_idx) +{ + + return ((pt2_entry_t *)(pgva + page_pt2off(pt1_idx))); +} + +/* + * Get virtual address of PT2s page (mapped in PT2MAP) + * which holds PT2 which holds entry which maps given virtual address. + */ +static __inline vm_offset_t +pt2map_pt2pg(vm_offset_t va) +{ + + va &= ~(NPT2_IN_PG * PTE1_SIZE - 1); + return ((vm_offset_t)pt2map_entry(va)); +} + +/***************************************************************************** + * + * THREE pmap initialization milestones exist: + * + * locore.S + * -> fundamental init (including MMU) in ASM + * + * initarm() + * -> fundamental init continues in C + * -> first available physical address is known + * + * pmap_bootstrap_prepare() -> FIRST PMAP MILESTONE (first epoch begins) + * -> basic (safe) interface for physical address allocation is made + * -> basic (safe) interface for virtual mapping is made + * -> limited not SMP coherent work is possible + * + * -> more fundamental init continues in C + * -> locks and some more things are available + * -> all fundamental allocations and mappings are done + * + * pmap_bootstrap() -> SECOND PMAP MILESTONE (second epoch begins) + * -> phys_avail[] and virtual_avail is set + * -> control is passed to vm subsystem + * -> physical and virtual address allocation are off limit + * -> low level mapping functions, some SMP coherent, + * are available, which cannot be used before vm subsystem + * is being inited + * + * mi_startup() + * -> vm subsystem is being inited + * + * pmap_init() -> THIRD PMAP MILESTONE (third epoch begins) + * -> pmap is fully inited + * + *****************************************************************************/ + +/***************************************************************************** + * + * PMAP first stage initialization and utility functions + * for pre-bootstrap epoch. + * + * After pmap_bootstrap_prepare() is called, the following functions + * can be used: + * + * (1) strictly only for this stage functions for physical page allocations, + * virtual space allocations, and mappings: + * + * vm_paddr_t pmap_preboot_get_pages(u_int num); + * void pmap_preboot_map_pages(vm_paddr_t pa, vm_offset_t va, u_int num); + * vm_offset_t pmap_preboot_reserve_pages(u_int num); + * vm_offset_t pmap_preboot_get_vpages(u_int num); + * void pmap_preboot_map_attr(vm_paddr_t pa, vm_offset_t va, vm_size_t size, + * int prot, int attr); + * + * (2) for all stages: + * + * vm_paddr_t pmap_kextract(vm_offset_t va); + * + * NOTE: This is not SMP coherent stage. + * + *****************************************************************************/ + +#define KERNEL_P2V(pa) \ + ((vm_offset_t)((pa) - arm_physmem_kernaddr + KERNVIRTADDR)) +#define KERNEL_V2P(va) \ + ((vm_paddr_t)((va) - KERNVIRTADDR + arm_physmem_kernaddr)) + +static vm_paddr_t last_paddr; + +/* + * Pre-bootstrap epoch page allocator. + */ +vm_paddr_t +pmap_preboot_get_pages(u_int num) +{ + vm_paddr_t ret; + + ret = last_paddr; + last_paddr += num * PAGE_SIZE; + + return (ret); +} + +/* + * The fundamental initalization of PMAP stuff. + * + * Some things already happened in locore.S and some things could happen + * before pmap_bootstrap_prepare() is called, so let's recall what is done: + * 1. Caches are disabled. + * 2. We are running on virtual addresses already with 'boot_pt1' + * as L1 page table. + * 3. So far, all virtual addresses can be converted to physical ones and + * vice versa by the following macros: + * KERNEL_P2V(pa) .... physical to virtual ones, + * KERNEL_V2P(va) .... virtual to physical ones. + * + * What is done herein: + * 1. The 'boot_pt1' is replaced by real kernel L1 page table 'kern_pt1'. + * 2. PT2MAP magic is brought to live. + * 3. Basic preboot functions for page allocations and mappings can be used. + * 4. Everything is prepared for L1 cache enabling. + * + * Variations: + * 1. To use second TTB register, so kernel and users page tables will be + * separated. This way process forking - pmap_pinit() - could be faster, + * it saves physical pages and KVA per a process, and it's simple change. + * However, it will lead, due to hardware matter, to the following: + * (a) 2G space for kernel and 2G space for users. + * (b) 1G space for kernel in low addresses and 3G for users above it. + * A question is: Is the case (b) really an option? Note that case (b) + * does save neither physical memory and KVA. + */ +void +pmap_bootstrap_prepare(vm_paddr_t last) +{ + vm_paddr_t pt2pg_pa, pt2tab_pa, pa, size; + vm_offset_t pt2pg_va; + pt1_entry_t *pte1p; + pt2_entry_t *pte2p; + u_int i; + + /* + * Now, we are going to make real kernel mapping. Note that we are + * already running on some mapping made in locore.S and we expect + * that it's large enough to ensure nofault access to physical memory + * allocated herein before switch. + * + * As kernel image and everything needed before are and will be mapped + * by section mappings, we align last physical address to PTE1_SIZE. + */ + last_paddr = pte1_roundup(last); + + /* + * Allocate and zero page(s) for kernel L1 page table. + * + * Note that it's first allocation on space which was PTE1_SIZE + * aligned and as such base_pt1 is aligned to NB_IN_PT1 too. + */ + base_pt1 = pmap_preboot_get_pages(NPG_IN_PT1); + kern_pt1 = (pt1_entry_t *)KERNEL_P2V(base_pt1); + bzero((void*)kern_pt1, NB_IN_PT1); + pte1_sync_range(kern_pt1, NB_IN_PT1); + + /* Allocate and zero page(s) for kernel PT2TAB. */ + pt2tab_pa = pmap_preboot_get_pages(NPG_IN_PT2TAB); + kern_pt2tab = (pt2_entry_t *)KERNEL_P2V(pt2tab_pa); + bzero(kern_pt2tab, NB_IN_PT2TAB); + pte2_sync_range(kern_pt2tab, NB_IN_PT2TAB); + + /* Allocate and zero page(s) for kernel L2 page tables. */ + pt2pg_pa = pmap_preboot_get_pages(NKPT2PG); + pt2pg_va = KERNEL_P2V(pt2pg_pa); + size = NKPT2PG * PAGE_SIZE; + bzero((void*)pt2pg_va, size); + pte2_sync_range((pt2_entry_t *)pt2pg_va, size); + + /* + * Add a physical memory segment (vm_phys_seg) corresponding to the + * preallocated pages for kernel L2 page tables so that vm_page + * structures representing these pages will be created. The vm_page + * structures are required for promotion of the corresponding kernel + * virtual addresses to section mappings. + */ + vm_phys_add_seg(pt2tab_pa, pmap_preboot_get_pages(0)); + + /* + * Insert allocated L2 page table pages to PT2TAB and make + * link to all PT2s in L1 page table. See how kernel_vm_end + * is initialized. + * + * We play simple and safe. So every KVA will have underlaying + * L2 page table, even kernel image mapped by sections. + */ + pte2p = kern_pt2tab_entry(KERNBASE); + for (pa = pt2pg_pa; pa < pt2pg_pa + size; pa += PTE2_SIZE) + pt2tab_store(pte2p++, PTE2_KPT(pa)); + + pte1p = kern_pte1(KERNBASE); + for (pa = pt2pg_pa; pa < pt2pg_pa + size; pa += NB_IN_PT2) + pte1_store(pte1p++, PTE1_LINK(pa)); + + /* Make section mappings for kernel. */ + pte1p = kern_pte1(KERNBASE); + for (pa = KERNEL_V2P(KERNBASE); pa < last; pa += PTE1_SIZE) + pte1_store(pte1p++, PTE1_KERN(pa, PTE1_AP_KRW, + ATTR_TO_L1(PTE2_ATTR_WB_WA))); + + /* + * Get free and aligned space for PT2MAP and make L1 page table links + * to L2 page tables held in PT2TAB. + * + * Note that pages holding PT2s are stored in PT2TAB as pt2_entry_t + * descriptors and PT2TAB page(s) itself is(are) used as PT2s. Thus + * each entry in PT2TAB maps all PT2s in a page. This implies that + * virtual address of PT2MAP must be aligned to NPT2_IN_PG * PTE1_SIZE. + */ + PT2MAP = (pt2_entry_t *)(KERNBASE - PT2MAP_SIZE); + pte1p = kern_pte1((vm_offset_t)PT2MAP); + for (pa = pt2tab_pa, i = 0; i < NPT2_IN_PT2TAB; i++, pa += NB_IN_PT2) { + pte1_store(pte1p++, PTE1_LINK(pa)); + } + + /* + * Store PT2TAB in PT2TAB itself, i.e. self reference mapping. + * Each pmap will hold own PT2TAB, so the mapping should be not global. + */ + pte2p = kern_pt2tab_entry((vm_offset_t)PT2MAP); + for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) { + pt2tab_store(pte2p++, PTE2_KPT_NG(pa)); + } + + /* + * Choose correct L2 page table and make mappings for allocations + * made herein which replaces temporary locore.S mappings after a while. + * Note that PT2MAP cannot be used until we switch to kern_pt1. + * + * Note, that these allocations started aligned on 1M section and + * kernel PT1 was allocated first. Making of mappings must follow + * order of physical allocations as we've used KERNEL_P2V() macro + * for virtual addresses resolution. + */ + pte2p = kern_pt2tab_entry((vm_offset_t)kern_pt1); + pt2pg_va = KERNEL_P2V(pte2_pa(pte2_load(pte2p))); + + pte2p = page_pt2(pt2pg_va, pte1_index((vm_offset_t)kern_pt1)); + + /* Make mapping for kernel L1 page table. */ + for (pa = base_pt1, i = 0; i < NPG_IN_PT1; i++, pa += PTE2_SIZE) + pte2_store(pte2p++, PTE2_KPT(pa)); + + /* Make mapping for kernel PT2TAB. */ + for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) + pte2_store(pte2p++, PTE2_KPT(pa)); + + /* Finally, switch from 'boot_pt1' to 'kern_pt1'. */ + pmap_kern_ttb = base_pt1 | ttb_flags; + reinit_mmu(pmap_kern_ttb, (1 << 6) | (1 << 0), (1 << 6) | (1 << 0)); + + /* + * Initialize the first available KVA. As kernel image is mapped by + * sections, we are leaving some gap behind. + */ + virtual_avail = (vm_offset_t)kern_pt2tab + NPG_IN_PT2TAB * PAGE_SIZE; +} + +/* + * Setup L2 page table page for given KVA. + * Used in pre-bootstrap epoch. + * + * Note that we have allocated NKPT2PG pages for L2 page tables in advance + * and used them for mapping KVA starting from KERNBASE. However, this is not + * enough. Vectors and devices need L2 page tables too. Note that they are + * even above VM_MAX_KERNEL_ADDRESS. + */ +static __inline vm_paddr_t +pmap_preboot_pt2pg_setup(vm_offset_t va) +{ + pt2_entry_t *pte2p, pte2; + vm_paddr_t pt2pg_pa; + + /* Get associated entry in PT2TAB. */ + pte2p = kern_pt2tab_entry(va); + + /* Just return, if PT2s page exists already. */ + pte2 = pt2tab_load(pte2p); + if (pte2_is_valid(pte2)) + return (pte2_pa(pte2)); + + KASSERT(va >= VM_MAX_KERNEL_ADDRESS, + ("%s: NKPT2PG too small", __func__)); + + /* + * Allocate page for PT2s and insert it to PT2TAB. + * In other words, map it into PT2MAP space. + */ + pt2pg_pa = pmap_preboot_get_pages(1); + pt2tab_store(pte2p, PTE2_KPT(pt2pg_pa)); + + /* Zero all PT2s in allocated page. */ + bzero((void*)pt2map_pt2pg(va), PAGE_SIZE); + pte2_sync_range((pt2_entry_t *)pt2map_pt2pg(va), PAGE_SIZE); + + return (pt2pg_pa); +} + +/* + * Setup L2 page table for given KVA. + * Used in pre-bootstrap epoch. + */ +static void +pmap_preboot_pt2_setup(vm_offset_t va) +{ + pt1_entry_t *pte1p; + vm_paddr_t pt2pg_pa, pt2_pa; + + /* Setup PT2's page. */ + pt2pg_pa = pmap_preboot_pt2pg_setup(va); + pt2_pa = page_pt2pa(pt2pg_pa, pte1_index(va)); + + /* Insert PT2 to PT1. */ + pte1p = kern_pte1(va); + pte1_store(pte1p, PTE1_LINK(pt2_pa)); +} + +/* + * Get L2 page entry associated with given KVA. + * Used in pre-bootstrap epoch. + */ +static __inline pt2_entry_t* +pmap_preboot_vtopte2(vm_offset_t va) +{ + pt1_entry_t *pte1p; + + /* Setup PT2 if needed. */ + pte1p = kern_pte1(va); + if (!pte1_is_valid(pte1_load(pte1p))) /* XXX - sections ?! */ + pmap_preboot_pt2_setup(va); + + return (pt2map_entry(va)); +} + +/* + * Pre-bootstrap epoch page(s) mapping(s). + */ +void +pmap_preboot_map_pages(vm_paddr_t pa, vm_offset_t va, u_int num) +{ + u_int i; + pt2_entry_t *pte2p; + + /* Map all the pages. */ + for (i = 0; i < num; i++) { + pte2p = pmap_preboot_vtopte2(va); + pte2_store(pte2p, PTE2_KRW(pa)); + va += PAGE_SIZE; + pa += PAGE_SIZE; + } +} + +/* + * Pre-bootstrap epoch virtual space alocator. + */ +vm_offset_t +pmap_preboot_reserve_pages(u_int num) +{ + u_int i; + vm_offset_t start, va; + pt2_entry_t *pte2p; + + /* Allocate virtual space. */ + start = va = virtual_avail; + virtual_avail += num * PAGE_SIZE; + + /* Zero the mapping. */ + for (i = 0; i < num; i++) { + pte2p = pmap_preboot_vtopte2(va); + pte2_store(pte2p, 0); + va += PAGE_SIZE; + } + + return (start); +} + +/* + * Pre-bootstrap epoch page(s) allocation and mapping(s). + */ +vm_offset_t +pmap_preboot_get_vpages(u_int num) +{ + vm_paddr_t pa; + vm_offset_t va; + + /* Allocate physical page(s). */ + pa = pmap_preboot_get_pages(num); + + /* Allocate virtual space. */ + va = virtual_avail; + virtual_avail += num * PAGE_SIZE; + + /* Map and zero all. */ + pmap_preboot_map_pages(pa, va, num); + bzero((void *)va, num * PAGE_SIZE); + + return (va); +} + +/* + * Pre-bootstrap epoch page mapping(s) with attributes. + */ +void +pmap_preboot_map_attr(vm_paddr_t pa, vm_offset_t va, vm_size_t size, int prot, + int attr) +{ + u_int num; + u_int l1_attr, l1_prot; + pt1_entry_t *pte1p; + pt2_entry_t *pte2p; + + l1_prot = ATTR_TO_L1(prot); + l1_attr = ATTR_TO_L1(attr); + + /* Map all the pages. */ + num = round_page(size); + while (num > 0) { + if ((((va | pa) & PTE1_OFFSET) == 0) && (num >= PTE1_SIZE)) { + pte1p = kern_pte1(va); + pte1_store(pte1p, PTE1_KERN(pa, l1_prot, l1_attr)); + va += PTE1_SIZE; + pa += PTE1_SIZE; + num -= PTE1_SIZE; + } else { + pte2p = pmap_preboot_vtopte2(va); + pte2_store(pte2p, PTE2_KERN(pa, prot, attr)); + va += PAGE_SIZE; + pa += PAGE_SIZE; + num -= PAGE_SIZE; + } + } + +} + +/* + * Extract from the kernel page table the physical address + * that is mapped by the given virtual address "va". + */ +vm_paddr_t +pmap_kextract(vm_offset_t va) +{ + vm_paddr_t pa; + pt1_entry_t pte1; + pt2_entry_t pte2; + + pte1 = pte1_load(kern_pte1(va)); + if (pte1_is_section(pte1)) { + pa = pte1_pa(pte1) | (va & PTE1_OFFSET); + } else if (pte1_is_link(pte1)) { + /* + * We should beware of concurrent promotion that changes + * pte1 at this point. However, it's not a problem as PT2 + * page is preserved by promotion in PT2TAB. So even if + * it happens, using of PT2MAP is still safe. + * + * QQQ: However, concurrent removing is a problem which + * ends in abort on PT2MAP space. Locking must be used + * to deal with this. + */ + pte2 = pte2_load(pt2map_entry(va)); + pa = pte2_pa(pte2) | (va & PTE2_OFFSET); + } + else { + panic("%s: va %#x pte1 %#x", __func__, va, pte1); + } + return (pa); +} + +/***************************************************************************** + * + * PMAP second stage initialization and utility functions + * for bootstrap epoch. + * + * After pmap_bootstrap() is called, the following functions for + * mappings can be used: + * + * void pmap_kenter(vm_offset_t va, vm_paddr_t pa); + * void pmap_kremove(vm_offset_t va); + * vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, + * int prot); + * + * NOTE: This is not SMP coherent stage. And physical page allocation is not + * allowed during this stage. + * + *****************************************************************************/ + +/* + * Initialize kernel PMAP locks and lists, kernel_pmap itself, and + * reserve various virtual spaces for temporary mappings. + */ +void +pmap_bootstrap(vm_offset_t firstaddr) +{ + pt2_entry_t *unused __unused; + struct sysmaps *sysmaps; + u_int i; + + /* + * Initialize the kernel pmap (which is statically allocated). + */ + PMAP_LOCK_INIT(kernel_pmap); + kernel_l1pa = (vm_paddr_t)kern_pt1; /* for libkvm */ + kernel_pmap->pm_pt1 = kern_pt1; + kernel_pmap->pm_pt2tab = kern_pt2tab; + CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ + TAILQ_INIT(&kernel_pmap->pm_pvchunk); + + /* + * Initialize the global pv list lock. + */ + rw_init(&pvh_global_lock, "pmap pv global"); + + LIST_INIT(&allpmaps); + + /* + * Request a spin mutex so that changes to allpmaps cannot be + * preempted by smp_rendezvous_cpus(). + */ + mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); + mtx_lock_spin(&allpmaps_lock); + LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); + mtx_unlock_spin(&allpmaps_lock); + + /* + * Reserve some special page table entries/VA space for temporary + * mapping of pages. + */ +#define SYSMAP(c, p, v, n) do { \ + v = (c)pmap_preboot_reserve_pages(1); \ + p = pt2map_entry((vm_offset_t)v); \ + } while (0) + + /* + * Local CMAP1/CMAP2 are used for zeroing and copying pages. + * Local CMAP3 is used for data cache cleaning. + * Global CMAP3 is used for the idle process page zeroing. + */ + for (i = 0; i < MAXCPU; i++) { + sysmaps = &sysmaps_pcpu[i]; + mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); + SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1); + SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1); + SYSMAP(caddr_t, sysmaps->CMAP3, sysmaps->CADDR3, 1); + } + SYSMAP(caddr_t, CMAP3, CADDR3, 1); + + /* + * Crashdump maps. + */ + SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS); + + /* + * _tmppt is used for reading arbitrary physical pages via /dev/mem. + */ + SYSMAP(caddr_t, unused, _tmppt, 1); + + /* + * PADDR1 and PADDR2 are used by pmap_pte2_quick() and pmap_pte2(), + * respectively. PADDR3 is used by pmap_pte2_ddb(). + */ + SYSMAP(pt2_entry_t *, PMAP1, PADDR1, 1); + SYSMAP(pt2_entry_t *, PMAP2, PADDR2, 1); +#ifdef DDB + SYSMAP(pt2_entry_t *, PMAP3, PADDR3, 1); +#endif + mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); + + /* + * Note that in very short time in initarm(), we are going to + * initialize phys_avail[] array and no futher page allocation + * can happen after that until vm subsystem will be initialized. + */ + kernel_vm_end_new = kernel_vm_end; + virtual_end = vm_max_kernel_address; +} + +/* + * The function can already be use in second initialization stage. + * As such, the function DOES NOT call pmap_growkernel() where PT2 + * allocation can happen. So if used, be sure that PT2 for given + * virtual address is allocated already! + * + * Add a wired page to the kva. + * Note: not SMP coherent. + */ +static __inline void +pmap_kenter_prot_attr(vm_offset_t va, vm_paddr_t pa, uint32_t prot, + uint32_t attr) +{ + pt1_entry_t *pte1p; + pt2_entry_t *pte2p; + + pte1p = kern_pte1(va); + if (!pte1_is_valid(pte1_load(pte1p))) { /* XXX - sections ?! */ + /* + * This is a very low level function, so PT2 and particularly + * PT2PG associated with given virtual address must be already + * allocated. It's a pain mainly during pmap initialization + * stage. However, called after pmap initialization with + * virtual address not under kernel_vm_end will lead to + * the same misery. + */ + if (!pte2_is_valid(pte2_load(kern_pt2tab_entry(va)))) + panic("%s: kernel PT2 not allocated!", __func__); + } + + pte2p = pt2map_entry(va); + pte2_store(pte2p, PTE2_KERN(pa, prot, attr)); +} + +static __inline void +pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int attr) +{ + + pmap_kenter_prot_attr(va, pa, PTE2_AP_KRW, attr); +} + +PMAP_INLINE void +pmap_kenter(vm_offset_t va, vm_paddr_t pa) +{ + + pmap_kenter_prot_attr(va, pa, PTE2_AP_KRW, PTE2_ATTR_NORMAL); +} + +/* + * Remove a page from the kernel pagetables. + * Note: not SMP coherent. + */ +PMAP_INLINE void +pmap_kremove(vm_offset_t va) +{ + pt2_entry_t *pte2p; + + pte2p = pt2map_entry(va); + pte2_clear(pte2p); +} + +/* + * Share new kernel PT2PG with all pmaps. + * The caller is responsible for maintaining TLB consistency. + */ +static void +pmap_kenter_pt2tab(vm_offset_t va, pt2_entry_t npte2) +{ + pmap_t pmap; + pt2_entry_t *pte2p; + + mtx_lock_spin(&allpmaps_lock); + LIST_FOREACH(pmap, &allpmaps, pm_list) { + pte2p = pmap_pt2tab_entry(pmap, va); + pt2tab_store(pte2p, npte2); + } + mtx_unlock_spin(&allpmaps_lock); +} + +/* + * Share new kernel PTE1 with all pmaps. + * The caller is responsible for maintaining TLB consistency. + */ +static void +pmap_kenter_pte1(vm_offset_t va, pt1_entry_t npte1) +{ + pmap_t pmap; + pt1_entry_t *pte1p; + + mtx_lock_spin(&allpmaps_lock); + LIST_FOREACH(pmap, &allpmaps, pm_list) { + pte1p = pmap_pte1(pmap, va); + pte1_store(pte1p, npte1); + } + mtx_unlock_spin(&allpmaps_lock); +} + +/* + * Used to map a range of physical addresses into kernel + * virtual address space. + * + * The value passed in '*virt' is a suggested virtual address for + * the mapping. Architectures which can support a direct-mapped + * physical to virtual region can return the appropriate address + * within that region, leaving '*virt' unchanged. Other + * architectures should map the pages starting at '*virt' and + * update '*virt' with the first usable address after the mapped + * region. + * + * NOTE: Read the comments above pmap_kenter_prot_attr() as + * the function is used herein! + */ +vm_offset_t +pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) +{ + vm_offset_t va, sva; + vm_paddr_t pte1_offset; + pt1_entry_t npte1; + u_int l1prot,l2prot; + + PDEBUG(1, printf("%s: virt = %#x, start = %#x, end = %#x (size = %#x)," + " prot = %d\n", __func__, *virt, start, end, end - start, prot)); + + l2prot = (prot & VM_PROT_WRITE) ? PTE2_AP_KRW : PTE1_AP_KR; + l2prot |= (prot & VM_PROT_EXECUTE) ? PTE2_X : PTE2_NX; + l1prot = ATTR_TO_L1(l2prot); + + va = *virt; + /* + * Does the physical address range's size and alignment permit at + * least one section mapping to be created? + */ + pte1_offset = start & PTE1_OFFSET; + if ((end - start) - ((PTE1_SIZE - pte1_offset) & PTE1_OFFSET) >= + PTE1_SIZE) { + /* + * Increase the starting virtual address so that its alignment + * does not preclude the use of section mappings. + */ + if ((va & PTE1_OFFSET) < pte1_offset) + va = pte1_trunc(va) + pte1_offset; + else if ((va & PTE1_OFFSET) > pte1_offset) + va = pte1_roundup(va) + pte1_offset; + } + sva = va; + while (start < end) { + if ((start & PTE1_OFFSET) == 0 && end - start >= PTE1_SIZE) { + KASSERT((va & PTE1_OFFSET) == 0, + ("%s: misaligned va %#x", __func__, va)); + npte1 = PTE1_KERN(start, l1prot, PTE1_ATTR_NORMAL); + pmap_kenter_pte1(va, npte1); + va += PTE1_SIZE; + start += PTE1_SIZE; + } else { + pmap_kenter_prot_attr(va, start, l2prot, + PTE2_ATTR_NORMAL); + va += PAGE_SIZE; + start += PAGE_SIZE; + } + } + tlb_flush_range(sva, va - sva); + *virt = va; + return (sva); +} + +/* + * Make a temporary mapping for a physical address. + * This is only intended to be used for panic dumps. + */ +void * +pmap_kenter_temporary(vm_paddr_t pa, int i) +{ + vm_offset_t va; + + /* QQQ: 'i' should be less or equal to MAXDUMPPGS. */ + + va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); + pmap_kenter(va, pa); + tlb_flush_local(va); + return ((void *)crashdumpmap); +} + + +/************************************* + * + * TLB & cache maintenance routines. + * + *************************************/ + +/* + * We inline these within pmap.c for speed. + */ +PMAP_INLINE void +pmap_tlb_flush(pmap_t pmap, vm_offset_t va) +{ + + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) + tlb_flush(va); +} + +PMAP_INLINE void +pmap_tlb_flush_range(pmap_t pmap, vm_offset_t sva, vm_size_t size) +{ + + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) + tlb_flush_range(sva, size); +} + +PMAP_INLINE void +pmap_tlb_flush_ng(pmap_t pmap) +{ + + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) + tlb_flush_all_ng(); +} + +/* + * Abuse the pte2 nodes for unmapped kva to thread a kva freelist through. + * Requirements: + * - Must deal with pages in order to ensure that none of the PTE2_* bits + * are ever set, PTE2_V in particular. + * - Assumes we can write to pte2s without pte2_store() atomic ops. + * - Assumes nothing will ever test these addresses for 0 to indicate + * no mapping instead of correctly checking PTE2_V. + * - Assumes a vm_offset_t will fit in a pte2 (true for arm). + * Because PTE2_V is never set, there can be no mappings to invalidate. + */ +static vm_offset_t +pmap_pte2list_alloc(vm_offset_t *head) +{ + pt2_entry_t *pte2p; + vm_offset_t va; + + va = *head; + if (va == 0) + panic("pmap_ptelist_alloc: exhausted ptelist KVA"); + pte2p = pt2map_entry(va); + *head = *pte2p; + if (*head & PTE2_V) + panic("%s: va with PTE2_V set!", __func__); + *pte2p = 0; + return (va); +} + +static void +pmap_pte2list_free(vm_offset_t *head, vm_offset_t va) +{ + pt2_entry_t *pte2p; + + if (va & PTE2_V) + panic("%s: freeing va with PTE2_V set!", __func__); + pte2p = pt2map_entry(va); + *pte2p = *head; /* virtual! PTE2_V is 0 though */ + *head = va; +} + +static void +pmap_pte2list_init(vm_offset_t *head, void *base, int npages) +{ + int i; + vm_offset_t va; + + *head = 0; + for (i = npages - 1; i >= 0; i--) { + va = (vm_offset_t)base + i * PAGE_SIZE; + pmap_pte2list_free(head, va); + } +} + +/***************************************************************************** + * + * PMAP third and final stage initialization. + * + * After pmap_init() is called, PMAP subsystem is fully initialized. + * + *****************************************************************************/ + +SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); + +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, + "Max number of PV entries"); +SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, + "Page share factor per proc"); + +static u_long nkpt2pg = NKPT2PG; +SYSCTL_ULONG(_vm_pmap, OID_AUTO, nkpt2pg, CTLFLAG_RD, + &nkpt2pg, 0, "Pre-allocated pages for kernel PT2s"); + +static int sp_enabled = 1; +SYSCTL_INT(_vm_pmap, OID_AUTO, sp_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, + &sp_enabled, 0, "Are large page mappings enabled?"); + +static SYSCTL_NODE(_vm_pmap, OID_AUTO, pte1, CTLFLAG_RD, 0, + "1MB page mapping counters"); + +static u_long pmap_pte1_demotions; +SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, demotions, CTLFLAG_RD, + &pmap_pte1_demotions, 0, "1MB page demotions"); + +static u_long pmap_pte1_mappings; +SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, mappings, CTLFLAG_RD, + &pmap_pte1_mappings, 0, "1MB page mappings"); + +static u_long pmap_pte1_p_failures; +SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, p_failures, CTLFLAG_RD, + &pmap_pte1_p_failures, 0, "1MB page promotion failures"); + +static u_long pmap_pte1_promotions; +SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, promotions, CTLFLAG_RD, + &pmap_pte1_promotions, 0, "1MB page promotions"); + +static __inline ttb_entry_t +pmap_ttb_get(pmap_t pmap) +{ + + return (vtophys(pmap->pm_pt1) | ttb_flags); +} + +/* + * Initialize a vm_page's machine-dependent fields. + * + * Variations: + * 1. Pages for L2 page tables are always not managed. So, pv_list and + * pt2_wirecount can share same physical space. However, proper + * initialization on a page alloc for page tables and reinitialization + * on the page free must be ensured. + */ +void +pmap_page_init(vm_page_t m) +{ + + TAILQ_INIT(&m->md.pv_list); + pt2_wirecount_init(m); + m->md.pat_mode = PTE2_ATTR_NORMAL; +} + +/* + * Virtualization for faster way how to zero whole page. + */ +static __inline void +pagezero(void *page) +{ + + bzero(page, PAGE_SIZE); +} + +/* + * Zero L2 page table page. + * Use same KVA as in pmap_zero_page(). + */ +static __inline vm_paddr_t +pmap_pt2pg_zero(vm_page_t m) +{ + vm_paddr_t pa; + struct sysmaps *sysmaps; + + pa = VM_PAGE_TO_PHYS(m); + + /* + * XXX: For now, we map whole page even if it's already zero, + * to sync it even if the sync is only DSB. + */ + sched_pin(); + sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; + mtx_lock(&sysmaps->lock); + if (pte2_load(sysmaps->CMAP2) != 0) + panic("%s: CMAP2 busy", __func__); + pte2_store(sysmaps->CMAP2, PTE2_KERN_NG(pa, PTE2_AP_KRW, + m->md.pat_mode)); + tlb_flush_local((vm_offset_t)sysmaps->CADDR2); + /* Even VM_ALLOC_ZERO request is only advisory. */ + if ((m->flags & PG_ZERO) == 0) + pagezero(sysmaps->CADDR2); + pte2_sync_range((pt2_entry_t *)sysmaps->CADDR2, PAGE_SIZE); + pte2_clear(sysmaps->CMAP2); + sched_unpin(); + mtx_unlock(&sysmaps->lock); + + return (pa); +} + +/* + * Init just allocated page as L2 page table(s) holder + * and return its physical address. + */ +static __inline vm_paddr_t +pmap_pt2pg_init(pmap_t pmap, vm_offset_t va, vm_page_t m) +{ + vm_paddr_t pa; + pt2_entry_t *pte2p; + + /* Check page attributes. */ + if (pmap_page_get_memattr(m) != pt_memattr) + pmap_page_set_memattr(m, pt_memattr); + + /* Zero page and init wire counts. */ + pa = pmap_pt2pg_zero(m); + pt2_wirecount_init(m); + + /* + * Map page to PT2MAP address space for given pmap. + * Note that PT2MAP space is shared with all pmaps. + */ + if (pmap == kernel_pmap) + pmap_kenter_pt2tab(va, PTE2_KPT(pa)); + else { + pte2p = pmap_pt2tab_entry(pmap, va); + pt2tab_store(pte2p, PTE2_KPT_NG(pa)); + } + + return (pa); +} + +/* + * Initialize the pmap module. + * Called by vm_init, to initialize any structures that the pmap + * system needs to map virtual memory. + */ +void +pmap_init(void) +{ + vm_size_t s; + pt2_entry_t *pte2p, pte2; + u_int i, pte1_idx, pv_npg; + + PDEBUG(1, printf("%s: phys_start = %#x\n", __func__, PHYSADDR)); + + /* + * Initialize the vm page array entries for kernel pmap's + * L2 page table pages allocated in advance. + */ + pte1_idx = pte1_index(KERNBASE - PT2MAP_SIZE); + pte2p = kern_pt2tab_entry(KERNBASE - PT2MAP_SIZE); + for (i = 0; i < nkpt2pg + NPG_IN_PT2TAB; i++, pte2p++) { + vm_paddr_t pa; + vm_page_t m; + + pte2 = pte2_load(pte2p); + KASSERT(pte2_is_valid(pte2), ("%s: no valid entry", __func__)); + + pa = pte2_pa(pte2); + m = PHYS_TO_VM_PAGE(pa); + KASSERT(m >= vm_page_array && + m < &vm_page_array[vm_page_array_size], + ("%s: L2 page table page is out of range", __func__)); + + m->pindex = pte1_idx; + m->phys_addr = pa; + pte1_idx += NPT2_IN_PG; + } + + /* + * Initialize the address space (zone) for the pv entries. Set a + * high water mark so that the system can recover from excessive + * numbers of pv entries. + */ + TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); + pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; + TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); + pv_entry_max = roundup(pv_entry_max, _NPCPV); + pv_entry_high_water = 9 * (pv_entry_max / 10); + + /* + * Are large page mappings enabled? + */ + TUNABLE_INT_FETCH("vm.pmap.sp_enabled", &sp_enabled); + if (sp_enabled) { + KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, + ("%s: can't assign to pagesizes[1]", __func__)); + pagesizes[1] = PTE1_SIZE; + } + + /* + * Calculate the size of the pv head table for sections. + * Handle the possibility that "vm_phys_segs[...].end" is zero. + * Note that the table is only for sections which could be promoted. + */ + first_managed_pa = pte1_trunc(vm_phys_segs[0].start); + pv_npg = (pte1_trunc(vm_phys_segs[vm_phys_nsegs - 1].end - PAGE_SIZE) + - first_managed_pa) / PTE1_SIZE + 1; + + /* + * Allocate memory for the pv head table for sections. + */ + s = (vm_size_t)(pv_npg * sizeof(struct md_page)); + s = round_page(s); + pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, + M_WAITOK | M_ZERO); + for (i = 0; i < pv_npg; i++) + TAILQ_INIT(&pv_table[i].pv_list); + + pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); + pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks); + if (pv_chunkbase == NULL) + panic("%s: not enough kvm for pv chunks", __func__); + pmap_pte2list_init(&pv_vafree, pv_chunkbase, pv_maxchunks); +} + +/* + * Add a list of wired pages to the kva + * this routine is only used for temporary + * kernel mappings that do not need to have + * page modification or references recorded. + * Note that old mappings are simply written + * over. The page *must* be wired. + * Note: SMP coherent. Uses a ranged shootdown IPI. + */ +void +pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) +{ + u_int anychanged; + pt2_entry_t *epte2p, *pte2p, pte2; + vm_page_t m; + vm_paddr_t pa; + + anychanged = 0; + pte2p = pt2map_entry(sva); + epte2p = pte2p + count; + while (pte2p < epte2p) { + m = *ma++; + pa = VM_PAGE_TO_PHYS(m); + pte2 = pte2_load(pte2p); + if ((pte2_pa(pte2) != pa) || + (pte2_attr(pte2) != m->md.pat_mode)) { + anychanged++; + pte2_store(pte2p, PTE2_KERN(pa, PTE2_AP_KRW, + m->md.pat_mode)); + } + pte2p++; + } + if (__predict_false(anychanged)) + tlb_flush_range(sva, count * PAGE_SIZE); +} + +/* + * This routine tears out page mappings from the + * kernel -- it is meant only for temporary mappings. + * Note: SMP coherent. Uses a ranged shootdown IPI. + */ +void +pmap_qremove(vm_offset_t sva, int count) +{ + vm_offset_t va; + + va = sva; + while (count-- > 0) { + pmap_kremove(va); + va += PAGE_SIZE; + } + tlb_flush_range(sva, va - sva); +} + +/* + * Are we current address space or kernel? + */ +static __inline int +pmap_is_current(pmap_t pmap) +{ + + return (pmap == kernel_pmap || + (pmap == vmspace_pmap(curthread->td_proc->p_vmspace))); +} + +/* + * If the given pmap is not the current or kernel pmap, the returned + * pte2 must be released by passing it to pmap_pte2_release(). + */ +static pt2_entry_t * +pmap_pte2(pmap_t pmap, vm_offset_t va) +{ + pt1_entry_t pte1; + vm_paddr_t pt2pg_pa; + + pte1 = pte1_load(pmap_pte1(pmap, va)); + if (pte1_is_section(pte1)) + panic("%s: attempt to map PTE1", __func__); + if (pte1_is_link(pte1)) { + /* Are we current address space or kernel? */ + if (pmap_is_current(pmap)) + return (pt2map_entry(va)); + /* Note that L2 page table size is not equal to PAGE_SIZE. */ + pt2pg_pa = trunc_page(pte1_link_pa(pte1)); + mtx_lock(&PMAP2mutex); + if (pte2_pa(pte2_load(PMAP2)) != pt2pg_pa) { + pte2_store(PMAP2, PTE2_KPT(pt2pg_pa)); + tlb_flush((vm_offset_t)PADDR2); + } + return (PADDR2 + (arm32_btop(va) & (NPTE2_IN_PG - 1))); + } + return (NULL); +} + +/* + * Releases a pte2 that was obtained from pmap_pte2(). + * Be prepared for the pte2p being NULL. + */ +static __inline void +pmap_pte2_release(pt2_entry_t *pte2p) +{ + + if ((pt2_entry_t *)(trunc_page((vm_offset_t)pte2p)) == PADDR2) { + mtx_unlock(&PMAP2mutex); + } +} + +/* + * Super fast pmap_pte2 routine best used when scanning + * the pv lists. This eliminates many coarse-grained + * invltlb calls. Note that many of the pv list + * scans are across different pmaps. It is very wasteful + * to do an entire tlb flush for checking a single mapping. + * + * If the given pmap is not the current pmap, pvh_global_lock + * must be held and curthread pinned to a CPU. + */ +static pt2_entry_t * +pmap_pte2_quick(pmap_t pmap, vm_offset_t va) +{ + pt1_entry_t pte1; + vm_paddr_t pt2pg_pa; + + pte1 = pte1_load(pmap_pte1(pmap, va)); + if (pte1_is_section(pte1)) + panic("%s: attempt to map PTE1", __func__); + if (pte1_is_link(pte1)) { + /* Are we current address space or kernel? */ + if (pmap_is_current(pmap)) + return (pt2map_entry(va)); + rw_assert(&pvh_global_lock, RA_WLOCKED); + KASSERT(curthread->td_pinned > 0, + ("%s: curthread not pinned", __func__)); + /* Note that L2 page table size is not equal to PAGE_SIZE. */ + pt2pg_pa = trunc_page(pte1_link_pa(pte1)); + if (pte2_pa(pte2_load(PMAP1)) != pt2pg_pa) { + pte2_store(PMAP1, PTE2_KPT(pt2pg_pa)); +#ifdef SMP + PMAP1cpu = PCPU_GET(cpuid); +#endif + tlb_flush_local((vm_offset_t)PADDR1); + PMAP1changed++; + } else +#ifdef SMP + if (PMAP1cpu != PCPU_GET(cpuid)) { + PMAP1cpu = PCPU_GET(cpuid); + tlb_flush_local((vm_offset_t)PADDR1); + PMAP1changedcpu++; + } else +#endif + PMAP1unchanged++; + return (PADDR1 + (arm32_btop(va) & (NPTE2_IN_PG - 1))); + } + return (NULL); +} + +/* + * Routine: pmap_extract + * Function: + * Extract the physical page address associated + * with the given map/virtual_address pair. + */ +vm_paddr_t +pmap_extract(pmap_t pmap, vm_offset_t va) +{ + vm_paddr_t pa; + pt1_entry_t pte1; + pt2_entry_t *pte2p; + + PMAP_LOCK(pmap); + pte1 = pte1_load(pmap_pte1(pmap, va)); + if (pte1_is_section(pte1)) + pa = pte1_pa(pte1) | (va & PTE1_OFFSET); + else if (pte1_is_link(pte1)) { + pte2p = pmap_pte2(pmap, va); + pa = pte2_pa(pte2_load(pte2p)) | (va & PTE2_OFFSET); + pmap_pte2_release(pte2p); + } else + pa = 0; + PMAP_UNLOCK(pmap); + return (pa); +} + +/* + * Routine: pmap_extract_and_hold + * Function: + * Atomically extract and hold the physical page + * with the given pmap and virtual address pair + * if that mapping permits the given protection. + */ +vm_page_t +pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) +{ + vm_paddr_t pa, lockpa; + pt1_entry_t pte1; + pt2_entry_t pte2, *pte2p; + vm_page_t m; + + lockpa = 0; + m = NULL; + PMAP_LOCK(pmap); +retry: + pte1 = pte1_load(pmap_pte1(pmap, va)); + if (pte1_is_section(pte1)) { + if (!(pte1 & PTE1_RO) || !(prot & VM_PROT_WRITE)) { + pa = pte1_pa(pte1) | (va & PTE1_OFFSET); + if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) + goto retry; + m = PHYS_TO_VM_PAGE(pa); + vm_page_hold(m); + } + } else if (pte1_is_link(pte1)) { + pte2p = pmap_pte2(pmap, va); + pte2 = pte2_load(pte2p); + pmap_pte2_release(pte2p); + if (pte2_is_valid(pte2) && + (!(pte2 & PTE2_RO) || !(prot & VM_PROT_WRITE))) { + pa = pte2_pa(pte2); + if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) + goto retry; + m = PHYS_TO_VM_PAGE(pa); + vm_page_hold(m); + } + } + PA_UNLOCK_COND(lockpa); + PMAP_UNLOCK(pmap); + return (m); +} + +/* + * Grow the number of kernel L2 page table entries, if needed. + */ +void +pmap_growkernel(vm_offset_t addr) +{ + vm_page_t m; + vm_paddr_t pt2pg_pa, pt2_pa; + pt1_entry_t pte1; + pt2_entry_t pte2; + + PDEBUG(1, printf("%s: addr = %#x\n", __func__, addr)); + /* + * All the time kernel_vm_end is first KVA for which underlying + * L2 page table is either not allocated or linked from L1 page table + * (not considering sections). Except for two possible cases: + * + * (1) in the very beginning as long as pmap_growkernel() was + * not called, it could be first unused KVA (which is not + * rounded up to PTE1_SIZE), + * + * (2) when all KVA space is mapped and kernel_map->max_offset + * address is not rounded up to PTE1_SIZE. (For example, + * it could be 0xFFFFFFFF.) + */ + kernel_vm_end = pte1_roundup(kernel_vm_end); + mtx_assert(&kernel_map->system_mtx, MA_OWNED); + addr = roundup2(addr, PTE1_SIZE); + if (addr - 1 >= kernel_map->max_offset) + addr = kernel_map->max_offset; + while (kernel_vm_end < addr) { + pte1 = pte1_load(kern_pte1(kernel_vm_end)); + if (pte1_is_valid(pte1)) { + kernel_vm_end += PTE1_SIZE; + if (kernel_vm_end - 1 >= kernel_map->max_offset) { + kernel_vm_end = kernel_map->max_offset; + break; + } + continue; + } + + /* + * kernel_vm_end_new is used in pmap_pinit() when kernel + * mappings are entered to new pmap all at once to avoid race + * between pmap_kenter_pte1() and kernel_vm_end increase. + * The same aplies to pmap_kenter_pt2tab(). + */ + kernel_vm_end_new = kernel_vm_end + PTE1_SIZE; + + pte2 = pt2tab_load(kern_pt2tab_entry(kernel_vm_end)); + if (!pte2_is_valid(pte2)) { + /* + * Install new PT2s page into kernel PT2TAB. + */ + m = vm_page_alloc(NULL, + pte1_index(kernel_vm_end) & ~PT2PG_MASK, + VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED | VM_ALLOC_ZERO); + if (m == NULL) + panic("%s: no memory to grow kernel", __func__); + /* + * QQQ: To link all new L2 page tables from L1 page + * table now and so pmap_kenter_pte1() them + * at once together with pmap_kenter_pt2tab() + * could be nice speed up. However, + * pmap_growkernel() does not happen so often... + * QQQ: The other TTBR is another option. + */ + pt2pg_pa = pmap_pt2pg_init(kernel_pmap, kernel_vm_end, + m); + } else + pt2pg_pa = pte2_pa(pte2); + + pt2_pa = page_pt2pa(pt2pg_pa, pte1_index(kernel_vm_end)); + pmap_kenter_pte1(kernel_vm_end, PTE1_LINK(pt2_pa)); + + kernel_vm_end = kernel_vm_end_new; + if (kernel_vm_end - 1 >= kernel_map->max_offset) { + kernel_vm_end = kernel_map->max_offset; + break; + } + } +} + +static int +kvm_size(SYSCTL_HANDLER_ARGS) +{ + unsigned long ksize = vm_max_kernel_address - KERNBASE; + + return (sysctl_handle_long(oidp, &ksize, 0, req)); +} +SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, + 0, 0, kvm_size, "IU", "Size of KVM"); + +static int +kvm_free(SYSCTL_HANDLER_ARGS) +{ + unsigned long kfree = vm_max_kernel_address - kernel_vm_end; + + return (sysctl_handle_long(oidp, &kfree, 0, req)); +} +SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, + 0, 0, kvm_free, "IU", "Amount of KVM free"); + +/*********************************************** + * + * Pmap allocation/deallocation routines. + * + ***********************************************/ + +/* + * Initialize the pmap for the swapper process. + */ +void +pmap_pinit0(pmap_t pmap) +{ + PDEBUG(1, printf("%s: pmap = %p\n", __func__, pmap)); + + PMAP_LOCK_INIT(pmap); + + /* + * Kernel page table directory and pmap stuff around is already + * initialized, we are using it right now and here. So, finish + * only PMAP structures initialization for process0 ... + * + * Since the L1 page table and PT2TAB is shared with the kernel pmap, + * which is already included in the list "allpmaps", this pmap does + * not need to be inserted into that list. + */ + pmap->pm_pt1 = kern_pt1; + pmap->pm_pt2tab = kern_pt2tab; + CPU_ZERO(&pmap->pm_active); + PCPU_SET(curpmap, pmap); + TAILQ_INIT(&pmap->pm_pvchunk); + bzero(&pmap->pm_stats, sizeof pmap->pm_stats); + CPU_SET(0, &pmap->pm_active); +} + +static __inline void +pte1_copy_nosync(pt1_entry_t *spte1p, pt1_entry_t *dpte1p, vm_offset_t sva, + vm_offset_t eva) +{ + u_int idx, count; + + idx = pte1_index(sva); + count = (pte1_index(eva) - idx + 1) * sizeof(pt1_entry_t); + bcopy(spte1p + idx, dpte1p + idx, count); +} + +static __inline void +pt2tab_copy_nosync(pt2_entry_t *spte2p, pt2_entry_t *dpte2p, vm_offset_t sva, + vm_offset_t eva) +{ + u_int idx, count; + + idx = pt2tab_index(sva); + count = (pt2tab_index(eva) - idx + 1) * sizeof(pt2_entry_t); + bcopy(spte2p + idx, dpte2p + idx, count); +} + +/* + * Initialize a preallocated and zeroed pmap structure, + * such as one in a vmspace structure. + */ +int +pmap_pinit(pmap_t pmap) +{ + pt1_entry_t *pte1p; + pt2_entry_t *pte2p; + vm_paddr_t pa, pt2tab_pa; + u_int i; + + PDEBUG(6, printf("%s: pmap = %p, pm_pt1 = %p\n", __func__, pmap, + pmap->pm_pt1)); + + /* + * No need to allocate L2 page table space yet but we do need + * a valid L1 page table and PT2TAB table. + * + * Install shared kernel mappings to these tables. It's a little + * tricky as some parts of KVA are reserved for vectors, devices, + * and whatever else. These parts are supposed to be above + * vm_max_kernel_address. Thus two regions should be installed: + * + * (1) . + * + * QQQ: The second region should be stable enough to be installed + * only once in time when the tables are allocated. + * QQQ: Maybe copy of both regions at once could be faster ... + * QQQ: Maybe the other TTBR is an option. + * + * Finally, install own PT2TAB table to these tables. + */ + + if (pmap->pm_pt1 == NULL) { + pmap->pm_pt1 = (pt1_entry_t *)kmem_alloc_contig(kernel_arena, + NB_IN_PT1, M_NOWAIT | M_ZERO, 0, -1UL, NB_IN_PT1, 0, + pt_memattr); + if (pmap->pm_pt1 == NULL) + return (0); + } + if (pmap->pm_pt2tab == NULL) { + /* + * QQQ: (1) PT2TAB must be contiguous. If PT2TAB is one page + * only, what should be the only size for 32 bit systems, + * then we could allocate it with vm_page_alloc() and all + * the stuff needed as other L2 page table pages. + * (2) Note that a process PT2TAB is special L2 page table + * page. Its mapping in kernel_arena is permanent and can + * be used no matter which process is current. Its mapping + * in PT2MAP can be used only for current process. + */ + pmap->pm_pt2tab = (pt2_entry_t *)kmem_alloc_attr(kernel_arena, + NB_IN_PT2TAB, M_NOWAIT | M_ZERO, 0, -1UL, pt_memattr); + if (pmap->pm_pt2tab == NULL) { + /* + * QQQ: As struct pmap is allocated from UMA with + * UMA_ZONE_NOFREE flag, it's important to leave + * no allocation in pmap if initialization failed. + */ + kmem_free(kernel_arena, (vm_offset_t)pmap->pm_pt1, + NB_IN_PT1); + pmap->pm_pt1 = NULL; + return (0); + } + /* + * QQQ: Each L2 page table page vm_page_t has pindex set to + * pte1 index of virtual address mapped by this page. + * It's not valid for non kernel PT2TABs themselves. + * The pindex of these pages can not be altered because + * of the way how they are allocated now. However, it + * should not be a problem. + */ + } + + mtx_lock_spin(&allpmaps_lock); + /* + * To avoid race with pmap_kenter_pte1() and pmap_kenter_pt2tab(), + * kernel_vm_end_new is used here instead of kernel_vm_end. + */ + pte1_copy_nosync(kern_pt1, pmap->pm_pt1, KERNBASE, + kernel_vm_end_new - 1); + pte1_copy_nosync(kern_pt1, pmap->pm_pt1, vm_max_kernel_address, + 0xFFFFFFFF); + pt2tab_copy_nosync(kern_pt2tab, pmap->pm_pt2tab, KERNBASE, + kernel_vm_end_new - 1); + pt2tab_copy_nosync(kern_pt2tab, pmap->pm_pt2tab, vm_max_kernel_address, + 0xFFFFFFFF); + LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); + mtx_unlock_spin(&allpmaps_lock); + + /* + * Store PT2MAP PT2 pages (a.k.a. PT2TAB) in PT2TAB itself. + * I.e. self reference mapping. The PT2TAB is private, however mapped + * into shared PT2MAP space, so the mapping should be not global. + */ + pt2tab_pa = vtophys(pmap->pm_pt2tab); + pte2p = pmap_pt2tab_entry(pmap, (vm_offset_t)PT2MAP); + for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) { + pt2tab_store(pte2p++, PTE2_KPT_NG(pa)); + } + + /* Insert PT2MAP PT2s into pmap PT1. */ + pte1p = pmap_pte1(pmap, (vm_offset_t)PT2MAP); + for (pa = pt2tab_pa, i = 0; i < NPT2_IN_PT2TAB; i++, pa += NB_IN_PT2) { + pte1_store(pte1p++, PTE1_LINK(pa)); + } + + /* + * Now synchronize new mapping which was made above. + */ + pte1_sync_range(pmap->pm_pt1, NB_IN_PT1); + pte2_sync_range(pmap->pm_pt2tab, NB_IN_PT2TAB); + + CPU_ZERO(&pmap->pm_active); + TAILQ_INIT(&pmap->pm_pvchunk); + bzero(&pmap->pm_stats, sizeof pmap->pm_stats); + + return (1); +} + +#ifdef SMP +/* + * Deal with a SMP shootdown of other users of the pmap that we are + * trying to dispose of. This can be a bit hairy. + */ +static cpuset_t *lazymask; +static ttb_entry_t lazyttb; +static volatile u_int lazywait; + +void +pmap_lazyfix_action(void) +{ + +#ifdef COUNT_IPIS + (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; +#endif + spinlock_enter(); + if (cp15_ttbr_get() == lazyttb) { + cp15_ttbr_set(curthread->td_pcb->pcb_pagedir); + } + CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask); + atomic_store_rel_int(&lazywait, 1); + spinlock_exit(); + +} + +static void +pmap_lazyfix_self(u_int cpuid) +{ + + spinlock_enter(); + if (cp15_ttbr_get() == lazyttb) { + cp15_ttbr_set(curthread->td_pcb->pcb_pagedir); + } + CPU_CLR_ATOMIC(cpuid, lazymask); + spinlock_exit(); +} + +static void +pmap_lazyfix(pmap_t pmap) +{ + cpuset_t mymask, mask; + u_int cpuid, spins; + int lsb; + + mask = pmap->pm_active; + while (!CPU_EMPTY(&mask)) { + spins = 50000000; + + /* Find least significant set bit. */ + lsb = CPU_FFS(&mask); + MPASS(lsb != 0); + lsb--; + CPU_SETOF(lsb, &mask); + mtx_lock_spin(&smp_ipi_mtx); + + lazyttb = pmap_ttb_get(pmap); + cpuid = PCPU_GET(cpuid); + + /* Use a cpuset just for having an easy check. */ + CPU_SETOF(cpuid, &mymask); + if (!CPU_CMP(&mask, &mymask)) { + lazymask = &pmap->pm_active; + pmap_lazyfix_self(cpuid); + } else { + atomic_store_rel_int((u_int *)&lazymask, + (u_int)&pmap->pm_active); + atomic_store_rel_int(&lazywait, 0); + ipi_selected(mask, IPI_LAZYPMAP); + while (lazywait == 0) { + if (--spins == 0) + break; + } + } + mtx_unlock_spin(&smp_ipi_mtx); + if (spins == 0) + printf("%s: spun for 50000000\n", __func__); + mask = pmap->pm_active; + } +} +#else /* SMP */ +/* + * Cleaning up on uniprocessor is easy. For various reasons, we're + * unlikely to have to even execute this code, including the fact + * that the cleanup is deferred until the parent does a wait(2), which + * means that another userland process has run. + */ +static void +pmap_lazyfix(pmap_t pmap) +{ + + if (!CPU_EMPTY(&pmap->pm_active)) { + cp15_ttbr_set(curthread->td_pcb->pcb_pagedir); + CPU_ZERO(&pmap->pm_active); + } +} +#endif /* SMP */ + +#ifdef INVARIANTS +static boolean_t +pt2tab_user_is_empty(pt2_entry_t *tab) +{ + u_int i, end; + + end = pt2tab_index(VM_MAXUSER_ADDRESS); + for (i = 0; i < end; i++) + if (tab[i] != 0) return (FALSE); + return (TRUE); +} +#endif +/* + * Release any resources held by the given physical map. + * Called when a pmap initialized by pmap_pinit is being released. + * Should only be called if the map contains no valid mappings. + */ +void +pmap_release(pmap_t pmap) +{ +#ifdef INVARIANTS + vm_offset_t start, end; +#endif + KASSERT(pmap->pm_stats.resident_count == 0, + ("%s: pmap resident count %ld != 0", __func__, + pmap->pm_stats.resident_count)); + KASSERT(pt2tab_user_is_empty(pmap->pm_pt2tab), + ("%s: has allocated user PT2(s)", __func__)); + + pmap_lazyfix(pmap); + mtx_lock_spin(&allpmaps_lock); + LIST_REMOVE(pmap, pm_list); + mtx_unlock_spin(&allpmaps_lock); + +#ifdef INVARIANTS + start = pte1_index(KERNBASE) * sizeof(pt1_entry_t); + end = (pte1_index(0xFFFFFFFF) + 1) * sizeof(pt1_entry_t); + bzero((char *)pmap->pm_pt1 + start, end - start); + + start = pt2tab_index(KERNBASE) * sizeof(pt2_entry_t); + end = (pt2tab_index(0xFFFFFFFF) + 1) * sizeof(pt2_entry_t); + bzero((char *)pmap->pm_pt2tab + start, end - start); +#endif + /* + * We are leaving PT1 and PT2TAB allocated on released pmap, + * so hopefully UMA vmspace_zone will always be inited with + * UMA_ZONE_NOFREE flag. + */ +} + +/********************************************************* + * + * L2 table pages and their pages management routines. + * + *********************************************************/ + +/* + * Virtual interface for L2 page table wire counting. + * + * Each L2 page table in a page has own counter which counts a number of + * valid mappings in a table. Global page counter counts mappings in all + * tables in a page plus a single itself mapping in PT2TAB. + * + * During a promotion we leave the associated L2 page table counter + * untouched, so the table (strictly speaking a page which holds it) + * is never freed if promoted. + * + * If a page m->wire_count == 1 then no valid mappings exist in any L2 page + * table in the page and the page itself is only mapped in PT2TAB. + */ + +static __inline void +pt2_wirecount_init(vm_page_t m) +{ + u_int i; + + /* + * Note: A page m is allocated with VM_ALLOC_WIRED flag and + * m->wire_count should be already set correctly. + * So, there is no need to set it again herein. + */ + for (i = 0; i < NPT2_IN_PG; i++) + m->md.pt2_wirecount[i] = 0; +} + +static __inline void +pt2_wirecount_inc(vm_page_t m, uint32_t pte1_idx) +{ + + /* + * Note: A just modificated pte2 (i.e. already allocated) + * is acquiring one extra reference which must be + * explicitly cleared. It influences the KASSERTs herein. + * All L2 page tables in a page always belong to the same + * pmap, so we allow only one extra reference for the page. + */ + KASSERT(m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] < (NPTE2_IN_PT2 + 1), + ("%s: PT2 is overflowing ...", __func__)); + KASSERT(m->wire_count <= (NPTE2_IN_PG + 1), + ("%s: PT2PG is overflowing ...", __func__)); + + m->wire_count++; + m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]++; +} + +static __inline void +pt2_wirecount_dec(vm_page_t m, uint32_t pte1_idx) +{ + + KASSERT(m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] != 0, + ("%s: PT2 is underflowing ...", __func__)); + KASSERT(m->wire_count > 1, + ("%s: PT2PG is underflowing ...", __func__)); + + m->wire_count--; + m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]--; +} + +static __inline void +pt2_wirecount_set(vm_page_t m, uint32_t pte1_idx, uint16_t count) +{ + + KASSERT(count <= NPTE2_IN_PT2, + ("%s: invalid count %u", __func__, count)); + KASSERT(m->wire_count > m->md.pt2_wirecount[pte1_idx & PT2PG_MASK], + ("%s: PT2PG corrupting (%u, %u) ...", __func__, m->wire_count, + m->md.pt2_wirecount[pte1_idx & PT2PG_MASK])); + + m->wire_count -= m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]; + m->wire_count += count; + m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] = count; + + KASSERT(m->wire_count <= (NPTE2_IN_PG + 1), + ("%s: PT2PG is overflowed (%u) ...", __func__, m->wire_count)); +} + +static __inline uint32_t +pt2_wirecount_get(vm_page_t m, uint32_t pte1_idx) +{ + + return (m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]); +} + +static __inline boolean_t +pt2_is_empty(vm_page_t m, vm_offset_t va) +{ + + return (m->md.pt2_wirecount[pte1_index(va) & PT2PG_MASK] == 0); +} + +static __inline boolean_t +pt2_is_full(vm_page_t m, vm_offset_t va) +{ + + return (m->md.pt2_wirecount[pte1_index(va) & PT2PG_MASK] == + NPTE2_IN_PT2); +} + +static __inline boolean_t +pt2pg_is_empty(vm_page_t m) +{ + + return (m->wire_count == 1); +} + +/* + * This routine is called if the L2 page table + * is not mapped correctly. + */ +static vm_page_t +_pmap_allocpte2(pmap_t pmap, vm_offset_t va, u_int flags) +{ + uint32_t pte1_idx; + pt1_entry_t *pte1p; + pt2_entry_t pte2; + vm_page_t m; + vm_paddr_t pt2pg_pa, pt2_pa; + + pte1_idx = pte1_index(va); + pte1p = pmap->pm_pt1 + pte1_idx; + + KASSERT(pte1_load(pte1p) == 0, + ("%s: pm_pt1[%#x] is not zero: %#x", __func__, pte1_idx, + pte1_load(pte1p))); + + pte2 = pt2tab_load(pmap_pt2tab_entry(pmap, va)); + if (!pte2_is_valid(pte2)) { + /* + * Install new PT2s page into pmap PT2TAB. + */ + m = vm_page_alloc(NULL, pte1_idx & ~PT2PG_MASK, + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); + if (m == NULL) { + if ((flags & PMAP_ENTER_NOSLEEP) == 0) { + PMAP_UNLOCK(pmap); + rw_wunlock(&pvh_global_lock); + VM_WAIT; + rw_wlock(&pvh_global_lock); + PMAP_LOCK(pmap); + } + + /* + * Indicate the need to retry. While waiting, + * the L2 page table page may have been allocated. + */ + return (NULL); + } + pmap->pm_stats.resident_count++; + pt2pg_pa = pmap_pt2pg_init(pmap, va, m); + } else { + pt2pg_pa = pte2_pa(pte2); + m = PHYS_TO_VM_PAGE(pt2pg_pa); + } + + pt2_wirecount_inc(m, pte1_idx); + pt2_pa = page_pt2pa(pt2pg_pa, pte1_idx); + pte1_store(pte1p, PTE1_LINK(pt2_pa)); + + return (m); +} + +static vm_page_t +pmap_allocpte2(pmap_t pmap, vm_offset_t va, u_int flags) +{ + u_int pte1_idx; + pt1_entry_t *pte1p, pte1; + vm_page_t m; + + pte1_idx = pte1_index(va); +retry: + pte1p = pmap->pm_pt1 + pte1_idx; + pte1 = pte1_load(pte1p); + + /* + * This supports switching from a 1MB page to a + * normal 4K page. + */ + if (pte1_is_section(pte1)) { + (void)pmap_demote_pte1(pmap, pte1p, va); + /* + * Reload pte1 after demotion. + * + * Note: Demotion can even fail as either PT2 is not find for + * the virtual address or PT2PG can not be allocated. + */ + pte1 = pte1_load(pte1p); + } + + /* + * If the L2 page table page is mapped, we just increment the + * hold count, and activate it. + */ + if (pte1_is_link(pte1)) { + m = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); + pt2_wirecount_inc(m, pte1_idx); + } else { + /* + * Here if the PT2 isn't mapped, or if it has + * been deallocated. + */ + m = _pmap_allocpte2(pmap, va, flags); + if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0) + goto retry; + } + + return (m); +} + +static __inline void +pmap_free_zero_pages(struct spglist *free) +{ + vm_page_t m; + + while ((m = SLIST_FIRST(free)) != NULL) { + SLIST_REMOVE_HEAD(free, plinks.s.ss); + /* Preserve the page's PG_ZERO setting. */ + vm_page_free_toq(m); + } +} + +/* + * Schedule the specified unused L2 page table page to be freed. Specifically, + * add the page to the specified list of pages that will be released to the + * physical memory manager after the TLB has been updated. + */ +static __inline void +pmap_add_delayed_free_list(vm_page_t m, struct spglist *free) +{ + + /* + * Put page on a list so that it is released after + * *ALL* TLB shootdown is done + */ +#ifdef PMAP_DEBUG + pmap_zero_page_check(m); +#endif + m->flags |= PG_ZERO; + SLIST_INSERT_HEAD(free, m, plinks.s.ss); +} + +/* + * Unwire L2 page tables page. + */ +static void +pmap_unwire_pt2pg(pmap_t pmap, vm_offset_t va, vm_page_t m) +{ + pt1_entry_t *pte1p, opte1 __unused; + pt2_entry_t *pte2p; + uint32_t i; + + KASSERT(pt2pg_is_empty(m), + ("%s: pmap %p PT2PG %p wired", __func__, pmap, m)); + + /* + * Unmap all L2 page tables in the page from L1 page table. + * + * QQQ: Individual L2 page tables (except the last one) can be unmapped + * earlier. However, we are doing that this way. + */ + KASSERT(m->pindex == (pte1_index(va) & ~PT2PG_MASK), + ("%s: pmap %p va %#x PT2PG %p bad index", __func__, pmap, va, m)); + pte1p = pmap->pm_pt1 + m->pindex; + for (i = 0; i < NPT2_IN_PG; i++, pte1p++) { + KASSERT(m->md.pt2_wirecount[i] == 0, + ("%s: pmap %p PT2 %u (PG %p) wired", __func__, pmap, i, m)); + opte1 = pte1_load(pte1p); + if (pte1_is_link(opte1)) + pte1_clear(pte1p); +#ifdef INVARIANTS + else + KASSERT((opte1 == 0) || pte1_is_section(opte1), + ("%s: pmap %p va %#x bad pte1 %x at %u", __func__, + pmap, va, opte1, i)); +#endif + } + + /* + * Unmap the page from PT2TAB. + */ + pte2p = pmap_pt2tab_entry(pmap, va); + (void)pt2tab_load_clear(pte2p); + pmap_tlb_flush(pmap, pt2map_pt2pg(va)); + + m->wire_count = 0; + pmap->pm_stats.resident_count--; + + /* + * This is a release store so that the ordinary store unmapping + * the L2 page table page is globally performed before TLB shoot- + * down is begun. + */ + atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); +} + +/* + * Decrements a L2 page table page's wire count, which is used to record the + * number of valid page table entries within the page. If the wire count + * drops to zero, then the page table page is unmapped. Returns TRUE if the + * page table page was unmapped and FALSE otherwise. + */ +static __inline boolean_t +pmap_unwire_pt2(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) +{ + pt2_wirecount_dec(m, pte1_index(va)); + if (pt2pg_is_empty(m)) { + /* + * QQQ: Wire count is zero, so whole page should be zero and + * we can set PG_ZERO flag to it. + * Note that when promotion is enabled, it takes some + * more efforts. See pmap_unwire_pt2_all() below. + */ + pmap_unwire_pt2pg(pmap, va, m); + pmap_add_delayed_free_list(m, free); + return (TRUE); + } else + return (FALSE); +} + +/* + * Drop a L2 page table page's wire count at once, which is used to record + * the number of valid L2 page table entries within the page. If the wire + * count drops to zero, then the L2 page table page is unmapped. + */ +static __inline void +pmap_unwire_pt2_all(pmap_t pmap, vm_offset_t va, vm_page_t m, + struct spglist *free) +{ + u_int pte1_idx = pte1_index(va); + + KASSERT(m->pindex == (pte1_idx & ~PT2PG_MASK), + ("%s: PT2 page's pindex is wrong", __func__)); + KASSERT(m->wire_count > pt2_wirecount_get(m, pte1_idx), + ("%s: bad pt2 wire count %u > %u", __func__, m->wire_count, + pt2_wirecount_get(m, pte1_idx))); + + /* + * It's possible that the L2 page table was never used. + * It happened in case that a section was created without promotion. + */ + if (pt2_is_full(m, va)) { + pt2_wirecount_set(m, pte1_idx, 0); + + /* + * QQQ: We clear L2 page table now, so when L2 page table page + * is going to be freed, we can set it PG_ZERO flag ... + * This function is called only on section mappings, so + * hopefully it's not to big overload. + * + * XXX: If pmap is current, existing PT2MAP mapping could be + * used for zeroing. + */ + pmap_zero_page_area(m, page_pt2off(pte1_idx), NB_IN_PT2); + } +#ifdef INVARIANTS + else + KASSERT(pt2_is_empty(m, va), ("%s: PT2 is not empty (%u)", + __func__, pt2_wirecount_get(m, pte1_idx))); +#endif + if (pt2pg_is_empty(m)) { + pmap_unwire_pt2pg(pmap, va, m); + pmap_add_delayed_free_list(m, free); + } +} + +/* + * After removing a L2 page table entry, this routine is used to + * conditionally free the page, and manage the hold/wire counts. + */ +static boolean_t +pmap_unuse_pt2(pmap_t pmap, vm_offset_t va, struct spglist *free) +{ + pt1_entry_t pte1; + vm_page_t mpte; + + if (va >= VM_MAXUSER_ADDRESS) + return (FALSE); + pte1 = pte1_load(pmap_pte1(pmap, va)); + mpte = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); + return (pmap_unwire_pt2(pmap, va, mpte, free)); +} + +/************************************* + * + * Page management routines. + * + *************************************/ + +CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); +CTASSERT(_NPCM == 11); +CTASSERT(_NPCPV == 336); + +static __inline struct pv_chunk * +pv_to_chunk(pv_entry_t pv) +{ + + return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); +} + +#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) + +#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ +#define PC_FREE10 0x0000fffful /* Free values for index 10 */ + +static const uint32_t pc_freemask[_NPCM] = { + PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, + PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, + PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, + PC_FREE0_9, PC_FREE10 +}; + +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, + "Current number of pv entries"); + +#ifdef PV_STATS +static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; + +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, + "Current number of pv entry chunks"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, + "Current number of pv entry chunks allocated"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, + "Current number of pv entry chunks frees"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, + 0, "Number of times tried to get a chunk page but failed."); + +static long pv_entry_frees, pv_entry_allocs; +static int pv_entry_spare; + +SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, + "Current number of pv entry frees"); +SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, + 0, "Current number of pv entry allocs"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, + "Current number of spare pv entries"); +#endif + +/* + * Is given page managed? + */ +static __inline boolean_t +is_managed(vm_paddr_t pa) +{ + vm_offset_t pgnum; + vm_page_t m; + + pgnum = atop(pa); + if (pgnum >= first_page) { + m = PHYS_TO_VM_PAGE(pa); + if (m == NULL) + return (FALSE); + if ((m->oflags & VPO_UNMANAGED) == 0) + return (TRUE); + } + return (FALSE); +} + +static __inline boolean_t +pte1_is_managed(pt1_entry_t pte1) +{ + + return (is_managed(pte1_pa(pte1))); +} + +static __inline boolean_t +pte2_is_managed(pt2_entry_t pte2) +{ + + return (is_managed(pte2_pa(pte2))); +} + +/* + * We are in a serious low memory condition. Resort to + * drastic measures to free some pages so we can allocate + * another pv entry chunk. + */ +static vm_page_t +pmap_pv_reclaim(pmap_t locked_pmap) +{ + struct pch newtail; + struct pv_chunk *pc; + struct md_page *pvh; + pt1_entry_t *pte1p; + pmap_t pmap; + pt2_entry_t *pte2p, tpte2; + pv_entry_t pv; + vm_offset_t va; + vm_page_t m, m_pc; + struct spglist free; + uint32_t inuse; + int bit, field, freed; + + PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); + pmap = NULL; + m_pc = NULL; + SLIST_INIT(&free); + TAILQ_INIT(&newtail); + while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || + SLIST_EMPTY(&free))) { + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + if (pmap != pc->pc_pmap) { + if (pmap != NULL) { + pmap_tlb_flush_ng(pmap); + if (pmap != locked_pmap) + PMAP_UNLOCK(pmap); + } + pmap = pc->pc_pmap; + /* Avoid deadlock and lock recursion. */ + if (pmap > locked_pmap) + PMAP_LOCK(pmap); + else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { + pmap = NULL; + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); + continue; + } + } + + /* + * Destroy every non-wired, 4 KB page mapping in the chunk. + */ + freed = 0; + for (field = 0; field < _NPCM; field++) { + for (inuse = ~pc->pc_map[field] & pc_freemask[field]; + inuse != 0; inuse &= ~(1UL << bit)) { + bit = ffs(inuse) - 1; + pv = &pc->pc_pventry[field * 32 + bit]; + va = pv->pv_va; + pte1p = pmap_pte1(pmap, va); + if (pte1_is_section(pte1_load(pte1p))) + continue; + pte2p = pmap_pte2(pmap, va); + tpte2 = pte2_load(pte2p); + if ((tpte2 & PTE2_W) == 0) + tpte2 = pte2_load_clear(pte2p); + pmap_pte2_release(pte2p); + if ((tpte2 & PTE2_W) != 0) + continue; + KASSERT(tpte2 != 0, + ("pmap_pv_reclaim: pmap %p va %#x zero pte", + pmap, va)); + if (pte2_is_global(tpte2)) + tlb_flush(va); + m = PHYS_TO_VM_PAGE(pte2_pa(tpte2)); + if (pte2_is_dirty(tpte2)) + vm_page_dirty(m); + if ((tpte2 & PTE2_A) != 0) + vm_page_aflag_set(m, PGA_REFERENCED); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + if (TAILQ_EMPTY(&m->md.pv_list) && + (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + if (TAILQ_EMPTY(&pvh->pv_list)) { + vm_page_aflag_clear(m, + PGA_WRITEABLE); + } + } + pc->pc_map[field] |= 1UL << bit; + pmap_unuse_pt2(pmap, va, &free); + freed++; + } + } + if (freed == 0) { + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); + continue; + } + /* Every freed mapping is for a 4 KB page. */ + pmap->pm_stats.resident_count -= freed; + PV_STAT(pv_entry_frees += freed); + PV_STAT(pv_entry_spare += freed); + pv_entry_count -= freed; + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + for (field = 0; field < _NPCM; field++) + if (pc->pc_map[field] != pc_freemask[field]) { + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, + pc_list); + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); + + /* + * One freed pv entry in locked_pmap is + * sufficient. + */ + if (pmap == locked_pmap) + goto out; + break; + } + if (field == _NPCM) { + PV_STAT(pv_entry_spare -= _NPCPV); + PV_STAT(pc_chunk_count--); + PV_STAT(pc_chunk_frees++); + /* Entire chunk is free; return it. */ + m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); + pmap_qremove((vm_offset_t)pc, 1); + pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc); + break; + } + } +out: + TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); + if (pmap != NULL) { + pmap_tlb_flush_ng(pmap); + if (pmap != locked_pmap) + PMAP_UNLOCK(pmap); + } + if (m_pc == NULL && pv_vafree != 0 && SLIST_EMPTY(&free)) { + m_pc = SLIST_FIRST(&free); + SLIST_REMOVE_HEAD(&free, plinks.s.ss); + /* Recycle a freed page table page. */ + m_pc->wire_count = 1; + atomic_add_int(&vm_cnt.v_wire_count, 1); + } + pmap_free_zero_pages(&free); + return (m_pc); +} + +static void +free_pv_chunk(struct pv_chunk *pc) +{ + vm_page_t m; + + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + PV_STAT(pv_entry_spare -= _NPCPV); + PV_STAT(pc_chunk_count--); + PV_STAT(pc_chunk_frees++); + /* entire chunk is free, return it */ + m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); + pmap_qremove((vm_offset_t)pc, 1); + vm_page_unwire(m, PQ_INACTIVE); + vm_page_free(m); + pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc); +} + +/* + * Free the pv_entry back to the free list. + */ +static void +free_pv_entry(pmap_t pmap, pv_entry_t pv) +{ + struct pv_chunk *pc; + int idx, field, bit; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + PV_STAT(pv_entry_frees++); + PV_STAT(pv_entry_spare++); + pv_entry_count--; + pc = pv_to_chunk(pv); + idx = pv - &pc->pc_pventry[0]; + field = idx / 32; + bit = idx % 32; + pc->pc_map[field] |= 1ul << bit; + for (idx = 0; idx < _NPCM; idx++) + if (pc->pc_map[idx] != pc_freemask[idx]) { + /* + * 98% of the time, pc is already at the head of the + * list. If it isn't already, move it to the head. + */ + if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != + pc)) { + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, + pc_list); + } + return; + } + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + free_pv_chunk(pc); +} + +/* + * Get a new pv_entry, allocating a block from the system + * when needed. + */ +static pv_entry_t +get_pv_entry(pmap_t pmap, boolean_t try) +{ + static const struct timeval printinterval = { 60, 0 }; + static struct timeval lastprint; + int bit, field; + pv_entry_t pv; + struct pv_chunk *pc; + vm_page_t m; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + PV_STAT(pv_entry_allocs++); + pv_entry_count++; + if (pv_entry_count > pv_entry_high_water) + if (ratecheck(&lastprint, &printinterval)) + printf("Approaching the limit on PV entries, consider " + "increasing either the vm.pmap.shpgperproc or the " + "vm.pmap.pv_entry_max tunable.\n"); +retry: + pc = TAILQ_FIRST(&pmap->pm_pvchunk); + if (pc != NULL) { + for (field = 0; field < _NPCM; field++) { + if (pc->pc_map[field]) { + bit = ffs(pc->pc_map[field]) - 1; + break; + } + } + if (field < _NPCM) { + pv = &pc->pc_pventry[field * 32 + bit]; + pc->pc_map[field] &= ~(1ul << bit); + /* If this was the last item, move it to tail */ + for (field = 0; field < _NPCM; field++) + if (pc->pc_map[field] != 0) { + PV_STAT(pv_entry_spare--); + return (pv); /* not full, return */ + } + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); + PV_STAT(pv_entry_spare--); + return (pv); + } + } + /* + * Access to the pte2list "pv_vafree" is synchronized by the pvh + * global lock. If "pv_vafree" is currently non-empty, it will + * remain non-empty until pmap_pte2list_alloc() completes. + */ + if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { + if (try) { + pv_entry_count--; + PV_STAT(pc_chunk_tryfail++); + return (NULL); + } + m = pmap_pv_reclaim(pmap); + if (m == NULL) + goto retry; + } + PV_STAT(pc_chunk_count++); + PV_STAT(pc_chunk_allocs++); + pc = (struct pv_chunk *)pmap_pte2list_alloc(&pv_vafree); + pmap_qenter((vm_offset_t)pc, &m, 1); + pc->pc_pmap = pmap; + pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ + for (field = 1; field < _NPCM; field++) + pc->pc_map[field] = pc_freemask[field]; + TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); + pv = &pc->pc_pventry[0]; + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + PV_STAT(pv_entry_spare += _NPCPV - 1); + return (pv); +} + +/* + * Create a pv entry for page at pa for + * (pmap, va). + */ +static void +pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) +{ + pv_entry_t pv; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + pv = get_pv_entry(pmap, FALSE); + pv->pv_va = va; + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); +} + +static __inline pv_entry_t +pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) +{ + pv_entry_t pv; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + if (pmap == PV_PMAP(pv) && va == pv->pv_va) { + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); + break; + } + } + return (pv); +} + +static void +pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) +{ + pv_entry_t pv; + + pv = pmap_pvh_remove(pvh, pmap, va); + KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); + free_pv_entry(pmap, pv); +} + +static void +pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) +{ + struct md_page *pvh; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + pmap_pvh_free(&m->md, pmap, va); + if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + if (TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } +} + +static void +pmap_pv_demote_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) +{ + struct md_page *pvh; + pv_entry_t pv; + vm_offset_t va_last; + vm_page_t m; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + KASSERT((pa & PTE1_OFFSET) == 0, + ("pmap_pv_demote_pte1: pa is not 1mpage aligned")); + + /* + * Transfer the 1mpage's pv entry for this mapping to the first + * page's pv list. + */ + pvh = pa_to_pvh(pa); + va = pte1_trunc(va); + pv = pmap_pvh_remove(pvh, pmap, va); + KASSERT(pv != NULL, ("pmap_pv_demote_pte1: pv not found")); + m = PHYS_TO_VM_PAGE(pa); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + /* Instantiate the remaining NPTE2_IN_PT2 - 1 pv entries. */ + va_last = va + PTE1_SIZE - PAGE_SIZE; + do { + m++; + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_pv_demote_pte1: page %p is not managed", m)); + va += PAGE_SIZE; + pmap_insert_entry(pmap, va, m); + } while (va < va_last); +} + +static void +pmap_pv_promote_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) +{ + struct md_page *pvh; + pv_entry_t pv; + vm_offset_t va_last; + vm_page_t m; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + KASSERT((pa & PTE1_OFFSET) == 0, + ("pmap_pv_promote_pte1: pa is not 1mpage aligned")); + + /* + * Transfer the first page's pv entry for this mapping to the + * 1mpage's pv list. Aside from avoiding the cost of a call + * to get_pv_entry(), a transfer avoids the possibility that + * get_pv_entry() calls pmap_pv_reclaim() and that pmap_pv_reclaim() + * removes one of the mappings that is being promoted. + */ + m = PHYS_TO_VM_PAGE(pa); + va = pte1_trunc(va); + pv = pmap_pvh_remove(&m->md, pmap, va); + KASSERT(pv != NULL, ("pmap_pv_promote_pte1: pv not found")); + pvh = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + /* Free the remaining NPTE2_IN_PT2 - 1 pv entries. */ + va_last = va + PTE1_SIZE - PAGE_SIZE; + do { + m++; + va += PAGE_SIZE; + pmap_pvh_free(&m->md, pmap, va); + } while (va < va_last); +} + +/* + * Conditionally create a pv entry. + */ +static boolean_t +pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) +{ + pv_entry_t pv; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + if (pv_entry_count < pv_entry_high_water && + (pv = get_pv_entry(pmap, TRUE)) != NULL) { + pv->pv_va = va; + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + return (TRUE); + } else + return (FALSE); +} + +/* + * Create the pv entries for each of the pages within a section. + */ +static boolean_t +pmap_pv_insert_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) +{ + struct md_page *pvh; + pv_entry_t pv; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + if (pv_entry_count < pv_entry_high_water && + (pv = get_pv_entry(pmap, TRUE)) != NULL) { + pv->pv_va = va; + pvh = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + return (TRUE); + } else + return (FALSE); +} + +/* + * Tries to promote the NPTE2_IN_PT2, contiguous 4KB page mappings that are + * within a single page table page (PT2) to a single 1MB page mapping. + * For promotion to occur, two conditions must be met: (1) the 4KB page + * mappings must map aligned, contiguous physical memory and (2) the 4KB page + * mappings must have identical characteristics. + * + * Managed (PG_MANAGED) mappings within the kernel address space are not + * promoted. The reason is that kernel PTE1s are replicated in each pmap but + * pmap_remove_write(), pmap_clear_modify(), and pmap_clear_reference() only + * read the PTE1 from the kernel pmap. + */ +static void +pmap_promote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va) +{ + pt1_entry_t npte1; + pt2_entry_t *fpte2p, fpte2, fpte2_fav; + pt2_entry_t *pte2p, pte2; + vm_offset_t pteva __unused; + vm_page_t m __unused; + + PDEBUG(6, printf("%s(%p): try for va %#x pte1 %#x at %p\n", __func__, + pmap, va, pte1_load(pte1p), pte1p)); + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + /* + * Examine the first PTE2 in the specified PT2. Abort if this PTE2 is + * either invalid, unused, or does not map the first 4KB physical page + * within a 1MB page. + */ + fpte2p = pmap_pte2_quick(pmap, pte1_trunc(va)); +setpte1: + fpte2 = pte2_load(fpte2p); + if ((fpte2 & ((PTE2_FRAME & PTE1_OFFSET) | PTE2_A | PTE2_V)) != + (PTE2_A | PTE2_V)) { + pmap_pte1_p_failures++; + CTR3(KTR_PMAP, "%s: failure(1) for va %#x in pmap %p", + __func__, va, pmap); + return; + } + if (pte2_is_managed(fpte2) && pmap == kernel_pmap) { + pmap_pte1_p_failures++; + CTR3(KTR_PMAP, "%s: failure(2) for va %#x in pmap %p", + __func__, va, pmap); + return; + } + if ((fpte2 & (PTE2_NM | PTE2_RO)) == PTE2_NM) { + /* + * When page is not modified, PTE2_RO can be set without + * a TLB invalidation. + * + * Note: When modified bit is being set, then in harware case, + * the TLB entry is re-read (updated) from PT2, and in + * software case (abort), the PTE2 is read from PT2 and + * TLB flushed if changed. The following cmpset() solves + * any race with setting this bit in both cases. + */ + if (!pte2_cmpset(fpte2p, fpte2, fpte2 | PTE2_RO)) + goto setpte1; + fpte2 |= PTE2_RO; + } + + /* + * Examine each of the other PTE2s in the specified PT2. Abort if this + * PTE2 maps an unexpected 4KB physical page or does not have identical + * characteristics to the first PTE2. + */ + fpte2_fav = (fpte2 & (PTE2_FRAME | PTE2_A | PTE2_V)); + fpte2_fav += PTE1_SIZE - PTE2_SIZE; /* examine from the end */ + for (pte2p = fpte2p + NPTE2_IN_PT2 - 1; pte2p > fpte2p; pte2p--) { +setpte2: + pte2 = pte2_load(pte2p); + if ((pte2 & (PTE2_FRAME | PTE2_A | PTE2_V)) != fpte2_fav) { + pmap_pte1_p_failures++; + CTR3(KTR_PMAP, "%s: failure(3) for va %#x in pmap %p", + __func__, va, pmap); + return; + } + if ((pte2 & (PTE2_NM | PTE2_RO)) == PTE2_NM) { + /* + * When page is not modified, PTE2_RO can be set + * without a TLB invalidation. See note above. + */ + if (!pte2_cmpset(pte2p, pte2, pte2 | PTE2_RO)) + goto setpte2; + pte2 |= PTE2_RO; + pteva = pte1_trunc(va) | (pte2 & PTE1_OFFSET & + PTE2_FRAME); + CTR3(KTR_PMAP, "%s: protect for va %#x in pmap %p", + __func__, pteva, pmap); + } + if ((pte2 & PTE2_PROMOTE) != (fpte2 & PTE2_PROMOTE)) { + pmap_pte1_p_failures++; + CTR3(KTR_PMAP, "%s: failure(4) for va %#x in pmap %p", + __func__, va, pmap); + return; + } + + fpte2_fav -= PTE2_SIZE; + } + /* + * The page table page in its current state will stay in PT2TAB + * until the PTE1 mapping the section is demoted by pmap_demote_pte1() + * or destroyed by pmap_remove_pte1(). + * + * Note that L2 page table size is not equal to PAGE_SIZE. + */ + m = PHYS_TO_VM_PAGE(trunc_page(pte1_link_pa(pte1_load(pte1p)))); + KASSERT(m >= vm_page_array && m < &vm_page_array[vm_page_array_size], + ("%s: PT2 page is out of range", __func__)); + KASSERT(m->pindex == (pte1_index(va) & ~PT2PG_MASK), + ("%s: PT2 page's pindex is wrong", __func__)); + + /* + * Get pte1 from pte2 format. + */ + npte1 = (fpte2 & PTE1_FRAME) | ATTR_TO_L1(fpte2) | PTE1_V; + + /* + * Promote the pv entries. + */ + if (pte2_is_managed(fpte2)) + pmap_pv_promote_pte1(pmap, va, pte1_pa(npte1)); + + /* + * Map the section. + */ + if (pmap == kernel_pmap) + pmap_kenter_pte1(va, npte1); + else + pte1_store(pte1p, npte1); + /* + * Flush old small mappings. We call single pmap_tlb_flush() in + * pmap_demote_pte1() and pmap_remove_pte1(), so we must be sure that + * no small mappings survive. We assume that given pmap is current and + * don't play game with PTE2_NG. + */ + pmap_tlb_flush_range(pmap, pte1_trunc(va), PTE1_SIZE); + + pmap_pte1_promotions++; + CTR3(KTR_PMAP, "%s: success for va %#x in pmap %p", + __func__, va, pmap); + + PDEBUG(6, printf("%s(%p): success for va %#x pte1 %#x(%#x) at %p\n", + __func__, pmap, va, npte1, pte1_load(pte1p), pte1p)); +} + +/* + * Zero L2 page table page. + */ +static __inline void +pmap_clear_pt2(pt2_entry_t *fpte2p) +{ + pt2_entry_t *pte2p; + + for (pte2p = fpte2p; pte2p < fpte2p + NPTE2_IN_PT2; pte2p++) + pte2_clear(pte2p); + +} + +/* + * Removes a 1MB page mapping from the kernel pmap. + */ +static void +pmap_remove_kernel_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va) +{ + vm_page_t m; + uint32_t pte1_idx; + pt2_entry_t *fpte2p; + vm_paddr_t pt2_pa; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + m = pmap_pt2_page(pmap, va); + if (m == NULL) + /* + * QQQ: Is this function called only on promoted pte1? + * We certainly do section mappings directly + * (without promotion) in kernel !!! + */ + panic("%s: missing pt2 page", __func__); + + pte1_idx = pte1_index(va); + + /* + * Initialize the L2 page table. + */ + fpte2p = page_pt2(pt2map_pt2pg(va), pte1_idx); + pmap_clear_pt2(fpte2p); + + /* + * Remove the mapping. + */ + pt2_pa = page_pt2pa(VM_PAGE_TO_PHYS(m), pte1_idx); + pmap_kenter_pte1(va, PTE1_LINK(pt2_pa)); + + /* + * QQQ: We do not need to invalidate PT2MAP mapping + * as we did not change it. I.e. the L2 page table page + * was and still is mapped the same way. + */ +} + +/* + * Do the things to unmap a section in a process + */ +static void +pmap_remove_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t sva, + struct spglist *free) +{ + pt1_entry_t opte1; + struct md_page *pvh; + vm_offset_t eva, va; + vm_page_t m; + + PDEBUG(6, printf("%s(%p): va %#x pte1 %#x at %p\n", __func__, pmap, sva, + pte1_load(pte1p), pte1p)); + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT((sva & PTE1_OFFSET) == 0, + ("%s: sva is not 1mpage aligned", __func__)); + + opte1 = pte1_load_clear(pte1p); + if (pte1_is_wired(opte1)) + pmap->pm_stats.wired_count -= PTE1_SIZE / PAGE_SIZE; + + /* + * If the mapping was global, invalidate it even if given pmap + * is not active (kernel_pmap is active always). The mapping should + * occupy one and only TLB entry. So, pmap_tlb_flush() called + * with aligned address should be sufficient. + */ + if (pte1_is_global(opte1)) + tlb_flush(sva); + pmap->pm_stats.resident_count -= PTE1_SIZE / PAGE_SIZE; + if (pte1_is_managed(opte1)) { + pvh = pa_to_pvh(pte1_pa(opte1)); + pmap_pvh_free(pvh, pmap, sva); + eva = sva + PTE1_SIZE; + for (va = sva, m = PHYS_TO_VM_PAGE(pte1_pa(opte1)); + va < eva; va += PAGE_SIZE, m++) { + if (pte1_is_dirty(opte1)) + vm_page_dirty(m); + if (opte1 & PTE1_A) + vm_page_aflag_set(m, PGA_REFERENCED); + if (TAILQ_EMPTY(&m->md.pv_list) && + TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } + } + if (pmap == kernel_pmap) { + /* + * L2 page table(s) can't be removed from kernel map as + * kernel counts on it (stuff around pmap_growkernel()). + */ + pmap_remove_kernel_pte1(pmap, pte1p, sva); + } else { + /* + * Get associated L2 page table page. + * It's possible that the page was never allocated. + */ + m = pmap_pt2_page(pmap, sva); + if (m != NULL) + pmap_unwire_pt2_all(pmap, sva, m, free); + } +} + +/* + * Fills L2 page table page with mappings to consecutive physical pages. + */ +static __inline void +pmap_fill_pt2(pt2_entry_t *fpte2p, pt2_entry_t npte2) +{ + pt2_entry_t *pte2p; + + for (pte2p = fpte2p; pte2p < fpte2p + NPTE2_IN_PT2; pte2p++) { + pte2_store(pte2p, npte2); + npte2 += PTE2_SIZE; + } +} + +/* + * Tries to demote a 1MB page mapping. If demotion fails, the + * 1MB page mapping is invalidated. + */ +static boolean_t +pmap_demote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va) +{ + pt1_entry_t opte1, npte1; + pt2_entry_t *fpte2p, npte2; + vm_paddr_t pt2pg_pa, pt2_pa; + vm_page_t m; + struct spglist free; + uint32_t pte1_idx, isnew = 0; + + PDEBUG(6, printf("%s(%p): try for va %#x pte1 %#x at %p\n", __func__, + pmap, va, pte1_load(pte1p), pte1p)); + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + opte1 = pte1_load(pte1p); + KASSERT(pte1_is_section(opte1), ("%s: opte1 not a section", __func__)); + + if ((opte1 & PTE1_A) == 0 || (m = pmap_pt2_page(pmap, va)) == NULL) { + KASSERT(!pte1_is_wired(opte1), + ("%s: PT2 page for a wired mapping is missing", __func__)); + + /* + * Invalidate the 1MB page mapping and return + * "failure" if the mapping was never accessed or the + * allocation of the new page table page fails. + */ + if ((opte1 & PTE1_A) == 0 || (m = vm_page_alloc(NULL, + pte1_index(va) & ~PT2PG_MASK, VM_ALLOC_NOOBJ | + VM_ALLOC_NORMAL | VM_ALLOC_WIRED)) == NULL) { + SLIST_INIT(&free); + pmap_remove_pte1(pmap, pte1p, pte1_trunc(va), &free); + pmap_tlb_flush(pmap, pte1_trunc(va)); + pmap_free_zero_pages(&free); + CTR3(KTR_PMAP, "%s: failure for va %#x in pmap %p", + __func__, va, pmap); + return (FALSE); + } + if (va < VM_MAXUSER_ADDRESS) + pmap->pm_stats.resident_count++; + + isnew = 1; + + /* + * We init all L2 page tables in the page even if + * we are going to change everything for one L2 page + * table in a while. + */ + pt2pg_pa = pmap_pt2pg_init(pmap, va, m); + } else { + if (va < VM_MAXUSER_ADDRESS) { + if (pt2_is_empty(m, va)) + isnew = 1; /* Demoting section w/o promotion. */ +#ifdef INVARIANTS + else + KASSERT(pt2_is_full(m, va), ("%s: bad PT2 wire" + " count %u", __func__, + pt2_wirecount_get(m, pte1_index(va)))); +#endif + } + } + + pt2pg_pa = VM_PAGE_TO_PHYS(m); + pte1_idx = pte1_index(va); + /* + * If the pmap is current, then the PT2MAP can provide access to + * the page table page (promoted L2 page tables are not unmapped). + * Otherwise, temporarily map the L2 page table page (m) into + * the kernel's address space at either PADDR1 or PADDR2. + * + * Note that L2 page table size is not equal to PAGE_SIZE. + */ + if (pmap_is_current(pmap)) + fpte2p = page_pt2(pt2map_pt2pg(va), pte1_idx); + else if (curthread->td_pinned > 0 && rw_wowned(&pvh_global_lock)) { + if (pte2_pa(pte2_load(PMAP1)) != pt2pg_pa) { + pte2_store(PMAP1, PTE2_KPT(pt2pg_pa)); +#ifdef SMP + PMAP1cpu = PCPU_GET(cpuid); +#endif + tlb_flush_local((vm_offset_t)PADDR1); + PMAP1changed++; + } else +#ifdef SMP + if (PMAP1cpu != PCPU_GET(cpuid)) { + PMAP1cpu = PCPU_GET(cpuid); + tlb_flush_local((vm_offset_t)PADDR1); + PMAP1changedcpu++; + } else +#endif + PMAP1unchanged++; + fpte2p = page_pt2((vm_offset_t)PADDR1, pte1_idx); + } else { + mtx_lock(&PMAP2mutex); + if (pte2_pa(pte2_load(PMAP2)) != pt2pg_pa) { + pte2_store(PMAP2, PTE2_KPT(pt2pg_pa)); + tlb_flush((vm_offset_t)PADDR2); + } + fpte2p = page_pt2((vm_offset_t)PADDR2, pte1_idx); + } + pt2_pa = page_pt2pa(pt2pg_pa, pte1_idx); + npte1 = PTE1_LINK(pt2_pa); + + KASSERT((opte1 & PTE1_A) != 0, + ("%s: opte1 is missing PTE1_A", __func__)); + KASSERT((opte1 & (PTE1_NM | PTE1_RO)) != PTE1_NM, + ("%s: opte1 has PTE1_NM", __func__)); + + /* + * Get pte2 from pte1 format. + */ + npte2 = pte1_pa(opte1) | ATTR_TO_L2(opte1) | PTE2_V; + + /* + * If the L2 page table page is new, initialize it. If the mapping + * has changed attributes, update the page table entries. + */ + if (isnew != 0) { + pt2_wirecount_set(m, pte1_idx, NPTE2_IN_PT2); + pmap_fill_pt2(fpte2p, npte2); + } else if ((pte2_load(fpte2p) & PTE2_PROMOTE) != + (npte2 & PTE2_PROMOTE)) + pmap_fill_pt2(fpte2p, npte2); + + KASSERT(pte2_pa(pte2_load(fpte2p)) == pte2_pa(npte2), + ("%s: fpte2p and npte2 map different physical addresses", + __func__)); + + if (fpte2p == PADDR2) + mtx_unlock(&PMAP2mutex); + + /* + * Demote the mapping. This pmap is locked. The old PTE1 has + * PTE1_A set. If the old PTE1 has not PTE1_RO set, it also + * has not PTE1_NM set. Thus, there is no danger of a race with + * another processor changing the setting of PTE1_A and/or PTE1_NM + * between the read above and the store below. + */ + if (pmap == kernel_pmap) + pmap_kenter_pte1(va, npte1); + else + pte1_store(pte1p, npte1); + + /* + * Flush old big mapping. The mapping should occupy one and only + * TLB entry. So, pmap_tlb_flush() called with aligned address + * should be sufficient. + */ + pmap_tlb_flush(pmap, pte1_trunc(va)); + + /* + * Demote the pv entry. This depends on the earlier demotion + * of the mapping. Specifically, the (re)creation of a per- + * page pv entry might trigger the execution of pmap_pv_reclaim(), + * which might reclaim a newly (re)created per-page pv entry + * and destroy the associated mapping. In order to destroy + * the mapping, the PTE1 must have already changed from mapping + * the 1mpage to referencing the page table page. + */ + if (pte1_is_managed(opte1)) + pmap_pv_demote_pte1(pmap, va, pte1_pa(opte1)); + + pmap_pte1_demotions++; + CTR3(KTR_PMAP, "%s: success for va %#x in pmap %p", + __func__, va, pmap); + + PDEBUG(6, printf("%s(%p): success for va %#x pte1 %#x(%#x) at %p\n", + __func__, pmap, va, npte1, pte1_load(pte1p), pte1p)); + return (TRUE); +} + +/* + * Insert the given physical page (p) at + * the specified virtual address (v) in the + * target physical map with the protection requested. + * + * If specified, the page will be wired down, meaning + * that the related pte can not be reclaimed. + * + * NB: This is the only routine which MAY NOT lazy-evaluate + * or lose information. That is, this routine must actually + * insert this page into the given map NOW. + */ +int +pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, + u_int flags, int8_t psind) +{ + pt1_entry_t *pte1p; + pt2_entry_t *pte2p; + pt2_entry_t npte2, opte2; + pv_entry_t pv; + vm_paddr_t opa, pa; + vm_page_t mpte2, om; + boolean_t wired; + + va = trunc_page(va); + mpte2 = NULL; + wired = (flags & PMAP_ENTER_WIRED) != 0; + + KASSERT(va <= vm_max_kernel_address, ("%s: toobig", __func__)); + KASSERT(va < UPT2V_MIN_ADDRESS || va >= UPT2V_MAX_ADDRESS, + ("%s: invalid to pmap_enter page table pages (va: 0x%x)", __func__, + va)); + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) + VM_OBJECT_ASSERT_LOCKED(m->object); + + rw_wlock(&pvh_global_lock); + PMAP_LOCK(pmap); + sched_pin(); + + /* + * In the case that a page table page is not + * resident, we are creating it here. + */ + if (va < VM_MAXUSER_ADDRESS) { + mpte2 = pmap_allocpte2(pmap, va, flags); + if (mpte2 == NULL) { + KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0, + ("pmap_allocpte2 failed with sleep allowed")); + sched_unpin(); + rw_wunlock(&pvh_global_lock); + PMAP_UNLOCK(pmap); + return (KERN_RESOURCE_SHORTAGE); + } + } + pte1p = pmap_pte1(pmap, va); + if (pte1_is_section(pte1_load(pte1p))) + panic("%s: attempted on 1MB page", __func__); + pte2p = pmap_pte2_quick(pmap, va); + if (pte2p == NULL) + panic("%s: invalid L1 page table entry va=%#x", __func__, va); + + om = NULL; + pa = VM_PAGE_TO_PHYS(m); + opte2 = pte2_load(pte2p); + opa = pte2_pa(opte2); + /* + * Mapping has not changed, must be protection or wiring change. + */ + if (pte2_is_valid(opte2) && (opa == pa)) { + /* + * Wiring change, just update stats. We don't worry about + * wiring PT2 pages as they remain resident as long as there + * are valid mappings in them. Hence, if a user page is wired, + * the PT2 page will be also. + */ + if (wired && !pte2_is_wired(opte2)) + pmap->pm_stats.wired_count++; + else if (!wired && pte2_is_wired(opte2)) + pmap->pm_stats.wired_count--; + + /* + * Remove extra pte2 reference + */ + if (mpte2) + pt2_wirecount_dec(mpte2, pte1_index(va)); + if (pte2_is_managed(opte2)) + om = m; + goto validate; + } + + /* + * QQQ: We think that changing physical address on writeable mapping + * is not safe. Well, maybe on kernel address space with correct + * locking, it can make a sense. However, we have no idea why + * anyone should do that on user address space. Are we wrong? + */ + KASSERT((opa == 0) || (opa == pa) || + !pte2_is_valid(opte2) || ((opte2 & PTE2_RO) != 0), + ("%s: pmap %p va %#x(%#x) opa %#x pa %#x - gotcha %#x %#x!", + __func__, pmap, va, opte2, opa, pa, flags, prot)); + + pv = NULL; + + /* + * Mapping has changed, invalidate old range and fall through to + * handle validating new mapping. + */ + if (opa) { + if (pte2_is_wired(opte2)) + pmap->pm_stats.wired_count--; + if (pte2_is_managed(opte2)) { + om = PHYS_TO_VM_PAGE(opa); + pv = pmap_pvh_remove(&om->md, pmap, va); + } + /* + * Remove extra pte2 reference + */ + if (mpte2 != NULL) + pt2_wirecount_dec(mpte2, va >> PTE1_SHIFT); + } else + pmap->pm_stats.resident_count++; + + /* + * Enter on the PV list if part of our managed memory. + */ + if ((m->oflags & VPO_UNMANAGED) == 0) { + KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, + ("%s: managed mapping within the clean submap", __func__)); + if (pv == NULL) + pv = get_pv_entry(pmap, FALSE); + pv->pv_va = va; + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + } else if (pv != NULL) + free_pv_entry(pmap, pv); + + /* + * Increment counters + */ + if (wired) + pmap->pm_stats.wired_count++; + +validate: + /* + * Now validate mapping with desired protection/wiring. + */ + npte2 = PTE2(pa, PTE2_NM, m->md.pat_mode); + if (prot & VM_PROT_WRITE) { + if (pte2_is_managed(npte2)) + vm_page_aflag_set(m, PGA_WRITEABLE); + } + else + npte2 |= PTE2_RO; + if ((prot & VM_PROT_EXECUTE) == 0) + npte2 |= PTE2_NX; + if (wired) + npte2 |= PTE2_W; + if (va < VM_MAXUSER_ADDRESS) + npte2 |= PTE2_U; + if (pmap != kernel_pmap) + npte2 |= PTE2_NG; + + /* + * If the mapping or permission bits are different, we need + * to update the pte2. + * + * QQQ: Think again and again what to do + * if the mapping is going to be changed! + */ + if ((opte2 & ~(PTE2_NM | PTE2_A)) != (npte2 & ~(PTE2_NM | PTE2_A))) { + /* + * Sync icache if exec permission and attribute PTE2_ATTR_WB_WA + * is set. Do it now, before the mapping is stored and made + * valid for hardware table walk. If done later, there is a race + * for other threads of current process in lazy loading case. + * + * QQQ: (1) Does it exist any better way where + * or how to sync icache? + * (2) Now, we do it on a page basis. + */ + if ((prot & VM_PROT_EXECUTE) && + (m->md.pat_mode == PTE2_ATTR_WB_WA) && + ((opa != pa) || (opte2 & PTE2_NX))) + cache_icache_sync_fresh(va, pa, PAGE_SIZE); + + npte2 |= PTE2_A; + if (flags & VM_PROT_WRITE) + npte2 &= ~PTE2_NM; + if (opte2 & PTE2_V) { + /* Change mapping with break-before-make approach. */ + opte2 = pte2_load_clear(pte2p); + pmap_tlb_flush(pmap, va); + pte2_store(pte2p, npte2); + if (opte2 & PTE2_A) { + if (pte2_is_managed(opte2)) + vm_page_aflag_set(om, PGA_REFERENCED); + } + if (pte2_is_dirty(opte2)) { + if (pte2_is_managed(opte2)) + vm_page_dirty(om); + } + if (pte2_is_managed(opte2) && + TAILQ_EMPTY(&om->md.pv_list) && + ((om->flags & PG_FICTITIOUS) != 0 || + TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) + vm_page_aflag_clear(om, PGA_WRITEABLE); + } else + pte2_store(pte2p, npte2); + } +#if 0 + else { + /* + * QQQ: In time when both access and not mofified bits are + * emulated by software, this should not happen. Some + * analysis is need, if this really happen. Missing + * tlb flush somewhere could be the reason. + */ + panic("%s: pmap %p va %#x opte2 %x npte2 %x !!", __func__, pmap, + va, opte2, npte2); + } +#endif + /* + * If both the L2 page table page and the reservation are fully + * populated, then attempt promotion. + */ + if ((mpte2 == NULL || pt2_is_full(mpte2, va)) && + sp_enabled && (m->flags & PG_FICTITIOUS) == 0 && + vm_reserv_level_iffullpop(m) == 0) + pmap_promote_pte1(pmap, pte1p, va); + sched_unpin(); + rw_wunlock(&pvh_global_lock); + PMAP_UNLOCK(pmap); + return (KERN_SUCCESS); +} + +/* + * Do the things to unmap a page in a process. + */ +static int +pmap_remove_pte2(pmap_t pmap, pt2_entry_t *pte2p, vm_offset_t va, + struct spglist *free) +{ + pt2_entry_t opte2; + vm_page_t m; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + opte2 = pte2_load_clear(pte2p); + KASSERT(pte2_is_valid(opte2), ("%s: pmap %p va %#x not link pte2 %#x", + __func__, pmap, va, opte2)); + if (opte2 & PTE2_W) + pmap->pm_stats.wired_count -= 1; + /* + * If the mapping was global, invalidate it even if given pmap + * is not active (kernel_pmap is active always). + */ + if (pte2_is_global(opte2)) + tlb_flush(va); + pmap->pm_stats.resident_count -= 1; + if (pte2_is_managed(opte2)) { + m = PHYS_TO_VM_PAGE(pte2_pa(opte2)); + if (pte2_is_dirty(opte2)) + vm_page_dirty(m); + if (opte2 & PTE2_A) + vm_page_aflag_set(m, PGA_REFERENCED); + pmap_remove_entry(pmap, m, va); + } + return (pmap_unuse_pt2(pmap, va, free)); +} + +/* + * Remove a single page from a process address space. + */ +static void +pmap_remove_page(pmap_t pmap, vm_offset_t va, struct spglist *free) +{ + pt2_entry_t *pte2p; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + KASSERT(curthread->td_pinned > 0, + ("%s: curthread not pinned", __func__)); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + if ((pte2p = pmap_pte2_quick(pmap, va)) == NULL || + !pte2_is_valid(pte2_load(pte2p))) + return; + pmap_remove_pte2(pmap, pte2p, va, free); + pmap_tlb_flush(pmap, va); +} + +/* + * Remove the given range of addresses from the specified map. + * + * It is assumed that the start and end are properly + * rounded to the page size. + */ +void +pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + vm_offset_t nextva; + pt1_entry_t *pte1p, pte1; + pt2_entry_t *pte2p, pte2; + struct spglist free; + int anyvalid; + + /* + * Perform an unsynchronized read. This is, however, safe. + */ + if (pmap->pm_stats.resident_count == 0) + return; + + anyvalid = 0; + SLIST_INIT(&free); + + rw_wlock(&pvh_global_lock); + sched_pin(); + PMAP_LOCK(pmap); + + /* + * Special handling of removing one page. A very common + * operation and easy to short circuit some code. + */ + if (sva + PAGE_SIZE == eva) { + pte1 = pte1_load(pmap_pte1(pmap, sva)); + if (pte1_is_link(pte1)) { + pmap_remove_page(pmap, sva, &free); + goto out; + } + } + + for (; sva < eva; sva = nextva) { + /* + * Calculate address for next L2 page table. + */ + nextva = pte1_trunc(sva + PTE1_SIZE); + if (nextva < sva) + nextva = eva; + if (pmap->pm_stats.resident_count == 0) + break; + + pte1p = pmap_pte1(pmap, sva); + pte1 = pte1_load(pte1p); + + /* + * Weed out invalid mappings. Note: we assume that the L1 page + * table is always allocated, and in kernel virtual. + */ + if (pte1 == 0) + continue; + + if (pte1_is_section(pte1)) { + /* + * Are we removing the entire large page? If not, + * demote the mapping and fall through. + */ + if (sva + PTE1_SIZE == nextva && eva >= nextva) { + /* + * The TLB entry for global mapping is + * invalidated by pmap_remove_pte1(). + */ + if (!pte1_is_global(pte1)) + anyvalid = 1; + pmap_remove_pte1(pmap, pte1p, sva, &free); + continue; + } else if (!pmap_demote_pte1(pmap, pte1p, sva)) { + /* The large page mapping was destroyed. */ + continue; + } +#ifdef INVARIANTS + else { + /* Update pte1 after demotion. */ + pte1 = pte1_load(pte1p); + } +#endif + } + + KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p" + " is not link", __func__, pmap, sva, pte1, pte1p)); + + /* + * Limit our scan to either the end of the va represented + * by the current L2 page table page, or to the end of the + * range being removed. + */ + if (nextva > eva) + nextva = eva; + + for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; + pte2p++, sva += PAGE_SIZE) { + pte2 = pte2_load(pte2p); + if (!pte2_is_valid(pte2)) + continue; + + /* + * The TLB entry for global mapping is invalidated + * by pmap_remove_pte2(). + */ + if (!pte2_is_global(pte2)) + anyvalid = 1; + if (pmap_remove_pte2(pmap, pte2p, sva, &free)) + break; + } + } +out: + sched_unpin(); + if (anyvalid) + pmap_tlb_flush_ng(pmap); + rw_wunlock(&pvh_global_lock); + PMAP_UNLOCK(pmap); + pmap_free_zero_pages(&free); +} + +/* + * Routine: pmap_remove_all + * Function: + * Removes this physical page from + * all physical maps in which it resides. + * Reflects back modify bits to the pager. + * + * Notes: + * Original versions of this routine were very + * inefficient because they iteratively called + * pmap_remove (slow...) + */ + +void +pmap_remove_all(vm_page_t m) +{ + struct md_page *pvh; + pv_entry_t pv; + pmap_t pmap; + pt2_entry_t *pte2p, opte2; + pt1_entry_t *pte1p; + vm_offset_t va; + struct spglist free; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("%s: page %p is not managed", __func__, m)); + SLIST_INIT(&free); + rw_wlock(&pvh_global_lock); + sched_pin(); + if ((m->flags & PG_FICTITIOUS) != 0) + goto small_mappings; + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { + va = pv->pv_va; + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte1p = pmap_pte1(pmap, va); + (void)pmap_demote_pte1(pmap, pte1p, va); + PMAP_UNLOCK(pmap); + } +small_mappings: + while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pmap->pm_stats.resident_count--; + pte1p = pmap_pte1(pmap, pv->pv_va); + KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found " + "a 1mpage in page %p's pv list", __func__, m)); + pte2p = pmap_pte2_quick(pmap, pv->pv_va); + opte2 = pte2_load_clear(pte2p); + KASSERT(pte2_is_valid(opte2), ("%s: pmap %p va %x zero pte2", + __func__, pmap, pv->pv_va)); + if (pte2_is_wired(opte2)) + pmap->pm_stats.wired_count--; + if (opte2 & PTE2_A) + vm_page_aflag_set(m, PGA_REFERENCED); + + /* + * Update the vm_page_t clean and reference bits. + */ + if (pte2_is_dirty(opte2)) + vm_page_dirty(m); + pmap_unuse_pt2(pmap, pv->pv_va, &free); + pmap_tlb_flush(pmap, pv->pv_va); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + free_pv_entry(pmap, pv); + PMAP_UNLOCK(pmap); + } + vm_page_aflag_clear(m, PGA_WRITEABLE); + sched_unpin(); + rw_wunlock(&pvh_global_lock); + pmap_free_zero_pages(&free); +} + +/* + * Just subroutine for pmap_remove_pages() to reasonably satisfy + * good coding style, a.k.a. 80 character line width limit hell. + */ +static __inline void +pmap_remove_pte1_quick(pmap_t pmap, pt1_entry_t pte1, pv_entry_t pv, + struct spglist *free) +{ + vm_paddr_t pa; + vm_page_t m, mt, mpt2pg; + struct md_page *pvh; + + pa = pte1_pa(pte1); + m = PHYS_TO_VM_PAGE(pa); + + KASSERT(m->phys_addr == pa, ("%s: vm_page_t %p addr mismatch %#x %#x", + __func__, m, m->phys_addr, pa)); + KASSERT((m->flags & PG_FICTITIOUS) != 0 || + m < &vm_page_array[vm_page_array_size], + ("%s: bad pte1 %#x", __func__, pte1)); + + if (pte1_is_dirty(pte1)) { + for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++) + vm_page_dirty(mt); + } + + pmap->pm_stats.resident_count -= PTE1_SIZE / PAGE_SIZE; + pvh = pa_to_pvh(pa); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); + if (TAILQ_EMPTY(&pvh->pv_list)) { + for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++) + if (TAILQ_EMPTY(&mt->md.pv_list)) + vm_page_aflag_clear(mt, PGA_WRITEABLE); + } + mpt2pg = pmap_pt2_page(pmap, pv->pv_va); + if (mpt2pg != NULL) + pmap_unwire_pt2_all(pmap, pv->pv_va, mpt2pg, free); +} + +/* + * Just subroutine for pmap_remove_pages() to reasonably satisfy + * good coding style, a.k.a. 80 character line width limit hell. + */ +static __inline void +pmap_remove_pte2_quick(pmap_t pmap, pt2_entry_t pte2, pv_entry_t pv, + struct spglist *free) +{ + vm_paddr_t pa; + vm_page_t m; + struct md_page *pvh; + + pa = pte2_pa(pte2); + m = PHYS_TO_VM_PAGE(pa); + + KASSERT(m->phys_addr == pa, ("%s: vm_page_t %p addr mismatch %#x %#x", + __func__, m, m->phys_addr, pa)); + KASSERT((m->flags & PG_FICTITIOUS) != 0 || + m < &vm_page_array[vm_page_array_size], + ("%s: bad pte2 %#x", __func__, pte2)); + + if (pte2_is_dirty(pte2)) + vm_page_dirty(m); + + pmap->pm_stats.resident_count--; + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(pa); + if (TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } + pmap_unuse_pt2(pmap, pv->pv_va, free); +} + +/* + * Remove all pages from specified address space this aids process + * exit speeds. Also, this code is special cased for current process + * only, but can have the more generic (and slightly slower) mode enabled. + * This is much faster than pmap_remove in the case of running down + * an entire address space. + */ +void +pmap_remove_pages(pmap_t pmap) +{ + pt1_entry_t *pte1p, pte1; + pt2_entry_t *pte2p, pte2; + pv_entry_t pv; + struct pv_chunk *pc, *npc; + struct spglist free; + int field, idx; + int32_t bit; + uint32_t inuse, bitmask; + boolean_t allfree; + + if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { + printf("warning: %s called with non-current pmap\n", __func__); + return; + } + SLIST_INIT(&free); + rw_wlock(&pvh_global_lock); + PMAP_LOCK(pmap); + sched_pin(); + TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { + KASSERT(pc->pc_pmap == pmap, ("%s: wrong pmap %p %p", + __func__, pmap, pc->pc_pmap)); + allfree = TRUE; + for (field = 0; field < _NPCM; field++) { + inuse = (~(pc->pc_map[field])) & pc_freemask[field]; + while (inuse != 0) { + bit = ffs(inuse) - 1; + bitmask = 1UL << bit; + idx = field * 32 + bit; + pv = &pc->pc_pventry[idx]; + inuse &= ~bitmask; + + /* + * Note that we cannot remove wired pages + * from a process' mapping at this time + */ + pte1p = pmap_pte1(pmap, pv->pv_va); + pte1 = pte1_load(pte1p); + if (pte1_is_section(pte1)) { + if (pte1_is_wired(pte1)) { + allfree = FALSE; + continue; + } + pte1_clear(pte1p); + pmap_remove_pte1_quick(pmap, pte1, pv, + &free); + } + else if (pte1_is_link(pte1)) { + pte2p = pt2map_entry(pv->pv_va); + pte2 = pte2_load(pte2p); + + if (!pte2_is_valid(pte2)) { + printf("%s: pmap %p va %#x " + "pte2 %#x\n", __func__, + pmap, pv->pv_va, pte2); + panic("bad pte2"); + } + + if (pte2_is_wired(pte2)) { + allfree = FALSE; + continue; + } + pte2_clear(pte2p); + pmap_remove_pte2_quick(pmap, pte2, pv, + &free); + } else { + printf("%s: pmap %p va %#x pte1 %#x\n", + __func__, pmap, pv->pv_va, pte1); + panic("bad pte1"); + } + + /* Mark free */ + PV_STAT(pv_entry_frees++); + PV_STAT(pv_entry_spare++); + pv_entry_count--; + pc->pc_map[field] |= bitmask; + } + } + if (allfree) { + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + free_pv_chunk(pc); + } + } + sched_unpin(); + pmap_tlb_flush_ng(pmap); + rw_wunlock(&pvh_global_lock); + PMAP_UNLOCK(pmap); + pmap_free_zero_pages(&free); +} + +/* + * This code makes some *MAJOR* assumptions: + * 1. Current pmap & pmap exists. + * 2. Not wired. + * 3. Read access. + * 4. No L2 page table pages. + * but is *MUCH* faster than pmap_enter... + */ +static vm_page_t +pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot, vm_page_t mpt2pg) +{ + pt2_entry_t *pte2p, pte2; + vm_paddr_t pa; + struct spglist free; + uint32_t l2prot; + + KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || + (m->oflags & VPO_UNMANAGED) != 0, + ("%s: managed mapping within the clean submap", __func__)); + rw_assert(&pvh_global_lock, RA_WLOCKED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + /* + * In the case that a L2 page table page is not + * resident, we are creating it here. + */ + if (va < VM_MAXUSER_ADDRESS) { + u_int pte1_idx; + pt1_entry_t pte1, *pte1p; + vm_paddr_t pt2_pa; + + /* + * Get L1 page table things. + */ + pte1_idx = pte1_index(va); + pte1p = pmap_pte1(pmap, va); + pte1 = pte1_load(pte1p); + + if (mpt2pg && (mpt2pg->pindex == (pte1_idx & ~PT2PG_MASK))) { + /* + * Each of NPT2_IN_PG L2 page tables on the page can + * come here. Make sure that associated L1 page table + * link is established. + * + * QQQ: It comes that we don't establish all links to + * L2 page tables for newly allocated L2 page + * tables page. + */ + KASSERT(!pte1_is_section(pte1), + ("%s: pte1 %#x is section", __func__, pte1)); + if (!pte1_is_link(pte1)) { + pt2_pa = page_pt2pa(VM_PAGE_TO_PHYS(mpt2pg), + pte1_idx); + pte1_store(pte1p, PTE1_LINK(pt2_pa)); + } + pt2_wirecount_inc(mpt2pg, pte1_idx); + } else { + /* + * If the L2 page table page is mapped, we just + * increment the hold count, and activate it. + */ + if (pte1_is_section(pte1)) { + return (NULL); + } else if (pte1_is_link(pte1)) { + mpt2pg = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); + pt2_wirecount_inc(mpt2pg, pte1_idx); + } else { + mpt2pg = _pmap_allocpte2(pmap, va, + PMAP_ENTER_NOSLEEP); + if (mpt2pg == NULL) + return (NULL); + } + } + } else { + mpt2pg = NULL; + } + + /* + * This call to pt2map_entry() makes the assumption that we are + * entering the page into the current pmap. In order to support + * quick entry into any pmap, one would likely use pmap_pte2_quick(). + * But that isn't as quick as pt2map_entry(). + */ + pte2p = pt2map_entry(va); + pte2 = pte2_load(pte2p); + if (pte2_is_valid(pte2)) { + if (mpt2pg != NULL) { + /* + * Remove extra pte2 reference + */ + pt2_wirecount_dec(mpt2pg, pte1_index(va)); + mpt2pg = NULL; + } + return (NULL); + } + + /* + * Enter on the PV list if part of our managed memory. + */ + if ((m->oflags & VPO_UNMANAGED) == 0 && + !pmap_try_insert_pv_entry(pmap, va, m)) { + if (mpt2pg != NULL) { + SLIST_INIT(&free); + if (pmap_unwire_pt2(pmap, va, mpt2pg, &free)) { + pmap_tlb_flush(pmap, va); + pmap_free_zero_pages(&free); + } + + mpt2pg = NULL; + } + return (NULL); + } + + /* + * Increment counters + */ + pmap->pm_stats.resident_count++; + + /* + * Now validate mapping with RO protection + */ + pa = VM_PAGE_TO_PHYS(m); + l2prot = PTE2_RO | PTE2_NM; + if (va < VM_MAXUSER_ADDRESS) + l2prot |= PTE2_U | PTE2_NG; + if ((prot & VM_PROT_EXECUTE) == 0) + l2prot |= PTE2_NX; + else if (m->md.pat_mode == PTE2_ATTR_WB_WA) { + /* + * Sync icache if exec permission and attribute PTE2_ATTR_WB_WA + * is set. QQQ: For more info, see comments in pmap_enter(). + */ + cache_icache_sync_fresh(va, pa, PAGE_SIZE); + } + pte2_store(pte2p, PTE2(pa, l2prot, m->md.pat_mode)); + + return (mpt2pg); +} + +void +pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) +{ + + rw_wlock(&pvh_global_lock); + PMAP_LOCK(pmap); + (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); + rw_wunlock(&pvh_global_lock); + PMAP_UNLOCK(pmap); +} + +/* + * Tries to create 1MB page mapping. Returns TRUE if successful and + * FALSE otherwise. Fails if (1) a page table page cannot be allocated without + * blocking, (2) a mapping already exists at the specified virtual address, or + * (3) a pv entry cannot be allocated without reclaiming another pv entry. + */ +static boolean_t +pmap_enter_pte1(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) +{ + pt1_entry_t *pte1p; + vm_paddr_t pa; + uint32_t l1prot; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + pte1p = pmap_pte1(pmap, va); + if (pte1_is_valid(pte1_load(pte1p))) { + CTR3(KTR_PMAP, "%s: failure for va %#lx in pmap %p", __func__, + va, pmap); + return (FALSE); + } + if ((m->oflags & VPO_UNMANAGED) == 0) { + /* + * Abort this mapping if its PV entry could not be created. + */ + if (!pmap_pv_insert_pte1(pmap, va, VM_PAGE_TO_PHYS(m))) { + CTR3(KTR_PMAP, "%s: failure for va %#lx in pmap %p", + __func__, va, pmap); + return (FALSE); + } + } + /* + * Increment counters. + */ + pmap->pm_stats.resident_count += PTE1_SIZE / PAGE_SIZE; + + /* + * Map the section. + * + * QQQ: Why VM_PROT_WRITE is not evaluated and the mapping is + * made readonly? + */ + pa = VM_PAGE_TO_PHYS(m); + l1prot = PTE1_RO | PTE1_NM; + if (va < VM_MAXUSER_ADDRESS) + l1prot |= PTE1_U | PTE1_NG; + if ((prot & VM_PROT_EXECUTE) == 0) + l1prot |= PTE1_NX; + else if (m->md.pat_mode == PTE2_ATTR_WB_WA) { + /* + * Sync icache if exec permission and attribute PTE2_ATTR_WB_WA + * is set. QQQ: For more info, see comments in pmap_enter(). + */ + cache_icache_sync_fresh(va, pa, PTE1_SIZE); + } + pte1_store(pte1p, PTE1(pa, l1prot, ATTR_TO_L1(m->md.pat_mode))); + + pmap_pte1_mappings++; + CTR3(KTR_PMAP, "%s: success for va %#lx in pmap %p", __func__, va, + pmap); + return (TRUE); +} + +/* + * Maps a sequence of resident pages belonging to the same object. + * The sequence begins with the given page m_start. This page is + * mapped at the given virtual address start. Each subsequent page is + * mapped at a virtual address that is offset from start by the same + * amount as the page is offset from m_start within the object. The + * last page in the sequence is the page with the largest offset from + * m_start that can be mapped at a virtual address less than the given + * virtual address end. Not every virtual page between start and end + * is mapped; only those for which a resident page exists with the + * corresponding offset from m_start are mapped. + */ +void +pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, + vm_page_t m_start, vm_prot_t prot) +{ + vm_offset_t va; + vm_page_t m, mpt2pg; + vm_pindex_t diff, psize; + + PDEBUG(6, printf("%s: pmap %p start %#x end %#x m %p prot %#x\n", + __func__, pmap, start, end, m_start, prot)); + + VM_OBJECT_ASSERT_LOCKED(m_start->object); + psize = atop(end - start); + mpt2pg = NULL; + m = m_start; + rw_wlock(&pvh_global_lock); + PMAP_LOCK(pmap); + while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { + va = start + ptoa(diff); + if ((va & PTE1_OFFSET) == 0 && va + PTE1_SIZE <= end && + m->psind == 1 && sp_enabled && + pmap_enter_pte1(pmap, va, m, prot)) + m = &m[PTE1_SIZE / PAGE_SIZE - 1]; + else + mpt2pg = pmap_enter_quick_locked(pmap, va, m, prot, + mpt2pg); + m = TAILQ_NEXT(m, listq); + } + rw_wunlock(&pvh_global_lock); + PMAP_UNLOCK(pmap); +} + +/* + * This code maps large physical mmap regions into the + * processor address space. Note that some shortcuts + * are taken, but the code works. + */ +void +pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, + vm_pindex_t pindex, vm_size_t size) +{ + pt1_entry_t *pte1p; + vm_paddr_t pa, pte2_pa; + vm_page_t p; + int pat_mode; + u_int l1attr, l1prot; + + VM_OBJECT_ASSERT_WLOCKED(object); + KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, + ("%s: non-device object", __func__)); + if ((addr & PTE1_OFFSET) == 0 && (size & PTE1_OFFSET) == 0) { + if (!vm_object_populate(object, pindex, pindex + atop(size))) + return; + p = vm_page_lookup(object, pindex); + KASSERT(p->valid == VM_PAGE_BITS_ALL, + ("%s: invalid page %p", __func__, p)); + pat_mode = p->md.pat_mode; + + /* + * Abort the mapping if the first page is not physically + * aligned to a 1MB page boundary. + */ + pte2_pa = VM_PAGE_TO_PHYS(p); + if (pte2_pa & PTE1_OFFSET) + return; + + /* + * Skip the first page. Abort the mapping if the rest of + * the pages are not physically contiguous or have differing + * memory attributes. + */ + p = TAILQ_NEXT(p, listq); + for (pa = pte2_pa + PAGE_SIZE; pa < pte2_pa + size; + pa += PAGE_SIZE) { + KASSERT(p->valid == VM_PAGE_BITS_ALL, + ("%s: invalid page %p", __func__, p)); + if (pa != VM_PAGE_TO_PHYS(p) || + pat_mode != p->md.pat_mode) + return; + p = TAILQ_NEXT(p, listq); + } + + /* + * Map using 1MB pages. + * + * QQQ: Well, we are mapping a section, so same condition must + * be hold like during promotion. It looks that only RW mapping + * is done here, so readonly mapping must be done elsewhere. + */ + l1prot = PTE1_U | PTE1_NG | PTE1_RW | PTE1_M | PTE1_A; + l1attr = ATTR_TO_L1(pat_mode); + PMAP_LOCK(pmap); + for (pa = pte2_pa; pa < pte2_pa + size; pa += PTE1_SIZE) { + pte1p = pmap_pte1(pmap, addr); + if (!pte1_is_valid(pte1_load(pte1p))) { + pte1_store(pte1p, PTE1(pa, l1prot, l1attr)); + pmap->pm_stats.resident_count += PTE1_SIZE / + PAGE_SIZE; + pmap_pte1_mappings++; + } + /* Else continue on if the PTE1 is already valid. */ + addr += PTE1_SIZE; + } + PMAP_UNLOCK(pmap); + } +} + +/* + * Do the things to protect a 1mpage in a process. + */ +static boolean_t +pmap_protect_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t sva, + vm_prot_t prot) +{ + pt1_entry_t npte1, opte1; + vm_offset_t eva, va; + vm_page_t m; + boolean_t anychanged; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT((sva & PTE1_OFFSET) == 0, + ("%s: sva is not 1mpage aligned", __func__)); + anychanged = FALSE; +retry: + opte1 = npte1 = pte1_load(pte1p); + if (pte1_is_managed(opte1)) { + eva = sva + PTE1_SIZE; + for (va = sva, m = PHYS_TO_VM_PAGE(pte1_pa(opte1)); + va < eva; va += PAGE_SIZE, m++) + if (pte1_is_dirty(opte1)) + vm_page_dirty(m); + } + if ((prot & VM_PROT_WRITE) == 0) + npte1 |= PTE1_RO | PTE1_NM; + if ((prot & VM_PROT_EXECUTE) == 0) + npte1 |= PTE1_NX; + + /* + * QQQ: Herein, execute permission is never set. + * It only can be cleared. So, no icache + * syncing is needed. + */ + + if (npte1 != opte1) { + if (!pte1_cmpset(pte1p, opte1, npte1)) + goto retry; + if (pte1_is_global(opte1)) + tlb_flush(sva); + else + anychanged = TRUE; + } + return (anychanged); +} + +/* + * Set the physical protection on the + * specified range of this map as requested. + */ +void +pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) +{ + boolean_t anychanged, pv_lists_locked; + vm_offset_t nextva; + pt1_entry_t *pte1p, pte1; + pt2_entry_t *pte2p, opte2, npte2; + + KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); + if (prot == VM_PROT_NONE) { + pmap_remove(pmap, sva, eva); + return; + } + + if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == + (VM_PROT_WRITE | VM_PROT_EXECUTE)) + return; + + if (pmap_is_current(pmap)) + pv_lists_locked = FALSE; + else { + pv_lists_locked = TRUE; +resume: + rw_wlock(&pvh_global_lock); + sched_pin(); + } + anychanged = FALSE; + + PMAP_LOCK(pmap); + for (; sva < eva; sva = nextva) { + /* + * Calculate address for next L2 page table. + */ + nextva = pte1_trunc(sva + PTE1_SIZE); + if (nextva < sva) + nextva = eva; + + pte1p = pmap_pte1(pmap, sva); + pte1 = pte1_load(pte1p); + + /* + * Weed out invalid mappings. Note: we assume that L1 page + * page table is always allocated, and in kernel virtual. + */ + if (pte1 == 0) + continue; + + if (pte1_is_section(pte1)) { + /* + * Are we protecting the entire large page? If not, + * demote the mapping and fall through. + */ + if (sva + PTE1_SIZE == nextva && eva >= nextva) { + /* + * The TLB entry for global mapping is + * invalidated by pmap_protect_pte1(). + */ + if (pmap_protect_pte1(pmap, pte1p, sva, prot)) + anychanged = TRUE; + continue; + } else { + if (!pv_lists_locked) { + pv_lists_locked = TRUE; + if (!rw_try_wlock(&pvh_global_lock)) { + if (anychanged) + pmap_tlb_flush_ng(pmap); + PMAP_UNLOCK(pmap); + goto resume; + } + sched_pin(); + } + if (!pmap_demote_pte1(pmap, pte1p, sva)) { + /* + * The large page mapping + * was destroyed. + */ + continue; + } +#ifdef INVARIANTS + else { + /* Update pte1 after demotion */ + pte1 = pte1_load(pte1p); + } +#endif + } + } + + KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p" + " is not link", __func__, pmap, sva, pte1, pte1p)); + + /* + * Limit our scan to either the end of the va represented + * by the current L2 page table page, or to the end of the + * range being protected. + */ + if (nextva > eva) + nextva = eva; + + for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++, + sva += PAGE_SIZE) { + vm_page_t m; +retry: + opte2 = npte2 = pte2_load(pte2p); + if (!pte2_is_valid(opte2)) + continue; + + if ((prot & VM_PROT_WRITE) == 0) { + if (pte2_is_managed(opte2) && + pte2_is_dirty(opte2)) { + m = PHYS_TO_VM_PAGE(pte2_pa(opte2)); + vm_page_dirty(m); + } + npte2 |= PTE2_RO | PTE2_NM; + } + + if ((prot & VM_PROT_EXECUTE) == 0) + npte2 |= PTE2_NX; + + /* + * QQQ: Herein, execute permission is never set. + * It only can be cleared. So, no icache + * syncing is needed. + */ + + if (npte2 != opte2) { + + if (!pte2_cmpset(pte2p, opte2, npte2)) + goto retry; + + if (pte2_is_global(opte2)) + tlb_flush(sva); + else + anychanged = TRUE; + } + } + } + if (anychanged) + pmap_tlb_flush_ng(pmap); + if (pv_lists_locked) { + sched_unpin(); + rw_wunlock(&pvh_global_lock); + } + PMAP_UNLOCK(pmap); +} + +/* + * pmap_pvh_wired_mappings: + * + * Return the updated number "count" of managed mappings that are wired. + */ +static int +pmap_pvh_wired_mappings(struct md_page *pvh, int count) +{ + pmap_t pmap; + pt1_entry_t pte1; + pt2_entry_t pte2; + pv_entry_t pv; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + sched_pin(); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va)); + if (pte1_is_section(pte1)) { + if (pte1_is_wired(pte1)) + count++; + } else { + KASSERT(pte1_is_link(pte1), + ("%s: pte1 %#x is not link", __func__, pte1)); + pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va)); + if (pte2_is_wired(pte2)) + count++; + } + PMAP_UNLOCK(pmap); + } + sched_unpin(); + return (count); +} + +/* + * pmap_page_wired_mappings: + * + * Return the number of managed mappings to the given physical page + * that are wired. + */ +int +pmap_page_wired_mappings(vm_page_t m) +{ + int count; + + count = 0; + if ((m->oflags & VPO_UNMANAGED) != 0) + return (count); + rw_wlock(&pvh_global_lock); + count = pmap_pvh_wired_mappings(&m->md, count); + if ((m->flags & PG_FICTITIOUS) == 0) { + count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), + count); + } + rw_wunlock(&pvh_global_lock); + return (count); +} + +/* + * Returns TRUE if any of the given mappings were used to modify + * physical memory. Otherwise, returns FALSE. Both page and 1mpage + * mappings are supported. + */ +static boolean_t +pmap_is_modified_pvh(struct md_page *pvh) +{ + pv_entry_t pv; + pt1_entry_t pte1; + pt2_entry_t pte2; + pmap_t pmap; + boolean_t rv; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + rv = FALSE; + sched_pin(); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va)); + if (pte1_is_section(pte1)) { + rv = pte1_is_dirty(pte1); + } else { + KASSERT(pte1_is_link(pte1), + ("%s: pte1 %#x is not link", __func__, pte1)); + pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va)); + rv = pte2_is_dirty(pte2); + } + PMAP_UNLOCK(pmap); + if (rv) + break; + } + sched_unpin(); + return (rv); +} + +/* + * pmap_is_modified: + * + * Return whether or not the specified physical page was modified + * in any physical maps. + */ +boolean_t +pmap_is_modified(vm_page_t m) +{ + boolean_t rv; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("%s: page %p is not managed", __func__, m)); + + /* + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be + * concurrently set while the object is locked. Thus, if PGA_WRITEABLE + * is clear, no PTE2s can have PG_M set. + */ + VM_OBJECT_ASSERT_WLOCKED(m->object); + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + return (FALSE); + rw_wlock(&pvh_global_lock); + rv = pmap_is_modified_pvh(&m->md) || + ((m->flags & PG_FICTITIOUS) == 0 && + pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); + rw_wunlock(&pvh_global_lock); + return (rv); +} + +/* + * pmap_is_prefaultable: + * + * Return whether or not the specified virtual address is eligible + * for prefault. + */ +boolean_t +pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) +{ + pt1_entry_t pte1; + pt2_entry_t pte2; + boolean_t rv; + + rv = FALSE; + PMAP_LOCK(pmap); + pte1 = pte1_load(pmap_pte1(pmap, addr)); + if (pte1_is_link(pte1)) { + pte2 = pte2_load(pt2map_entry(addr)); + rv = !pte2_is_valid(pte2) ; + } + PMAP_UNLOCK(pmap); + return (rv); +} + +/* + * Returns TRUE if any of the given mappings were referenced and FALSE + * otherwise. Both page and 1mpage mappings are supported. + */ +static boolean_t +pmap_is_referenced_pvh(struct md_page *pvh) +{ + + pv_entry_t pv; + pt1_entry_t pte1; + pt2_entry_t pte2; + pmap_t pmap; + boolean_t rv; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + rv = FALSE; + sched_pin(); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va)); + if (pte1_is_section(pte1)) { + rv = (pte1 & (PTE1_A | PTE1_V)) == (PTE1_A | PTE1_V); + } else { + pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va)); + rv = (pte2 & (PTE2_A | PTE2_V)) == (PTE2_A | PTE2_V); + } + PMAP_UNLOCK(pmap); + if (rv) + break; + } + sched_unpin(); + return (rv); +} + +/* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + boolean_t rv; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("%s: page %p is not managed", __func__, m)); + rw_wlock(&pvh_global_lock); + rv = pmap_is_referenced_pvh(&m->md) || + ((m->flags & PG_FICTITIOUS) == 0 && + pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); + rw_wunlock(&pvh_global_lock); + return (rv); +} + +#define PMAP_TS_REFERENCED_MAX 5 + +/* + * pmap_ts_referenced: + * + * Return a count of reference bits for a page, clearing those bits. + * It is not necessary for every reference bit to be cleared, but it + * is necessary that 0 only be returned when there are truly no + * reference bits set. + * + * XXX: The exact number of bits to check and clear is a matter that + * should be tested and standardized at some point in the future for + * optimal aging of shared pages. + */ +int +pmap_ts_referenced(vm_page_t m) +{ + struct md_page *pvh; + pv_entry_t pv, pvf; + pmap_t pmap; + pt1_entry_t *pte1p, opte1; + pt2_entry_t *pte2p; + vm_paddr_t pa; + int rtval = 0; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("%s: page %p is not managed", __func__, m)); + pa = VM_PAGE_TO_PHYS(m); + pvh = pa_to_pvh(pa); + rw_wlock(&pvh_global_lock); + sched_pin(); + if ((m->flags & PG_FICTITIOUS) != 0 || + (pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) + goto small_mappings; + pv = pvf; + do { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte1p = pmap_pte1(pmap, pv->pv_va); + opte1 = pte1_load(pte1p); + if ((opte1 & PTE1_A) != 0) { + /* + * Since this reference bit is shared by 256 4KB pages, + * it should not be cleared every time it is tested. + * Apply a simple "hash" function on the physical page + * number, the virtual section number, and the pmap + * address to select one 4KB page out of the 256 + * on which testing the reference bit will result + * in clearing that bit. This function is designed + * to avoid the selection of the same 4KB page + * for every 1MB page mapping. + * + * On demotion, a mapping that hasn't been referenced + * is simply destroyed. To avoid the possibility of a + * subsequent page fault on a demoted wired mapping, + * always leave its reference bit set. Moreover, + * since the section is wired, the current state of + * its reference bit won't affect page replacement. + */ + if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PTE1_SHIFT) ^ + (uintptr_t)pmap) & (NPTE2_IN_PG - 1)) == 0 && + !pte1_is_wired(opte1)) { + pte1_clear_bit(pte1p, PTE1_A); + pmap_tlb_flush(pmap, pv->pv_va); + } + rtval++; + } + PMAP_UNLOCK(pmap); + /* Rotate the PV list if it has more than one entry. */ + if (TAILQ_NEXT(pv, pv_next) != NULL) { + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + } + if (rtval >= PMAP_TS_REFERENCED_MAX) + goto out; + } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); +small_mappings: + if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) + goto out; + pv = pvf; + do { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte1p = pmap_pte1(pmap, pv->pv_va); + KASSERT(pte1_is_link(pte1_load(pte1p)), + ("%s: not found a link in page %p's pv list", __func__, m)); + + pte2p = pmap_pte2_quick(pmap, pv->pv_va); + if ((pte2_load(pte2p) & PTE2_A) != 0) { + pte2_clear_bit(pte2p, PTE2_A); + pmap_tlb_flush(pmap, pv->pv_va); + rtval++; + } + PMAP_UNLOCK(pmap); + /* Rotate the PV list if it has more than one entry. */ + if (TAILQ_NEXT(pv, pv_next) != NULL) { + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + } + } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && rtval < + PMAP_TS_REFERENCED_MAX); +out: + sched_unpin(); + rw_wunlock(&pvh_global_lock); + return (rtval); +} + +/* + * Clear the wired attribute from the mappings for the specified range of + * addresses in the given pmap. Every valid mapping within that range + * must have the wired attribute set. In contrast, invalid mappings + * cannot have the wired attribute set, so they are ignored. + * + * The wired attribute of the page table entry is not a hardware feature, + * so there is no need to invalidate any TLB entries. + */ +void +pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + vm_offset_t nextva; + pt1_entry_t *pte1p, pte1; + pt2_entry_t *pte2p, pte2; + boolean_t pv_lists_locked; + + if (pmap_is_current(pmap)) + pv_lists_locked = FALSE; + else { + pv_lists_locked = TRUE; +resume: + rw_wlock(&pvh_global_lock); + sched_pin(); + } + PMAP_LOCK(pmap); + for (; sva < eva; sva = nextva) { + nextva = pte1_trunc(sva + PTE1_SIZE); + if (nextva < sva) + nextva = eva; + + pte1p = pmap_pte1(pmap, sva); + pte1 = pte1_load(pte1p); + + /* + * Weed out invalid mappings. Note: we assume that L1 page + * page table is always allocated, and in kernel virtual. + */ + if (pte1 == 0) + continue; + + if (pte1_is_section(pte1)) { + if (!pte1_is_wired(pte1)) + panic("%s: pte1 %#x not wired", __func__, pte1); + + /* + * Are we unwiring the entire large page? If not, + * demote the mapping and fall through. + */ + if (sva + PTE1_SIZE == nextva && eva >= nextva) { + pte1_clear_bit(pte1p, PTE1_W); + pmap->pm_stats.wired_count -= PTE1_SIZE / + PAGE_SIZE; + continue; + } else { + if (!pv_lists_locked) { + pv_lists_locked = TRUE; + if (!rw_try_wlock(&pvh_global_lock)) { + PMAP_UNLOCK(pmap); + /* Repeat sva. */ + goto resume; + } + sched_pin(); + } + if (!pmap_demote_pte1(pmap, pte1p, sva)) + panic("%s: demotion failed", __func__); +#ifdef INVARIANTS + else { + /* Update pte1 after demotion */ + pte1 = pte1_load(pte1p); + } +#endif + } + } + + KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p" + " is not link", __func__, pmap, sva, pte1, pte1p)); + + /* + * Limit our scan to either the end of the va represented + * by the current L2 page table page, or to the end of the + * range being protected. + */ + if (nextva > eva) + nextva = eva; + + for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++, + sva += PAGE_SIZE) { + pte2 = pte2_load(pte2p); + if (!pte2_is_valid(pte2)) + continue; + if (!pte2_is_wired(pte2)) + panic("%s: pte2 %#x is missing PTE2_W", + __func__, pte2); + + /* + * PTE2_W must be cleared atomically. Although the pmap + * lock synchronizes access to PTE2_W, another processor + * could be changing PTE2_NM and/or PTE2_A concurrently. + */ + pte2_clear_bit(pte2p, PTE2_W); + pmap->pm_stats.wired_count--; + } + } + if (pv_lists_locked) { + sched_unpin(); + rw_wunlock(&pvh_global_lock); + } + PMAP_UNLOCK(pmap); +} + +/* + * Clear the write and modified bits in each of the given page's mappings. + */ +void +pmap_remove_write(vm_page_t m) +{ + struct md_page *pvh; + pv_entry_t next_pv, pv; + pmap_t pmap; + pt1_entry_t *pte1p; + pt2_entry_t *pte2p, opte2; + vm_offset_t va; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("%s: page %p is not managed", __func__, m)); + + /* + * If the page is not exclusive busied, then PGA_WRITEABLE cannot be + * set by another thread while the object is locked. Thus, + * if PGA_WRITEABLE is clear, no page table entries need updating. + */ + VM_OBJECT_ASSERT_WLOCKED(m->object); + if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) + return; + rw_wlock(&pvh_global_lock); + sched_pin(); + if ((m->flags & PG_FICTITIOUS) != 0) + goto small_mappings; + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { + va = pv->pv_va; + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte1p = pmap_pte1(pmap, va); + if (!(pte1_load(pte1p) & PTE1_RO)) + (void)pmap_demote_pte1(pmap, pte1p, va); + PMAP_UNLOCK(pmap); + } +small_mappings: + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte1p = pmap_pte1(pmap, pv->pv_va); + KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found" + " a section in page %p's pv list", __func__, m)); + pte2p = pmap_pte2_quick(pmap, pv->pv_va); +retry: + opte2 = pte2_load(pte2p); + if (!(opte2 & PTE2_RO)) { + if (!pte2_cmpset(pte2p, opte2, + opte2 | (PTE2_RO | PTE2_NM))) + goto retry; + if (pte2_is_dirty(opte2)) + vm_page_dirty(m); + pmap_tlb_flush(pmap, pv->pv_va); + } + PMAP_UNLOCK(pmap); + } + vm_page_aflag_clear(m, PGA_WRITEABLE); + sched_unpin(); + rw_wunlock(&pvh_global_lock); +} + +/* + * Apply the given advice to the specified range of addresses within the + * given pmap. Depending on the advice, clear the referenced and/or + * modified flags in each mapping and set the mapped page's dirty field. + */ +void +pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) +{ + pt1_entry_t *pte1p, opte1; + pt2_entry_t *pte2p, pte2; + vm_offset_t pdnxt; + vm_page_t m; + boolean_t anychanged, pv_lists_locked; + + if (advice != MADV_DONTNEED && advice != MADV_FREE) + return; + if (pmap_is_current(pmap)) + pv_lists_locked = FALSE; + else { + pv_lists_locked = TRUE; +resume: + rw_wlock(&pvh_global_lock); + sched_pin(); + } + anychanged = FALSE; + PMAP_LOCK(pmap); + for (; sva < eva; sva = pdnxt) { + pdnxt = pte1_trunc(sva + PTE1_SIZE); + if (pdnxt < sva) + pdnxt = eva; + pte1p = pmap_pte1(pmap, sva); + opte1 = pte1_load(pte1p); + if (!pte1_is_valid(opte1)) /* XXX */ + continue; + else if (pte1_is_section(opte1)) { + if (!pte1_is_managed(opte1)) + continue; + if (!pv_lists_locked) { + pv_lists_locked = TRUE; + if (!rw_try_wlock(&pvh_global_lock)) { + if (anychanged) + pmap_tlb_flush_ng(pmap); + PMAP_UNLOCK(pmap); + goto resume; + } + sched_pin(); + } + if (!pmap_demote_pte1(pmap, pte1p, sva)) { + /* + * The large page mapping was destroyed. + */ + continue; + } + + /* + * Unless the page mappings are wired, remove the + * mapping to a single page so that a subsequent + * access may repromote. Since the underlying L2 page + * table is fully populated, this removal never + * frees a L2 page table page. + */ + if (!pte1_is_wired(opte1)) { + pte2p = pmap_pte2_quick(pmap, sva); + KASSERT(pte2_is_valid(pte2_load(pte2p)), + ("%s: invalid PTE2", __func__)); + pmap_remove_pte2(pmap, pte2p, sva, NULL); + anychanged = TRUE; + } + } + if (pdnxt > eva) + pdnxt = eva; + for (pte2p = pmap_pte2_quick(pmap, sva); sva != pdnxt; pte2p++, + sva += PAGE_SIZE) { + pte2 = pte2_load(pte2p); + if (!pte2_is_valid(pte2) || !pte2_is_managed(pte2)) + continue; + else if (pte2_is_dirty(pte2)) { + if (advice == MADV_DONTNEED) { + /* + * Future calls to pmap_is_modified() + * can be avoided by making the page + * dirty now. + */ + m = PHYS_TO_VM_PAGE(pte2_pa(pte2)); + vm_page_dirty(m); + } + pte2_set_bit(pte2p, PTE2_NM); + pte2_clear_bit(pte2p, PTE2_A); + } else if ((pte2 & PTE2_A) != 0) + pte2_clear_bit(pte2p, PTE2_A); + else + continue; + if (pte2_is_global(pte2)) + tlb_flush(sva); + else + anychanged = TRUE; + } + } + if (anychanged) + pmap_tlb_flush_ng(pmap); + if (pv_lists_locked) { + sched_unpin(); + rw_wunlock(&pvh_global_lock); + } + PMAP_UNLOCK(pmap); +} + +/* + * Clear the modify bits on the specified physical page. + */ +void +pmap_clear_modify(vm_page_t m) +{ + struct md_page *pvh; + pv_entry_t next_pv, pv; + pmap_t pmap; + pt1_entry_t *pte1p, opte1; + pt2_entry_t *pte2p, opte2; + vm_offset_t va; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("%s: page %p is not managed", __func__, m)); + VM_OBJECT_ASSERT_WLOCKED(m->object); + KASSERT(!vm_page_xbusied(m), + ("%s: page %p is exclusive busy", __func__, m)); + + /* + * If the page is not PGA_WRITEABLE, then no PTE2s can have PTE2_NM + * cleared. If the object containing the page is locked and the page + * is not exclusive busied, then PGA_WRITEABLE cannot be concurrently + * set. + */ + if ((m->flags & PGA_WRITEABLE) == 0) + return; + rw_wlock(&pvh_global_lock); + sched_pin(); + if ((m->flags & PG_FICTITIOUS) != 0) + goto small_mappings; + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { + va = pv->pv_va; + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte1p = pmap_pte1(pmap, va); + opte1 = pte1_load(pte1p); + if (!(opte1 & PTE1_RO)) { + if (pmap_demote_pte1(pmap, pte1p, va) && + !pte1_is_wired(opte1)) { + /* + * Write protect the mapping to a + * single page so that a subsequent + * write access may repromote. + */ + va += VM_PAGE_TO_PHYS(m) - pte1_pa(opte1); + pte2p = pmap_pte2_quick(pmap, va); + opte2 = pte2_load(pte2p); + if ((opte2 & PTE2_V)) { + pte2_set_bit(pte2p, PTE2_NM | PTE2_RO); + vm_page_dirty(m); + pmap_tlb_flush(pmap, va); + } + } + } + PMAP_UNLOCK(pmap); + } +small_mappings: + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte1p = pmap_pte1(pmap, pv->pv_va); + KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found" + " a section in page %p's pv list", __func__, m)); + pte2p = pmap_pte2_quick(pmap, pv->pv_va); + if (pte2_is_dirty(pte2_load(pte2p))) { + pte2_set_bit(pte2p, PTE2_NM); + pmap_tlb_flush(pmap, pv->pv_va); + } + PMAP_UNLOCK(pmap); + } + sched_unpin(); + rw_wunlock(&pvh_global_lock); +} + + +/* + * Sets the memory attribute for the specified page. + */ +void +pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) +{ + struct sysmaps *sysmaps; + vm_memattr_t oma; + vm_paddr_t pa; + vm_offset_t va; + + oma = m->md.pat_mode; + m->md.pat_mode = ma; + + CTR5(KTR_PMAP, "%s: page %p - 0x%08X oma: %d, ma: %d, phys: 0x%08X", __func__, m, VM_PAGE_TO_PHYS(m), oma, ma); + if ((m->flags & PG_FICTITIOUS) != 0) + return; +#if 0 + /* + * If "m" is a normal page, flush it from the cache. + * + * First, try to find an existing mapping of the page by sf + * buffer. sf_buf_invalidate_cache() modifies mapping and + * flushes the cache. + */ + if (sf_buf_invalidate_cache(m, oma)) + return; +#endif + /* + * If page is not mapped by sf buffer, map the page + * transient and do invalidation. + */ + if (ma != oma) { + pa = VM_PAGE_TO_PHYS(m); + sched_pin(); + sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; + mtx_lock(&sysmaps->lock); + if (*sysmaps->CMAP2) + panic("%s: CMAP2 busy", __func__); + pte2_store(sysmaps->CMAP2, PTE2_KERN_NG(pa, PTE2_AP_KRW, ma)); + va = (vm_offset_t)sysmaps->CADDR2; + tlb_flush_local(va); + dcache_wbinv_poc(va, pa, PAGE_SIZE); + pte2_clear(sysmaps->CMAP2); + sched_unpin(); + mtx_unlock(&sysmaps->lock); + } +} + +/* + * Miscellaneous support routines follow + */ + +/* + * Returns TRUE if the given page is mapped individually or as part of + * a 1mpage. Otherwise, returns FALSE. + */ +boolean_t +pmap_page_is_mapped(vm_page_t m) +{ + boolean_t rv; + + if ((m->oflags & VPO_UNMANAGED) != 0) + return (FALSE); + rw_wlock(&pvh_global_lock); + rv = !TAILQ_EMPTY(&m->md.pv_list) || + ((m->flags & PG_FICTITIOUS) == 0 && + !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); + rw_wunlock(&pvh_global_lock); + return (rv); +} + +/* + * Returns true if the pmap's pv is one of the first + * 16 pvs linked to from this page. This count may + * be changed upwards or downwards in the future; it + * is only necessary that true be returned for a small + * subset of pmaps for proper page aging. + */ +boolean_t +pmap_page_exists_quick(pmap_t pmap, vm_page_t m) +{ + struct md_page *pvh; + pv_entry_t pv; + int loops = 0; + boolean_t rv; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("%s: page %p is not managed", __func__, m)); + rv = FALSE; + rw_wlock(&pvh_global_lock); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { + if (PV_PMAP(pv) == pmap) { + rv = TRUE; + break; + } + loops++; + if (loops >= 16) + break; + } + if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + if (PV_PMAP(pv) == pmap) { + rv = TRUE; + break; + } + loops++; + if (loops >= 16) + break; + } + } + rw_wunlock(&pvh_global_lock); + return (rv); +} + +/* + * pmap_zero_page zeros the specified hardware page by mapping + * the page into KVM and using bzero to clear its contents. + */ +void +pmap_zero_page(vm_page_t m) +{ + struct sysmaps *sysmaps; + + sched_pin(); + sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; + mtx_lock(&sysmaps->lock); + if (pte2_load(sysmaps->CMAP2) != 0) + panic("%s: CMAP2 busy", __func__); + pte2_store(sysmaps->CMAP2, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, + m->md.pat_mode)); + tlb_flush_local((vm_offset_t)sysmaps->CADDR2); + pagezero(sysmaps->CADDR2); + pte2_clear(sysmaps->CMAP2); + sched_unpin(); + mtx_unlock(&sysmaps->lock); +} + +/* + * pmap_zero_page_area zeros the specified hardware page by mapping + * the page into KVM and using bzero to clear its contents. + * + * off and size may not cover an area beyond a single hardware page. + */ +void +pmap_zero_page_area(vm_page_t m, int off, int size) +{ + struct sysmaps *sysmaps; + + sched_pin(); + sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; + mtx_lock(&sysmaps->lock); + if (pte2_load(sysmaps->CMAP2) != 0) + panic("%s: CMAP2 busy", __func__); + pte2_store(sysmaps->CMAP2, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, + m->md.pat_mode)); + tlb_flush_local((vm_offset_t)sysmaps->CADDR2); + if (off == 0 && size == PAGE_SIZE) + pagezero(sysmaps->CADDR2); + else + bzero(sysmaps->CADDR2 + off, size); + pte2_clear(sysmaps->CMAP2); + sched_unpin(); + mtx_unlock(&sysmaps->lock); +} + +/* + * pmap_zero_page_idle zeros the specified hardware page by mapping + * the page into KVM and using bzero to clear its contents. This + * is intended to be called from the vm_pagezero process only and + * outside of Giant. + */ +void +pmap_zero_page_idle(vm_page_t m) +{ + + if (pte2_load(CMAP3) != 0) + panic("%s: CMAP3 busy", __func__); + sched_pin(); + pte2_store(CMAP3, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, + m->md.pat_mode)); + tlb_flush_local((vm_offset_t)CADDR3); + pagezero(CADDR3); + pte2_clear(CMAP3); + sched_unpin(); +} + +/* + * pmap_copy_page copies the specified (machine independent) + * page by mapping the page into virtual memory and using + * bcopy to copy the page, one machine dependent page at a + * time. + */ +void +pmap_copy_page(vm_page_t src, vm_page_t dst) +{ + struct sysmaps *sysmaps; + + sched_pin(); + sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; + mtx_lock(&sysmaps->lock); + if (pte2_load(sysmaps->CMAP1) != 0) + panic("%s: CMAP1 busy", __func__); + if (pte2_load(sysmaps->CMAP2) != 0) + panic("%s: CMAP2 busy", __func__); + pte2_store(sysmaps->CMAP1, PTE2_KERN_NG(VM_PAGE_TO_PHYS(src), + PTE2_AP_KR | PTE2_NM, src->md.pat_mode)); + tlb_flush_local((vm_offset_t)sysmaps->CADDR1); + pte2_store(sysmaps->CMAP2, PTE2_KERN_NG(VM_PAGE_TO_PHYS(dst), + PTE2_AP_KRW, dst->md.pat_mode)); + tlb_flush_local((vm_offset_t)sysmaps->CADDR2); + bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE); + pte2_clear(sysmaps->CMAP1); + pte2_clear(sysmaps->CMAP2); + sched_unpin(); + mtx_unlock(&sysmaps->lock); +} + +int unmapped_buf_allowed = 1; + +void +pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], + vm_offset_t b_offset, int xfersize) +{ + struct sysmaps *sysmaps; + vm_page_t a_pg, b_pg; + char *a_cp, *b_cp; + vm_offset_t a_pg_offset, b_pg_offset; + int cnt; + + sched_pin(); + sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; + mtx_lock(&sysmaps->lock); + if (*sysmaps->CMAP1 != 0) + panic("pmap_copy_pages: CMAP1 busy"); + if (*sysmaps->CMAP2 != 0) + panic("pmap_copy_pages: CMAP2 busy"); + while (xfersize > 0) { + a_pg = ma[a_offset >> PAGE_SHIFT]; + a_pg_offset = a_offset & PAGE_MASK; + cnt = min(xfersize, PAGE_SIZE - a_pg_offset); + b_pg = mb[b_offset >> PAGE_SHIFT]; + b_pg_offset = b_offset & PAGE_MASK; + cnt = min(cnt, PAGE_SIZE - b_pg_offset); + pte2_store(sysmaps->CMAP1, PTE2_KERN_NG(VM_PAGE_TO_PHYS(a_pg), + PTE2_AP_KR | PTE2_NM, a_pg->md.pat_mode)); + tlb_flush_local((vm_offset_t)sysmaps->CADDR1); + pte2_store(sysmaps->CMAP2, PTE2_KERN_NG(VM_PAGE_TO_PHYS(b_pg), + PTE2_AP_KRW, b_pg->md.pat_mode)); + tlb_flush_local((vm_offset_t)sysmaps->CADDR2); + a_cp = sysmaps->CADDR1 + a_pg_offset; + b_cp = sysmaps->CADDR2 + b_pg_offset; + bcopy(a_cp, b_cp, cnt); + a_offset += cnt; + b_offset += cnt; + xfersize -= cnt; + } + pte2_clear(sysmaps->CMAP1); + pte2_clear(sysmaps->CMAP2); + sched_unpin(); + mtx_unlock(&sysmaps->lock); +} + +/* + * Copy the range specified by src_addr/len + * from the source map to the range dst_addr/len + * in the destination map. + * + * This routine is only advisory and need not do anything. + */ +void +pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, + vm_offset_t src_addr) +{ + struct spglist free; + vm_offset_t addr; + vm_offset_t end_addr = src_addr + len; + vm_offset_t nextva; + + if (dst_addr != src_addr) + return; + + if (!pmap_is_current(src_pmap)) + return; + + rw_wlock(&pvh_global_lock); + if (dst_pmap < src_pmap) { + PMAP_LOCK(dst_pmap); + PMAP_LOCK(src_pmap); + } else { + PMAP_LOCK(src_pmap); + PMAP_LOCK(dst_pmap); + } + sched_pin(); + for (addr = src_addr; addr < end_addr; addr = nextva) { + pt2_entry_t *src_pte2p, *dst_pte2p; + vm_page_t dst_mpt2pg, src_mpt2pg; + pt1_entry_t src_pte1; + u_int pte1_idx; + + KASSERT(addr < VM_MAXUSER_ADDRESS, + ("%s: invalid to pmap_copy page tables", __func__)); + + nextva = pte1_trunc(addr + PTE1_SIZE); + if (nextva < addr) + nextva = end_addr; + + pte1_idx = pte1_index(addr); + src_pte1 = src_pmap->pm_pt1[pte1_idx]; + if (pte1_is_section(src_pte1)) { + if ((addr & PTE1_OFFSET) != 0 || + (addr + PTE1_SIZE) > end_addr) + continue; + if (dst_pmap->pm_pt1[pte1_idx] == 0 && + (!pte1_is_managed(src_pte1) || + pmap_pv_insert_pte1(dst_pmap, addr, + pte1_pa(src_pte1)))) { + dst_pmap->pm_pt1[pte1_idx] = src_pte1 & + ~PTE1_W; + dst_pmap->pm_stats.resident_count += + PTE1_SIZE / PAGE_SIZE; + } + continue; + } else if (!pte1_is_link(src_pte1)) + continue; + + src_mpt2pg = PHYS_TO_VM_PAGE(pte1_link_pa(src_pte1)); + + /* + * We leave PT2s to be linked from PT1 even if they are not + * referenced until all PT2s in a page are without reference. + * + * QQQ: It could be changed ... + */ +#if 0 /* single_pt2_link_is_cleared */ + KASSERT(pt2_wirecount_get(src_mpt2pg, pte1_idx) > 0, + ("%s: source page table page is unused", __func__)); +#else + if (pt2_wirecount_get(src_mpt2pg, pte1_idx) == 0) + continue; +#endif + if (nextva > end_addr) + nextva = end_addr; + + src_pte2p = pt2map_entry(addr); + while (addr < nextva) { + pt2_entry_t temp_pte2; + temp_pte2 = pte2_load(src_pte2p); + /* + * we only virtual copy managed pages + */ + if (pte2_is_managed(temp_pte2)) { + dst_mpt2pg = pmap_allocpte2(dst_pmap, addr, + PMAP_ENTER_NOSLEEP); + if (dst_mpt2pg == NULL) + goto out; + dst_pte2p = pmap_pte2_quick(dst_pmap, addr); + if (!pte2_is_valid(pte2_load(dst_pte2p)) && + pmap_try_insert_pv_entry(dst_pmap, addr, + PHYS_TO_VM_PAGE(pte2_pa(temp_pte2)))) { + /* + * Clear the wired, modified, and + * accessed (referenced) bits + * during the copy. + */ + temp_pte2 &= ~(PTE2_W | PTE2_A); + temp_pte2 |= PTE2_NM; + pte2_store(dst_pte2p, temp_pte2); + dst_pmap->pm_stats.resident_count++; + } else { + SLIST_INIT(&free); + if (pmap_unwire_pt2(dst_pmap, addr, + dst_mpt2pg, &free)) { + pmap_tlb_flush(dst_pmap, addr); + pmap_free_zero_pages(&free); + } + goto out; + } + if (pt2_wirecount_get(dst_mpt2pg, pte1_idx) >= + pt2_wirecount_get(src_mpt2pg, pte1_idx)) + break; + } + addr += PAGE_SIZE; + src_pte2p++; + } + } +out: + sched_unpin(); + rw_wunlock(&pvh_global_lock); + PMAP_UNLOCK(src_pmap); + PMAP_UNLOCK(dst_pmap); +} + +/* + * Increase the starting virtual address of the given mapping if a + * different alignment might result in more section mappings. + */ +void +pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, + vm_offset_t *addr, vm_size_t size) +{ + vm_offset_t pte1_offset; + + if (size < PTE1_SIZE) + return; + if (object != NULL && (object->flags & OBJ_COLORED) != 0) + offset += ptoa(object->pg_color); + pte1_offset = offset & PTE1_OFFSET; + if (size - ((PTE1_SIZE - pte1_offset) & PTE1_OFFSET) < PTE1_SIZE || + (*addr & PTE1_OFFSET) == pte1_offset) + return; + if ((*addr & PTE1_OFFSET) < pte1_offset) + *addr = pte1_trunc(*addr) + pte1_offset; + else + *addr = pte1_roundup(*addr) + pte1_offset; +} + +void +pmap_activate(struct thread *td) +{ + pmap_t pmap, oldpmap; + u_int cpuid, ttb; + + PDEBUG(9, printf("%s: td = %08x\n", __func__, (uint32_t)td)); + + critical_enter(); + pmap = vmspace_pmap(td->td_proc->p_vmspace); + oldpmap = PCPU_GET(curpmap); + cpuid = PCPU_GET(cpuid); + +#if defined(SMP) + CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); + CPU_SET_ATOMIC(cpuid, &pmap->pm_active); +#else + CPU_CLR(cpuid, &oldpmap->pm_active); + CPU_SET(cpuid, &pmap->pm_active); +#endif + + ttb = pmap_ttb_get(pmap); + + /* + * pmap_activate is for the current thread on the current cpu + */ + td->td_pcb->pcb_pagedir = ttb; + cp15_ttbr_set(ttb); + PCPU_SET(curpmap, pmap); + critical_exit(); +} + +int +pmap_dmap_iscurrent(pmap_t pmap) +{ + + return (pmap_is_current(pmap)); +} + +/* + * Perform the pmap work for mincore. + */ +int +pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) +{ + pt1_entry_t *pte1p, pte1; + pt2_entry_t *pte2p, pte2; + vm_paddr_t pa; + boolean_t managed; + int val; + + PMAP_LOCK(pmap); +retry: + pte1p = pmap_pte1(pmap, addr); + pte1 = pte1_load(pte1p); + if (pte1_is_section(pte1)) { + pa = trunc_page(pte1_pa(pte1) | (addr & PTE1_OFFSET)); + managed = pte1_is_managed(pte1); + val = MINCORE_SUPER | MINCORE_INCORE; + if (pte1_is_dirty(pte1)) + val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; + if (pte1 & PTE1_A) + val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; + } else if (pte1_is_link(pte1)) { + pte2p = pmap_pte2(pmap, addr); + pte2 = pte2_load(pte2p); + pmap_pte2_release(pte2p); + pa = pte2_pa(pte2); + managed = pte2_is_managed(pte2); + val = MINCORE_INCORE; + if (pte2_is_dirty(pte2)) + val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; + if (pte2 & PTE2_A) + val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; + } else { + managed = FALSE; + val = 0; + } + if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != + (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { + /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ + if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) + goto retry; + } else + PA_UNLOCK_COND(*locked_pa); + PMAP_UNLOCK(pmap); + return (val); +} + +void +pmap_kenter_device(vm_offset_t va, vm_paddr_t pa) +{ + + pmap_kenter_prot_attr(va, pa, PTE2_AP_KRW, PTE2_ATTR_DEVICE); + tlb_flush(va); +} + +void +pmap_set_pcb_pagedir(pmap_t pmap, struct pcb *pcb) +{ + + pcb->pcb_pagedir = pmap_ttb_get(pmap); +} + + +/* + * Clean L1 data cache range on a single page, which is not mapped yet. + */ +static void +pmap_dcache_wb_pou(vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) +{ + struct sysmaps *sysmaps; + vm_offset_t va; + + KASSERT(((pa & PAGE_MASK) + size) <= PAGE_SIZE, + ("%s: not on single page", __func__)); + + sched_pin(); + sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; + mtx_lock(&sysmaps->lock); + if (*sysmaps->CMAP3) + panic("%s: CMAP3 busy", __func__); + pte2_store(sysmaps->CMAP3, PTE2_KERN_NG(pa, PTE2_AP_KRW, ma)); + va = (vm_offset_t)sysmaps->CADDR3; + tlb_flush_local(va); + dcache_wb_pou(va, size); + pte2_clear(sysmaps->CMAP3); + sched_unpin(); + mtx_unlock(&sysmaps->lock); +} + +/* + * Sync instruction cache range which is not mapped yet. + */ +void +cache_icache_sync_fresh(vm_offset_t va, vm_paddr_t pa, vm_size_t size) +{ + uint32_t len, offset; + vm_page_t m; + + /* Write back d-cache on given address range. */ + offset = pa & PAGE_MASK; + for ( ; size != 0; size -= len, pa += len, offset = 0) { + len = min(PAGE_SIZE - offset, size); + m = PHYS_TO_VM_PAGE(pa); + KASSERT(m != NULL, ("%s: vm_page_t is null for %#x", + __func__, pa)); + pmap_dcache_wb_pou(pa, len, m->md.pat_mode); + } + /* + * I-cache is VIPT. Only way how to flush all virtual mappings + * on given physical address is to invalidate all i-cache. + */ + icache_inv_all(); +} + +void +pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t size) +{ + + /* Write back d-cache on given address range. */ + if (va >= VM_MIN_KERNEL_ADDRESS) { + dcache_wb_pou(va, size); + } else { + uint32_t len, offset; + vm_paddr_t pa; + vm_page_t m; + + offset = va & PAGE_MASK; + for ( ; size != 0; size -= len, va += len, offset = 0) { + pa = pmap_extract(pmap, va); /* offset is preserved */ + len = min(PAGE_SIZE - offset, size); + m = PHYS_TO_VM_PAGE(pa); + KASSERT(m != NULL, ("%s: vm_page_t is null for %#x", + __func__, pa)); + pmap_dcache_wb_pou(pa, len, m->md.pat_mode); + } + } + /* + * I-cache is VIPT. Only way how to flush all virtual mappings + * on given physical address is to invalidate all i-cache. + */ + icache_inv_all(); +} + +/* + * The implementation of pmap_fault() uses IN_RANGE2() macro which + * depends on the fact that given range size is a power of 2. + */ +CTASSERT(powerof2(NB_IN_PT1)); +CTASSERT(powerof2(PT2MAP_SIZE)); + +#define IN_RANGE2(addr, start, size) \ + ((vm_offset_t)(start) == ((vm_offset_t)(addr) & ~((size) - 1))) + +/* + * Handle access and R/W emulation faults. + */ +int +pmap_fault(pmap_t pmap, vm_offset_t far, uint32_t fsr, int idx, int usermode) +{ + pt1_entry_t *pte1p, pte1; + pt2_entry_t *pte2p, pte2; + + if (pmap == NULL) + pmap = kernel_pmap; + + /* + * In kernel, we should never get abort with FAR which is in range of + * pmap->pm_pt1 or PT2MAP address spaces. If it happens, stop here + * and print out a useful abort message and even get to the debugger + * otherwise it likely ends with never ending loop of aborts. + */ + if (__predict_false(IN_RANGE2(far, pmap->pm_pt1, NB_IN_PT1))) { + /* + * All L1 tables should always be mapped and present. + * However, we check only current one herein. For user mode, + * only permission abort from malicious user is not fatal. + */ + if (!usermode || (idx != FAULT_PERM_L2)) { + CTR4(KTR_PMAP, "%s: pmap %#x pm_pt1 %#x far %#x", + __func__, pmap, pmap->pm_pt1, far); + panic("%s: pm_pt1 abort", __func__); + } + return (EFAULT); + } + if (__predict_false(IN_RANGE2(far, PT2MAP, PT2MAP_SIZE))) { + /* + * PT2MAP should be always mapped and present in current + * L1 table. However, only existing L2 tables are mapped + * in PT2MAP. For user mode, only L2 translation abort and + * permission abort from malicious user is not fatal. + */ + if (!usermode || + (idx != FAULT_TRAN_L2 && idx != FAULT_PERM_L2)) { + CTR4(KTR_PMAP, "%s: pmap %#x PT2MAP %#x far %#x", + __func__, pmap, PT2MAP, far); + panic("%s: PT2MAP abort", __func__); + } + return (EFAULT); + } + + /* + * Accesss bits for page and section. Note that the entry + * is not in TLB yet, so TLB flush is not necessary. + * + * QQQ: This is hardware emulation, we do not call userret() + * for aborts from user mode. + * We do not lock PMAP, so cmpset() is a need. Hopefully, + * no one removes the mapping when we are here. + */ + if (idx == FAULT_ACCESS_L2) { + pte2p = pt2map_entry(far); +pte2_seta: + pte2 = pte2_load(pte2p); + if (pte2_is_valid(pte2)) { + if (!pte2_cmpset(pte2p, pte2, pte2 | PTE2_A)) { + goto pte2_seta; + } + return (0); + } + } + if (idx == FAULT_ACCESS_L1) { + pte1p = pmap_pte1(pmap, far); +pte1_seta: + pte1 = pte1_load(pte1p); + if (pte1_is_section(pte1)) { + if (!pte1_cmpset(pte1p, pte1, pte1 | PTE1_A)) { + goto pte1_seta; + } + return (0); + } + } + + /* + * Handle modify bits for page and section. Note that the modify + * bit is emulated by software. So PTEx_RO is software read only + * bit and PTEx_NM flag is real harware read only bit. + * + * QQQ: This is hardware emulation, we do not call userret() + * for aborts from user mode. + * We do not lock PMAP, so cmpset() is a need. Hopefully, + * no one removes the mapping when we are here. + */ + if ((fsr & FSR_WNR) && (idx == FAULT_PERM_L2)) { + pte2p = pt2map_entry(far); +pte2_setrw: + pte2 = pte2_load(pte2p); + if (pte2_is_valid(pte2) && !(pte2 & PTE2_RO) && + (pte2 & PTE2_NM)) { + if (!pte2_cmpset(pte2p, pte2, pte2 & ~PTE2_NM)) { + goto pte2_setrw; + } + tlb_flush(trunc_page(far)); + return (0); + } + } + if ((fsr & FSR_WNR) && (idx == FAULT_PERM_L1)) { + pte1p = pmap_pte1(pmap, far); +pte1_setrw: + pte1 = pte1_load(pte1p); + if (pte1_is_section(pte1) && !(pte1 & PTE1_RO) && + (pte1 & PTE1_NM)) { + if (!pte1_cmpset(pte1p, pte1, pte1 & ~PTE1_NM)) { + goto pte1_setrw; + } + tlb_flush(pte1_trunc(far)); + return (0); + } + } + + /* + * QQQ: The previous code, mainly fast handling of access and + * modify bits aborts, could be moved to ASM. Now we are + * starting to deal with not fast aborts. + */ + +#ifdef INVARIANTS + /* + * Read an entry in PT2TAB associated with both pmap and far. + * It's safe because PT2TAB is always mapped. + * + * QQQ: We do not lock PMAP, so false positives could happen if + * the mapping is removed concurrently. + */ + pte2 = pt2tab_load(pmap_pt2tab_entry(pmap, far)); + if (pte2_is_valid(pte2)) { + /* + * Now, when we know that L2 page table is allocated, + * we can use PT2MAP to get L2 page table entry. + */ + pte2 = pte2_load(pt2map_entry(far)); + if (pte2_is_valid(pte2)) { + /* + * If L2 page table entry is valid, make sure that + * L1 page table entry is valid too. Note that we + * leave L2 page entries untouched when promoted. + */ + pte1 = pte1_load(pmap_pte1(pmap, far)); + if (!pte1_is_valid(pte1)) { + panic("%s: missing L1 page entry (%p, %#x)", + __func__, pmap, far); + } + } + } +#endif + return (EAGAIN); +} + +/* !!!! REMOVE !!!! */ +void +pmap_pte_init_mmu_v6(void) +{ +} + +void vector_page_setprot(int p) +{ +} + +#if defined(PMAP_DEBUG) +/* + * Reusing of KVA used in pmap_zero_page function !!! + */ +static void +pmap_zero_page_check(vm_page_t m) +{ + uint32_t *p, *end; + struct sysmaps *sysmaps; + + sched_pin(); + sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; + mtx_lock(&sysmaps->lock); + if (pte2_load(sysmaps->CMAP2) != 0) + panic("%s: CMAP2 busy", __func__); + pte2_store(sysmaps->CMAP2, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, + m->md.pat_mode)); + tlb_flush_local((vm_offset_t)sysmaps->CADDR2); + end = (uint32_t*)(sysmaps->CADDR2 + PAGE_SIZE); + for (p = (uint32_t*)sysmaps->CADDR2; p < end; p++) + if (*p != 0) + panic("%s: page %p not zero, va: %p", __func__, m, + sysmaps->CADDR2); + pte2_clear(sysmaps->CMAP2); + sched_unpin(); + mtx_unlock(&sysmaps->lock); +} + +int +pmap_pid_dump(int pid) +{ + pmap_t pmap; + struct proc *p; + int npte2 = 0; + int i, j, index; + + sx_slock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + if (p->p_pid != pid || p->p_vmspace == NULL) + continue; + index = 0; + pmap = vmspace_pmap(p->p_vmspace); + for (i = 0; i < NPTE1_IN_PT1; i++) { + pt1_entry_t pte1; + pt2_entry_t *pte2p, pte2; + vm_offset_t base, va; + vm_paddr_t pa; + vm_page_t m; + + base = i << PTE1_SHIFT; + pte1 = pte1_load(&pmap->pm_pt1[i]); + + if (pte1_is_section(pte1)) { + /* + * QQQ: Do something here! + */ + } else if (pte1_is_link(pte1)) { + for (j = 0; j < NPTE2_IN_PT2; j++) { + va = base + (j << PAGE_SHIFT); + if (va >= VM_MIN_KERNEL_ADDRESS) { + if (index) { + index = 0; + printf("\n"); + } + sx_sunlock(&allproc_lock); + return (npte2); + } + pte2p = pmap_pte2(pmap, va); + pte2 = pte2_load(pte2p); + pmap_pte2_release(pte2p); + if (!pte2_is_valid(pte2)) + continue; + + pa = pte2_pa(pte2); + m = PHYS_TO_VM_PAGE(pa); + printf("va: 0x%x, pa: 0x%x, h: %d, w:" + " %d, f: 0x%x", va, pa, + m->hold_count, m->wire_count, + m->flags); + npte2++; + index++; + if (index >= 2) { + index = 0; + printf("\n"); + } else { + printf(" "); + } + } + } + } + } + sx_sunlock(&allproc_lock); + return (npte2); +} + +/* + * Print address space of pmap. + */ +static void +pads(pmap_t pmap) +{ + int i, j; + vm_paddr_t va; + pt1_entry_t pte1; + pt2_entry_t *pte2p, pte2; + + if (pmap == kernel_pmap) + return; + for (i = 0; i < NPTE1_IN_PT1; i++) { + pte1 = pte1_load(&pmap->pm_pt1[i]); + if (pte1_is_section(pte1)) { + /* + * QQQ: Do something here! + */ + } else if (pte1_is_link(pte1)) { + for (j = 0; j < NPTE2_IN_PT2; j++) { + va = (i << PTE1_SHIFT) + (j << PAGE_SHIFT); + if (pmap == kernel_pmap && va < KERNBASE) + continue; + if (pmap != kernel_pmap && va >= KERNBASE && + (va < UPT2V_MIN_ADDRESS || + va >= UPT2V_MAX_ADDRESS)) + continue; + + pte2p = pmap_pte2(pmap, va); + pte2 = pte2_load(pte2p); + pmap_pte2_release(pte2p); + if (!pte2_is_valid(pte2)) + continue; + printf("%x:%x ", va, pte2); + } + } + } +} + +void +pmap_pvdump(vm_paddr_t pa) +{ + pv_entry_t pv; + pmap_t pmap; + vm_page_t m; + + printf("pa %x", pa); + m = PHYS_TO_VM_PAGE(pa); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { + pmap = PV_PMAP(pv); + printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va); + pads(pmap); + } + printf(" "); +} +#endif + +#ifdef DDB +static pt2_entry_t * +pmap_pte2_ddb(pmap_t pmap, vm_offset_t va) +{ + pt1_entry_t pte1; + vm_paddr_t pt2pg_pa; + + pte1 = pte1_load(pmap_pte1(pmap, va)); + if (!pte1_is_link(pte1)) + return (NULL); + + if (pmap_is_current(pmap)) + return (pt2map_entry(va)); + + /* Note that L2 page table size is not equal to PAGE_SIZE. */ + pt2pg_pa = trunc_page(pte1_link_pa(pte1)); + if (pte2_pa(pte2_load(PMAP3)) != pt2pg_pa) { + pte2_store(PMAP3, PTE2_KPT(pt2pg_pa)); +#ifdef SMP + PMAP3cpu = PCPU_GET(cpuid); +#endif + tlb_flush_local((vm_offset_t)PADDR3); + } +#ifdef SMP + else if (PMAP3cpu != PCPU_GET(cpuid)) { + PMAP3cpu = PCPU_GET(cpuid); + tlb_flush_local((vm_offset_t)PADDR3); + } +#endif + return (PADDR3 + (arm32_btop(va) & (NPTE2_IN_PG - 1))); +} + +static void +dump_pmap(pmap_t pmap) +{ + + printf("pmap %p\n", pmap); + printf(" pm_pt1: %p\n", pmap->pm_pt1); + printf(" pm_pt2tab: %p\n", pmap->pm_pt2tab); + printf(" pm_active: 0x%08lX\n", pmap->pm_active.__bits[0]); +} + +DB_SHOW_COMMAND(pmaps, pmap_list_pmaps) +{ + + pmap_t pmap; + LIST_FOREACH(pmap, &allpmaps, pm_list) { + dump_pmap(pmap); + } +} + +static int +pte2_class(pt2_entry_t pte2) +{ + int cls; + + cls = (pte2 >> 2) & 0x03; + cls |= (pte2 >> 4) & 0x04; + return (cls); +} + +static void +dump_section(pmap_t pmap, uint32_t pte1_idx) +{ +} + +static void +dump_link(pmap_t pmap, uint32_t pte1_idx, boolean_t invalid_ok) +{ + uint32_t i; + vm_offset_t va; + pt2_entry_t *pte2p, pte2; + vm_page_t m; + + va = pte1_idx << PTE1_SHIFT; + pte2p = pmap_pte2_ddb(pmap, va); + for (i = 0; i < NPTE2_IN_PT2; i++, pte2p++, va += PAGE_SIZE) { + pte2 = pte2_load(pte2p); + if (pte2 == 0) + continue; + if (!pte2_is_valid(pte2)) { + printf(" 0x%08X: 0x%08X", va, pte2); + if (!invalid_ok) + printf(" - not valid !!!"); + printf("\n"); + continue; + } + m = PHYS_TO_VM_PAGE(pte2_pa(pte2)); + printf(" 0x%08X: 0x%08X, TEX%d, s:%d, g:%d, m:%p", va , pte2, + pte2_class(pte2), !!(pte2 & PTE2_S), !(pte2 & PTE2_NG), m); + if (m != NULL) { + printf(" v:%d h:%d w:%d f:0x%04X\n", m->valid, + m->hold_count, m->wire_count, m->flags); + } else { + printf("\n"); + } + } +} + +static __inline boolean_t +is_pv_chunk_space(vm_offset_t va) +{ + + if ((((vm_offset_t)pv_chunkbase) <= va) && + (va < ((vm_offset_t)pv_chunkbase + PAGE_SIZE * pv_maxchunks))) + return (TRUE); + return (FALSE); +} + +DB_SHOW_COMMAND(pmap, pmap_pmap_print) +{ + /* XXX convert args. */ + pmap_t pmap = (pmap_t)addr; + pt1_entry_t pte1; + pt2_entry_t pte2; + vm_offset_t va, eva; + vm_page_t m; + uint32_t i; + boolean_t invalid_ok, dump_link_ok, dump_pv_chunk; + + if (have_addr) { + pmap_t pm; + + LIST_FOREACH(pm, &allpmaps, pm_list) + if (pm == pmap) break; + if (pm == NULL) { + printf("given pmap %p is not in allpmaps list\n", pmap); + return; + } + } else + pmap = PCPU_GET(curpmap); + + eva = (modif[0] == 'u') ? VM_MAXUSER_ADDRESS : 0xFFFFFFFF; + dump_pv_chunk = FALSE; /* XXX evaluate from modif[] */ + + printf("pmap: 0x%08X\n", (uint32_t)pmap); + printf("PT2MAP: 0x%08X\n", (uint32_t)PT2MAP); + printf("pt2tab: 0x%08X\n", (uint32_t)pmap->pm_pt2tab); + + for(i = 0; i < NPTE1_IN_PT1; i++) { + pte1 = pte1_load(&pmap->pm_pt1[i]); + if (pte1 == 0) + continue; + va = i << PTE1_SHIFT; + if (va >= eva) + break; + + if (pte1_is_section(pte1)) { + printf("0x%08X: Section 0x%08X, s:%d g:%d\n", va, pte1, + !!(pte1 & PTE1_S), !(pte1 & PTE1_NG)); + dump_section(pmap, i); + } else if (pte1_is_link(pte1)) { + dump_link_ok = TRUE; + invalid_ok = FALSE; + pte2 = pte2_load(pmap_pt2tab_entry(pmap, va)); + m = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); + printf("0x%08X: Link 0x%08X, pt2tab: 0x%08X m: %p", + va, pte1, pte2, m); + if (is_pv_chunk_space(va)) { + printf(" - pv_chunk space"); + if (dump_pv_chunk) + invalid_ok = TRUE; + else + dump_link_ok = FALSE; + } + else if (m != NULL) + printf(" w:%d w2:%u", m->wire_count, + pt2_wirecount_get(m, pte1_index(va))); + if (pte2 == 0) + printf(" !!! pt2tab entry is ZERO"); + else if (pte2_pa(pte1) != pte2_pa(pte2)) + printf(" !!! pt2tab entry is DIFFERENT - m: %p", + PHYS_TO_VM_PAGE(pte2_pa(pte2))); + printf("\n"); + if (dump_link_ok) + dump_link(pmap, i, invalid_ok); + } else + printf("0x%08X: Invalid entry 0x%08X\n", va, pte1); + } +} + +static void +dump_pt2tab(pmap_t pmap) +{ + uint32_t i; + pt2_entry_t pte2; + vm_offset_t va; + vm_paddr_t pa; + vm_page_t m; + + printf("PT2TAB:\n"); + for (i = 0; i < PT2TAB_ENTRIES; i++) { + pte2 = pte2_load(&pmap->pm_pt2tab[i]); + if (!pte2_is_valid(pte2)) + continue; + va = i << PT2TAB_SHIFT; + pa = pte2_pa(pte2); + m = PHYS_TO_VM_PAGE(pa); + printf(" 0x%08X: 0x%08X, TEX%d, s:%d, m:%p", va, pte2, + pte2_class(pte2), !!(pte2 & PTE2_S), m); + if (m != NULL) + printf(" , h: %d, w: %d, f: 0x%04X pidx: %lld", + m->hold_count, m->wire_count, m->flags, m->pindex); + printf("\n"); + } +} + +DB_SHOW_COMMAND(pmap_pt2tab, pmap_pt2tab_print) +{ + /* XXX convert args. */ + pmap_t pmap = (pmap_t)addr; + pt1_entry_t pte1; + pt2_entry_t pte2; + vm_offset_t va; + uint32_t i, start; + + if (have_addr) { + printf("supported only on current pmap\n"); + return; + } + + pmap = PCPU_GET(curpmap); + printf("curpmap: 0x%08X\n", (uint32_t)pmap); + printf("PT2MAP: 0x%08X\n", (uint32_t)PT2MAP); + printf("pt2tab: 0x%08X\n", (uint32_t)pmap->pm_pt2tab); + + start = pte1_index((vm_offset_t)PT2MAP); + for (i = start; i < (start + NPT2_IN_PT2TAB); i++) { + pte1 = pte1_load(&pmap->pm_pt1[i]); + if (pte1 == 0) + continue; + va = i << PTE1_SHIFT; + if (pte1_is_section(pte1)) { + printf("0x%08X: Section 0x%08X, s:%d\n", va, pte1, + !!(pte1 & PTE1_S)); + dump_section(pmap, i); + } else if (pte1_is_link(pte1)) { + pte2 = pte2_load(pmap_pt2tab_entry(pmap, va)); + printf("0x%08X: Link 0x%08X, pt2tab: 0x%08X\n", va, + pte1, pte2); + if (pte2 == 0) + printf(" !!! pt2tab entry is ZERO\n"); + } else + printf("0x%08X: Invalid entry 0x%08X\n", va, pte1); + } + dump_pt2tab(pmap); +} +#endif Property changes on: head/sys/arm/arm/pmap-v6-new.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/arm/arm/swtch.S =================================================================== --- head/sys/arm/arm/swtch.S (revision 280711) +++ head/sys/arm/arm/swtch.S (revision 280712) @@ -1,442 +1,828 @@ /* $NetBSD: cpuswitch.S,v 1.41 2003/11/15 08:44:18 scw Exp $ */ /*- * Copyright 2003 Wasabi Systems, Inc. * All rights reserved. * * Written by Steve C. Woodford for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1994-1998 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * RiscBSD kernel project * * cpuswitch.S * * cpu switching functions * * Created : 15/10/94 * */ #include "assym.s" #include "opt_sched.h" #include #include #include #include __FBSDID("$FreeBSD$"); -#define DOMAIN_CLIENT 0x01 - #if defined(_ARM_ARCH_6) && defined(SMP) #define GET_PCPU(tmp, tmp2) \ mrc p15, 0, tmp, c0, c0, 5; \ and tmp, tmp, #0xf; \ ldr tmp2, .Lcurpcpu+4; \ mul tmp, tmp, tmp2; \ ldr tmp2, .Lcurpcpu; \ add tmp, tmp, tmp2; #else #define GET_PCPU(tmp, tmp2) \ ldr tmp, .Lcurpcpu #endif #ifdef VFP .fpu vfp /* allow VFP instructions */ #endif .Lcurpcpu: - .word _C_LABEL(__pcpu) + .word _C_LABEL(__pcpu) .word PCPU_SIZE -.Lcpufuncs: - .word _C_LABEL(cpufuncs) .Lblocked_lock: .word _C_LABEL(blocked_lock) + +#ifndef ARM_NEW_PMAP + +#define DOMAIN_CLIENT 0x01 + +.Lcpufuncs: + .word _C_LABEL(cpufuncs) + /* * cpu_throw(oldtd, newtd) * * Remove current thread state, then select the next thread to run * and load its state. * r0 = oldtd * r1 = newtd */ ENTRY(cpu_throw) mov r5, r1 /* * r0 = oldtd * r5 = newtd */ #ifdef VFP /* This thread is dying, disable */ bl _C_LABEL(vfp_discard) /* VFP without preserving state. */ #endif GET_PCPU(r7, r9) ldr r7, [r5, #(TD_PCB)] /* r7 = new thread's PCB */ /* Switch to lwp0 context */ ldr r9, .Lcpufuncs #if !defined(CPU_ARM11) && !defined(CPU_CORTEXA) && !defined(CPU_MV_PJ4B) && !defined(CPU_KRAIT) mov lr, pc ldr pc, [r9, #CF_IDCACHE_WBINV_ALL] #endif ldr r0, [r7, #(PCB_PL1VEC)] ldr r1, [r7, #(PCB_DACR)] /* * r0 = Pointer to L1 slot for vector_page (or NULL) * r1 = lwp0's DACR * r5 = lwp0 * r7 = lwp0's PCB * r9 = cpufuncs */ /* * Ensure the vector table is accessible by fixing up lwp0's L1 */ cmp r0, #0 /* No need to fixup vector table? */ ldrne r3, [r0] /* But if yes, fetch current value */ ldrne r2, [r7, #(PCB_L1VEC)] /* Fetch new vector_page value */ mcr p15, 0, r1, c3, c0, 0 /* Update DACR for lwp0's context */ cmpne r3, r2 /* Stuffing the same value? */ strne r2, [r0] /* Store if not. */ #ifdef PMAP_INCLUDE_PTE_SYNC /* * Need to sync the cache to make sure that last store is * visible to the MMU. */ movne r1, #4 movne lr, pc ldrne pc, [r9, #CF_DCACHE_WB_RANGE] #endif /* PMAP_INCLUDE_PTE_SYNC */ /* * Note: We don't do the same optimisation as cpu_switch() with * respect to avoiding flushing the TLB if we're switching to * the same L1 since this process' VM space may be about to go * away, so we don't want *any* turds left in the TLB. */ /* Switch the memory to the new process */ ldr r0, [r7, #(PCB_PAGEDIR)] mov lr, pc ldr pc, [r9, #CF_CONTEXT_SWITCH] GET_PCPU(r6, r4) /* Hook in a new pcb */ str r7, [r6, #PC_CURPCB] /* We have a new curthread now so make a note it */ str r5, [r6, #PC_CURTHREAD] #ifndef ARM_TP_ADDRESS mcr p15, 0, r5, c13, c0, 4 #endif /* Set the new tp */ ldr r6, [r5, #(TD_MD + MD_TP)] #ifdef ARM_TP_ADDRESS ldr r4, =ARM_TP_ADDRESS str r6, [r4] ldr r6, [r5, #(TD_MD + MD_RAS_START)] str r6, [r4, #4] /* ARM_RAS_START */ ldr r6, [r5, #(TD_MD + MD_RAS_END)] str r6, [r4, #8] /* ARM_RAS_END */ #else mcr p15, 0, r6, c13, c0, 3 #endif /* Restore all the saved registers and exit */ add r3, r7, #PCB_R4 ldmia r3, {r4-r12, sp, pc} END(cpu_throw) /* * cpu_switch(oldtd, newtd, lock) * * Save the current thread state, then select the next thread to run * and load its state. * r0 = oldtd * r1 = newtd * r2 = lock (new lock for old thread) */ ENTRY(cpu_switch) /* Interrupts are disabled. */ /* Save all the registers in the old thread's pcb. */ ldr r3, [r0, #(TD_PCB)] /* Restore all the saved registers and exit */ add r3, #(PCB_R4) stmia r3, {r4-r12, sp, lr, pc} mov r6, r2 /* Save the mutex */ /* rem: r0 = old lwp */ /* rem: interrupts are disabled */ /* Process is now on a processor. */ /* We have a new curthread now so make a note it */ GET_PCPU(r7, r2) str r1, [r7, #PC_CURTHREAD] #ifndef ARM_TP_ADDRESS mcr p15, 0, r1, c13, c0, 4 #endif /* Hook in a new pcb */ ldr r2, [r1, #TD_PCB] str r2, [r7, #PC_CURPCB] /* Stage two : Save old context */ /* Get the user structure for the old thread. */ ldr r2, [r0, #(TD_PCB)] mov r4, r0 /* Save the old thread. */ #ifdef ARM_TP_ADDRESS /* Store the old tp; userland can change it on armv4. */ ldr r3, =ARM_TP_ADDRESS ldr r9, [r3] str r9, [r0, #(TD_MD + MD_TP)] ldr r9, [r3, #4] str r9, [r0, #(TD_MD + MD_RAS_START)] ldr r9, [r3, #8] str r9, [r0, #(TD_MD + MD_RAS_END)] /* Set the new tp */ ldr r9, [r1, #(TD_MD + MD_TP)] str r9, [r3] ldr r9, [r1, #(TD_MD + MD_RAS_START)] str r9, [r3, #4] ldr r9, [r1, #(TD_MD + MD_RAS_END)] str r9, [r3, #8] #else /* * Set new tp. No need to store the old one first, userland can't * change it directly on armv6. */ ldr r9, [r1, #(TD_MD + MD_TP)] mcr p15, 0, r9, c13, c0, 3 #endif /* Get the user structure for the new process in r9 */ ldr r9, [r1, #(TD_PCB)] /* rem: r2 = old PCB */ /* rem: r9 = new PCB */ /* rem: interrupts are enabled */ #ifdef VFP fmrx r0, fpexc /* If the VFP is enabled */ tst r0, #(VFPEXC_EN) /* the current thread has */ movne r1, #1 /* used it, so go save */ addne r0, r2, #(PCB_VFPSTATE) /* the state into the PCB */ blne _C_LABEL(vfp_store) /* and disable the VFP. */ #endif /* r0-r3 now free! */ /* Third phase : restore saved context */ /* rem: r2 = old PCB */ /* rem: r9 = new PCB */ ldr r5, [r9, #(PCB_DACR)] /* r5 = new DACR */ mov r2, #DOMAIN_CLIENT cmp r5, r2, lsl #(PMAP_DOMAIN_KERNEL * 2) /* Sw to kernel thread? */ beq .Lcs_context_switched /* Yup. Don't flush cache */ mrc p15, 0, r0, c3, c0, 0 /* r0 = old DACR */ /* * Get the new L1 table pointer into r11. If we're switching to * an LWP with the same address space as the outgoing one, we can * skip the cache purge and the TTB load. * * To avoid data dep stalls that would happen anyway, we try * and get some useful work done in the mean time. */ mrc p15, 0, r10, c2, c0, 0 /* r10 = old L1 */ ldr r11, [r9, #(PCB_PAGEDIR)] /* r11 = new L1 */ teq r10, r11 /* Same L1? */ cmpeq r0, r5 /* Same DACR? */ beq .Lcs_context_switched /* yes! */ #if !defined(CPU_ARM11) && !defined(CPU_CORTEXA) && !defined(CPU_MV_PJ4B) && !defined(CPU_KRAIT) /* * Definately need to flush the cache. */ ldr r1, .Lcpufuncs mov lr, pc ldr pc, [r1, #CF_IDCACHE_WBINV_ALL] #endif .Lcs_cache_purge_skipped: /* rem: r6 = lock */ /* rem: r9 = new PCB */ /* rem: r10 = old L1 */ /* rem: r11 = new L1 */ mov r2, #0x00000000 ldr r7, [r9, #(PCB_PL1VEC)] /* * Ensure the vector table is accessible by fixing up the L1 */ cmp r7, #0 /* No need to fixup vector table? */ ldrne r2, [r7] /* But if yes, fetch current value */ ldrne r0, [r9, #(PCB_L1VEC)] /* Fetch new vector_page value */ mcr p15, 0, r5, c3, c0, 0 /* Update DACR for new context */ cmpne r2, r0 /* Stuffing the same value? */ #ifndef PMAP_INCLUDE_PTE_SYNC strne r0, [r7] /* Nope, update it */ #else beq .Lcs_same_vector str r0, [r7] /* Otherwise, update it */ /* * Need to sync the cache to make sure that last store is * visible to the MMU. */ ldr r2, .Lcpufuncs mov r0, r7 mov r1, #4 mov lr, pc ldr pc, [r2, #CF_DCACHE_WB_RANGE] .Lcs_same_vector: #endif /* PMAP_INCLUDE_PTE_SYNC */ cmp r10, r11 /* Switching to the same L1? */ ldr r10, .Lcpufuncs beq .Lcs_same_l1 /* Yup. */ /* * Do a full context switch, including full TLB flush. */ mov r0, r11 mov lr, pc ldr pc, [r10, #CF_CONTEXT_SWITCH] b .Lcs_context_switched /* * We're switching to a different process in the same L1. * In this situation, we only need to flush the TLB for the * vector_page mapping, and even then only if r7 is non-NULL. */ .Lcs_same_l1: cmp r7, #0 movne r0, #0 /* We *know* vector_page's VA is 0x0 */ movne lr, pc ldrne pc, [r10, #CF_TLB_FLUSHID_SE] .Lcs_context_switched: /* Release the old thread */ str r6, [r4, #TD_LOCK] #if defined(SCHED_ULE) && defined(SMP) ldr r6, .Lblocked_lock GET_CURTHREAD_PTR(r3) 1: ldr r4, [r3, #TD_LOCK] cmp r4, r6 beq 1b #endif /* XXXSCW: Safe to re-enable FIQs here */ /* rem: r9 = new PCB */ /* Restore all the saved registers and exit */ add r3, r9, #PCB_R4 ldmia r3, {r4-r12, sp, pc} END(cpu_switch) + + +#else /* !ARM_NEW_PMAP */ +#include + +ENTRY(cpu_context_switch) /* QQQ: What about macro instead of function? */ + DSB + mcr CP15_TTBR0(r0) /* set the new TTB */ + ISB + mov r0, #(CPU_ASID_KERNEL) + mcr CP15_TLBIASID(r0) /* flush not global TLBs */ + /* + * Flush entire Branch Target Cache because of the branch predictor + * is not architecturally invisible. See ARM Architecture Reference + * Manual ARMv7-A and ARMv7-R edition, page B2-1264(65), Branch + * predictors and Requirements for branch predictor maintenance + * operations sections. + * + * QQQ: The predictor is virtually addressed and holds virtual target + * addresses. Therefore, if mapping is changed, the predictor cache + * must be flushed.The flush is part of entire i-cache invalidation + * what is always called when code mapping is changed. So herein, + * it's the only place where standalone predictor flush must be + * executed in kernel (except self modifying code case). + */ + mcr CP15_BPIALL /* and flush entire Branch Target Cache */ + DSB + mov pc, lr +END(cpu_context_switch) + +/* + * cpu_throw(oldtd, newtd) + * + * Remove current thread state, then select the next thread to run + * and load its state. + * r0 = oldtd + * r1 = newtd + */ +ENTRY(cpu_throw) + mov r10, r0 /* r10 = oldtd */ + mov r11, r1 /* r11 = newtd */ + +#ifdef VFP /* This thread is dying, disable */ + bl _C_LABEL(vfp_discard) /* VFP without preserving state. */ +#endif + GET_PCPU(r8, r9) /* r8 = current pcpu */ + ldr r4, [r8, #PC_CPUID] /* r4 = current cpu id */ + + cmp r10, #0 /* old thread? */ + beq 2f /* no, skip */ + + /* Remove this CPU from the active list. */ + ldr r5, [r8, #PC_CURPMAP] + mov r0, #(PM_ACTIVE) + add r5, r0 /* r5 = old pm_active */ + + /* Compute position and mask. */ +#if _NCPUWORDS > 1 + lsr r0, r4, #3 + bic r0, #3 + add r5, r0 /* r5 = position in old pm_active */ + mov r2, #1 + and r0, r4, #31 + lsl r2, r0 /* r2 = mask */ +#else + mov r2, #1 + lsl r2, r4 /* r2 = mask */ +#endif + /* Clear cpu from old active list. */ +#ifdef SMP +1: ldrex r0, [r5] + bic r0, r2 + strex r1, r0, [r5] + teq r1, #0 + bne 1b +#else + ldr r0, [r5] + bic r0, r2 + str r0, [r5] +#endif + +2: +#ifdef INVARIANTS + cmp r11, #0 /* new thread? */ + beq badsw1 /* no, panic */ +#endif + ldr r7, [r11, #(TD_PCB)] /* r7 = new PCB */ + + /* + * Registers at this point + * r4 = current cpu id + * r7 = new PCB + * r8 = current pcpu + * r11 = newtd + */ + + /* MMU switch to new thread. */ + ldr r0, [r7, #(PCB_PAGEDIR)] +#ifdef INVARIANTS + cmp r0, #0 /* new thread? */ + beq badsw4 /* no, panic */ +#endif + bl _C_LABEL(cpu_context_switch) + + /* + * Set new PMAP as current one. + * Insert cpu to new active list. + */ + + ldr r6, [r11, #(TD_PROC)] /* newtd->proc */ + ldr r6, [r6, #(P_VMSPACE)] /* newtd->proc->vmspace */ + add r6, #VM_PMAP /* newtd->proc->vmspace->pmap */ + str r6, [r8, #PC_CURPMAP] /* store to curpmap */ + + mov r0, #PM_ACTIVE + add r6, r0 /* r6 = new pm_active */ + + /* compute position and mask */ +#if _NCPUWORDS > 1 + lsr r0, r4, #3 + bic r0, #3 + add r6, r0 /* r6 = position in new pm_active */ + mov r2, #1 + and r0, r4, #31 + lsl r2, r0 /* r2 = mask */ +#else + mov r2, #1 + lsl r2, r4 /* r2 = mask */ +#endif + /* Set cpu to new active list. */ +#ifdef SMP +1: ldrex r0, [r6] + orr r0, r2 + strex r1, r0, [r6] + teq r1, #0 + bne 1b +#else + ldr r0, [r6] + orr r0, r2 + str r0, [r6] +#endif + /* + * Registers at this point. + * r7 = new PCB + * r8 = current pcpu + * r11 = newtd + * They must match the ones in sw1 position !!! + */ + DMB + b sw1 /* share new thread init with cpu_switch() */ +END(cpu_throw) + +/* + * cpu_switch(oldtd, newtd, lock) + * + * Save the current thread state, then select the next thread to run + * and load its state. + * r0 = oldtd + * r1 = newtd + * r2 = lock (new lock for old thread) + */ +ENTRY(cpu_switch) + /* Interrupts are disabled. */ +#ifdef INVARIANTS + cmp r0, #0 /* old thread? */ + beq badsw2 /* no, panic */ +#endif + /* Save all the registers in the old thread's pcb. */ + ldr r3, [r0, #(TD_PCB)] + add r3, #(PCB_R4) + stmia r3, {r4-r12, sp, lr, pc} + +#ifdef INVARIANTS + cmp r1, #0 /* new thread? */ + beq badsw3 /* no, panic */ +#endif + /* + * Save arguments. Note that we can now use r0-r14 until + * it is time to restore them for the new thread. However, + * some registers are not safe over function call. + */ + mov r9, r2 /* r9 = lock */ + mov r10, r0 /* r10 = oldtd */ + mov r11, r1 /* r11 = newtd */ + + GET_PCPU(r8, r3) /* r8 = current PCPU */ + ldr r7, [r11, #(TD_PCB)] /* r7 = newtd->td_pcb */ + + + +#ifdef VFP + ldr r3, [r10, #(TD_PCB)] + fmrx r0, fpexc /* If the VFP is enabled */ + tst r0, #(VFPEXC_EN) /* the current thread has */ + movne r1, #1 /* used it, so go save */ + addne r0, r3, #(PCB_VFPSTATE) /* the state into the PCB */ + blne _C_LABEL(vfp_store) /* and disable the VFP. */ +#endif + + /* + * MMU switch. If we're switching to a thread with the same + * address space as the outgoing one, we can skip the MMU switch. + */ + mrc CP15_TTBR0(r1) /* r1 = old TTB */ + ldr r0, [r7, #(PCB_PAGEDIR)] /* r0 = new TTB */ + cmp r0, r1 /* Switching to the TTB? */ + beq sw0 /* same TTB, skip */ + +#if 1 /* Lazy context switch */ + /* Don't switch mapping for kernel threads */ + ldr r1, =pmap_kern_ttb + ldr r1, [r1] /* r1 = kernel TTB */ + cmp r0, r1 /* Switching to kernel TTB? */ + beq sw0 /* kernel TTB, skip */ +#endif + +#ifdef INVARIANTS + cmp r0, #0 /* new thread? */ + beq badsw4 /* no, panic */ +#endif + + bl cpu_context_switch /* new TTB as argument */ + + /* + * Registers at this point + * r7 = new PCB + * r8 = current pcpu + * r9 = lock + * r10 = oldtd + * r11 = newtd + */ + + /* + * Set new PMAP as current one. + * Update active list on PMAPs. + */ + ldr r6, [r11, #TD_PROC] /* newtd->proc */ + ldr r6, [r6, #P_VMSPACE] /* newtd->proc->vmspace */ + add r6, #VM_PMAP /* newtd->proc->vmspace->pmap */ + + ldr r5, [r8, #PC_CURPMAP] /* get old curpmap */ + str r6, [r8, #PC_CURPMAP] /* and save new one */ + + mov r0, #PM_ACTIVE + add r5, r0 /* r5 = old pm_active */ + add r6, r0 /* r6 = new pm_active */ + + /* Compute position and mask. */ + ldr r4, [r8, #PC_CPUID] +#if _NCPUWORDS > 1 + lsr r0, r4, #3 + bic r0, #3 + add r5, r0 /* r5 = position in old pm_active */ + add r6, r0 /* r6 = position in new pm_active */ + mov r2, #1 + and r0, r4, #31 + lsl r2, r0 /* r2 = mask */ +#else + mov r2, #1 + lsl r2, r4 /* r2 = mask */ +#endif + /* Clear cpu from old active list. */ +#ifdef SMP +1: ldrex r0, [r5] + bic r0, r2 + strex r1, r0, [r5] + teq r1, #0 + bne 1b +#else + ldr r0, [r5] + bic r0, r2 + str r0, [r5] +#endif + /* Set cpu to new active list. */ +#ifdef SMP +1: ldrex r0, [r6] + orr r0, r2 + strex r1, r0, [r6] + teq r1, #0 + bne 1b +#else + ldr r0, [r6] + orr r0, r2 + str r0, [r6] +#endif + +sw0: + /* + * Registers at this point + * r7 = new PCB + * r8 = current pcpu + * r9 = lock + * r10 = oldtd + * r11 = newtd + */ + + /* Change the old thread lock. */ + add r5, r10, #TD_LOCK + DMB +1: ldrex r0, [r5] + strex r1, r9, [r5] + teq r1, #0 + bne 1b + DMB + +sw1: + clrex + /* + * Registers at this point + * r7 = new PCB + * r8 = current pcpu + * r11 = newtd + */ + +#if defined(SMP) && defined(SCHED_ULE) + /* + * 386 and amd64 do the blocked lock test only for SMP and SCHED_ULE + * QQQ: What does it mean in reality and why is it done? + */ + ldr r6, =blocked_lock +1: + ldr r3, [r11, #TD_LOCK] /* atomic write regular read */ + cmp r3, r6 + beq 1b +#endif + /* Set the new tls */ + ldr r0, [r11, #(TD_MD + MD_TP)] + mcr CP15_TPIDRURO(r0) /* write tls thread reg 2 */ + + /* We have a new curthread now so make a note it */ + str r11, [r8, #PC_CURTHREAD] + mcr CP15_TPIDRPRW(r11) + + /* store pcb in per cpu structure */ + str r7, [r8, #PC_CURPCB] + + /* + * Restore all saved registers and return. Note that some saved + * registers can be changed when either cpu_fork(), cpu_set_upcall(), + * cpu_set_fork_handler(), or makectx() was called. + */ + add r3, r7, #PCB_R4 + ldmia r3, {r4-r12, sp, pc} + +#ifdef INVARIANTS +badsw1: + ldr r0, =sw1_panic_str + bl _C_LABEL(panic) +1: nop + b 1b + +badsw2: + ldr r0, =sw2_panic_str + bl _C_LABEL(panic) +1: nop + b 1b + +badsw3: + ldr r0, =sw3_panic_str + bl _C_LABEL(panic) +1: nop + b 1b + +badsw4: + ldr r0, =sw4_panic_str + bl _C_LABEL(panic) +1: nop + b 1b + +sw1_panic_str: + .asciz "cpu_throw: no newthread supplied.\n" +sw2_panic_str: + .asciz "cpu_switch: no curthread supplied.\n" +sw3_panic_str: + .asciz "cpu_switch: no newthread supplied.\n" +sw4_panic_str: + .asciz "cpu_switch: new pagedir is NULL.\n" +#endif +END(cpu_switch) + + +#endif /* !ARM_NEW_PMAP */ ENTRY(savectx) stmfd sp!, {lr} sub sp, sp, #4 /* Store all the registers in the thread's pcb */ add r3, r0, #(PCB_R4) stmia r3, {r4-r12, sp, lr, pc} #ifdef VFP fmrx r2, fpexc /* If the VFP is enabled */ tst r2, #(VFPEXC_EN) /* the current thread has */ movne r1, #1 /* used it, so go save */ addne r0, r0, #(PCB_VFPSTATE) /* the state into the PCB */ blne _C_LABEL(vfp_store) /* and disable the VFP. */ #endif add sp, sp, #4; ldmfd sp!, {pc} END(savectx) ENTRY(fork_trampoline) STOP_UNWINDING /* EABI: Don't unwind beyond the thread enty point. */ mov fp, #0 /* OABI: Stack traceback via fp stops here. */ mov r2, sp mov r1, r5 mov r0, r4 ldr lr, =swi_exit /* Go finish forking, then return */ b _C_LABEL(fork_exit) /* to userland via swi_exit code. */ END(fork_trampoline) Index: head/sys/arm/include/machdep.h =================================================================== --- head/sys/arm/include/machdep.h (revision 280711) +++ head/sys/arm/include/machdep.h (revision 280712) @@ -1,40 +1,46 @@ /* $NetBSD: machdep.h,v 1.7 2002/02/21 02:52:21 thorpej Exp $ */ /* $FreeBSD$ */ #ifndef _MACHDEP_BOOT_MACHDEP_H_ #define _MACHDEP_BOOT_MACHDEP_H_ /* Structs that need to be initialised by initarm */ +#ifdef ARM_NEW_PMAP +extern vm_offset_t irqstack; +extern vm_offset_t undstack; +extern vm_offset_t abtstack; +#else struct pv_addr; extern struct pv_addr irqstack; extern struct pv_addr undstack; extern struct pv_addr abtstack; +#endif /* Define various stack sizes in pages */ #define IRQ_STACK_SIZE 1 #define ABT_STACK_SIZE 1 #define UND_STACK_SIZE 1 /* misc prototypes used by the many arm machdeps */ struct trapframe; void arm_lock_cache_line(vm_offset_t); void init_proc0(vm_offset_t kstack); void halt(void); void abort_handler(struct trapframe *, int ); void set_stackptrs(int cpu); void undefinedinstruction_bounce(struct trapframe *); /* Early boot related helper functions */ struct arm_boot_params; vm_offset_t default_parse_boot_param(struct arm_boot_params *abp); vm_offset_t freebsd_parse_boot_param(struct arm_boot_params *abp); vm_offset_t linux_parse_boot_param(struct arm_boot_params *abp); vm_offset_t fake_preload_metadata(struct arm_boot_params *abp); vm_offset_t parse_boot_param(struct arm_boot_params *abp); void arm_generic_initclocks(void); /* Board-specific attributes */ void board_set_serial(uint64_t); void board_set_revision(uint32_t); #endif /* !_MACHINE_MACHDEP_H_ */ Index: head/sys/arm/include/pcb.h =================================================================== --- head/sys/arm/include/pcb.h (revision 280711) +++ head/sys/arm/include/pcb.h (revision 280712) @@ -1,82 +1,85 @@ /* $NetBSD: pcb.h,v 1.10 2003/10/13 21:46:39 scw Exp $ */ /*- * Copyright (c) 2001 Matt Thomas . * Copyright (c) 1994 Mark Brinicombe. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the RiscBSD team. * 4. The name "RiscBSD" nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY RISCBSD ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL RISCBSD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_PCB_H_ #define _MACHINE_PCB_H_ #include #include /* * WARNING! * Keep pcb_regs first for faster access in switch.S */ struct pcb { struct switchframe pcb_regs; /* CPU state */ u_int pcb_flags; #define PCB_OWNFPU 0x00000001 #define PCB_NOALIGNFLT 0x00000002 caddr_t pcb_onfault; /* On fault handler */ +#ifdef ARM_NEW_PMAP + uint32_t pcb_pagedir; /* TTB0 value */ +#else vm_offset_t pcb_pagedir; /* PT hooks */ uint32_t *pcb_pl1vec; /* PTR to vector_base L1 entry*/ uint32_t pcb_l1vec; /* Value to stuff on ctx sw */ u_int pcb_dacr; /* Domain Access Control Reg */ - +#endif struct vfp_state pcb_vfpstate; /* VP/NEON state */ u_int pcb_vfpcpu; /* VP/NEON last cpu */ } __aligned(8); /* * We need the PCB to be aligned on 8 bytes, as we may * access it using ldrd/strd, and ARM ABI require it * to by aligned on 8 bytes. */ /* * No additional data for core dumps. */ struct md_coredump { int md_empty; }; void makectx(struct trapframe *tf, struct pcb *pcb); #ifdef _KERNEL void savectx(struct pcb *) __returns_twice; #endif /* _KERNEL */ #endif /* !_MACHINE_PCB_H_ */ Index: head/sys/arm/include/pmap-v6.h =================================================================== --- head/sys/arm/include/pmap-v6.h (nonexistent) +++ head/sys/arm/include/pmap-v6.h (revision 280712) @@ -0,0 +1,313 @@ +/*- + * Copyright 2014 Svatopluk Kraus + * Copyright 2014 Michal Meloun + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The ARM version of this file was more or less based on the i386 version, + * which has the following provenance... + * + * Derived from hp300 version by Mike Hibler, this version by William + * Jolitz uses a recursive map [a pde points to the page directory] to + * map the page tables using the pagetables themselves. This is done to + * reduce the impact on kernel virtual memory for lots of sparse address + * space, and to reduce the cost of memory to each process. + * + * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 + * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 + * from: FreeBSD: src/sys/i386/include/pmap.h,v 1.70 2000/11/30 + * + * $FreeBSD$ + */ + +#ifndef _MACHINE_PMAP_H_ +#define _MACHINE_PMAP_H_ + +#include +#include +#include +#include + +typedef uint32_t pt1_entry_t; /* L1 table entry */ +typedef uint32_t pt2_entry_t; /* L2 table entry */ +typedef uint32_t ttb_entry_t; /* TTB entry */ + +#ifdef _KERNEL + +#if 0 +#define PMAP_PTE_NOCACHE // Use uncached page tables +#endif + +/* + * (1) During pmap bootstrap, physical pages for L2 page tables are + * allocated in advance which are used for KVA continuous mapping + * starting from KERNBASE. This makes things more simple. + * (2) During vm subsystem initialization, only vm subsystem itself can + * allocate physical memory safely. As pmap_map() is called during + * this initialization, we must be prepared for that and have some + * preallocated physical pages for L2 page tables. + * + * Note that some more pages for L2 page tables are preallocated too + * for mappings laying above VM_MAX_KERNEL_ADDRESS. + */ +#ifndef NKPT2PG +/* + * The optimal way is to define this in board configuration as + * definition here must be safe enough. It means really big. + * + * 1 GB KVA <=> 256 kernel L2 page table pages + * + * From real platforms: + * 1 GB physical memory <=> 10 pages is enough + * 2 GB physical memory <=> 21 pages is enough + */ +#define NKPT2PG 32 +#endif + +extern vm_paddr_t phys_avail[]; +extern vm_paddr_t dump_avail[]; +extern char *_tmppt; /* poor name! */ +extern vm_offset_t virtual_avail; +extern vm_offset_t virtual_end; + +/* + * Pmap stuff + */ + +/* + * This structure is used to hold a virtual<->physical address + * association and is used mostly by bootstrap code + */ +struct pv_addr { + SLIST_ENTRY(pv_addr) pv_list; + vm_offset_t pv_va; + vm_paddr_t pv_pa; +}; +#endif +struct pv_entry; +struct pv_chunk; + +struct md_page { + TAILQ_HEAD(,pv_entry) pv_list; + uint16_t pt2_wirecount[4]; + int pat_mode; +}; + +struct pmap { + struct mtx pm_mtx; + pt1_entry_t *pm_pt1; /* KVA of pt1 */ + pt2_entry_t *pm_pt2tab; /* KVA of pt2 pages table */ + TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ + cpuset_t pm_active; /* active on cpus */ + struct pmap_statistics pm_stats; /* pmap statictics */ + LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ +}; + +typedef struct pmap *pmap_t; + +#ifdef _KERNEL +extern struct pmap kernel_pmap_store; +#define kernel_pmap (&kernel_pmap_store) + +#define PMAP_LOCK(pmap) mtx_lock(&(pmap)->pm_mtx) +#define PMAP_LOCK_ASSERT(pmap, type) \ + mtx_assert(&(pmap)->pm_mtx, (type)) +#define PMAP_LOCK_DESTROY(pmap) mtx_destroy(&(pmap)->pm_mtx) +#define PMAP_LOCK_INIT(pmap) mtx_init(&(pmap)->pm_mtx, "pmap", \ + NULL, MTX_DEF | MTX_DUPOK) +#define PMAP_LOCKED(pmap) mtx_owned(&(pmap)->pm_mtx) +#define PMAP_MTX(pmap) (&(pmap)->pm_mtx) +#define PMAP_TRYLOCK(pmap) mtx_trylock(&(pmap)->pm_mtx) +#define PMAP_UNLOCK(pmap) mtx_unlock(&(pmap)->pm_mtx) +#endif + +/* + * For each vm_page_t, there is a list of all currently valid virtual + * mappings of that page. An entry is a pv_entry_t, the list is pv_list. + */ +typedef struct pv_entry { + vm_offset_t pv_va; /* virtual address for mapping */ + TAILQ_ENTRY(pv_entry) pv_next; +} *pv_entry_t; + +/* + * pv_entries are allocated in chunks per-process. This avoids the + * need to track per-pmap assignments. + */ +#define _NPCM 11 +#define _NPCPV 336 +struct pv_chunk { + pmap_t pc_pmap; + TAILQ_ENTRY(pv_chunk) pc_list; + uint32_t pc_map[_NPCM]; /* bitmap; 1 = free */ + TAILQ_ENTRY(pv_chunk) pc_lru; + struct pv_entry pc_pventry[_NPCPV]; +}; + +#ifdef _KERNEL +struct pcb; +extern ttb_entry_t pmap_kern_ttb; /* TTB for kernel pmap */ + +#define pmap_page_get_memattr(m) ((vm_memattr_t)(m)->md.pat_mode) +#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0) + +/* + * Only the following functions or macros may be used before pmap_bootstrap() + * is called: pmap_kenter(), pmap_kextract(), pmap_kremove(), vtophys(), and + * vtopte2(). + */ +void pmap_bootstrap(vm_offset_t ); +void pmap_kenter(vm_offset_t , vm_paddr_t ); +void *pmap_kenter_temporary(vm_paddr_t , int ); +void pmap_kremove(vm_offset_t); +void *pmap_mapdev(vm_paddr_t, vm_size_t); +void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int); +boolean_t pmap_page_is_mapped(vm_page_t ); +void pmap_page_set_memattr(vm_page_t , vm_memattr_t ); +void pmap_unmapdev(vm_offset_t, vm_size_t); +void pmap_kenter_device(vm_offset_t , vm_paddr_t ); +void pmap_set_pcb_pagedir(pmap_t , struct pcb *); +void pmap_lazyfix_action(void); + +void pmap_tlb_flush(pmap_t , vm_offset_t ); +void pmap_tlb_flush_range(pmap_t , vm_offset_t , vm_size_t ); +void pmap_tlb_flush_ng(pmap_t ); + +void pmap_dcache_wb_range(vm_paddr_t , vm_size_t , vm_memattr_t ); + +vm_paddr_t pmap_kextract(vm_offset_t ); +int pmap_fault(pmap_t , vm_offset_t , uint32_t , int , int ); +#define vtophys(va) pmap_kextract((vm_offset_t)(va)) + +void pmap_set_tex(void); +void reinit_mmu(ttb_entry_t ttb, u_int aux_clr, u_int aux_set); + +/* + * Pre-bootstrap epoch functions set. + */ +void pmap_bootstrap_prepare(vm_paddr_t ); +vm_paddr_t pmap_preboot_get_pages(u_int ); +void pmap_preboot_map_pages(vm_paddr_t , vm_offset_t , u_int ); +vm_offset_t pmap_preboot_reserve_pages(u_int ); +vm_offset_t pmap_preboot_get_vpages(u_int ); +void pmap_preboot_map_attr(vm_paddr_t , vm_offset_t , vm_size_t , + int , int ); +static __inline void +pmap_map_chunk(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, + vm_size_t size, int prot, int cache) +{ + pmap_preboot_map_attr(pa, va, size, prot, cache); +} + +/* + * This structure is used by machine-dependent code to describe + * static mappings of devices, created at bootstrap time. + */ +struct pmap_devmap { + vm_offset_t pd_va; /* virtual address */ + vm_paddr_t pd_pa; /* physical address */ + vm_size_t pd_size; /* size of region */ + vm_prot_t pd_prot; /* protection code */ + int pd_cache; /* cache attributes */ +}; + +void pmap_devmap_bootstrap(const struct pmap_devmap *); + +#endif /* _KERNEL */ + +// ----------------- TO BE DELETED --------------------------------------------- +#include + +#ifdef _KERNEL + +/* + * sys/arm/arm/elf_trampoline.c + * sys/arm/arm/genassym.c + * sys/arm/arm/machdep.c + * sys/arm/arm/mp_machdep.c + * sys/arm/arm/locore.S + * sys/arm/arm/pmap.c + * sys/arm/arm/swtch.S + * sys/arm/at91/at91_machdep.c + * sys/arm/cavium/cns11xx/econa_machdep.c + * sys/arm/s3c2xx0/s3c24x0_machdep.c + * sys/arm/xscale/ixp425/avila_machdep.c + * sys/arm/xscale/i8134x/crb_machdep.c + * sys/arm/xscale/i80321/ep80219_machdep.c + * sys/arm/xscale/i80321/iq31244_machdep.c + * sys/arm/xscale/pxa/pxa_machdep.c + */ +#define PMAP_DOMAIN_KERNEL 0 /* The kernel uses domain #0 */ + +/* + * sys/arm/arm/busdma_machdep-v6.c + */ +int pmap_dmap_iscurrent(pmap_t pmap); + +/* + * sys/arm/arm/cpufunc.c + */ +void pmap_pte_init_mmu_v6(void); +void vector_page_setprot(int); + + +/* + * sys/arm/arm/db_interface.c + * sys/arm/arm/machdep.c + * sys/arm/arm/minidump_machdep.c + * sys/arm/arm/pmap.c + */ +#define pmap_kernel() kernel_pmap + +/* + * sys/arm/arm/bus_space_generic.c (just comment) + * sys/arm/arm/devmap.c + * sys/arm/arm/pmap.c (just comment) + * sys/arm/at91/at91_machdep.c + * sys/arm/cavium/cns11xx/econa_machdep.c + * sys/arm/freescale/imx/imx6_machdep.c (just comment) + * sys/arm/mv/orion/db88f5xxx.c + * sys/arm/mv/mv_localbus.c + * sys/arm/mv/mv_machdep.c + * sys/arm/mv/mv_pci.c + * sys/arm/s3c2xx0/s3c24x0_machdep.c + * sys/arm/versatile/versatile_machdep.c + * sys/arm/xscale/ixp425/avila_machdep.c + * sys/arm/xscale/i8134x/crb_machdep.c + * sys/arm/xscale/i80321/ep80219_machdep.c + * sys/arm/xscale/i80321/iq31244_machdep.c + * sys/arm/xscale/pxa/pxa_machdep.c + */ +#define PTE_DEVICE PTE2_ATTR_DEVICE + + + +#endif /* _KERNEL */ +// ----------------------------------------------------------------------------- + +#endif /* !_MACHINE_PMAP_H_ */ Property changes on: head/sys/arm/include/pmap-v6.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/arm/include/pmap.h =================================================================== --- head/sys/arm/include/pmap.h (revision 280711) +++ head/sys/arm/include/pmap.h (revision 280712) @@ -1,708 +1,712 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Derived from hp300 version by Mike Hibler, this version by William * Jolitz uses a recursive map [a pde points to the page directory] to * map the page tables using the pagetables themselves. This is done to * reduce the impact on kernel virtual memory for lots of sparse address * space, and to reduce the cost of memory to each process. * * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 * from: FreeBSD: src/sys/i386/include/pmap.h,v 1.70 2000/11/30 * * $FreeBSD$ */ +#ifdef ARM_NEW_PMAP +#include +#else /* ARM_NEW_PMAP */ #ifndef _MACHINE_PMAP_H_ #define _MACHINE_PMAP_H_ #include #include /* * Pte related macros */ #if ARM_ARCH_6 || ARM_ARCH_7A #ifdef SMP #define PTE_NOCACHE 2 #else #define PTE_NOCACHE 1 #endif #define PTE_CACHE 6 #define PTE_DEVICE 2 #define PTE_PAGETABLE 6 #else #define PTE_NOCACHE 1 #define PTE_CACHE 2 #define PTE_DEVICE PTE_NOCACHE #define PTE_PAGETABLE 3 #endif enum mem_type { STRONG_ORD = 0, DEVICE_NOSHARE, DEVICE_SHARE, NRML_NOCACHE, NRML_IWT_OWT, NRML_IWB_OWB, NRML_IWBA_OWBA }; #ifndef LOCORE #include #include #include #include #define PDESIZE sizeof(pd_entry_t) /* for assembly files */ #define PTESIZE sizeof(pt_entry_t) /* for assembly files */ #ifdef _KERNEL #define vtophys(va) pmap_kextract((vm_offset_t)(va)) #endif #define pmap_page_get_memattr(m) ((m)->md.pv_memattr) #define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0) #if (ARM_MMU_V6 + ARM_MMU_V7) > 0 boolean_t pmap_page_is_mapped(vm_page_t); #else #define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list)) #endif void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma); /* * Pmap stuff */ /* * This structure is used to hold a virtual<->physical address * association and is used mostly by bootstrap code */ struct pv_addr { SLIST_ENTRY(pv_addr) pv_list; vm_offset_t pv_va; vm_paddr_t pv_pa; }; struct pv_entry; struct pv_chunk; struct md_page { int pvh_attrs; vm_memattr_t pv_memattr; #if (ARM_MMU_V6 + ARM_MMU_V7) == 0 vm_offset_t pv_kva; /* first kernel VA mapping */ #endif TAILQ_HEAD(,pv_entry) pv_list; }; struct l1_ttable; struct l2_dtable; /* * The number of L2 descriptor tables which can be tracked by an l2_dtable. * A bucket size of 16 provides for 16MB of contiguous virtual address * space per l2_dtable. Most processes will, therefore, require only two or * three of these to map their whole working set. */ #define L2_BUCKET_LOG2 4 #define L2_BUCKET_SIZE (1 << L2_BUCKET_LOG2) /* * Given the above "L2-descriptors-per-l2_dtable" constant, the number * of l2_dtable structures required to track all possible page descriptors * mappable by an L1 translation table is given by the following constants: */ #define L2_LOG2 ((32 - L1_S_SHIFT) - L2_BUCKET_LOG2) #define L2_SIZE (1 << L2_LOG2) struct pmap { struct mtx pm_mtx; u_int8_t pm_domain; struct l1_ttable *pm_l1; struct l2_dtable *pm_l2[L2_SIZE]; cpuset_t pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statictics */ #if (ARM_MMU_V6 + ARM_MMU_V7) != 0 TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ #else TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ #endif }; typedef struct pmap *pmap_t; #ifdef _KERNEL extern struct pmap kernel_pmap_store; #define kernel_pmap (&kernel_pmap_store) #define pmap_kernel() kernel_pmap #define PMAP_ASSERT_LOCKED(pmap) \ mtx_assert(&(pmap)->pm_mtx, MA_OWNED) #define PMAP_LOCK(pmap) mtx_lock(&(pmap)->pm_mtx) #define PMAP_LOCK_DESTROY(pmap) mtx_destroy(&(pmap)->pm_mtx) #define PMAP_LOCK_INIT(pmap) mtx_init(&(pmap)->pm_mtx, "pmap", \ NULL, MTX_DEF | MTX_DUPOK) #define PMAP_OWNED(pmap) mtx_owned(&(pmap)->pm_mtx) #define PMAP_MTX(pmap) (&(pmap)->pm_mtx) #define PMAP_TRYLOCK(pmap) mtx_trylock(&(pmap)->pm_mtx) #define PMAP_UNLOCK(pmap) mtx_unlock(&(pmap)->pm_mtx) #endif /* * For each vm_page_t, there is a list of all currently valid virtual * mappings of that page. An entry is a pv_entry_t, the list is pv_list. */ typedef struct pv_entry { vm_offset_t pv_va; /* virtual address for mapping */ TAILQ_ENTRY(pv_entry) pv_list; int pv_flags; /* flags (wired, etc...) */ #if (ARM_MMU_V6 + ARM_MMU_V7) == 0 pmap_t pv_pmap; /* pmap where mapping lies */ TAILQ_ENTRY(pv_entry) pv_plist; #endif } *pv_entry_t; /* * pv_entries are allocated in chunks per-process. This avoids the * need to track per-pmap assignments. */ #define _NPCM 8 #define _NPCPV 252 struct pv_chunk { pmap_t pc_pmap; TAILQ_ENTRY(pv_chunk) pc_list; uint32_t pc_map[_NPCM]; /* bitmap; 1 = free */ uint32_t pc_dummy[3]; /* aligns pv_chunk to 4KB */ TAILQ_ENTRY(pv_chunk) pc_lru; struct pv_entry pc_pventry[_NPCPV]; }; #ifdef _KERNEL boolean_t pmap_get_pde_pte(pmap_t, vm_offset_t, pd_entry_t **, pt_entry_t **); /* * virtual address to page table entry and * to physical address. Likewise for alternate address space. * Note: these work recursively, thus vtopte of a pte will give * the corresponding pde that in turn maps it. */ /* * The current top of kernel VM. */ extern vm_offset_t pmap_curmaxkvaddr; struct pcb; void pmap_set_pcb_pagedir(pmap_t, struct pcb *); /* Virtual address to page table entry */ static __inline pt_entry_t * vtopte(vm_offset_t va) { pd_entry_t *pdep; pt_entry_t *ptep; if (pmap_get_pde_pte(pmap_kernel(), va, &pdep, &ptep) == FALSE) return (NULL); return (ptep); } extern vm_paddr_t phys_avail[]; extern vm_offset_t virtual_avail; extern vm_offset_t virtual_end; void pmap_bootstrap(vm_offset_t firstaddr, struct pv_addr *l1pt); int pmap_change_attr(vm_offset_t, vm_size_t, int); void pmap_kenter(vm_offset_t va, vm_paddr_t pa); void pmap_kenter_nocache(vm_offset_t va, vm_paddr_t pa); void pmap_kenter_device(vm_offset_t va, vm_paddr_t pa); void *pmap_kenter_temporary(vm_paddr_t pa, int i); void pmap_kenter_user(vm_offset_t va, vm_paddr_t pa); vm_paddr_t pmap_kextract(vm_offset_t va); void pmap_kremove(vm_offset_t); void *pmap_mapdev(vm_offset_t, vm_size_t); void pmap_unmapdev(vm_offset_t, vm_size_t); vm_page_t pmap_use_pt(pmap_t, vm_offset_t); void pmap_debug(int); #if (ARM_MMU_V6 + ARM_MMU_V7) == 0 void pmap_map_section(vm_offset_t, vm_offset_t, vm_offset_t, int, int); #endif void pmap_link_l2pt(vm_offset_t, vm_offset_t, struct pv_addr *); vm_size_t pmap_map_chunk(vm_offset_t, vm_offset_t, vm_offset_t, vm_size_t, int, int); void pmap_map_entry(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, int prot, int cache); int pmap_fault_fixup(pmap_t, vm_offset_t, vm_prot_t, int); int pmap_dmap_iscurrent(pmap_t pmap); /* * Definitions for MMU domains */ #define PMAP_DOMAINS 15 /* 15 'user' domains (1-15) */ #define PMAP_DOMAIN_KERNEL 0 /* The kernel uses domain #0 */ /* * The new pmap ensures that page-tables are always mapping Write-Thru. * Thus, on some platforms we can run fast and loose and avoid syncing PTEs * on every change. * * Unfortunately, not all CPUs have a write-through cache mode. So we * define PMAP_NEEDS_PTE_SYNC for C code to conditionally do PTE syncs, * and if there is the chance for PTE syncs to be needed, we define * PMAP_INCLUDE_PTE_SYNC so e.g. assembly code can include (and run) * the code. */ extern int pmap_needs_pte_sync; /* * These macros define the various bit masks in the PTE. * * We use these macros since we use different bits on different processor * models. */ #define L1_S_CACHE_MASK_generic (L1_S_B|L1_S_C) #define L1_S_CACHE_MASK_xscale (L1_S_B|L1_S_C|L1_S_XSCALE_TEX(TEX_XSCALE_X)|\ L1_S_XSCALE_TEX(TEX_XSCALE_T)) #define L2_L_CACHE_MASK_generic (L2_B|L2_C) #define L2_L_CACHE_MASK_xscale (L2_B|L2_C|L2_XSCALE_L_TEX(TEX_XSCALE_X) | \ L2_XSCALE_L_TEX(TEX_XSCALE_T)) #define L2_S_PROT_U_generic (L2_AP(AP_U)) #define L2_S_PROT_W_generic (L2_AP(AP_W)) #define L2_S_PROT_MASK_generic (L2_S_PROT_U|L2_S_PROT_W) #define L2_S_PROT_U_xscale (L2_AP0(AP_U)) #define L2_S_PROT_W_xscale (L2_AP0(AP_W)) #define L2_S_PROT_MASK_xscale (L2_S_PROT_U|L2_S_PROT_W) #define L2_S_CACHE_MASK_generic (L2_B|L2_C) #define L2_S_CACHE_MASK_xscale (L2_B|L2_C|L2_XSCALE_T_TEX(TEX_XSCALE_X)| \ L2_XSCALE_T_TEX(TEX_XSCALE_X)) #define L1_S_PROTO_generic (L1_TYPE_S | L1_S_IMP) #define L1_S_PROTO_xscale (L1_TYPE_S) #define L1_C_PROTO_generic (L1_TYPE_C | L1_C_IMP2) #define L1_C_PROTO_xscale (L1_TYPE_C) #define L2_L_PROTO (L2_TYPE_L) #define L2_S_PROTO_generic (L2_TYPE_S) #define L2_S_PROTO_xscale (L2_TYPE_XSCALE_XS) /* * User-visible names for the ones that vary with MMU class. */ #if (ARM_MMU_V6 + ARM_MMU_V7) != 0 #define L2_AP(x) (L2_AP0(x)) #else #define L2_AP(x) (L2_AP0(x) | L2_AP1(x) | L2_AP2(x) | L2_AP3(x)) #endif #if (ARM_MMU_V6 + ARM_MMU_V7) != 0 /* * AP[2:1] access permissions model: * * AP[2](APX) - Write Disable * AP[1] - User Enable * AP[0] - Reference Flag * * AP[2] AP[1] Kernel User * 0 0 R/W N * 0 1 R/W R/W * 1 0 R N * 1 1 R R * */ #define L2_S_PROT_R (0) /* kernel read */ #define L2_S_PROT_U (L2_AP0(2)) /* user read */ #define L2_S_REF (L2_AP0(1)) /* reference flag */ #define L2_S_PROT_MASK (L2_S_PROT_U|L2_S_PROT_R|L2_APX) #define L2_S_EXECUTABLE(pte) (!(pte & L2_XN)) #define L2_S_WRITABLE(pte) (!(pte & L2_APX)) #define L2_S_REFERENCED(pte) (!!(pte & L2_S_REF)) #ifndef SMP #define L1_S_CACHE_MASK (L1_S_TEX_MASK|L1_S_B|L1_S_C) #define L2_L_CACHE_MASK (L2_L_TEX_MASK|L2_B|L2_C) #define L2_S_CACHE_MASK (L2_S_TEX_MASK|L2_B|L2_C) #else #define L1_S_CACHE_MASK (L1_S_TEX_MASK|L1_S_B|L1_S_C|L1_SHARED) #define L2_L_CACHE_MASK (L2_L_TEX_MASK|L2_B|L2_C|L2_SHARED) #define L2_S_CACHE_MASK (L2_S_TEX_MASK|L2_B|L2_C|L2_SHARED) #endif /* SMP */ #define L1_S_PROTO (L1_TYPE_S) #define L1_C_PROTO (L1_TYPE_C) #define L2_S_PROTO (L2_TYPE_S) /* * Promotion to a 1MB (SECTION) mapping requires that the corresponding * 4KB (SMALL) page mappings have identical settings for the following fields: */ #define L2_S_PROMOTE (L2_S_REF | L2_SHARED | L2_S_PROT_MASK | \ L2_XN | L2_S_PROTO) /* * In order to compare 1MB (SECTION) entry settings with the 4KB (SMALL) * page mapping it is necessary to read and shift appropriate bits from * L1 entry to positions of the corresponding bits in the L2 entry. */ #define L1_S_DEMOTE(l1pd) ((((l1pd) & L1_S_PROTO) >> 0) | \ (((l1pd) & L1_SHARED) >> 6) | \ (((l1pd) & L1_S_REF) >> 6) | \ (((l1pd) & L1_S_PROT_MASK) >> 6) | \ (((l1pd) & L1_S_XN) >> 4)) #ifndef SMP #define ARM_L1S_STRONG_ORD (0) #define ARM_L1S_DEVICE_NOSHARE (L1_S_TEX(2)) #define ARM_L1S_DEVICE_SHARE (L1_S_B) #define ARM_L1S_NRML_NOCACHE (L1_S_TEX(1)) #define ARM_L1S_NRML_IWT_OWT (L1_S_C) #define ARM_L1S_NRML_IWB_OWB (L1_S_C|L1_S_B) #define ARM_L1S_NRML_IWBA_OWBA (L1_S_TEX(1)|L1_S_C|L1_S_B) #define ARM_L2L_STRONG_ORD (0) #define ARM_L2L_DEVICE_NOSHARE (L2_L_TEX(2)) #define ARM_L2L_DEVICE_SHARE (L2_B) #define ARM_L2L_NRML_NOCACHE (L2_L_TEX(1)) #define ARM_L2L_NRML_IWT_OWT (L2_C) #define ARM_L2L_NRML_IWB_OWB (L2_C|L2_B) #define ARM_L2L_NRML_IWBA_OWBA (L2_L_TEX(1)|L2_C|L2_B) #define ARM_L2S_STRONG_ORD (0) #define ARM_L2S_DEVICE_NOSHARE (L2_S_TEX(2)) #define ARM_L2S_DEVICE_SHARE (L2_B) #define ARM_L2S_NRML_NOCACHE (L2_S_TEX(1)) #define ARM_L2S_NRML_IWT_OWT (L2_C) #define ARM_L2S_NRML_IWB_OWB (L2_C|L2_B) #define ARM_L2S_NRML_IWBA_OWBA (L2_S_TEX(1)|L2_C|L2_B) #else #define ARM_L1S_STRONG_ORD (0) #define ARM_L1S_DEVICE_NOSHARE (L1_S_TEX(2)) #define ARM_L1S_DEVICE_SHARE (L1_S_B) #define ARM_L1S_NRML_NOCACHE (L1_S_TEX(1)|L1_SHARED) #define ARM_L1S_NRML_IWT_OWT (L1_S_C|L1_SHARED) #define ARM_L1S_NRML_IWB_OWB (L1_S_C|L1_S_B|L1_SHARED) #define ARM_L1S_NRML_IWBA_OWBA (L1_S_TEX(1)|L1_S_C|L1_S_B|L1_SHARED) #define ARM_L2L_STRONG_ORD (0) #define ARM_L2L_DEVICE_NOSHARE (L2_L_TEX(2)) #define ARM_L2L_DEVICE_SHARE (L2_B) #define ARM_L2L_NRML_NOCACHE (L2_L_TEX(1)|L2_SHARED) #define ARM_L2L_NRML_IWT_OWT (L2_C|L2_SHARED) #define ARM_L2L_NRML_IWB_OWB (L2_C|L2_B|L2_SHARED) #define ARM_L2L_NRML_IWBA_OWBA (L2_L_TEX(1)|L2_C|L2_B|L2_SHARED) #define ARM_L2S_STRONG_ORD (0) #define ARM_L2S_DEVICE_NOSHARE (L2_S_TEX(2)) #define ARM_L2S_DEVICE_SHARE (L2_B) #define ARM_L2S_NRML_NOCACHE (L2_S_TEX(1)|L2_SHARED) #define ARM_L2S_NRML_IWT_OWT (L2_C|L2_SHARED) #define ARM_L2S_NRML_IWB_OWB (L2_C|L2_B|L2_SHARED) #define ARM_L2S_NRML_IWBA_OWBA (L2_S_TEX(1)|L2_C|L2_B|L2_SHARED) #endif /* SMP */ #elif ARM_NMMUS > 1 /* More than one MMU class configured; use variables. */ #define L2_S_PROT_U pte_l2_s_prot_u #define L2_S_PROT_W pte_l2_s_prot_w #define L2_S_PROT_MASK pte_l2_s_prot_mask #define L1_S_CACHE_MASK pte_l1_s_cache_mask #define L2_L_CACHE_MASK pte_l2_l_cache_mask #define L2_S_CACHE_MASK pte_l2_s_cache_mask #define L1_S_PROTO pte_l1_s_proto #define L1_C_PROTO pte_l1_c_proto #define L2_S_PROTO pte_l2_s_proto #elif ARM_MMU_GENERIC != 0 #define L2_S_PROT_U L2_S_PROT_U_generic #define L2_S_PROT_W L2_S_PROT_W_generic #define L2_S_PROT_MASK L2_S_PROT_MASK_generic #define L1_S_CACHE_MASK L1_S_CACHE_MASK_generic #define L2_L_CACHE_MASK L2_L_CACHE_MASK_generic #define L2_S_CACHE_MASK L2_S_CACHE_MASK_generic #define L1_S_PROTO L1_S_PROTO_generic #define L1_C_PROTO L1_C_PROTO_generic #define L2_S_PROTO L2_S_PROTO_generic #elif ARM_MMU_XSCALE == 1 #define L2_S_PROT_U L2_S_PROT_U_xscale #define L2_S_PROT_W L2_S_PROT_W_xscale #define L2_S_PROT_MASK L2_S_PROT_MASK_xscale #define L1_S_CACHE_MASK L1_S_CACHE_MASK_xscale #define L2_L_CACHE_MASK L2_L_CACHE_MASK_xscale #define L2_S_CACHE_MASK L2_S_CACHE_MASK_xscale #define L1_S_PROTO L1_S_PROTO_xscale #define L1_C_PROTO L1_C_PROTO_xscale #define L2_S_PROTO L2_S_PROTO_xscale #endif /* ARM_NMMUS > 1 */ #if defined(CPU_XSCALE_81342) || ARM_ARCH_6 || ARM_ARCH_7A #define PMAP_NEEDS_PTE_SYNC 1 #define PMAP_INCLUDE_PTE_SYNC #else #define PMAP_NEEDS_PTE_SYNC 0 #endif /* * These macros return various bits based on kernel/user and protection. * Note that the compiler will usually fold these at compile time. */ #if (ARM_MMU_V6 + ARM_MMU_V7) == 0 #define L1_S_PROT_U (L1_S_AP(AP_U)) #define L1_S_PROT_W (L1_S_AP(AP_W)) #define L1_S_PROT_MASK (L1_S_PROT_U|L1_S_PROT_W) #define L1_S_WRITABLE(pd) ((pd) & L1_S_PROT_W) #define L1_S_PROT(ku, pr) ((((ku) == PTE_USER) ? L1_S_PROT_U : 0) | \ (((pr) & VM_PROT_WRITE) ? L1_S_PROT_W : 0)) #define L2_L_PROT_U (L2_AP(AP_U)) #define L2_L_PROT_W (L2_AP(AP_W)) #define L2_L_PROT_MASK (L2_L_PROT_U|L2_L_PROT_W) #define L2_L_PROT(ku, pr) ((((ku) == PTE_USER) ? L2_L_PROT_U : 0) | \ (((pr) & VM_PROT_WRITE) ? L2_L_PROT_W : 0)) #define L2_S_PROT(ku, pr) ((((ku) == PTE_USER) ? L2_S_PROT_U : 0) | \ (((pr) & VM_PROT_WRITE) ? L2_S_PROT_W : 0)) #else #define L1_S_PROT_U (L1_S_AP(AP_U)) #define L1_S_PROT_W (L1_S_APX) /* Write disable */ #define L1_S_PROT_MASK (L1_S_PROT_W|L1_S_PROT_U) #define L1_S_REF (L1_S_AP(AP_REF)) /* Reference flag */ #define L1_S_WRITABLE(pd) (!((pd) & L1_S_PROT_W)) #define L1_S_EXECUTABLE(pd) (!((pd) & L1_S_XN)) #define L1_S_REFERENCED(pd) ((pd) & L1_S_REF) #define L1_S_PROT(ku, pr) (((((ku) == PTE_KERNEL) ? 0 : L1_S_PROT_U) | \ (((pr) & VM_PROT_WRITE) ? 0 : L1_S_PROT_W) | \ (((pr) & VM_PROT_EXECUTE) ? 0 : L1_S_XN))) #define L2_L_PROT_MASK (L2_APX|L2_AP0(0x3)) #define L2_L_PROT(ku, pr) (L2_L_PROT_MASK & ~((((ku) == PTE_KERNEL) ? L2_S_PROT_U : 0) | \ (((pr) & VM_PROT_WRITE) ? L2_APX : 0))) #define L2_S_PROT(ku, pr) (L2_S_PROT_MASK & ~((((ku) == PTE_KERNEL) ? L2_S_PROT_U : 0) | \ (((pr) & VM_PROT_WRITE) ? L2_APX : 0))) #endif /* * Macros to test if a mapping is mappable with an L1 Section mapping * or an L2 Large Page mapping. */ #define L1_S_MAPPABLE_P(va, pa, size) \ ((((va) | (pa)) & L1_S_OFFSET) == 0 && (size) >= L1_S_SIZE) #define L2_L_MAPPABLE_P(va, pa, size) \ ((((va) | (pa)) & L2_L_OFFSET) == 0 && (size) >= L2_L_SIZE) /* * Provide a fallback in case we were not able to determine it at * compile-time. */ #ifndef PMAP_NEEDS_PTE_SYNC #define PMAP_NEEDS_PTE_SYNC pmap_needs_pte_sync #define PMAP_INCLUDE_PTE_SYNC #endif #ifdef ARM_L2_PIPT #define _sync_l2(pte, size) cpu_l2cache_wb_range(vtophys(pte), size) #else #define _sync_l2(pte, size) cpu_l2cache_wb_range(pte, size) #endif #define PTE_SYNC(pte) \ do { \ if (PMAP_NEEDS_PTE_SYNC) { \ cpu_dcache_wb_range((vm_offset_t)(pte), sizeof(pt_entry_t));\ cpu_drain_writebuf(); \ _sync_l2((vm_offset_t)(pte), sizeof(pt_entry_t));\ } else \ cpu_drain_writebuf(); \ } while (/*CONSTCOND*/0) #define PTE_SYNC_RANGE(pte, cnt) \ do { \ if (PMAP_NEEDS_PTE_SYNC) { \ cpu_dcache_wb_range((vm_offset_t)(pte), \ (cnt) << 2); /* * sizeof(pt_entry_t) */ \ cpu_drain_writebuf(); \ _sync_l2((vm_offset_t)(pte), \ (cnt) << 2); /* * sizeof(pt_entry_t) */ \ } else \ cpu_drain_writebuf(); \ } while (/*CONSTCOND*/0) extern pt_entry_t pte_l1_s_cache_mode; extern pt_entry_t pte_l1_s_cache_mask; extern pt_entry_t pte_l2_l_cache_mode; extern pt_entry_t pte_l2_l_cache_mask; extern pt_entry_t pte_l2_s_cache_mode; extern pt_entry_t pte_l2_s_cache_mask; extern pt_entry_t pte_l1_s_cache_mode_pt; extern pt_entry_t pte_l2_l_cache_mode_pt; extern pt_entry_t pte_l2_s_cache_mode_pt; extern pt_entry_t pte_l2_s_prot_u; extern pt_entry_t pte_l2_s_prot_w; extern pt_entry_t pte_l2_s_prot_mask; extern pt_entry_t pte_l1_s_proto; extern pt_entry_t pte_l1_c_proto; extern pt_entry_t pte_l2_s_proto; extern void (*pmap_copy_page_func)(vm_paddr_t, vm_paddr_t); extern void (*pmap_copy_page_offs_func)(vm_paddr_t a_phys, vm_offset_t a_offs, vm_paddr_t b_phys, vm_offset_t b_offs, int cnt); extern void (*pmap_zero_page_func)(vm_paddr_t, int, int); #if (ARM_MMU_GENERIC + ARM_MMU_V6 + ARM_MMU_V7) != 0 || defined(CPU_XSCALE_81342) void pmap_copy_page_generic(vm_paddr_t, vm_paddr_t); void pmap_zero_page_generic(vm_paddr_t, int, int); void pmap_pte_init_generic(void); #if defined(CPU_ARM9) void pmap_pte_init_arm9(void); #endif /* CPU_ARM9 */ #if defined(CPU_ARM10) void pmap_pte_init_arm10(void); #endif /* CPU_ARM10 */ #if (ARM_MMU_V6 + ARM_MMU_V7) != 0 void pmap_pte_init_mmu_v6(void); #endif /* (ARM_MMU_V6 + ARM_MMU_V7) != 0 */ #endif /* (ARM_MMU_GENERIC + ARM_MMU_V6 + ARM_MMU_V7) != 0 */ #if ARM_MMU_XSCALE == 1 void pmap_copy_page_xscale(vm_paddr_t, vm_paddr_t); void pmap_zero_page_xscale(vm_paddr_t, int, int); void pmap_pte_init_xscale(void); void xscale_setup_minidata(vm_offset_t, vm_offset_t, vm_offset_t); void pmap_use_minicache(vm_offset_t, vm_size_t); #endif /* ARM_MMU_XSCALE == 1 */ #if defined(CPU_XSCALE_81342) #define ARM_HAVE_SUPERSECTIONS #endif #define PTE_KERNEL 0 #define PTE_USER 1 #define l1pte_valid(pde) ((pde) != 0) #define l1pte_section_p(pde) (((pde) & L1_TYPE_MASK) == L1_TYPE_S) #define l1pte_page_p(pde) (((pde) & L1_TYPE_MASK) == L1_TYPE_C) #define l1pte_fpage_p(pde) (((pde) & L1_TYPE_MASK) == L1_TYPE_F) #define l2pte_index(v) (((v) & L2_ADDR_BITS) >> L2_S_SHIFT) #define l2pte_valid(pte) ((pte) != 0) #define l2pte_pa(pte) ((pte) & L2_S_FRAME) #define l2pte_minidata(pte) (((pte) & \ (L2_B | L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X)))\ == (L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X))) /* L1 and L2 page table macros */ #define pmap_pde_v(pde) l1pte_valid(*(pde)) #define pmap_pde_section(pde) l1pte_section_p(*(pde)) #define pmap_pde_page(pde) l1pte_page_p(*(pde)) #define pmap_pde_fpage(pde) l1pte_fpage_p(*(pde)) #define pmap_pte_v(pte) l2pte_valid(*(pte)) #define pmap_pte_pa(pte) l2pte_pa(*(pte)) /* * Flags that indicate attributes of pages or mappings of pages. * * The PVF_MOD and PVF_REF flags are stored in the mdpage for each * page. PVF_WIRED, PVF_WRITE, and PVF_NC are kept in individual * pv_entry's for each page. They live in the same "namespace" so * that we can clear multiple attributes at a time. * * Note the "non-cacheable" flag generally means the page has * multiple mappings in a given address space. */ #define PVF_MOD 0x01 /* page is modified */ #define PVF_REF 0x02 /* page is referenced */ #define PVF_WIRED 0x04 /* mapping is wired */ #define PVF_WRITE 0x08 /* mapping is writable */ #define PVF_EXEC 0x10 /* mapping is executable */ #define PVF_NC 0x20 /* mapping is non-cacheable */ #define PVF_MWC 0x40 /* mapping is used multiple times in userland */ #define PVF_UNMAN 0x80 /* mapping is unmanaged */ void vector_page_setprot(int); #define SECTION_CACHE 0x1 #define SECTION_PT 0x2 void pmap_kenter_section(vm_offset_t, vm_paddr_t, int flags); #ifdef ARM_HAVE_SUPERSECTIONS void pmap_kenter_supersection(vm_offset_t, uint64_t, int flags); #endif extern char *_tmppt; void pmap_postinit(void); extern vm_paddr_t dump_avail[]; #endif /* _KERNEL */ #endif /* !LOCORE */ #endif /* !_MACHINE_PMAP_H_ */ +#endif /* !ARM_NEW_PMAP */ \ No newline at end of file Index: head/sys/arm/include/pmap_var.h =================================================================== --- head/sys/arm/include/pmap_var.h (nonexistent) +++ head/sys/arm/include/pmap_var.h (revision 280712) @@ -0,0 +1,511 @@ +/*- + * Copyright 2014 Svatopluk Kraus + * Copyright 2014 Michal Meloun + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MACHINE_PMAP_VAR_H_ +#define _MACHINE_PMAP_VAR_H_ + +#include +/* + * Various PMAP defines, exports, and inline functions + * definitions also usable in other MD code. + */ + +/* A number of pages in L1 page table. */ +#define NPG_IN_PT1 (NB_IN_PT1 / PAGE_SIZE) + +/* A number of L2 page tables in a page. */ +#define NPT2_IN_PG (PAGE_SIZE / NB_IN_PT2) + +/* A number of L2 page table entries in a page. */ +#define NPTE2_IN_PG (NPT2_IN_PG * NPTE2_IN_PT2) + +#ifdef _KERNEL + +/* + * A L2 page tables page contains NPT2_IN_PG L2 page tables. Masking of + * pte1_idx by PT2PG_MASK gives us an index to associated L2 page table + * in a page. The PT2PG_SHIFT definition depends on NPT2_IN_PG strictly. + * I.e., (1 << PT2PG_SHIFT) == NPT2_IN_PG must be fulfilled. + */ +#define PT2PG_SHIFT 2 +#define PT2PG_MASK ((1 << PT2PG_SHIFT) - 1) + +/* + * A PT2TAB holds all allocated L2 page table pages in a pmap. + * Right shifting of virtual address by PT2TAB_SHIFT gives us an index + * to L2 page table page in PT2TAB which holds the address mapping. + */ +#define PT2TAB_ENTRIES (NPTE1_IN_PT1 / NPT2_IN_PG) +#define PT2TAB_SHIFT (PTE1_SHIFT + PT2PG_SHIFT) + +/* + * All allocated L2 page table pages in a pmap are mapped into PT2MAP space. + * An virtual address right shifting by PT2MAP_SHIFT gives us an index to PTE2 + * which maps the address. + */ +#define PT2MAP_SIZE (NPTE1_IN_PT1 * NB_IN_PT2) +#define PT2MAP_SHIFT PTE2_SHIFT + +extern pt1_entry_t *kern_pt1; +extern pt2_entry_t *kern_pt2tab; +extern pt2_entry_t *PT2MAP; + +/* + * Virtual interface for L1 page table management. + */ + +static __inline u_int +pte1_index(vm_offset_t va) +{ + + return (va >> PTE1_SHIFT); +} + +static __inline pt1_entry_t * +pte1_ptr(pt1_entry_t *pt1, vm_offset_t va) +{ + + return (pt1 + pte1_index(va)); +} + +static __inline vm_offset_t +pte1_trunc(vm_offset_t va) +{ + + return (va & PTE1_FRAME); +} + +static __inline vm_offset_t +pte1_roundup(vm_offset_t va) +{ + + return ((va + PTE1_OFFSET) & PTE1_FRAME); +} + +/* + * Virtual interface for L1 page table entries management. + * + * XXX: Some of the following functions now with a synchronization barrier + * are called in a loop, so it could be useful to have two versions of them. + * One with the barrier and one without the barrier. In this case, pure + * barrier pte1_sync() should be implemented as well. + */ +static __inline void +pte1_sync(pt1_entry_t *pte1p) +{ + + dsb(); +#ifndef PMAP_PTE_NOCACHE + if (!cpuinfo.coherent_walk) + dcache_wb_pou((vm_offset_t)pte1p, sizeof(*pte1p)); +#endif +} + +static __inline void +pte1_sync_range(pt1_entry_t *pte1p, vm_size_t size) +{ + + dsb(); +#ifndef PMAP_PTE_NOCACHE + if (!cpuinfo.coherent_walk) + dcache_wb_pou((vm_offset_t)pte1p, size); +#endif +} + +static __inline void +pte1_store(pt1_entry_t *pte1p, pt1_entry_t pte1) +{ + + atomic_store_rel_int(pte1p, pte1); + pte1_sync(pte1p); +} + +static __inline void +pte1_clear(pt1_entry_t *pte1p) +{ + + pte1_store(pte1p, 0); +} + +static __inline void +pte1_clear_bit(pt1_entry_t *pte1p, uint32_t bit) +{ + + atomic_clear_int(pte1p, bit); + pte1_sync(pte1p); +} + +static __inline boolean_t +pte1_cmpset(pt1_entry_t *pte1p, pt1_entry_t opte1, pt1_entry_t npte1) +{ + boolean_t ret; + + ret = atomic_cmpset_int(pte1p, opte1, npte1); + if (ret) pte1_sync(pte1p); + + return (ret); +} + +static __inline boolean_t +pte1_is_link(pt1_entry_t pte1) +{ + + return ((pte1 & L1_TYPE_MASK) == L1_TYPE_C); +} + +static __inline int +pte1_is_section(pt1_entry_t pte1) +{ + + return ((pte1 & L1_TYPE_MASK) == L1_TYPE_S); +} + +static __inline boolean_t +pte1_is_dirty(pt1_entry_t pte1) +{ + + return ((pte1 & (PTE1_NM | PTE1_RO)) == 0); +} + +static __inline boolean_t +pte1_is_global(pt1_entry_t pte1) +{ + + return ((pte1 & PTE1_NG) == 0); +} + +static __inline boolean_t +pte1_is_valid(pt1_entry_t pte1) +{ + int l1_type; + + l1_type = pte1 & L1_TYPE_MASK; + return ((l1_type == L1_TYPE_C) || (l1_type == L1_TYPE_S)); +} + +static __inline boolean_t +pte1_is_wired(pt1_entry_t pte1) +{ + + return (pte1 & PTE1_W); +} + +static __inline pt1_entry_t +pte1_load(pt1_entry_t *pte1p) +{ + pt1_entry_t pte1; + + pte1 = *pte1p; + return (pte1); +} + +static __inline pt1_entry_t +pte1_load_clear(pt1_entry_t *pte1p) +{ + pt1_entry_t opte1; + + opte1 = atomic_readandclear_int(pte1p); + pte1_sync(pte1p); + return (opte1); +} + +static __inline void +pte1_set_bit(pt1_entry_t *pte1p, uint32_t bit) +{ + + atomic_set_int(pte1p, bit); + pte1_sync(pte1p); +} + +static __inline vm_paddr_t +pte1_pa(pt1_entry_t pte1) +{ + + return ((vm_paddr_t)(pte1 & PTE1_FRAME)); +} + +static __inline vm_paddr_t +pte1_link_pa(pt1_entry_t pte1) +{ + + return ((vm_paddr_t)(pte1 & L1_C_ADDR_MASK)); +} + +/* + * Virtual interface for L2 page table entries management. + * + * XXX: Some of the following functions now with a synchronization barrier + * are called in a loop, so it could be useful to have two versions of them. + * One with the barrier and one without the barrier. + */ + +static __inline void +pte2_sync(pt2_entry_t *pte2p) +{ + + dsb(); +#ifndef PMAP_PTE_NOCACHE + if (!cpuinfo.coherent_walk) + dcache_wb_pou((vm_offset_t)pte2p, sizeof(*pte2p)); +#endif +} + +static __inline void +pte2_sync_range(pt2_entry_t *pte2p, vm_size_t size) +{ + + dsb(); +#ifndef PMAP_PTE_NOCACHE + if (!cpuinfo.coherent_walk) + dcache_wb_pou((vm_offset_t)pte2p, size); +#endif +} + +static __inline void +pte2_store(pt2_entry_t *pte2p, pt2_entry_t pte2) +{ + + atomic_store_rel_int(pte2p, pte2); + pte2_sync(pte2p); +} + +static __inline void +pte2_clear(pt2_entry_t *pte2p) +{ + + pte2_store(pte2p, 0); +} + +static __inline void +pte2_clear_bit(pt2_entry_t *pte2p, uint32_t bit) +{ + + atomic_clear_int(pte2p, bit); + pte2_sync(pte2p); +} + +static __inline boolean_t +pte2_cmpset(pt2_entry_t *pte2p, pt2_entry_t opte2, pt2_entry_t npte2) +{ + boolean_t ret; + + ret = atomic_cmpset_int(pte2p, opte2, npte2); + if (ret) pte2_sync(pte2p); + + return (ret); +} + +static __inline boolean_t +pte2_is_dirty(pt2_entry_t pte2) +{ + + return ((pte2 & (PTE2_NM | PTE2_RO)) == 0); +} + +static __inline boolean_t +pte2_is_global(pt2_entry_t pte2) +{ + + return ((pte2 & PTE2_NG) == 0); +} + +static __inline boolean_t +pte2_is_valid(pt2_entry_t pte2) +{ + + return (pte2 & PTE2_V); +} + +static __inline boolean_t +pte2_is_wired(pt2_entry_t pte2) +{ + + return (pte2 & PTE2_W); +} + +static __inline pt2_entry_t +pte2_load(pt2_entry_t *pte2p) +{ + pt2_entry_t pte2; + + pte2 = *pte2p; + return (pte2); +} + +static __inline pt2_entry_t +pte2_load_clear(pt2_entry_t *pte2p) +{ + pt2_entry_t opte2; + + opte2 = atomic_readandclear_int(pte2p); + pte2_sync(pte2p); + return (opte2); +} + +static __inline void +pte2_set_bit(pt2_entry_t *pte2p, uint32_t bit) +{ + + atomic_set_int(pte2p, bit); + pte2_sync(pte2p); +} + +static __inline void +pte2_set_wired(pt2_entry_t *pte2p, boolean_t wired) +{ + + /* + * Wired bit is transparent for page table walk, + * so pte2_sync() is not needed. + */ + if (wired) + atomic_set_int(pte2p, PTE2_W); + else + atomic_clear_int(pte2p, PTE2_W); +} + +static __inline vm_paddr_t +pte2_pa(pt2_entry_t pte2) +{ + + return ((vm_paddr_t)(pte2 & PTE2_FRAME)); +} + +static __inline u_int +pte2_attr(pt2_entry_t pte2) +{ + + return ((u_int)(pte2 & PTE2_ATTR_MASK)); +} + +/* + * Virtual interface for L2 page tables mapping management. + */ + +static __inline u_int +pt2tab_index(vm_offset_t va) +{ + + return (va >> PT2TAB_SHIFT); +} + +static __inline pt2_entry_t * +pt2tab_entry(pt2_entry_t *pt2tab, vm_offset_t va) +{ + + return (pt2tab + pt2tab_index(va)); +} + +static __inline void +pt2tab_store(pt2_entry_t *pte2p, pt2_entry_t pte2) +{ + + pte2_store(pte2p,pte2); +} + +static __inline pt2_entry_t +pt2tab_load(pt2_entry_t *pte2p) +{ + + return (pte2_load(pte2p)); +} + +static __inline pt2_entry_t +pt2tab_load_clear(pt2_entry_t *pte2p) +{ + + return (pte2_load_clear(pte2p)); +} + +static __inline u_int +pt2map_index(vm_offset_t va) +{ + + return (va >> PT2MAP_SHIFT); +} + +static __inline pt2_entry_t * +pt2map_entry(vm_offset_t va) +{ + + return (PT2MAP + pt2map_index(va)); +} + +/* + * Virtual interface for pmap structure & kernel shortcuts. + */ + +static __inline pt1_entry_t * +pmap_pte1(pmap_t pmap, vm_offset_t va) +{ + + return (pte1_ptr(pmap->pm_pt1, va)); +} + +static __inline pt1_entry_t * +kern_pte1(vm_offset_t va) +{ + + return (pte1_ptr(kern_pt1, va)); +} + +static __inline pt2_entry_t * +pmap_pt2tab_entry(pmap_t pmap, vm_offset_t va) +{ + + return (pt2tab_entry(pmap->pm_pt2tab, va)); +} + +static __inline pt2_entry_t * +kern_pt2tab_entry(vm_offset_t va) +{ + + return (pt2tab_entry(kern_pt2tab, va)); +} + +static __inline vm_page_t +pmap_pt2_page(pmap_t pmap, vm_offset_t va) +{ + pt2_entry_t pte2; + + pte2 = pte2_load(pmap_pt2tab_entry(pmap, va)); + return (PHYS_TO_VM_PAGE(pte2 & PTE2_FRAME)); +} + +static __inline vm_page_t +kern_pt2_page(vm_offset_t va) +{ + pt2_entry_t pte2; + + pte2 = pte2_load(kern_pt2tab_entry(va)); + return (PHYS_TO_VM_PAGE(pte2 & PTE2_FRAME)); +} + +#endif /* _KERNEL */ +#endif /* !_MACHINE_PMAP_VAR_H_ */ Property changes on: head/sys/arm/include/pmap_var.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/arm/include/pte-v6.h =================================================================== --- head/sys/arm/include/pte-v6.h (nonexistent) +++ head/sys/arm/include/pte-v6.h (revision 280712) @@ -0,0 +1,327 @@ +/*- + * Copyright 2014 Svatopluk Kraus + * Copyright 2014 Michal Meloun + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MACHINE_PTE_H_ +#define _MACHINE_PTE_H_ + +/* + * Domain Types for the Domain Access Control Register. + */ +#define DOMAIN_FAULT 0x00 /* no access */ +#define DOMAIN_CLIENT 0x01 /* client */ +#define DOMAIN_RESERVED 0x02 /* reserved */ +#define DOMAIN_MANAGER 0x03 /* manager */ + +/* + * TEX remap registers attributes + */ +#define PRRR_SO 0 /* Strongly ordered memory */ +#define PRRR_DEV 1 /* Device memory */ +#define PRRR_MEM 2 /* Normal memory */ +#define PRRR_DS0 (1 << 16) /* Shared bit for Device, S = 0 */ +#define PRRR_DS1 (1 << 17) /* Shared bit for Device, S = 1 */ +#define PRRR_NS0 (1 << 18) /* Shared bit for Normal, S = 0 */ +#define PRRR_NS1 (1 << 19) /* Shared bit for Normal, S = 1 */ +#define PRRR_NOS_SHIFT 24 /* base shif for Not Outer Shared bits */ + +#define NMRR_NC 0 /* Noncachable*/ +#define NMRR_WB_WA 1 /* Write Back, Write Allocate */ +#define NMRR_WT 2 /* Write Through, Non-Write Allocate */ +#define NMRR_WB 3 /* Write Back, Non-Write Allocate */ + +/* + * + * The ARM MMU is capable of mapping memory in the following chunks: + * + * 16M Supersections (L1 table) + * + * 1M Sections (L1 table) + * + * 64K Large Pages (L2 table) + * + * 4K Small Pages (L2 table) + * + * + * Coarse Tables can map Large and Small Pages. + * Coarse Tables are 1K in length. + * + * The Translation Table Base register holds the pointer to the + * L1 Table. The L1 Table is a 16K contiguous chunk of memory + * aligned to a 16K boundary. Each entry in the L1 Table maps + * 1M of virtual address space, either via a Section mapping or + * via an L2 Table. + * + */ +#define L1_TABLE_SIZE 0x4000 /* 16K */ +#define L1_ENTRIES 0x1000 /* 4K */ +#define L2_TABLE_SIZE 0x0400 /* 1K */ +#define L2_ENTRIES 0x0100 /* 256 */ + +/* ARMv6 super-sections. */ +#define L1_SUP_SIZE 0x01000000 /* 16M */ +#define L1_SUP_OFFSET (L1_SUP_SIZE - 1) +#define L1_SUP_FRAME (~L1_SUP_OFFSET) +#define L1_SUP_SHIFT 24 + +#define L1_S_SIZE 0x00100000 /* 1M */ +#define L1_S_OFFSET (L1_S_SIZE - 1) +#define L1_S_FRAME (~L1_S_OFFSET) +#define L1_S_SHIFT 20 + +#define L2_L_SIZE 0x00010000 /* 64K */ +#define L2_L_OFFSET (L2_L_SIZE - 1) +#define L2_L_FRAME (~L2_L_OFFSET) +#define L2_L_SHIFT 16 + +#define L2_S_SIZE 0x00001000 /* 4K */ +#define L2_S_OFFSET (L2_S_SIZE - 1) +#define L2_S_FRAME (~L2_S_OFFSET) +#define L2_S_SHIFT 12 + +/* + * ARM MMU L1 Descriptors + */ +#define L1_TYPE_INV 0x00 /* Invalid (fault) */ +#define L1_TYPE_C 0x01 /* Coarse L2 */ +#define L1_TYPE_S 0x02 /* Section */ +#define L1_TYPE_MASK 0x03 /* Mask of type bits */ + +/* L1 Section Descriptor */ +#define L1_S_B 0x00000004 /* bufferable Section */ +#define L1_S_C 0x00000008 /* cacheable Section */ +#define L1_S_NX 0x00000010 /* not executeable */ +#define L1_S_DOM(x) ((x) << 5) /* domain */ +#define L1_S_DOM_MASK L1_S_DOM(0xf) +#define L1_S_P 0x00000200 /* ECC enable for this section */ +#define L1_S_AP(x) ((x) << 10) /* access permissions */ +#define L1_S_AP0 0x00000400 /* access permissions bit 0 */ +#define L1_S_AP1 0x00000800 /* access permissions bit 1 */ +#define L1_S_TEX(x) ((x) << 12) /* type extension */ +#define L1_S_TEX0 0x00001000 /* type extension bit 0 */ +#define L1_S_TEX1 0x00002000 /* type extension bit 1 */ +#define L1_S_TEX2 0x00004000 /* type extension bit 2 */ +#define L1_S_AP2 0x00008000 /* access permissions bit 2 */ +#define L1_S_SHARED 0x00010000 /* shared */ +#define L1_S_NG 0x00020000 /* not global */ +#define L1_S_SUPERSEC 0x00040000 /* Section is a super-section. */ +#define L1_S_ADDR_MASK 0xfff00000 /* phys address of section */ + +/* L1 Coarse Descriptor */ +#define L1_C_DOM(x) ((x) << 5) /* domain */ +#define L1_C_DOM_MASK L1_C_DOM(0xf) +#define L1_C_P 0x00000200 /* ECC enable for this section */ +#define L1_C_ADDR_MASK 0xfffffc00 /* phys address of L2 Table */ + +/* + * ARM MMU L2 Descriptors + */ +#define L2_TYPE_INV 0x00 /* Invalid (fault) */ +#define L2_TYPE_L 0x01 /* Large Page - 64k - not used yet*/ +#define L2_TYPE_S 0x02 /* Small Page - 4 */ +#define L2_TYPE_MASK 0x03 + +#define L2_NX 0x00000001 /* Not executable */ +#define L2_B 0x00000004 /* Bufferable page */ +#define L2_C 0x00000008 /* Cacheable page */ +#define L2_AP(x) ((x) << 4) +#define L2_AP0 0x00000010 /* access permissions bit 0*/ +#define L2_AP1 0x00000020 /* access permissions bit 1*/ +#define L2_TEX(x) ((x) << 6) /* type extension */ +#define L2_TEX0 0x00000040 /* type extension bit 0 */ +#define L2_TEX1 0x00000080 /* type extension bit 1 */ +#define L2_TEX2 0x00000100 /* type extension bit 2 */ +#define L2_AP2 0x00000200 /* access permissions bit 2*/ +#define L2_SHARED 0x00000400 /* shared */ +#define L2_NG 0x00000800 /* not global */ + +/* + * TEX classes encoding + */ +#define TEX1_CLASS_0 ( 0) +#define TEX1_CLASS_1 ( L1_S_B) +#define TEX1_CLASS_2 ( L1_S_C ) +#define TEX1_CLASS_3 ( L1_S_C | L1_S_B) +#define TEX1_CLASS_4 (L1_S_TEX0 ) +#define TEX1_CLASS_5 (L1_S_TEX0 | L1_S_B) +#define TEX1_CLASS_6 (L1_S_TEX0 | L1_S_C ) /* Reserved for ARM11 */ +#define TEX1_CLASS_7 (L1_S_TEX0 | L1_S_C | L1_S_B) + +#define TEX2_CLASS_0 ( 0) +#define TEX2_CLASS_1 ( L2_B) +#define TEX2_CLASS_2 ( L2_C ) +#define TEX2_CLASS_3 ( L2_C | L2_B) +#define TEX2_CLASS_4 (L2_TEX0 ) +#define TEX2_CLASS_5 (L2_TEX0 | L2_B) +#define TEX2_CLASS_6 (L2_TEX0 | L2_C ) /* Reserved for ARM11 */ +#define TEX2_CLASS_7 (L2_TEX0 | L2_C | L2_B) + +/* L1 table definitions. */ +#define NB_IN_PT1 L1_TABLE_SIZE +#define NPTE1_IN_PT1 L1_ENTRIES + +/* L2 table definitions. */ +#define NB_IN_PT2 L2_TABLE_SIZE +#define NPTE2_IN_PT2 L2_ENTRIES + +/* + * Map memory attributes to TEX classes + */ +#define PTE2_ATTR_WB_WA TEX2_CLASS_0 +#define PTE2_ATTR_NOCACHE TEX2_CLASS_1 +#define PTE2_ATTR_DEVICE TEX2_CLASS_2 +#define PTE2_ATTR_SO TEX2_CLASS_3 +/* + * Software defined bits for L1 descriptors + * - L1_AP0 is used as page accessed bit + * - L1_AP2 (RO / not RW) is used as page not modified bit + * - L1_TEX0 is used as software emulated RO bit + */ +#define PTE1_V L1_TYPE_S /* Valid bit */ +#define PTE1_A L1_S_AP0 /* Accessed - software emulated */ +#define PTE1_NM L1_S_AP2 /* not modified bit - software emulated + * used as real write enable bit */ +#define PTE1_M 0 /* Modified (dummy) */ +#define PTE1_S L1_S_SHARED /* Shared */ +#define PTE1_NG L1_S_NG /* Not global */ +#define PTE1_G 0 /* Global (dummy) */ +#define PTE1_NX L1_S_NX /* Not executable */ +#define PTE1_X 0 /* Executable (dummy) */ +#define PTE1_RO L1_S_TEX1 /* Read Only */ +#define PTE1_RW 0 /* Read-Write (dummy) */ +#define PTE1_U L1_S_AP1 /* User */ +#define PTE1_NU 0 /* Not user (kernel only) (dummy) */ +#define PTE1_W L1_S_TEX2 /* Wired */ + +#define PTE1_SHIFT L1_S_SHIFT +#define PTE1_SIZE L1_S_SIZE +#define PTE1_OFFSET L1_S_OFFSET +#define PTE1_FRAME L1_S_FRAME + +#define PTE1_ATTR_MASK (L1_S_TEX0 | L1_S_C | L1_S_B) + +#define PTE1_AP_KR (PTE1_RO | PTE1_NM) +#define PTE1_AP_KRW 0 +#define PTE1_AP_KRUR (PTE1_RO | PTE1_NM | PTE1_U) +#define PTE1_AP_KRWURW PTE1_U + +/* + * PTE1 descriptors creation macros. + */ +#define PTE1_PA(pa) ((pa) & PTE1_FRAME) +#define PTE1_AP_COMMON (PTE1_V | PTE1_S) + +#define PTE1(pa, ap, attr) (PTE1_PA(pa) | (ap) | (attr) | PTE1_AP_COMMON) + +#define PTE1_KERN(pa, ap, attr) PTE1(pa, (ap) | PTE1_A | PTE1_G, attr) +#define PTE1_KERN_NG(pa, ap, attr) PTE1(pa, (ap) | PTE1_A | PTE1_NG, attr) + +#define PTE1_LINK(pa) (((pa) & L1_C_ADDR_MASK) | L1_TYPE_C) + +/* + * Software defined bits for L2 descriptors + * - L2_AP0 is used as page accessed bit + * - L2_AP2 (RO / not RW) is used as page not modified bit + * - L2_TEX0 is used as software emulated RO bit + */ +#define PTE2_V L2_TYPE_S /* Valid bit */ +#define PTE2_A L2_AP0 /* Accessed - software emulated */ +#define PTE2_NM L2_AP2 /* not modified bit - software emulated + * used as real write enable bit */ +#define PTE2_M 0 /* Modified (dummy) */ +#define PTE2_S L2_SHARED /* Shared */ +#define PTE2_NG L2_NG /* Not global */ +#define PTE2_G 0 /* Global (dummy) */ +#define PTE2_NX L2_NX /* Not executable */ +#define PTE2_X 0 /* Not executable (dummy) */ +#define PTE2_RO L2_TEX1 /* Read Only */ +#define PTE2_U L2_AP1 /* User */ +#define PTE2_NU 0 /* Not user (kernel only) (dummy) */ +#define PTE2_W L2_TEX2 /* Wired */ + +#define PTE2_SHIFT L2_S_SHIFT +#define PTE2_SIZE L2_S_SIZE +#define PTE2_OFFSET L2_S_OFFSET +#define PTE2_FRAME L2_S_FRAME + +#define PTE2_ATTR_MASK (L2_TEX0 | L2_C | L2_B) + +#define PTE2_AP_KR (PTE2_RO | PTE2_NM) +#define PTE2_AP_KRW 0 +#define PTE2_AP_KRUR (PTE2_RO | PTE2_NM | PTE2_U) +#define PTE2_AP_KRWURW PTE2_U + +/* + * PTE2 descriptors creation macros. + */ +#define PTE2_PA(pa) ((pa) & PTE2_FRAME) +#define PTE2_AP_COMMON (PTE2_V | PTE2_S) + +#define PTE2(pa, ap, attr) (PTE2_PA(pa) | (ap) | (attr) | PTE2_AP_COMMON) + +#define PTE2_KERN(pa, ap, attr) PTE2(pa, (ap) | PTE2_A | PTE2_G, attr) +#define PTE2_KERN_NG(pa, ap, attr) PTE2(pa, (ap) | PTE2_A | PTE2_NG, attr) + + +// ----------------- TO BE DELETED --------------------------------------------- + +/* + * sys/arm/arm/elf_trampoline.c + */ +#define AP_KRW 0x01 /* kernel read/write */ + +/* + * lib/libkvm/kvm_arm.c + */ +#define L1_ADDR_MASK 0xfffffc00 + +/* + * lib/libkvm/kvm_arm.c + */ +#define L2_ADDR_BITS 0x000ff000 /* L2 PTE address bits */ + +#ifndef LOCORE +/* + * sys/arm/arm/minidump_machdep.c + * sys/arm/arm/pmap.c + * sys/arm/arm/pmap.h (hack for our hack in pmap.h ) + * lib/libkvm/kvm_arm.c + */ +typedef uint32_t pd_entry_t; /* page directory entry */ + +/* + * sys/arm/arm/minidump_machdep.c + * sys/arm/arm/pmap.c + * sys/arm/arm/pmap.h (hack for our hack in pmap.h ) + * sys/arm/include/param.h + */ +typedef uint32_t pt_entry_t; /* page table entry */ +#endif +// ----------------------------------------------------------------------------- + +#endif /* !_MACHINE_PTE_H_ */ Property changes on: head/sys/arm/include/pte-v6.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/arm/include/pte.h =================================================================== --- head/sys/arm/include/pte.h (revision 280711) +++ head/sys/arm/include/pte.h (revision 280712) @@ -1,356 +1,360 @@ /* $NetBSD: pte.h,v 1.1 2001/11/23 17:39:04 thorpej Exp $ */ /*- * Copyright (c) 1994 Mark Brinicombe. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the RiscBSD team. * 4. The name "RiscBSD" nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY RISCBSD ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL RISCBSD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ +#ifdef ARM_NEW_PMAP +#include +#else /* ARM_NEW_PMAP */ #ifndef _MACHINE_PTE_H_ #define _MACHINE_PTE_H_ #ifndef LOCORE typedef uint32_t pd_entry_t; /* page directory entry */ typedef uint32_t pt_entry_t; /* page table entry */ #endif #define PG_FRAME 0xfffff000 /* The PT_SIZE definition is misleading... A page table is only 0x400 * bytes long. But since VM mapping can only be done to 0x1000 a single * 1KB blocks cannot be steered to a va by itself. Therefore the * pages tables are allocated in blocks of 4. i.e. if a 1 KB block * was allocated for a PT then the other 3KB would also get mapped * whenever the 1KB was mapped. */ #define PT_RSIZE 0x0400 /* Real page table size */ #define PT_SIZE 0x1000 #define PD_SIZE 0x4000 /* Page table types and masks */ #define L1_PAGE 0x01 /* L1 page table mapping */ #define L1_SECTION 0x02 /* L1 section mapping */ #define L1_FPAGE 0x03 /* L1 fine page mapping */ #define L1_MASK 0x03 /* Mask for L1 entry type */ #define L2_LPAGE 0x01 /* L2 large page (64KB) */ #define L2_SPAGE 0x02 /* L2 small page (4KB) */ #define L2_MASK 0x03 /* Mask for L2 entry type */ #define L2_INVAL 0x00 /* L2 invalid type */ /* L1 and L2 address masks */ #define L1_ADDR_MASK 0xfffffc00 #define L2_ADDR_MASK 0xfffff000 /* * The ARM MMU architecture was introduced with ARM v3 (previous ARM * architecture versions used an optional off-CPU memory controller * to perform address translation). * * The ARM MMU consists of a TLB and translation table walking logic. * There is typically one TLB per memory interface (or, put another * way, one TLB per software-visible cache). * * The ARM MMU is capable of mapping memory in the following chunks: * * 1M Sections (L1 table) * * 64K Large Pages (L2 table) * * 4K Small Pages (L2 table) * * 1K Tiny Pages (L2 table) * * There are two types of L2 tables: Coarse Tables and Fine Tables. * Coarse Tables can map Large and Small Pages. Fine Tables can * map Tiny Pages. * * Coarse Tables can define 4 Subpages within Large and Small pages. * Subpages define different permissions for each Subpage within * a Page. * * Coarse Tables are 1K in length. Fine tables are 4K in length. * * The Translation Table Base register holds the pointer to the * L1 Table. The L1 Table is a 16K contiguous chunk of memory * aligned to a 16K boundary. Each entry in the L1 Table maps * 1M of virtual address space, either via a Section mapping or * via an L2 Table. * * In addition, the Fast Context Switching Extension (FCSE) is available * on some ARM v4 and ARM v5 processors. FCSE is a way of eliminating * TLB/cache flushes on context switch by use of a smaller address space * and a "process ID" that modifies the virtual address before being * presented to the translation logic. */ /* ARMv6 super-sections. */ #define L1_SUP_SIZE 0x01000000 /* 16M */ #define L1_SUP_OFFSET (L1_SUP_SIZE - 1) #define L1_SUP_FRAME (~L1_SUP_OFFSET) #define L1_SUP_SHIFT 24 #define L1_S_SIZE 0x00100000 /* 1M */ #define L1_S_OFFSET (L1_S_SIZE - 1) #define L1_S_FRAME (~L1_S_OFFSET) #define L1_S_SHIFT 20 #define L2_L_SIZE 0x00010000 /* 64K */ #define L2_L_OFFSET (L2_L_SIZE - 1) #define L2_L_FRAME (~L2_L_OFFSET) #define L2_L_SHIFT 16 #define L2_S_SIZE 0x00001000 /* 4K */ #define L2_S_OFFSET (L2_S_SIZE - 1) #define L2_S_FRAME (~L2_S_OFFSET) #define L2_S_SHIFT 12 #define L2_T_SIZE 0x00000400 /* 1K */ #define L2_T_OFFSET (L2_T_SIZE - 1) #define L2_T_FRAME (~L2_T_OFFSET) #define L2_T_SHIFT 10 /* * The NetBSD VM implementation only works on whole pages (4K), * whereas the ARM MMU's Coarse tables are sized in terms of 1K * (16K L1 table, 1K L2 table). * * So, we allocate L2 tables 4 at a time, thus yielding a 4K L2 * table. */ #define L1_ADDR_BITS 0xfff00000 /* L1 PTE address bits */ #define L2_ADDR_BITS 0x000ff000 /* L2 PTE address bits */ #define L1_TABLE_SIZE 0x4000 /* 16K */ #define L2_TABLE_SIZE 0x1000 /* 4K */ /* * The new pmap deals with the 1KB coarse L2 tables by * allocating them from a pool. Until every port has been converted, * keep the old L2_TABLE_SIZE define lying around. Converted ports * should use L2_TABLE_SIZE_REAL until then. */ #define L2_TABLE_SIZE_REAL 0x400 /* 1K */ /* Total number of page table entries in L2 table */ #define L2_PTE_NUM_TOTAL (L2_TABLE_SIZE_REAL / sizeof(pt_entry_t)) /* * ARM L1 Descriptors */ #define L1_TYPE_INV 0x00 /* Invalid (fault) */ #define L1_TYPE_C 0x01 /* Coarse L2 */ #define L1_TYPE_S 0x02 /* Section */ #define L1_TYPE_F 0x03 /* Fine L2 */ #define L1_TYPE_MASK 0x03 /* mask of type bits */ /* L1 Section Descriptor */ #define L1_S_B 0x00000004 /* bufferable Section */ #define L1_S_C 0x00000008 /* cacheable Section */ #define L1_S_IMP 0x00000010 /* implementation defined */ #define L1_S_XN (1 << 4) /* execute not */ #define L1_S_DOM(x) ((x) << 5) /* domain */ #define L1_S_DOM_MASK L1_S_DOM(0xf) #define L1_S_AP(x) ((x) << 10) /* access permissions */ #define L1_S_ADDR_MASK 0xfff00000 /* phys address of section */ #define L1_S_TEX(x) (((x) & 0x7) << 12) /* Type Extension */ #define L1_S_TEX_MASK (0x7 << 12) /* Type Extension */ #define L1_S_APX (1 << 15) #define L1_SHARED (1 << 16) #define L1_S_XSCALE_P 0x00000200 /* ECC enable for this section */ #define L1_S_XSCALE_TEX(x) ((x) << 12) /* Type Extension */ #define L1_S_SUPERSEC ((1) << 18) /* Section is a super-section. */ /* L1 Coarse Descriptor */ #define L1_C_IMP0 0x00000004 /* implementation defined */ #define L1_C_IMP1 0x00000008 /* implementation defined */ #define L1_C_IMP2 0x00000010 /* implementation defined */ #define L1_C_DOM(x) ((x) << 5) /* domain */ #define L1_C_DOM_MASK L1_C_DOM(0xf) #define L1_C_ADDR_MASK 0xfffffc00 /* phys address of L2 Table */ #define L1_C_XSCALE_P 0x00000200 /* ECC enable for this section */ /* L1 Fine Descriptor */ #define L1_F_IMP0 0x00000004 /* implementation defined */ #define L1_F_IMP1 0x00000008 /* implementation defined */ #define L1_F_IMP2 0x00000010 /* implementation defined */ #define L1_F_DOM(x) ((x) << 5) /* domain */ #define L1_F_DOM_MASK L1_F_DOM(0xf) #define L1_F_ADDR_MASK 0xfffff000 /* phys address of L2 Table */ #define L1_F_XSCALE_P 0x00000200 /* ECC enable for this section */ /* * ARM L2 Descriptors */ #define L2_TYPE_INV 0x00 /* Invalid (fault) */ #define L2_TYPE_L 0x01 /* Large Page */ #define L2_TYPE_S 0x02 /* Small Page */ #define L2_TYPE_T 0x03 /* Tiny Page */ #define L2_TYPE_MASK 0x03 /* mask of type bits */ /* * This L2 Descriptor type is available on XScale processors * when using a Coarse L1 Descriptor. The Extended Small * Descriptor has the same format as the XScale Tiny Descriptor, * but describes a 4K page, rather than a 1K page. */ #define L2_TYPE_XSCALE_XS 0x03 /* XScale Extended Small Page */ #define L2_B 0x00000004 /* Bufferable page */ #define L2_C 0x00000008 /* Cacheable page */ #define L2_AP0(x) ((x) << 4) /* access permissions (sp 0) */ #define L2_AP1(x) ((x) << 6) /* access permissions (sp 1) */ #define L2_AP2(x) ((x) << 8) /* access permissions (sp 2) */ #define L2_AP3(x) ((x) << 10) /* access permissions (sp 3) */ #define L2_SHARED (1 << 10) #define L2_APX (1 << 9) #define L2_XN (1 << 0) #define L2_L_TEX_MASK (0x7 << 12) /* Type Extension */ #define L2_L_TEX(x) (((x) & 0x7) << 12) #define L2_S_TEX_MASK (0x7 << 6) /* Type Extension */ #define L2_S_TEX(x) (((x) & 0x7) << 6) #define L2_XSCALE_L_TEX(x) ((x) << 12) /* Type Extension */ #define L2_XSCALE_L_S(x) (1 << 15) /* Shared */ #define L2_XSCALE_T_TEX(x) ((x) << 6) /* Type Extension */ /* * Access Permissions for L1 and L2 Descriptors. */ #define AP_W 0x01 /* writable */ #define AP_REF 0x01 /* referenced flag */ #define AP_U 0x02 /* user */ /* * Short-hand for common AP_* constants. * * Note: These values assume the S (System) bit is set and * the R (ROM) bit is clear in CP15 register 1. */ #define AP_KR 0x00 /* kernel read */ #define AP_KRW 0x01 /* kernel read/write */ #define AP_KRWUR 0x02 /* kernel read/write usr read */ #define AP_KRWURW 0x03 /* kernel read/write usr read/write */ /* * Domain Types for the Domain Access Control Register. */ #define DOMAIN_FAULT 0x00 /* no access */ #define DOMAIN_CLIENT 0x01 /* client */ #define DOMAIN_RESERVED 0x02 /* reserved */ #define DOMAIN_MANAGER 0x03 /* manager */ /* * Type Extension bits for XScale processors. * * Behavior of C and B when X == 0: * * C B Cacheable Bufferable Write Policy Line Allocate Policy * 0 0 N N - - * 0 1 N Y - - * 1 0 Y Y Write-through Read Allocate * 1 1 Y Y Write-back Read Allocate * * Behavior of C and B when X == 1: * C B Cacheable Bufferable Write Policy Line Allocate Policy * 0 0 - - - - DO NOT USE * 0 1 N Y - - * 1 0 Mini-Data - - - * 1 1 Y Y Write-back R/W Allocate */ #define TEX_XSCALE_X 0x01 /* X modifies C and B */ #define TEX_XSCALE_E 0x02 #define TEX_XSCALE_T 0x04 /* Xscale core 3 */ /* * * Cache attributes with L2 present, S = 0 * T E X C B L1 i-cache L1 d-cache L1 DC WP L2 cacheable write coalesce * 0 0 0 0 0 N N - N N * 0 0 0 0 1 N N - N Y * 0 0 0 1 0 Y Y WT N Y * 0 0 0 1 1 Y Y WB Y Y * 0 0 1 0 0 N N - Y Y * 0 0 1 0 1 N N - N N * 0 0 1 1 0 Y Y - - N * 0 0 1 1 1 Y Y WT Y Y * 0 1 0 0 0 N N - N N * 0 1 0 0 1 N/A N/A N/A N/A N/A * 0 1 0 1 0 N/A N/A N/A N/A N/A * 0 1 0 1 1 N/A N/A N/A N/A N/A * 0 1 1 X X N/A N/A N/A N/A N/A * 1 X 0 0 0 N N - N Y * 1 X 0 0 1 Y N WB N Y * 1 X 0 1 0 Y N WT N Y * 1 X 0 1 1 Y N WB Y Y * 1 X 1 0 0 N N - Y Y * 1 X 1 0 1 Y Y WB Y Y * 1 X 1 1 0 Y Y WT Y Y * 1 X 1 1 1 Y Y WB Y Y * * * * * Cache attributes with L2 present, S = 1 * T E X C B L1 i-cache L1 d-cache L1 DC WP L2 cacheable write coalesce * 0 0 0 0 0 N N - N N * 0 0 0 0 1 N N - N Y * 0 0 0 1 0 Y Y - N Y * 0 0 0 1 1 Y Y WT Y Y * 0 0 1 0 0 N N - Y Y * 0 0 1 0 1 N N - N N * 0 0 1 1 0 Y Y - - N * 0 0 1 1 1 Y Y WT Y Y * 0 1 0 0 0 N N - N N * 0 1 0 0 1 N/A N/A N/A N/A N/A * 0 1 0 1 0 N/A N/A N/A N/A N/A * 0 1 0 1 1 N/A N/A N/A N/A N/A * 0 1 1 X X N/A N/A N/A N/A N/A * 1 X 0 0 0 N N - N Y * 1 X 0 0 1 Y N - N Y * 1 X 0 1 0 Y N - N Y * 1 X 0 1 1 Y N - Y Y * 1 X 1 0 0 N N - Y Y * 1 X 1 0 1 Y Y WT Y Y * 1 X 1 1 0 Y Y WT Y Y * 1 X 1 1 1 Y Y WT Y Y */ #endif /* !_MACHINE_PTE_H_ */ +#endif /* !ARM_NEW_PMAP */ /* End of pte.h */ Index: head/sys/arm/include/sf_buf.h =================================================================== --- head/sys/arm/include/sf_buf.h (revision 280711) +++ head/sys/arm/include/sf_buf.h (revision 280712) @@ -1,46 +1,50 @@ /*- * Copyright (c) 2003 Alan L. Cox * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_SF_BUF_H_ #define _MACHINE_SF_BUF_H_ static inline void sf_buf_map(struct sf_buf *sf, int flags) { +#ifdef ARM_NEW_PMAP + pmap_qenter(sf->kva, &(sf->m), 1); +#else pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m)); +#endif } static inline int sf_buf_unmap(struct sf_buf *sf) { pmap_kremove(sf->kva); return (1); } #endif /* !_MACHINE_SF_BUF_H_ */ Index: head/sys/arm/include/smp.h =================================================================== --- head/sys/arm/include/smp.h (revision 280711) +++ head/sys/arm/include/smp.h (revision 280712) @@ -1,40 +1,42 @@ /* $FreeBSD$ */ #ifndef _MACHINE_SMP_H_ #define _MACHINE_SMP_H_ #include #include #define IPI_AST 0 #define IPI_PREEMPT 2 #define IPI_RENDEZVOUS 3 #define IPI_STOP 4 #define IPI_STOP_HARD 4 #define IPI_HARDCLOCK 6 #define IPI_TLB 7 +#define IPI_CACHE 8 +#define IPI_LAZYPMAP 9 void init_secondary(int cpu); void mpentry(void); void ipi_all_but_self(u_int ipi); void ipi_cpu(int cpu, u_int ipi); void ipi_selected(cpuset_t cpus, u_int ipi); /* PIC interface */ void pic_ipi_send(cpuset_t cpus, u_int ipi); void pic_ipi_clear(int ipi); int pic_ipi_read(int arg); /* Platform interface */ void platform_mp_setmaxid(void); int platform_mp_probe(void); void platform_mp_start_ap(void); void platform_mp_init_secondary(void); void platform_ipi_send(cpuset_t cpus, u_int ipi); /* global data in mp_machdep.c */ extern struct pcb stoppcbs[]; #endif /* !_MACHINE_SMP_H_ */ Index: head/sys/arm/include/vm.h =================================================================== --- head/sys/arm/include/vm.h (revision 280711) +++ head/sys/arm/include/vm.h (revision 280712) @@ -1,36 +1,50 @@ /*- * Copyright (c) 2009 Alan L. Cox * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_VM_H_ #define _MACHINE_VM_H_ +#ifdef ARM_NEW_PMAP +#include + +#define VM_MEMATTR_WB_WA ((vm_memattr_t)PTE2_ATTR_WB_WA) +#define VM_MEMATTR_NOCACHE ((vm_memattr_t)PTE2_ATTR_NOCACHE) +#define VM_MEMATTR_DEVICE ((vm_memattr_t)PTE2_ATTR_DEVICE) +#define VM_MEMATTR_SO ((vm_memattr_t)PTE2_ATTR_SO) + +#define VM_MEMATTR_DEFAULT VM_MEMATTR_WB_WA +#define VM_MEMATTR_UNCACHEABLE VM_MEMATTR_SO /*name is misused by DMA */ + + +#else /* Memory attribute configuration. */ #define VM_MEMATTR_DEFAULT 0 #define VM_MEMATTR_UNCACHEABLE 1 +#endif #endif /* !_MACHINE_VM_H_ */ Index: head/sys/arm/include/vmparam.h =================================================================== --- head/sys/arm/include/vmparam.h (revision 280711) +++ head/sys/arm/include/vmparam.h (revision 280712) @@ -1,178 +1,175 @@ /* $NetBSD: vmparam.h,v 1.26 2003/08/07 16:27:47 agc Exp $ */ /*- * Copyright (c) 1988 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_VMPARAM_H_ #define _MACHINE_VMPARAM_H_ /* * Machine dependent constants for ARM. */ /* * Virtual memory related constants, all in bytes */ #ifndef MAXTSIZ #define MAXTSIZ (64UL*1024*1024) /* max text size */ #endif #ifndef DFLDSIZ #define DFLDSIZ (128UL*1024*1024) /* initial data size limit */ #endif #ifndef MAXDSIZ #define MAXDSIZ (512UL*1024*1024) /* max data size */ #endif #ifndef DFLSSIZ #define DFLSSIZ (2UL*1024*1024) /* initial stack size limit */ #endif #ifndef MAXSSIZ #define MAXSSIZ (8UL*1024*1024) /* max stack size */ #endif #ifndef SGROWSIZ #define SGROWSIZ (128UL*1024) /* amount to grow stack */ #endif /* * Address space constants */ /* * The line between user space and kernel space * Mappings >= KERNEL_BASE are constant across all processes */ #ifndef KERNBASE #define KERNBASE 0xc0000000 #endif /* * max number of non-contig chunks of physical RAM you can have */ #define VM_PHYSSEG_MAX 32 /* * The physical address space may be sparsely populated on some ARM systems. */ #define VM_PHYSSEG_SPARSE /* * Create two free page pools. Since the ARM kernel virtual address * space does not include a mapping onto the machine's entire physical * memory, VM_FREEPOOL_DIRECT is defined as an alias for the default * pool, VM_FREEPOOL_DEFAULT. */ #define VM_NFREEPOOL 2 #define VM_FREEPOOL_CACHE 1 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 0 /* * We need just one free list: DEFAULT. */ #define VM_NFREELIST 1 #define VM_FREELIST_DEFAULT 0 /* * The largest allocation size is 1MB. */ #define VM_NFREEORDER 9 /* * Enable superpage reservations: 1 level. */ #ifndef VM_NRESERVLEVEL #define VM_NRESERVLEVEL 1 #endif /* * Level 0 reservations consist of 256 pages. */ #ifndef VM_LEVEL_0_ORDER #define VM_LEVEL_0_ORDER 8 #endif -#define UPT_MAX_ADDRESS VADDR(UPTPTDI + 3, 0) -#define UPT_MIN_ADDRESS VADDR(UPTPTDI, 0) - #define VM_MIN_ADDRESS (0x00001000) #ifndef VM_MAXUSER_ADDRESS -#define VM_MAXUSER_ADDRESS KERNBASE -#endif /* VM_MAXUSER_ADDRESS */ +#define VM_MAXUSER_ADDRESS (KERNBASE - 0x00400000) /* !!! PT2MAP_SIZE */ +#endif #define VM_MAX_ADDRESS VM_MAXUSER_ADDRESS #define USRSTACK VM_MAXUSER_ADDRESS /* initial pagein size of beginning of executable file */ #ifndef VM_INITIAL_PAGEIN #define VM_INITIAL_PAGEIN 16 #endif #ifndef VM_MIN_KERNEL_ADDRESS #define VM_MIN_KERNEL_ADDRESS KERNBASE #endif #define VM_MAX_KERNEL_ADDRESS (vm_max_kernel_address) /* * How many physical pages per kmem arena virtual page. */ #ifndef VM_KMEM_SIZE_SCALE #define VM_KMEM_SIZE_SCALE (3) #endif /* * Optional floor (in bytes) on the size of the kmem arena. */ #ifndef VM_KMEM_SIZE_MIN #define VM_KMEM_SIZE_MIN (12 * 1024 * 1024) #endif /* * Optional ceiling (in bytes) on the size of the kmem arena: 40% of the * kernel map. */ #ifndef VM_KMEM_SIZE_MAX #define VM_KMEM_SIZE_MAX ((vm_max_kernel_address - \ VM_MIN_KERNEL_ADDRESS + 1) * 2 / 5) #endif extern vm_offset_t vm_max_kernel_address; #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ #ifndef VM_MAX_AUTOTUNE_MAXUSERS #define VM_MAX_AUTOTUNE_MAXUSERS 384 #endif #define SFBUF #define SFBUF_MAP #endif /* _MACHINE_VMPARAM_H_ */ Index: head/sys/conf/files.arm =================================================================== --- head/sys/conf/files.arm (revision 280711) +++ head/sys/conf/files.arm (revision 280712) @@ -1,108 +1,109 @@ # $FreeBSD$ arm/arm/autoconf.c standard arm/arm/bcopy_page.S standard arm/arm/bcopyinout.S standard arm/arm/blockio.S standard arm/arm/bootconfig.c standard arm/arm/bus_space_asm_generic.S standard arm/arm/busdma_machdep.c optional !armv6 arm/arm/busdma_machdep-v6.c optional armv6 arm/arm/copystr.S standard arm/arm/cpufunc.c standard arm/arm/cpufunc_asm.S standard arm/arm/cpufunc_asm_armv4.S standard arm/arm/cpuinfo.c standard arm/arm/cpu_asm-v6.S optional armv6 arm/arm/db_disasm.c optional ddb arm/arm/db_interface.c optional ddb arm/arm/db_trace.c optional ddb arm/arm/devmap.c standard arm/arm/disassem.c optional ddb arm/arm/dump_machdep.c standard arm/arm/elf_machdep.c standard arm/arm/elf_note.S standard arm/arm/exception.S standard arm/arm/fiq.c standard arm/arm/fiq_subr.S standard arm/arm/fusu.S standard arm/arm/gdb_machdep.c optional gdb arm/arm/identcpu.c standard arm/arm/in_cksum.c optional inet | inet6 arm/arm/in_cksum_arm.S optional inet | inet6 arm/arm/intr.c standard arm/arm/locore.S standard no-obj arm/arm/machdep.c standard arm/arm/mem.c optional mem arm/arm/minidump_machdep.c optional mem arm/arm/mp_machdep.c optional smp arm/arm/nexus.c standard arm/arm/physmem.c standard arm/arm/pl190.c optional pl190 arm/arm/pl310.c optional pl310 arm/arm/platform.c optional platform arm/arm/platform_if.m optional platform arm/arm/pmap.c optional !armv6 -arm/arm/pmap-v6.c optional armv6 +arm/arm/pmap-v6.c optional armv6 !arm_new_pmap +arm/arm/pmap-v6-new.c optional armv6 arm_new_pmap arm/arm/sc_machdep.c optional sc arm/arm/setcpsr.S standard arm/arm/setstack.s standard arm/arm/stack_machdep.c optional ddb | stack arm/arm/stdatomic.c standard \ compile-with "${NORMAL_C:N-Wmissing-prototypes}" arm/arm/support.S standard arm/arm/swtch.S standard arm/arm/sys_machdep.c standard arm/arm/syscall.c standard arm/arm/trap.c optional !armv6 arm/arm/trap-v6.c optional armv6 arm/arm/uio_machdep.c standard arm/arm/undefined.c standard arm/arm/unwind.c optional ddb | kdtrace_hooks arm/arm/vm_machdep.c standard arm/arm/vfp.c standard board_id.h standard \ dependency "$S/arm/conf/genboardid.awk $S/arm/conf/mach-types" \ compile-with "${AWK} -f $S/arm/conf/genboardid.awk $S/arm/conf/mach-types > board_id.h" \ no-obj no-implicit-rule before-depend \ clean "board_id.h" cddl/compat/opensolaris/kern/opensolaris_atomic.c optional zfs compile-with "${ZFS_C}" crypto/blowfish/bf_enc.c optional crypto | ipsec crypto/des/des_enc.c optional crypto | ipsec | netsmb dev/fb/fb.c optional sc dev/fdt/fdt_arm_platform.c optional platform fdt dev/hwpmc/hwpmc_arm.c optional hwpmc dev/hwpmc/hwpmc_armv7.c optional hwpmc dev/kbd/kbd.c optional sc | vt dev/syscons/scgfbrndr.c optional sc dev/syscons/scterm-teken.c optional sc dev/syscons/scvtb.c optional sc dev/uart/uart_cpu_fdt.c optional uart fdt font.h optional sc \ compile-with "uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \ no-obj no-implicit-rule before-depend \ clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8" kern/subr_busdma_bufalloc.c standard kern/subr_dummy_vdso_tc.c standard kern/subr_sfbuf.c standard libkern/arm/aeabi_unwind.c standard libkern/arm/divsi3.S standard libkern/arm/ffs.S standard libkern/arm/ldivmod.S standard libkern/arm/ldivmod_helper.c standard libkern/arm/memcpy.S standard libkern/arm/memset.S standard libkern/arm/muldi3.c standard libkern/ashldi3.c standard libkern/ashrdi3.c standard libkern/divdi3.c standard libkern/ffsl.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard libkern/lshrdi3.c standard libkern/moddi3.c standard libkern/qdivrem.c standard libkern/ucmpdi2.c standard libkern/udivdi3.c standard libkern/umoddi3.c standard Index: head/sys/conf/options.arm =================================================================== --- head/sys/conf/options.arm (revision 280711) +++ head/sys/conf/options.arm (revision 280712) @@ -1,67 +1,68 @@ #$FreeBSD$ ARM9_CACHE_WRITE_THROUGH opt_global.h ARMV6 opt_global.h ARM_CACHE_LOCK_ENABLE opt_global.h ARM_KERN_DIRECTMAP opt_vm.h ARM_L2_PIPT opt_global.h ARM_MANY_BOARD opt_global.h ARM_NEW_PMAP opt_global.h +NKPT2PG opt_pmap.h ARM_WANT_TP_ADDRESS opt_global.h COUNTS_PER_SEC opt_timer.h CPU_ARM9 opt_global.h CPU_ARM9E opt_global.h CPU_ARM1136 opt_global.h CPU_ARM1176 opt_global.h CPU_CORTEXA opt_global.h CPU_KRAIT opt_global.h CPU_FA526 opt_global.h CPU_FA626TE opt_global.h CPU_MV_PJ4B opt_global.h CPU_XSCALE_80219 opt_global.h CPU_XSCALE_80321 opt_global.h CPU_XSCALE_81342 opt_global.h CPU_XSCALE_IXP425 opt_global.h CPU_XSCALE_IXP435 opt_global.h CPU_XSCALE_PXA2X0 opt_global.h FLASHADDR opt_global.h IPI_IRQ_START opt_smp.h IPI_IRQ_END opt_smp.h FREEBSD_BOOT_LOADER opt_global.h IXP4XX_FLASH_SIZE opt_global.h KERNBASE opt_global.h KERNPHYSADDR opt_global.h KERNVIRTADDR opt_global.h LINUX_BOOT_ABI opt_global.h LOADERRAMADDR opt_global.h PHYSADDR opt_global.h PLATFORM opt_global.h SOCDEV_PA opt_global.h SOCDEV_VA opt_global.h PV_STATS opt_pmap.h QEMU_WORKAROUNDS opt_global.h SOC_BCM2835 opt_global.h SOC_BCM2836 opt_global.h SOC_MV_ARMADAXP opt_global.h SOC_MV_DISCOVERY opt_global.h SOC_MV_DOVE opt_global.h SOC_MV_FREY opt_global.h SOC_MV_KIRKWOOD opt_global.h SOC_MV_LOKIPLUS opt_global.h SOC_MV_ORION opt_global.h SOC_OMAP3 opt_global.h SOC_OMAP4 opt_global.h SOC_TI_AM335X opt_global.h SOC_TEGRA2 opt_global.h XSCALE_CACHE_READ_WRITE_ALLOCATE opt_global.h XSACLE_DISABLE_CCNT opt_timer.h VERBOSE_INIT_ARM opt_global.h VM_MAXUSER_ADDRESS opt_global.h AT91_ATE_USE_RMII opt_at91.h AT91_MCI_ALLOW_OVERCLOCK opt_at91.h AT91_MCI_HAS_4WIRE opt_at91.h AT91_MCI_SLOT_B opt_at91.h GFB_DEBUG opt_gfb.h GFB_NO_FONT_LOADING opt_gfb.h GFB_NO_MODE_CHANGE opt_gfb.h AT91C_MAIN_CLOCK opt_at91.h VFP opt_global.h