Index: head/sys/arm64/arm64/genassym.c =================================================================== --- head/sys/arm64/arm64/genassym.c (revision 297445) +++ head/sys/arm64/arm64/genassym.c (revision 297446) @@ -1,68 +1,68 @@ /*- * Copyright (c) 2004 Olivier Houchard * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include ASSYM(KERNBASE, KERNBASE); ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); ASSYM(PCPU_SIZE, sizeof(struct pcpu)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); /* Size of pcb, rounded to keep stack alignment */ ASSYM(PCB_SIZE, roundup2(sizeof(struct pcb), STACKALIGNBYTES + 1)); ASSYM(PCB_SINGLE_STEP_SHIFT, PCB_SINGLE_STEP_SHIFT); ASSYM(PCB_REGS, offsetof(struct pcb, pcb_x)); ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp)); -ASSYM(PCB_L1ADDR, offsetof(struct pcb, pcb_l1addr)); +ASSYM(PCB_L0ADDR, offsetof(struct pcb, pcb_l0addr)); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); ASSYM(SF_UC, offsetof(struct sigframe, sf_uc)); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TD_LOCK, offsetof(struct thread, td_lock)); ASSYM(TF_SIZE, sizeof(struct trapframe)); ASSYM(TF_SP, offsetof(struct trapframe, tf_sp)); ASSYM(TF_ELR, offsetof(struct trapframe, tf_elr)); ASSYM(TF_X, offsetof(struct trapframe, tf_x)); Index: head/sys/arm64/arm64/locore.S =================================================================== --- head/sys/arm64/arm64/locore.S (revision 297445) +++ head/sys/arm64/arm64/locore.S (revision 297446) @@ -1,695 +1,681 @@ /*- * Copyright (c) 2012-2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "assym.s" #include "opt_kstack_pages.h" #include #include #include #include #include #include -#define VIRT_BITS 39 +#define VIRT_BITS 48 .globl kernbase .set kernbase, KERNBASE #define DEVICE_MEM 0 #define NORMAL_UNCACHED 1 #define NORMAL_MEM 2 /* * We assume: * MMU on with an identity map, or off * D-Cache: off * I-Cache: on or off * We are loaded at a 2MiB aligned address */ .text .globl _start _start: /* Drop to EL1 */ bl drop_to_el1 /* * Disable the MMU. We may have entered the kernel with it on and * will need to update the tables later. If this has been set up * with anything other than a VA == PA map then this will fail, * but in this case the code to find where we are running from * would have also failed. */ dsb sy mrs x2, sctlr_el1 bic x2, x2, SCTLR_M msr sctlr_el1, x2 isb /* Set the context id */ msr contextidr_el1, xzr /* Get the virt -> phys offset */ bl get_virt_delta /* * At this point: * x29 = PA - VA * x28 = Our physical load address */ /* Create the page tables */ bl create_pagetables /* * At this point: * x27 = TTBR0 table - * x26 = TTBR1 table + * x26 = Kernel L1 table + * x24 = TTBR1 table */ /* Enable the mmu */ bl start_mmu /* Jump to the virtual address space */ ldr x15, .Lvirtdone br x15 virtdone: - /* - * Now that we are in virtual address space, - * we don't need the identity mapping in TTBR0 and - * can set the TCR to a more useful value. - */ - ldr x2, tcr - mrs x3, id_aa64mmfr0_el1 - bfi x2, x3, #32, #3 - msr tcr_el1, x2 - /* Set up the stack */ adr x25, initstack_end mov sp, x25 sub sp, sp, #PCB_SIZE /* Zero the BSS */ ldr x15, .Lbss ldr x14, .Lend 1: str xzr, [x15], #8 cmp x15, x14 b.lo 1b /* Backup the module pointer */ mov x1, x0 /* Make the page table base a virtual address */ sub x26, x26, x29 + sub x24, x24, x29 sub sp, sp, #(64 * 4) mov x0, sp /* Degate the delda so it is VA -> PA */ neg x29, x29 str x1, [x0] /* modulep */ str x26, [x0, 8] /* kern_l1pt */ str x29, [x0, 16] /* kern_delta */ str x25, [x0, 24] /* kern_stack */ + str x24, [x0, 32] /* kern_l0pt */ /* trace back starts here */ mov fp, #0 /* Branch to C code */ bl initarm bl mi_startup /* We should not get here */ brk 0 .align 3 .Lvirtdone: .quad virtdone .Lbss: .quad __bss_start .Lend: .quad _end #ifdef SMP /* * mpentry(unsigned long) * * Called by a core when it is being brought online. * The data in x0 is passed straight to init_secondary. */ ENTRY(mpentry) /* Disable interrupts */ msr daifset, #2 /* Drop to EL1 */ bl drop_to_el1 /* Set the context id */ msr contextidr_el1, x1 /* Load the kernel page table */ - adr x26, pagetable_l1_ttbr1 + adr x24, pagetable_l0_ttbr1 /* Load the identity page table */ adr x27, pagetable_l0_ttbr0 /* Enable the mmu */ bl start_mmu /* Jump to the virtual address space */ ldr x15, =mp_virtdone br x15 mp_virtdone: - /* - * Now that we are in virtual address space, - * we don't need the identity mapping in TTBR0 and - * can set the TCR to a more useful value. - */ - ldr x2, tcr - mrs x3, id_aa64mmfr0_el1 - bfi x2, x3, #32, #3 - msr tcr_el1, x2 - ldr x4, =secondary_stacks mov x5, #(PAGE_SIZE * KSTACK_PAGES) mul x5, x0, x5 add sp, x4, x5 b init_secondary END(mpentry) #endif /* * If we are started in EL2, configure the required hypervisor * registers and drop to EL1. */ drop_to_el1: mrs x1, CurrentEL lsr x1, x1, #2 cmp x1, #0x2 b.eq 1f ret 1: /* Configure the Hypervisor */ mov x2, #(HCR_RW) msr hcr_el2, x2 /* Load the Virtualization Process ID Register */ mrs x2, midr_el1 msr vpidr_el2, x2 /* Load the Virtualization Multiprocess ID Register */ mrs x2, mpidr_el1 msr vmpidr_el2, x2 /* Set the bits that need to be 1 in sctlr_el1 */ ldr x2, .Lsctlr_res1 msr sctlr_el1, x2 /* Don't trap to EL2 for exceptions */ mov x2, #CPTR_RES1 msr cptr_el2, x2 /* Don't trap to EL2 for CP15 traps */ msr hstr_el2, xzr /* Enable access to the physical timers at EL1 */ mrs x2, cnthctl_el2 orr x2, x2, #(CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) msr cnthctl_el2, x2 /* Set the counter offset to a known value */ msr cntvoff_el2, xzr /* Hypervisor trap functions */ adr x2, hyp_vectors msr vbar_el2, x2 mov x2, #(PSR_F | PSR_I | PSR_A | PSR_D | PSR_M_EL1h) msr spsr_el2, x2 /* Configure GICv3 CPU interface */ mrs x2, id_aa64pfr0_el1 /* Extract GIC bits from the register */ ubfx x2, x2, #ID_AA64PFR0_GIC_SHIFT, #ID_AA64PFR0_GIC_BITS /* GIC[3:0] == 0001 - GIC CPU interface via special regs. supported */ cmp x2, #(ID_AA64PFR0_GIC_CPUIF_EN >> ID_AA64PFR0_GIC_SHIFT) b.ne 2f mrs x2, icc_sre_el2 orr x2, x2, #ICC_SRE_EL2_EN /* Enable access from insecure EL1 */ orr x2, x2, #ICC_SRE_EL2_SRE /* Enable system registers */ msr icc_sre_el2, x2 2: /* Set the address to return to our return address */ msr elr_el2, x30 isb eret .align 3 .Lsctlr_res1: .quad SCTLR_RES1 #define VECT_EMPTY \ .align 7; \ 1: b 1b .align 11 hyp_vectors: VECT_EMPTY /* Synchronous EL2t */ VECT_EMPTY /* IRQ EL2t */ VECT_EMPTY /* FIQ EL2t */ VECT_EMPTY /* Error EL2t */ VECT_EMPTY /* Synchronous EL2h */ VECT_EMPTY /* IRQ EL2h */ VECT_EMPTY /* FIQ EL2h */ VECT_EMPTY /* Error EL2h */ VECT_EMPTY /* Synchronous 64-bit EL1 */ VECT_EMPTY /* IRQ 64-bit EL1 */ VECT_EMPTY /* FIQ 64-bit EL1 */ VECT_EMPTY /* Error 64-bit EL1 */ VECT_EMPTY /* Synchronous 32-bit EL1 */ VECT_EMPTY /* IRQ 32-bit EL1 */ VECT_EMPTY /* FIQ 32-bit EL1 */ VECT_EMPTY /* Error 32-bit EL1 */ /* * Get the delta between the physical address we were loaded to and the * virtual address we expect to run from. This is used when building the * initial page table. */ get_virt_delta: /* Load the physical address of virt_map */ adr x29, virt_map /* Load the virtual address of virt_map stored in virt_map */ ldr x28, [x29] /* Find PA - VA as PA' = VA' - VA + PA = VA' + (PA - VA) = VA' + x29 */ sub x29, x29, x28 /* Find the load address for the kernel */ mov x28, #(KERNBASE) add x28, x28, x29 ret .align 3 virt_map: .quad virt_map /* * This builds the page tables containing the identity map, and the kernel * virtual map. * * It relys on: * We were loaded to an address that is on a 2MiB boundary * All the memory must not cross a 1GiB boundaty * x28 contains the physical address we were loaded from * * TODO: This is out of date. * There are at least 5 pages before that address for the page tables * The pages used are: * - The identity (PA = VA) table (TTBR0) * - The Kernel L1 table (TTBR1)(not yet) * - The PA != VA L2 table to jump into (not yet) * - The FDT L2 table (not yet) */ create_pagetables: /* Save the Link register */ mov x5, x30 /* Clean the page table */ adr x6, pagetable mov x26, x6 adr x27, pagetable_end 1: stp xzr, xzr, [x6], #16 stp xzr, xzr, [x6], #16 stp xzr, xzr, [x6], #16 stp xzr, xzr, [x6], #16 cmp x6, x27 b.lo 1b /* * Build the TTBR1 maps. */ /* Find the size of the kernel */ mov x6, #(KERNBASE) ldr x7, .Lend /* Find the end - begin */ sub x8, x7, x6 /* Get the number of l2 pages to allocate, rounded down */ lsr x10, x8, #(L2_SHIFT) /* Add 8 MiB for any rounding above and the module data */ add x10, x10, #4 /* Create the kernel space L2 table */ mov x6, x26 mov x7, #NORMAL_MEM mov x8, #(KERNBASE & L2_BLOCK_MASK) mov x9, x28 bl build_l2_block_pagetable /* Move to the l1 table */ add x26, x26, #PAGE_SIZE /* Link the l1 -> l2 table */ mov x9, x6 mov x6, x26 bl link_l1_pagetable + /* Move to the l0 table */ + add x24, x26, #PAGE_SIZE + /* Link the l0 -> l1 table */ + mov x9, x6 + mov x6, x24 + bl link_l0_pagetable + /* * Build the TTBR0 maps. */ - add x27, x26, #PAGE_SIZE + add x27, x24, #PAGE_SIZE mov x6, x27 /* The initial page table */ #if defined(SOCDEV_PA) && defined(SOCDEV_VA) /* Create a table for the UART */ mov x7, #DEVICE_MEM mov x8, #(SOCDEV_VA) /* VA start */ mov x9, #(SOCDEV_PA) /* PA start */ mov x10, #1 bl build_l1_block_pagetable #endif /* Create the VA = PA map */ mov x7, #NORMAL_UNCACHED /* Uncached as it's only needed early on */ mov x9, x27 mov x8, x9 /* VA start (== PA start) */ mov x10, #1 bl build_l1_block_pagetable /* Move to the l0 table */ add x27, x27, #PAGE_SIZE /* Link the l0 -> l1 table */ mov x9, x6 mov x6, x27 bl link_l0_pagetable /* Restore the Link register */ mov x30, x5 ret /* * Builds an L0 -> L1 table descriptor * * This is a link for a 512GiB block of memory with up to 1GiB regions mapped * within it by build_l1_block_pagetable. * * x6 = L0 table * x8 = Virtual Address * x9 = L1 PA (trashed) * x11, x12 and x13 are trashed */ link_l0_pagetable: /* * Link an L0 -> L1 table entry. */ /* Find the table index */ lsr x11, x8, #L0_SHIFT - and x11, x11, #Ln_ADDR_MASK + and x11, x11, #L0_ADDR_MASK /* Build the L0 block entry */ mov x12, #L0_TABLE /* Only use the output address bits */ lsr x9, x9, #12 orr x12, x12, x9, lsl #12 /* Store the entry */ str x12, [x6, x11, lsl #3] ret /* * Builds an L1 -> L2 table descriptor * * This is a link for a 1GiB block of memory with up to 2MiB regions mapped * within it by build_l2_block_pagetable. * * x6 = L1 table * x8 = Virtual Address * x9 = L2 PA (trashed) * x11, x12 and x13 are trashed */ link_l1_pagetable: /* * Link an L1 -> L2 table entry. */ /* Find the table index */ lsr x11, x8, #L1_SHIFT and x11, x11, #Ln_ADDR_MASK /* Build the L1 block entry */ mov x12, #L1_TABLE /* Only use the output address bits */ lsr x9, x9, #12 orr x12, x12, x9, lsl #12 /* Store the entry */ str x12, [x6, x11, lsl #3] ret /* * Builds count 1 GiB page table entry * x6 = L1 table * x7 = Type (0 = Device, 1 = Normal) * x8 = VA start * x9 = PA start (trashed) * x10 = Entry count (TODO) * x11, x12 and x13 are trashed */ build_l1_block_pagetable: /* * Build the L1 table entry. */ /* Find the table index */ lsr x11, x8, #L1_SHIFT and x11, x11, #Ln_ADDR_MASK /* Build the L1 block entry */ lsl x12, x7, #2 orr x12, x12, #L1_BLOCK orr x12, x12, #(ATTR_AF) #ifdef SMP orr x12, x12, ATTR_SH(ATTR_SH_IS) #endif /* Only use the output address bits */ lsr x9, x9, #L1_SHIFT /* Set the physical address for this virtual address */ 1: orr x12, x12, x9, lsl #L1_SHIFT /* Store the entry */ str x12, [x6, x11, lsl #3] /* Clear the address bits */ and x12, x12, #ATTR_MASK_L sub x10, x10, #1 add x11, x11, #1 add x9, x9, #1 cbnz x10, 1b 2: ret /* * Builds count 2 MiB page table entry * x6 = L2 table * x7 = Type (0 = Device, 1 = Normal) * x8 = VA start * x9 = PA start (trashed) * x10 = Entry count (TODO) * x11, x12 and x13 are trashed */ build_l2_block_pagetable: /* * Build the L2 table entry. */ /* Find the table index */ lsr x11, x8, #L2_SHIFT and x11, x11, #Ln_ADDR_MASK /* Build the L2 block entry */ lsl x12, x7, #2 orr x12, x12, #L2_BLOCK orr x12, x12, #(ATTR_AF) #ifdef SMP orr x12, x12, ATTR_SH(ATTR_SH_IS) #endif /* Only use the output address bits */ lsr x9, x9, #L2_SHIFT /* Set the physical address for this virtual address */ 1: orr x12, x12, x9, lsl #L2_SHIFT /* Store the entry */ str x12, [x6, x11, lsl #3] /* Clear the address bits */ and x12, x12, #ATTR_MASK_L sub x10, x10, #1 add x11, x11, #1 add x9, x9, #1 cbnz x10, 1b 2: ret start_mmu: dsb sy /* Load the exception vectors */ ldr x2, =exception_vectors msr vbar_el1, x2 /* Load ttbr0 and ttbr1 */ msr ttbr0_el1, x27 - msr ttbr1_el1, x26 + msr ttbr1_el1, x24 isb /* Clear the Monitor Debug System control register */ msr mdscr_el1, xzr /* Invalidate the TLB */ tlbi vmalle1is ldr x2, mair msr mair_el1, x2 /* * Setup TCR according to PARange bits from ID_AA64MMFR0_EL1. - * Some machines have physical memory mapped >512GiB, which can not - * be identity-mapped using the default 39 VA bits. Thus, use - * 48 VA bits for now and switch back to 39 after the VA jump. */ - ldr x2, tcr_early + ldr x2, tcr mrs x3, id_aa64mmfr0_el1 bfi x2, x3, #32, #3 msr tcr_el1, x2 /* Setup SCTLR */ ldr x2, sctlr_set ldr x3, sctlr_clear mrs x1, sctlr_el1 bic x1, x1, x3 /* Clear the required bits */ orr x1, x1, x2 /* Set the required bits */ msr sctlr_el1, x1 isb ret .align 3 mair: /* Device Normal, no cache Normal, write-back */ .quad MAIR_ATTR(0x00, 0) | MAIR_ATTR(0x44, 1) | MAIR_ATTR(0xff, 2) tcr: .quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_ASID_16 | TCR_TG1_4K | \ TCR_CACHE_ATTRS | TCR_SMP_ATTRS) -tcr_early: - .quad (TCR_T1SZ(64 - VIRT_BITS) | TCR_T0SZ(64 - 48) | \ - TCR_ASID_16 | TCR_TG1_4K | TCR_CACHE_ATTRS | TCR_SMP_ATTRS) sctlr_set: /* Bits to set */ .quad (SCTLR_UCI | SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \ SCTLR_I | SCTLR_SED | SCTLR_SA0 | SCTLR_SA | SCTLR_C | SCTLR_M) sctlr_clear: /* Bits to clear */ .quad (SCTLR_EE | SCTLR_EOE | SCTLR_WXN | SCTLR_UMA | SCTLR_ITD | \ SCTLR_THEE | SCTLR_CP15BEN | SCTLR_A) .globl abort abort: b abort //.section .init_pagetable .align 12 /* 4KiB aligned */ /* * 3 initial tables (in the following order): * L2 for kernel (High addresses) * L1 for kernel * L1 for user (Low addresses) */ pagetable: .space PAGE_SIZE pagetable_l1_ttbr1: + .space PAGE_SIZE +pagetable_l0_ttbr1: .space PAGE_SIZE pagetable_l1_ttbr0: .space PAGE_SIZE pagetable_l0_ttbr0: .space PAGE_SIZE pagetable_end: el2_pagetable: .space PAGE_SIZE .globl init_pt_va init_pt_va: .quad pagetable /* XXX: Keep page tables VA */ .align 4 initstack: .space (PAGE_SIZE * KSTACK_PAGES) initstack_end: ENTRY(sigcode) mov x0, sp add x0, x0, #SF_UC 1: mov x8, #SYS_sigreturn svc 0 /* sigreturn failed, exit */ mov x8, #SYS_exit svc 0 b 1b END(sigcode) /* This may be copied to the stack, keep it 16-byte aligned */ .align 3 esigcode: .data .align 3 .global szsigcode szsigcode: .quad esigcode - sigcode Index: head/sys/arm64/arm64/machdep.c =================================================================== --- head/sys/arm64/arm64/machdep.c (revision 297445) +++ head/sys/arm64/arm64/machdep.c (revision 297446) @@ -1,1016 +1,1016 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include "opt_platform.h" #include "opt_ddb.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif #ifdef FDT #include #include #endif struct pcpu __pcpu[MAXCPU]; static struct trapframe proc0_tf; vm_paddr_t phys_avail[PHYS_AVAIL_SIZE + 2]; vm_paddr_t dump_avail[PHYS_AVAIL_SIZE + 2]; int early_boot = 1; int cold = 1; long realmem = 0; long Maxmem = 0; #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t physmap[PHYSMAP_SIZE]; u_int physmap_idx; struct kva_md_info kmi; int64_t dcache_line_size; /* The minimum D cache line size */ int64_t icache_line_size; /* The minimum I cache line size */ int64_t idcache_line_size; /* The minimum cache line size */ static void cpu_startup(void *dummy) { identify_cpu(); vm_ksubmap_init(&kmi); bufinit(); vm_pager_bufferinit(); } SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); int cpu_idle_wakeup(int cpu) { return (0); } void bzero(void *buf, size_t len) { uint8_t *p; p = buf; while(len-- > 0) *p++ = 0; } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; regs->sp = frame->tf_sp; regs->lr = frame->tf_lr; regs->elr = frame->tf_elr; regs->spsr = frame->tf_spsr; memcpy(regs->x, frame->tf_x, sizeof(regs->x)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; frame->tf_sp = regs->sp; frame->tf_lr = regs->lr; frame->tf_elr = regs->elr; frame->tf_spsr = regs->spsr; memcpy(frame->tf_x, regs->x, sizeof(frame->tf_x)); return (0); } int fill_fpregs(struct thread *td, struct fpreg *regs) { #ifdef VFP struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) { /* * If we have just been running VFP instructions we will * need to save the state to memcpy it below. */ vfp_save_state(td, pcb); memcpy(regs->fp_q, pcb->pcb_vfp, sizeof(regs->fp_q)); regs->fp_cr = pcb->pcb_fpcr; regs->fp_sr = pcb->pcb_fpsr; } else #endif memset(regs->fp_q, 0, sizeof(regs->fp_q)); return (0); } int set_fpregs(struct thread *td, struct fpreg *regs) { #ifdef VFP struct pcb *pcb; pcb = td->td_pcb; memcpy(pcb->pcb_vfp, regs->fp_q, sizeof(regs->fp_q)); pcb->pcb_fpcr = regs->fp_cr; pcb->pcb_fpsr = regs->fp_sr; #endif return (0); } int fill_dbregs(struct thread *td, struct dbreg *regs) { panic("ARM64TODO: fill_dbregs"); } int set_dbregs(struct thread *td, struct dbreg *regs) { panic("ARM64TODO: set_dbregs"); } int ptrace_set_pc(struct thread *td, u_long addr) { panic("ARM64TODO: ptrace_set_pc"); return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_spsr |= PSR_SS; td->td_pcb->pcb_flags |= PCB_SINGLE_STEP; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_spsr &= ~PSR_SS; td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP; return (0); } void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf = td->td_frame; memset(tf, 0, sizeof(struct trapframe)); /* * We need to set x0 for init as it doesn't call * cpu_set_syscall_retval to copy the value. We also * need to set td_retval for the cases where we do. */ tf->tf_x[0] = td->td_retval[0] = stack; tf->tf_sp = STACKALIGN(stack); tf->tf_lr = imgp->entry_addr; tf->tf_elr = imgp->entry_addr; } /* Sanity check these are the same size, they will be memcpy'd to and fro */ CTASSERT(sizeof(((struct trapframe *)0)->tf_x) == sizeof((struct gpregs *)0)->gp_x); CTASSERT(sizeof(((struct trapframe *)0)->tf_x) == sizeof((struct reg *)0)->x); int get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) { struct trapframe *tf = td->td_frame; if (clear_ret & GET_MC_CLEAR_RET) { mcp->mc_gpregs.gp_x[0] = 0; mcp->mc_gpregs.gp_spsr = tf->tf_spsr & ~PSR_C; } else { mcp->mc_gpregs.gp_x[0] = tf->tf_x[0]; mcp->mc_gpregs.gp_spsr = tf->tf_spsr; } memcpy(&mcp->mc_gpregs.gp_x[1], &tf->tf_x[1], sizeof(mcp->mc_gpregs.gp_x[1]) * (nitems(mcp->mc_gpregs.gp_x) - 1)); mcp->mc_gpregs.gp_sp = tf->tf_sp; mcp->mc_gpregs.gp_lr = tf->tf_lr; mcp->mc_gpregs.gp_elr = tf->tf_elr; return (0); } int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tf = td->td_frame; memcpy(tf->tf_x, mcp->mc_gpregs.gp_x, sizeof(tf->tf_x)); tf->tf_sp = mcp->mc_gpregs.gp_sp; tf->tf_lr = mcp->mc_gpregs.gp_lr; tf->tf_elr = mcp->mc_gpregs.gp_elr; tf->tf_spsr = mcp->mc_gpregs.gp_spsr; return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { #ifdef VFP struct pcb *curpcb; critical_enter(); curpcb = curthread->td_pcb; if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) { /* * If we have just been running VFP instructions we will * need to save the state to memcpy it below. */ vfp_save_state(td, curpcb); memcpy(mcp->mc_fpregs.fp_q, curpcb->pcb_vfp, sizeof(mcp->mc_fpregs)); mcp->mc_fpregs.fp_cr = curpcb->pcb_fpcr; mcp->mc_fpregs.fp_sr = curpcb->pcb_fpsr; mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags; mcp->mc_flags |= _MC_FP_VALID; } critical_exit(); #endif } static void set_fpcontext(struct thread *td, mcontext_t *mcp) { #ifdef VFP struct pcb *curpcb; critical_enter(); if ((mcp->mc_flags & _MC_FP_VALID) != 0) { curpcb = curthread->td_pcb; /* * Discard any vfp state for the current thread, we * are about to override it. */ vfp_discard(td); memcpy(curpcb->pcb_vfp, mcp->mc_fpregs.fp_q, sizeof(mcp->mc_fpregs)); curpcb->pcb_fpcr = mcp->mc_fpregs.fp_cr; curpcb->pcb_fpsr = mcp->mc_fpregs.fp_sr; curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags; } critical_exit(); #endif } void cpu_idle(int busy) { spinlock_enter(); if (!busy) cpu_idleclock(); if (!sched_runnable()) __asm __volatile( "dsb sy \n" "wfi \n"); if (!busy) cpu_activeclock(); spinlock_exit(); } void cpu_halt(void) { /* We should have shutdown by now, if not enter a low power sleep */ intr_disable(); while (1) { __asm __volatile("wfi"); } } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { /* ARM64TODO TBD */ } /* Get current clock frequency for the given CPU ID. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { panic("ARM64TODO: cpu_est_clockrate"); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } void spinlock_enter(void) { struct thread *td; register_t daif; td = curthread; if (td->td_md.md_spinlock_count == 0) { daif = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_daif = daif; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t daif; td = curthread; critical_exit(); daif = td->td_md.md_saved_daif; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(daif); } #ifndef _SYS_SYSPROTO_H_ struct sigreturn_args { ucontext_t *ucp; }; #endif int sys_sigreturn(struct thread *td, struct sigreturn_args *uap) { ucontext_t uc; uint32_t spsr; if (uap == NULL) return (EFAULT); if (copyin(uap->sigcntxp, &uc, sizeof(uc))) return (EFAULT); spsr = uc.uc_mcontext.mc_gpregs.gp_spsr; if ((spsr & PSR_M_MASK) != PSR_M_EL0t || (spsr & (PSR_F | PSR_I | PSR_A | PSR_D)) != 0) return (EINVAL); set_mcontext(td, &uc.uc_mcontext); set_fpcontext(td, &uc.uc_mcontext); /* Restore signal mask. */ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { int i; for (i = 0; i < PCB_LR; i++) pcb->pcb_x[i] = tf->tf_x[i]; pcb->pcb_x[PCB_LR] = tf->tf_lr; pcb->pcb_pc = tf->tf_elr; pcb->pcb_sp = tf->tf_sp; } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td; struct proc *p; struct trapframe *tf; struct sigframe *fp, frame; struct sigacts *psp; struct sysentvec *sysent; int code, onstack, sig; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else { fp = (struct sigframe *)td->td_frame->tf_sp; } /* Make room, keeping the stack aligned */ fp--; fp = (struct sigframe *)STACKALIGN(fp); /* Fill in the frame to copy out */ get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); get_fpcontext(td, &frame.sf_uc.uc_mcontext); frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE; frame.sf_uc.uc_stack = td->td_sigstk; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } tf->tf_x[0]= sig; tf->tf_x[1] = (register_t)&fp->sf_si; tf->tf_x[2] = (register_t)&fp->sf_uc; tf->tf_elr = (register_t)catcher; tf->tf_sp = (register_t)fp; sysent = p->p_sysent; if (sysent->sv_sigcode_base != 0) tf->tf_lr = (register_t)sysent->sv_sigcode_base; else tf->tf_lr = (register_t)(sysent->sv_psstrings - *(sysent->sv_szsigcode)); CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr, tf->tf_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } static void init_proc0(vm_offset_t kstack) { struct pcpu *pcpup = &__pcpu[0]; proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_pcb = (struct pcb *)(thread0.td_kstack) - 1; thread0.td_pcb->pcb_fpflags = 0; thread0.td_pcb->pcb_vfpcpu = UINT_MAX; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; } typedef struct { uint32_t type; uint64_t phys_start; uint64_t virt_start; uint64_t num_pages; uint64_t attr; } EFI_MEMORY_DESCRIPTOR; static int add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, u_int *physmap_idxp) { u_int i, insert_idx, _physmap_idx; _physmap_idx = *physmap_idxp; if (length == 0) return (1); /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. */ insert_idx = _physmap_idx; for (i = 0; i <= _physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= _physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += length; return (1); } _physmap_idx += 2; *physmap_idxp = _physmap_idx; if (_physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = _physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; return (1); } #ifdef FDT static void add_fdt_mem_regions(struct mem_region *mr, int mrcnt, vm_paddr_t *physmap, u_int *physmap_idxp) { for (int i = 0; i < mrcnt; i++) { if (!add_physmap_entry(mr[i].mr_start, mr[i].mr_size, physmap, physmap_idxp)) break; } } #endif #define efi_next_descriptor(ptr, size) \ ((struct efi_md *)(((uint8_t *) ptr) + size)) static void add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, u_int *physmap_idxp) { struct efi_md *map, *p; const char *type; size_t efisz; int ndesc, i; static const char *types[] = { "Reserved", "LoaderCode", "LoaderData", "BootServicesCode", "BootServicesData", "RuntimeServicesCode", "RuntimeServicesData", "ConventionalMemory", "UnusableMemory", "ACPIReclaimMemory", "ACPIMemoryNVS", "MemoryMappedIO", "MemoryMappedIOPortSpace", "PalCode" }; /* * Memory map data provided by UEFI via the GetMemoryMap * Boot Services API. */ efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return; ndesc = efihdr->memory_size / efihdr->descriptor_size; if (boothowto & RB_VERBOSE) printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { if (boothowto & RB_VERBOSE) { if (p->md_type <= EFI_MD_TYPE_PALCODE) type = types[p->md_type]; else type = ""; printf("%23s %012lx %12p %08lx ", type, p->md_phys, p->md_virt, p->md_pages); if (p->md_attr & EFI_MD_ATTR_UC) printf("UC "); if (p->md_attr & EFI_MD_ATTR_WC) printf("WC "); if (p->md_attr & EFI_MD_ATTR_WT) printf("WT "); if (p->md_attr & EFI_MD_ATTR_WB) printf("WB "); if (p->md_attr & EFI_MD_ATTR_UCE) printf("UCE "); if (p->md_attr & EFI_MD_ATTR_WP) printf("WP "); if (p->md_attr & EFI_MD_ATTR_RP) printf("RP "); if (p->md_attr & EFI_MD_ATTR_XP) printf("XP "); if (p->md_attr & EFI_MD_ATTR_RT) printf("RUNTIME"); printf("\n"); } switch (p->md_type) { case EFI_MD_TYPE_CODE: case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_BS_CODE: case EFI_MD_TYPE_BS_DATA: case EFI_MD_TYPE_FREE: /* * We're allowed to use any entry with these types. */ break; default: continue; } if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE), physmap, physmap_idxp)) break; } } #ifdef FDT static void try_load_dtb(caddr_t kmdp) { vm_offset_t dtbp; dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); if (dtbp == (vm_offset_t)NULL) { printf("ERROR loading DTB\n"); return; } if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); } #endif static void cache_setup(void) { int dcache_line_shift, icache_line_shift; uint32_t ctr_el0; ctr_el0 = READ_SPECIALREG(ctr_el0); /* Read the log2 words in each D cache line */ dcache_line_shift = CTR_DLINE_SIZE(ctr_el0); /* Get the D cache line size */ dcache_line_size = sizeof(int) << dcache_line_shift; /* And the same for the I cache */ icache_line_shift = CTR_ILINE_SIZE(ctr_el0); icache_line_size = sizeof(int) << icache_line_shift; idcache_line_size = MIN(dcache_line_size, icache_line_size); } void initarm(struct arm64_bootparams *abp) { struct efi_map_header *efihdr; struct pcpu *pcpup; #ifdef FDT struct mem_region mem_regions[FDT_MEM_REGIONS]; int mem_regions_sz; #endif vm_offset_t lastaddr; caddr_t kmdp; vm_paddr_t mem_len; int i; /* Set the module data location */ preload_metadata = (caddr_t)(uintptr_t)(abp->modulep); /* Find the kernel address */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); init_static_kenv(MD_FETCH(kmdp, MODINFOMD_ENVP, char *), 0); #ifdef FDT try_load_dtb(kmdp); #endif /* Find the address to start allocating from */ lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); /* Load the physical memory ranges */ physmap_idx = 0; efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); if (efihdr != NULL) add_efi_map_entries(efihdr, physmap, &physmap_idx); #ifdef FDT else { /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, NULL) != 0) panic("Cannot get physical memory regions"); add_fdt_mem_regions(mem_regions, mem_regions_sz, physmap, &physmap_idx); } #endif /* Print the memory map */ mem_len = 0; for (i = 0; i < physmap_idx; i += 2) { dump_avail[i] = physmap[i]; dump_avail[i + 1] = physmap[i + 1]; mem_len += physmap[i + 1] - physmap[i]; } dump_avail[i] = 0; dump_avail[i + 1] = 0; /* Set the pcpu data, this is needed by pmap_bootstrap */ pcpup = &__pcpu[0]; pcpu_init(pcpup, 0, sizeof(struct pcpu)); /* * Set the pcpu pointer with a backup in tpidr_el1 to be * loaded when entering the kernel from userland. */ __asm __volatile( "mov x18, %0 \n" "msr tpidr_el1, %0" :: "r"(pcpup)); PCPU_SET(curthread, &thread0); /* Do basic tuning, hz etc */ init_param1(); cache_setup(); /* Bootstrap enough of pmap to enter the kernel proper */ - pmap_bootstrap(abp->kern_l1pt, KERNBASE - abp->kern_delta, - lastaddr - KERNBASE); + pmap_bootstrap(abp->kern_l0pt, abp->kern_l1pt, + KERNBASE - abp->kern_delta, lastaddr - KERNBASE); arm_devmap_bootstrap(0, NULL); cninit(); init_proc0(abp->kern_stack); msgbufinit(msgbufp, msgbufsize); mutex_init(); init_param2(physmem); dbg_monitor_init(); kdb_init(); early_boot = 0; } uint32_t (*arm_cpu_fill_vdso_timehands)(struct vdso_timehands *, struct timecounter *); uint32_t cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc) { return (arm_cpu_fill_vdso_timehands != NULL ? arm_cpu_fill_vdso_timehands(vdso_th, tc) : 0); } #ifdef DDB #include DB_SHOW_COMMAND(specialregs, db_show_spregs) { #define PRINT_REG(reg) \ db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg)) PRINT_REG(actlr_el1); PRINT_REG(afsr0_el1); PRINT_REG(afsr1_el1); PRINT_REG(aidr_el1); PRINT_REG(amair_el1); PRINT_REG(ccsidr_el1); PRINT_REG(clidr_el1); PRINT_REG(contextidr_el1); PRINT_REG(cpacr_el1); PRINT_REG(csselr_el1); PRINT_REG(ctr_el0); PRINT_REG(currentel); PRINT_REG(daif); PRINT_REG(dczid_el0); PRINT_REG(elr_el1); PRINT_REG(esr_el1); PRINT_REG(far_el1); #if 0 /* ARM64TODO: Enable VFP before reading floating-point registers */ PRINT_REG(fpcr); PRINT_REG(fpsr); #endif PRINT_REG(id_aa64afr0_el1); PRINT_REG(id_aa64afr1_el1); PRINT_REG(id_aa64dfr0_el1); PRINT_REG(id_aa64dfr1_el1); PRINT_REG(id_aa64isar0_el1); PRINT_REG(id_aa64isar1_el1); PRINT_REG(id_aa64pfr0_el1); PRINT_REG(id_aa64pfr1_el1); PRINT_REG(id_afr0_el1); PRINT_REG(id_dfr0_el1); PRINT_REG(id_isar0_el1); PRINT_REG(id_isar1_el1); PRINT_REG(id_isar2_el1); PRINT_REG(id_isar3_el1); PRINT_REG(id_isar4_el1); PRINT_REG(id_isar5_el1); PRINT_REG(id_mmfr0_el1); PRINT_REG(id_mmfr1_el1); PRINT_REG(id_mmfr2_el1); PRINT_REG(id_mmfr3_el1); #if 0 /* Missing from llvm */ PRINT_REG(id_mmfr4_el1); #endif PRINT_REG(id_pfr0_el1); PRINT_REG(id_pfr1_el1); PRINT_REG(isr_el1); PRINT_REG(mair_el1); PRINT_REG(midr_el1); PRINT_REG(mpidr_el1); PRINT_REG(mvfr0_el1); PRINT_REG(mvfr1_el1); PRINT_REG(mvfr2_el1); PRINT_REG(revidr_el1); PRINT_REG(sctlr_el1); PRINT_REG(sp_el0); PRINT_REG(spsel); PRINT_REG(spsr_el1); PRINT_REG(tcr_el1); PRINT_REG(tpidr_el0); PRINT_REG(tpidr_el1); PRINT_REG(tpidrro_el0); PRINT_REG(ttbr0_el1); PRINT_REG(ttbr1_el1); PRINT_REG(vbar_el1); #undef PRINT_REG } DB_SHOW_COMMAND(vtop, db_show_vtop) { uint64_t phys; if (have_addr) { phys = arm64_address_translate_s1e1r(addr); db_printf("Physical address reg: 0x%016lx\n", phys); } else db_printf("show vtop \n"); } #endif Index: head/sys/arm64/arm64/minidump_machdep.c =================================================================== --- head/sys/arm64/arm64/minidump_machdep.c (revision 297445) +++ head/sys/arm64/arm64/minidump_machdep.c (revision 297446) @@ -1,465 +1,465 @@ /*- * Copyright (c) 2006 Peter Wemm * Copyright (c) 2015 The FreeBSD Foundation * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_watchdog.h" #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include CTASSERT(sizeof(struct kerneldumpheader) == 512); /* * Don't touch the first SIZEOF_METADATA bytes on the dump device. This * is to protect us from metadata and to protect metadata from us. */ #define SIZEOF_METADATA (64*1024) uint64_t *vm_page_dump; int vm_page_dump_size; static struct kerneldumpheader kdh; static off_t dumplo; /* Handle chunked writes. */ static size_t fragsz; static void *dump_va; static size_t counter, progress, dumpsize; static uint64_t tmpbuffer[PAGE_SIZE / sizeof(uint64_t)]; CTASSERT(sizeof(*vm_page_dump) == 8); static int is_dumpable(vm_paddr_t pa) { vm_page_t m; int i; if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL) return ((m->flags & PG_NODUMP) == 0); for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) return (1); } return (0); } static int blk_flush(struct dumperinfo *di) { int error; if (fragsz == 0) return (0); error = dump_write(di, dump_va, 0, dumplo, fragsz); dumplo += fragsz; fragsz = 0; return (error); } static struct { int min_per; int max_per; int visited; } progress_track[10] = { { 0, 10, 0}, { 10, 20, 0}, { 20, 30, 0}, { 30, 40, 0}, { 40, 50, 0}, { 50, 60, 0}, { 60, 70, 0}, { 70, 80, 0}, { 80, 90, 0}, { 90, 100, 0} }; static void report_progress(size_t progress, size_t dumpsize) { int sofar, i; sofar = 100 - ((progress * 100) / dumpsize); for (i = 0; i < nitems(progress_track); i++) { if (sofar < progress_track[i].min_per || sofar > progress_track[i].max_per) continue; if (progress_track[i].visited) return; progress_track[i].visited = 1; printf("..%d%%", sofar); return; } } static int blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) { size_t len; int error, c; u_int maxdumpsz; maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); if (maxdumpsz == 0) /* seatbelt */ maxdumpsz = PAGE_SIZE; error = 0; if ((sz % PAGE_SIZE) != 0) { printf("size not page aligned\n"); return (EINVAL); } if (ptr != NULL && pa != 0) { printf("cant have both va and pa!\n"); return (EINVAL); } if ((((uintptr_t)pa) % PAGE_SIZE) != 0) { printf("address not page aligned %p\n", ptr); return (EINVAL); } if (ptr != NULL) { /* * If we're doing a virtual dump, flush any * pre-existing pa pages. */ error = blk_flush(di); if (error) return (error); } while (sz) { len = maxdumpsz - fragsz; if (len > sz) len = sz; counter += len; progress -= len; if (counter >> 22) { report_progress(progress, dumpsize); counter &= (1 << 22) - 1; } wdog_kern_pat(WD_LASTVAL); if (ptr) { error = dump_write(di, ptr, 0, dumplo, len); if (error) return (error); dumplo += len; ptr += len; sz -= len; } else { dump_va = (void *)PHYS_TO_DMAP(pa); fragsz += len; pa += len; sz -= len; error = blk_flush(di); if (error) return (error); } /* Check for user abort. */ c = cncheckc(); if (c == 0x03) return (ECANCELED); if (c != -1) printf(" (CTRL-C to abort) "); } return (0); } int minidumpsys(struct dumperinfo *di) { - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; pt_entry_t *l3; uint32_t pmapsize; vm_offset_t va; vm_paddr_t pa; int error; uint64_t bits; int i, bit; int retry_count; struct minidumphdr mdhdr; retry_count = 0; retry: retry_count++; error = 0; pmapsize = 0; for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { pmapsize += PAGE_SIZE; - if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3)) + if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) continue; /* We should always be using the l2 table for kvm */ if (l2 == NULL) continue; if ((*l2 & ATTR_DESCR_MASK) == L2_BLOCK) { pa = *l2 & ~ATTR_MASK; for (i = 0; i < Ln_ENTRIES; i++, pa += PAGE_SIZE) { if (is_dumpable(pa)) dump_add_page(pa); } } else if ((*l2 & ATTR_DESCR_MASK) == L2_TABLE) { for (i = 0; i < Ln_ENTRIES; i++) { if ((l3[i] & ATTR_DESCR_MASK) != L3_PAGE) continue; pa = l3[i] & ~ATTR_MASK; if (is_dumpable(pa)) dump_add_page(pa); } } } /* Calculate dump size. */ dumpsize = pmapsize; dumpsize += round_page(msgbufp->msg_size); dumpsize += round_page(vm_page_dump_size); for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { bits = vm_page_dump[i]; while (bits) { bit = ffsl(bits) - 1; pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; /* Clear out undumpable pages now if needed */ if (is_dumpable(pa)) dumpsize += PAGE_SIZE; else dump_drop_page(pa); bits &= ~(1ul << bit); } } dumpsize += PAGE_SIZE; /* Determine dump offset on device. */ if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { error = E2BIG; goto fail; } dumplo = di->mediaoffset + di->mediasize - dumpsize; dumplo -= sizeof(kdh) * 2; progress = dumpsize; /* Initialize mdhdr */ bzero(&mdhdr, sizeof(mdhdr)); strcpy(mdhdr.magic, MINIDUMP_MAGIC); mdhdr.version = MINIDUMP_VERSION; mdhdr.msgbufsize = msgbufp->msg_size; mdhdr.bitmapsize = vm_page_dump_size; mdhdr.pmapsize = pmapsize; mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; mdhdr.dmapphys = DMAP_MIN_PHYSADDR; mdhdr.dmapbase = DMAP_MIN_ADDRESS; mdhdr.dmapend = DMAP_MAX_ADDRESS; mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AARCH64_VERSION, dumpsize, di->blocksize); printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, ptoa((uintmax_t)physmem) / 1048576); /* Dump leader */ error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); if (error) goto fail; dumplo += sizeof(kdh); /* Dump my header */ bzero(&tmpbuffer, sizeof(tmpbuffer)); bcopy(&mdhdr, &tmpbuffer, sizeof(mdhdr)); error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); if (error) goto fail; /* Dump msgbuf up front */ error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); if (error) goto fail; /* Dump bitmap */ error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); if (error) goto fail; /* Dump kernel page directory pages */ bzero(&tmpbuffer, sizeof(tmpbuffer)); for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { - if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3)) { + if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) { /* We always write a page, even if it is zero */ error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); if (error) goto fail; /* flush, in case we reuse tmpbuffer in the same block*/ error = blk_flush(di); if (error) goto fail; } else if (l2 == NULL) { pa = (*l1 & ~ATTR_MASK) | (va & L1_OFFSET); /* Generate fake l3 entries based upon the l1 entry */ for (i = 0; i < Ln_ENTRIES; i++) { tmpbuffer[i] = pa + (i * PAGE_SIZE) | ATTR_DEFAULT | L3_PAGE; } /* We always write a page, even if it is zero */ error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); if (error) goto fail; /* flush, in case we reuse tmpbuffer in the same block*/ error = blk_flush(di); if (error) goto fail; bzero(&tmpbuffer, sizeof(tmpbuffer)); } else if ((*l2 & ATTR_DESCR_MASK) == L2_BLOCK) { /* TODO: Handle an invalid L2 entry */ pa = (*l2 & ~ATTR_MASK) | (va & L2_OFFSET); /* Generate fake l3 entries based upon the l1 entry */ for (i = 0; i < Ln_ENTRIES; i++) { tmpbuffer[i] = pa + (i * PAGE_SIZE) | ATTR_DEFAULT | L3_PAGE; } /* We always write a page, even if it is zero */ error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); if (error) goto fail; /* flush, in case we reuse fakepd in the same block */ error = blk_flush(di); if (error) goto fail; bzero(&tmpbuffer, sizeof(tmpbuffer)); continue; } else { pa = *l2 & ~ATTR_MASK; /* We always write a page, even if it is zero */ error = blk_write(di, NULL, pa, PAGE_SIZE); if (error) goto fail; } } /* Dump memory chunks */ /* XXX cluster it up and use blk_dump() */ for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { bits = vm_page_dump[i]; while (bits) { bit = ffsl(bits) - 1; pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; error = blk_write(di, 0, pa, PAGE_SIZE); if (error) goto fail; bits &= ~(1ul << bit); } } error = blk_flush(di); if (error) goto fail; /* Dump trailer */ error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); if (error) goto fail; dumplo += sizeof(kdh); /* Signal completion, signoff and exit stage left. */ dump_write(di, NULL, 0, 0, 0); printf("\nDump complete\n"); return (0); fail: if (error < 0) error = -error; printf("\n"); if (error == ENOSPC) { printf("Dump map grown while dumping. "); if (retry_count < 5) { printf("Retrying...\n"); goto retry; } printf("Dump failed.\n"); } else if (error == ECANCELED) printf("Dump aborted\n"); else if (error == E2BIG) printf("Dump failed. Partition too small.\n"); else printf("** DUMP FAILED (ERROR %d) **\n", error); return (error); } void dump_add_page(vm_paddr_t pa) { int idx, bit; pa >>= PAGE_SHIFT; idx = pa >> 6; /* 2^6 = 64 */ bit = pa & 63; atomic_set_long(&vm_page_dump[idx], 1ul << bit); } void dump_drop_page(vm_paddr_t pa) { int idx, bit; pa >>= PAGE_SHIFT; idx = pa >> 6; /* 2^6 = 64 */ bit = pa & 63; atomic_clear_long(&vm_page_dump[idx], 1ul << bit); } Index: head/sys/arm64/arm64/pmap.c =================================================================== --- head/sys/arm64/arm64/pmap.c (revision 297445) +++ head/sys/arm64/arm64/pmap.c (revision 297446) @@ -1,3267 +1,3586 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2003 Peter Wemm * All rights reserved. * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * Copyright (c) 2014 Andrew Turner * All rights reserved. - * Copyright (c) 2014 The FreeBSD Foundation + * Copyright (c) 2014-2016 The FreeBSD Foundation * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * This software was developed by Andrew Turner under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) -#define NUPDE (NPDEPG * NPDEPG) -#define NUSERPGTBLS (NUPDE + NPDEPG) +#define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t))) +#define NUL0E L0_ENTRIES +#define NUL1E (NUL0E * NL1PG) +#define NUL2E (NUL1E * NL2PG) + #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline #else #define PMAP_INLINE extern inline #endif #else #define PMAP_INLINE #endif /* * These are configured by the mair_el1 register. This is set up in locore.S */ #define DEVICE_MEMORY 0 #define UNCACHED_MEMORY 1 #define CACHED_MEMORY 2 #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) #define NPV_LIST_LOCKS MAXCPU #define PHYS_TO_PV_LIST_LOCK(pa) \ (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ struct rwlock **_lockp = (lockp); \ struct rwlock *_new_lock; \ \ _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ if (_new_lock != *_lockp) { \ if (*_lockp != NULL) \ rw_wunlock(*_lockp); \ *_lockp = _new_lock; \ rw_wlock(*_lockp); \ } \ } while (0) #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) #define RELEASE_PV_LIST_LOCK(lockp) do { \ struct rwlock **_lockp = (lockp); \ \ if (*_lockp != NULL) { \ rw_wunlock(*_lockp); \ *_lockp = NULL; \ } \ } while (0) #define VM_PAGE_TO_PV_LIST_LOCK(m) \ PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) struct pmap kernel_pmap_store; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ vm_offset_t kernel_vm_end = 0; struct msgbuf *msgbufp = NULL; static struct rwlock_padalign pvh_global_lock; vm_paddr_t dmap_phys_base; /* The start of the dmap region */ /* * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static struct mtx pv_chunks_mutex; static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp); static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp); static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free); static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); /* * These load the old table data and store the new value. * They need to be atomic as the System MMU may write to the table at * the same time as the CPU. */ #define pmap_load_store(table, entry) atomic_swap_64(table, entry) #define pmap_set(table, mask) atomic_set_64(table, mask) #define pmap_load_clear(table) atomic_swap_64(table, 0) #define pmap_load(table) (*table) /********************/ /* Inline functions */ /********************/ static __inline void pagecopy(void *s, void *d) { memcpy(d, s, PAGE_SIZE); } static __inline void pagezero(void *p) { bzero(p, PAGE_SIZE); } +#define pmap_l0_index(va) (((va) >> L0_SHIFT) & L0_ADDR_MASK) #define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) #define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) #define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) static __inline pd_entry_t * +pmap_l0(pmap_t pmap, vm_offset_t va) +{ + + return (&pmap->pm_l0[pmap_l0_index(va)]); +} + +static __inline pd_entry_t * +pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va) +{ + pd_entry_t *l1; + + l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); + return (&l1[pmap_l1_index(va)]); +} + +static __inline pd_entry_t * pmap_l1(pmap_t pmap, vm_offset_t va) { + pd_entry_t *l0; - return (&pmap->pm_l1[pmap_l1_index(va)]); + l0 = pmap_l0(pmap, va); + if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE) + return (NULL); + + return (pmap_l0_to_l1(l0, va)); } static __inline pd_entry_t * pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) { pd_entry_t *l2; l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); return (&l2[pmap_l2_index(va)]); } static __inline pd_entry_t * pmap_l2(pmap_t pmap, vm_offset_t va) { pd_entry_t *l1; l1 = pmap_l1(pmap, va); if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE) return (NULL); return (pmap_l1_to_l2(l1, va)); } static __inline pt_entry_t * pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) { pt_entry_t *l3; l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); return (&l3[pmap_l3_index(va)]); } +/* + * Returns the lowest valid pde for a given virtual address. + * The next level may or may not point to a valid page or block. + */ +static __inline pd_entry_t * +pmap_pde(pmap_t pmap, vm_offset_t va, int *level) +{ + pd_entry_t *l0, *l1, *l2, desc; + + l0 = pmap_l0(pmap, va); + desc = pmap_load(l0) & ATTR_DESCR_MASK; + if (desc != L0_TABLE) { + *level = -1; + return (NULL); + } + + l1 = pmap_l0_to_l1(l0, va); + desc = pmap_load(l1) & ATTR_DESCR_MASK; + if (desc != L1_TABLE) { + *level = 0; + return (l0); + } + + l2 = pmap_l1_to_l2(l1, va); + desc = pmap_load(l2) & ATTR_DESCR_MASK; + if (desc != L2_TABLE) { + *level = 1; + return (l1); + } + + *level = 2; + return (l2); +} + +/* + * Returns the lowest valid pte block or table entry for a given virtual + * address. If there are no valid entries return NULL and set the level to + * the first invalid level. + */ static __inline pt_entry_t * -pmap_l3(pmap_t pmap, vm_offset_t va) +pmap_pte(pmap_t pmap, vm_offset_t va, int *level) { - pd_entry_t *l2; + pd_entry_t *l1, *l2, desc; + pt_entry_t *l3; - l2 = pmap_l2(pmap, va); - if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE) + l1 = pmap_l1(pmap, va); + if (l1 == NULL) { + *level = 0; return (NULL); + } + desc = pmap_load(l1) & ATTR_DESCR_MASK; + if (desc == L1_BLOCK) { + *level = 1; + return (l1); + } - return (pmap_l2_to_l3(l2, va)); + if (desc != L1_TABLE) { + *level = 1; + return (NULL); + } + + l2 = pmap_l1_to_l2(l1, va); + desc = pmap_load(l2) & ATTR_DESCR_MASK; + if (desc == L2_BLOCK) { + *level = 2; + return (l2); + } + + if (desc != L2_TABLE) { + *level = 2; + return (NULL); + } + + *level = 3; + l3 = pmap_l2_to_l3(l2, va); + if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE) + return (NULL); + + return (l3); } bool -pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l1, pd_entry_t **l2, - pt_entry_t **l3) +pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, + pd_entry_t **l2, pt_entry_t **l3) { - pd_entry_t *l1p, *l2p; + pd_entry_t *l0p, *l1p, *l2p; - if (pmap->pm_l1 == NULL) + if (pmap->pm_l0 == NULL) return (false); - l1p = pmap_l1(pmap, va); + l0p = pmap_l0(pmap, va); + *l0 = l0p; + + if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE) + return (false); + + l1p = pmap_l0_to_l1(l0p, va); *l1 = l1p; if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) { *l2 = NULL; *l3 = NULL; return (true); } if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE) return (false); l2p = pmap_l1_to_l2(l1p, va); *l2 = l2p; if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) { *l3 = NULL; return (true); } *l3 = pmap_l2_to_l3(l2p, va); return (true); } static __inline int pmap_is_current(pmap_t pmap) { return ((pmap == pmap_kernel()) || (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); } static __inline int pmap_l3_valid(pt_entry_t l3) { return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); } static __inline int pmap_l3_valid_cacheable(pt_entry_t l3) { return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) && ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); } #define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) /* * Checks if the page is dirty. We currently lack proper tracking of this on * arm64 so for now assume is a page mapped as rw was accessed it is. */ static inline int pmap_page_dirty(pt_entry_t pte) { return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) == (ATTR_AF | ATTR_AP(ATTR_AP_RW))); } static __inline void pmap_resident_count_inc(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); pmap->pm_stats.resident_count += count; } static __inline void pmap_resident_count_dec(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(pmap->pm_stats.resident_count >= count, ("pmap %p resident count underflow %ld %d", pmap, pmap->pm_stats.resident_count, count)); pmap->pm_stats.resident_count -= count; } static pt_entry_t * pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, u_int *l2_slot) { pt_entry_t *l2; pd_entry_t *l1; l1 = (pd_entry_t *)l1pt; *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; /* Check locore has used a table L1 map */ KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE, ("Invalid bootstrap L1 table")); /* Find the address of the L2 table */ l2 = (pt_entry_t *)init_pt_va; *l2_slot = pmap_l2_index(va); return (l2); } static vm_paddr_t pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) { u_int l1_slot, l2_slot; pt_entry_t *l2; l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET)); } static void pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart) { vm_offset_t va; vm_paddr_t pa; pd_entry_t *l1; u_int l1_slot; pa = dmap_phys_base = kernstart & ~L1_OFFSET; va = DMAP_MIN_ADDRESS; l1 = (pd_entry_t *)l1pt; l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS); for (; va < DMAP_MAX_ADDRESS; pa += L1_SIZE, va += L1_SIZE, l1_slot++) { KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); pmap_load_store(&l1[l1_slot], (pa & ~L1_OFFSET) | ATTR_DEFAULT | ATTR_IDX(CACHED_MEMORY) | L1_BLOCK); } cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); cpu_tlb_flushID(); } static vm_offset_t pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start) { vm_offset_t l2pt; vm_paddr_t pa; pd_entry_t *l1; u_int l1_slot; KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address")); l1 = (pd_entry_t *)l1pt; l1_slot = pmap_l1_index(va); l2pt = l2_start; for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) { KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); pa = pmap_early_vtophys(l1pt, l2pt); pmap_load_store(&l1[l1_slot], (pa & ~Ln_TABLE_MASK) | L1_TABLE); l2pt += PAGE_SIZE; } /* Clean the L2 page table */ memset((void *)l2_start, 0, l2pt - l2_start); cpu_dcache_wb_range(l2_start, l2pt - l2_start); /* Flush the l1 table to ram */ cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); return l2pt; } static vm_offset_t pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) { vm_offset_t l2pt, l3pt; vm_paddr_t pa; pd_entry_t *l2; u_int l2_slot; KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); l2 = pmap_l2(kernel_pmap, va); l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1)); l2pt = (vm_offset_t)l2; l2_slot = pmap_l2_index(va); l3pt = l3_start; for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); pa = pmap_early_vtophys(l1pt, l3pt); pmap_load_store(&l2[l2_slot], (pa & ~Ln_TABLE_MASK) | L2_TABLE); l3pt += PAGE_SIZE; } /* Clean the L2 page table */ memset((void *)l3_start, 0, l3pt - l3_start); cpu_dcache_wb_range(l3_start, l3pt - l3_start); cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); return l3pt; } /* * Bootstrap the system enough to run with virtual memory. */ void -pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) +pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart, + vm_size_t kernlen) { u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; uint64_t kern_delta; pt_entry_t *l2; vm_offset_t va, freemempos; vm_offset_t dpcpu, msgbufpv; vm_paddr_t pa, min_pa; int i; kern_delta = KERNBASE - kernstart; physmem = 0; printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); printf("%lx\n", l1pt); printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); /* Set this early so we can use the pagetable walking functions */ - kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt; + kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; PMAP_LOCK_INIT(kernel_pmap); /* * Initialize the global pv list lock. */ rw_init(&pvh_global_lock, "pmap pv global"); /* Assume the address we were loaded to is a valid physical address */ min_pa = KERNBASE - kern_delta; /* * Find the minimum physical address. physmap is sorted, * but may contain empty ranges. */ for (i = 0; i < (physmap_idx * 2); i += 2) { if (physmap[i] == physmap[i + 1]) continue; if (physmap[i] <= min_pa) min_pa = physmap[i]; break; } /* Create a direct map region early so we can use it for pa -> va */ pmap_bootstrap_dmap(l1pt, min_pa); va = KERNBASE; pa = KERNBASE - kern_delta; /* * Start to initialise phys_avail by copying from physmap * up to the physical address KERNBASE points at. */ map_slot = avail_slot = 0; for (; map_slot < (physmap_idx * 2) && avail_slot < (PHYS_AVAIL_SIZE - 2); map_slot += 2) { if (physmap[map_slot] == physmap[map_slot + 1]) continue; if (physmap[map_slot] <= pa && physmap[map_slot + 1] > pa) break; phys_avail[avail_slot] = physmap[map_slot]; phys_avail[avail_slot + 1] = physmap[map_slot + 1]; physmem += (phys_avail[avail_slot + 1] - phys_avail[avail_slot]) >> PAGE_SHIFT; avail_slot += 2; } /* Add the memory before the kernel */ if (physmap[avail_slot] < pa && avail_slot < (PHYS_AVAIL_SIZE - 2)) { phys_avail[avail_slot] = physmap[map_slot]; phys_avail[avail_slot + 1] = pa; physmem += (phys_avail[avail_slot + 1] - phys_avail[avail_slot]) >> PAGE_SHIFT; avail_slot += 2; } used_map_slot = map_slot; /* * Read the page table to find out what is already mapped. * This assumes we have mapped a block of memory from KERNBASE * using a single L1 entry. */ l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); /* Sanity check the index, KERNBASE should be the first VA */ KASSERT(l2_slot == 0, ("The L2 index is non-zero")); /* Find how many pages we have mapped */ for (; l2_slot < Ln_ENTRIES; l2_slot++) { if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0) break; /* Check locore used L2 blocks */ KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK, ("Invalid bootstrap L2 table")); KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa, ("Incorrect PA in L2 table")); va += L2_SIZE; pa += L2_SIZE; } va = roundup2(va, L1_SIZE); freemempos = KERNBASE + kernlen; freemempos = roundup2(freemempos, PAGE_SIZE); /* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */ freemempos = pmap_bootstrap_l2(l1pt, va, freemempos); /* And the l3 tables for the early devmap */ freemempos = pmap_bootstrap_l3(l1pt, VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); cpu_tlb_flushID(); #define alloc_pages(var, np) \ (var) = freemempos; \ freemempos += (np * PAGE_SIZE); \ memset((char *)(var), 0, ((np) * PAGE_SIZE)); /* Allocate dynamic per-cpu area. */ alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu, 0); /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); msgbufp = (void *)msgbufpv; virtual_avail = roundup2(freemempos, L1_SIZE); virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; kernel_vm_end = virtual_avail; pa = pmap_early_vtophys(l1pt, freemempos); /* Finish initialising physmap */ map_slot = used_map_slot; for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && map_slot < (physmap_idx * 2); map_slot += 2) { if (physmap[map_slot] == physmap[map_slot + 1]) continue; /* Have we used the current range? */ if (physmap[map_slot + 1] <= pa) continue; /* Do we need to split the entry? */ if (physmap[map_slot] < pa) { phys_avail[avail_slot] = pa; phys_avail[avail_slot + 1] = physmap[map_slot + 1]; } else { phys_avail[avail_slot] = physmap[map_slot]; phys_avail[avail_slot + 1] = physmap[map_slot + 1]; } physmem += (phys_avail[avail_slot + 1] - phys_avail[avail_slot]) >> PAGE_SHIFT; avail_slot += 2; } phys_avail[avail_slot] = 0; phys_avail[avail_slot + 1] = 0; /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". */ Maxmem = atop(phys_avail[avail_slot - 1]); cpu_tlb_flushID(); } /* * Initialize a vm_page's machine-dependent fields. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { int i; /* * Initialize the pv chunk list mutex. */ mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); /* * Initialize the pool of pv list locks. */ for (i = 0; i < NPV_LIST_LOCKS; i++) rw_init(&pv_list_locks[i], "pmap pv list"); } /* * Normal, non-SMP, invalidation functions. * We inline these within pmap.c for speed. */ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { sched_pin(); __asm __volatile( "dsb sy \n" "tlbi vaae1is, %0 \n" "dsb sy \n" "isb \n" : : "r"(va >> PAGE_SHIFT)); sched_unpin(); } PMAP_INLINE void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; sched_pin(); __asm __volatile("dsb sy"); for (addr = sva; addr < eva; addr += PAGE_SIZE) { __asm __volatile( "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT)); } __asm __volatile( "dsb sy \n" "isb \n"); sched_unpin(); } PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { sched_pin(); __asm __volatile( "dsb sy \n" "tlbi vmalle1is \n" "dsb sy \n" "isb \n"); sched_unpin(); } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { - pd_entry_t *l2p, l2; - pt_entry_t *l3p, l3; + pt_entry_t *pte, tpte; vm_paddr_t pa; + int lvl; pa = 0; PMAP_LOCK(pmap); /* - * Start with the l2 tabel. We are unable to allocate - * pages in the l1 table. + * Find the block or page map for this virtual address. pmap_pte + * will return either a valid block/page entry, or NULL. */ - l2p = pmap_l2(pmap, va); - if (l2p != NULL) { - l2 = pmap_load(l2p); - if ((l2 & ATTR_DESCR_MASK) == L2_TABLE) { - l3p = pmap_l2_to_l3(l2p, va); - if (l3p != NULL) { - l3 = pmap_load(l3p); - - if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) - pa = (l3 & ~ATTR_MASK) | - (va & L3_OFFSET); - } - } else if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) - pa = (l2 & ~ATTR_MASK) | (va & L2_OFFSET); + pte = pmap_pte(pmap, va, &lvl); + if (pte != NULL) { + tpte = pmap_load(pte); + pa = tpte & ~ATTR_MASK; + switch(lvl) { + case 1: + KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, + ("pmap_extract: Invalid L1 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L1_OFFSET); + break; + case 2: + KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, + ("pmap_extract: Invalid L2 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L2_OFFSET); + break; + case 3: + KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, + ("pmap_extract: Invalid L3 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L3_OFFSET); + break; + } } PMAP_UNLOCK(pmap); return (pa); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { - pt_entry_t *l3p, l3; + pt_entry_t *pte, tpte; vm_paddr_t pa; vm_page_t m; + int lvl; pa = 0; m = NULL; PMAP_LOCK(pmap); retry: - l3p = pmap_l3(pmap, va); - if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { - if (((l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || + pte = pmap_pte(pmap, va, &lvl); + if (pte != NULL) { + tpte = pmap_load(pte); + + KASSERT(lvl > 0 && lvl <= 3, + ("pmap_extract_and_hold: Invalid level %d", lvl)); + CTASSERT(L1_BLOCK == L2_BLOCK); + KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) || + (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK), + ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl, + tpte & ATTR_DESCR_MASK)); + if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || ((prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, l3 & ~ATTR_MASK, &pa)) + if (vm_page_pa_tryrelock(pmap, tpte & ~ATTR_MASK, &pa)) goto retry; - m = PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK); + m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK); vm_page_hold(m); } } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } vm_paddr_t pmap_kextract(vm_offset_t va) { - pd_entry_t *l2p, l2; - pt_entry_t *l3; + pt_entry_t *pte, tpte; vm_paddr_t pa; + int lvl; if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { pa = DMAP_TO_PHYS(va); } else { - l2p = pmap_l2(kernel_pmap, va); - if (l2p == NULL) - panic("pmap_kextract: No l2"); - l2 = pmap_load(l2p); - if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) - return ((l2 & ~ATTR_MASK) | - (va & L2_OFFSET)); - - l3 = pmap_l2_to_l3(l2p, va); - if (l3 == NULL) - panic("pmap_kextract: No l3..."); - pa = (pmap_load(l3) & ~ATTR_MASK) | (va & PAGE_MASK); + pa = 0; + pte = pmap_pte(kernel_pmap, va, &lvl); + if (pte != NULL) { + tpte = pmap_load(pte); + pa = tpte & ~ATTR_MASK; + switch(lvl) { + case 1: + KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, + ("pmap_kextract: Invalid L1 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L1_OFFSET); + break; + case 2: + KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, + ("pmap_kextract: Invalid L2 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L2_OFFSET); + break; + case 3: + KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, + ("pmap_kextract: Invalid L3 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L3_OFFSET); + break; + } + } } return (pa); } /*************************************************** * Low level mapping routines..... ***************************************************/ void pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) { - pt_entry_t *l3; + pd_entry_t *pde; + pt_entry_t *pte; vm_offset_t va; + int lvl; KASSERT((pa & L3_OFFSET) == 0, ("pmap_kenter_device: Invalid physical address")); KASSERT((sva & L3_OFFSET) == 0, ("pmap_kenter_device: Invalid virtual address")); KASSERT((size & PAGE_MASK) == 0, ("pmap_kenter_device: Mapping is not page-sized")); va = sva; while (size != 0) { - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); - pmap_load_store(l3, (pa & ~L3_OFFSET) | ATTR_DEFAULT | + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_kenter_device: Invalid page entry, va: 0x%lx", va)); + KASSERT(lvl == 2, + ("pmap_kenter_device: Invalid level %d", lvl)); + + pte = pmap_l2_to_l3(pde, va); + pmap_load_store(pte, (pa & ~L3_OFFSET) | ATTR_DEFAULT | ATTR_IDX(DEVICE_MEMORY) | L3_PAGE); - PTE_SYNC(l3); + PTE_SYNC(pte); va += PAGE_SIZE; pa += PAGE_SIZE; size -= PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * Remove a page from the kernel pagetables. - * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { - pt_entry_t *l3; + pt_entry_t *pte; + int lvl; - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); + pte = pmap_pte(kernel_pmap, va, &lvl); + KASSERT(pte != NULL, ("pmap_kremove: Invalid address")); + KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl)); - if (pmap_l3_valid_cacheable(pmap_load(l3))) + if (pmap_l3_valid_cacheable(pmap_load(pte))) cpu_dcache_wb_range(va, L3_SIZE); - pmap_load_clear(l3); - PTE_SYNC(l3); + pmap_load_clear(pte); + PTE_SYNC(pte); pmap_invalidate_page(kernel_pmap, va); } void pmap_kremove_device(vm_offset_t sva, vm_size_t size) { - pt_entry_t *l3; + pt_entry_t *pte; vm_offset_t va; + int lvl; KASSERT((sva & L3_OFFSET) == 0, ("pmap_kremove_device: Invalid virtual address")); KASSERT((size & PAGE_MASK) == 0, ("pmap_kremove_device: Mapping is not page-sized")); va = sva; while (size != 0) { - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); - pmap_load_clear(l3); - PTE_SYNC(l3); + pte = pmap_pte(kernel_pmap, va, &lvl); + KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va)); + KASSERT(lvl == 3, + ("Invalid device pagetable level: %d != 3", lvl)); + pmap_load_clear(pte); + PTE_SYNC(pte); va += PAGE_SIZE; size -= PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { return PHYS_TO_DMAP(start); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { - pt_entry_t *l3, pa; + pd_entry_t *pde; + pt_entry_t *pte, pa; vm_offset_t va; vm_page_t m; - int i; + int i, lvl; va = sva; for (i = 0; i < count; i++) { + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_qenter: Invalid page entry, va: 0x%lx", va)); + KASSERT(lvl == 2, + ("pmap_qenter: Invalid level %d", lvl)); + m = ma[i]; pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | ATTR_IDX(m->md.pv_memattr) | L3_PAGE; - l3 = pmap_l3(kernel_pmap, va); - pmap_load_store(l3, pa); - PTE_SYNC(l3); + pte = pmap_l2_to_l3(pde, va); + pmap_load_store(pte, pa); + PTE_SYNC(pte); va += L3_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. - * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { - pt_entry_t *l3; + pt_entry_t *pte; vm_offset_t va; + int lvl; KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); va = sva; while (count-- > 0) { - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); + pte = pmap_pte(kernel_pmap, va, &lvl); + KASSERT(lvl == 3, + ("Invalid device pagetable level: %d != 3", lvl)); + if (pte != NULL) { + if (pmap_l3_valid_cacheable(pmap_load(pte))) + cpu_dcache_wb_range(va, L3_SIZE); + pmap_load_clear(pte); + PTE_SYNC(pte); + } - if (pmap_l3_valid_cacheable(pmap_load(l3))) - cpu_dcache_wb_range(va, L3_SIZE); - pmap_load_clear(l3); - PTE_SYNC(l3); - va += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /*************************************************** * Page table page management routines..... ***************************************************/ static __inline void pmap_free_zero_pages(struct spglist *free) { vm_page_t m; while ((m = SLIST_FIRST(free)) != NULL) { SLIST_REMOVE_HEAD(free, plinks.s.ss); /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } } /* * Schedule the specified unused page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the * physical memory manager after the TLB has been updated. */ static __inline void pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, boolean_t set_PG_ZERO) { if (set_PG_ZERO) m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; SLIST_INSERT_HEAD(free, m, plinks.s.ss); } /* * Decrements a page table page's wire count, which is used to record the * number of valid page table entries within the page. If the wire count * drops to zero, then the page table page is unmapped. Returns TRUE if the * page table page was unmapped and FALSE otherwise. */ static inline boolean_t pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { --m->wire_count; if (m->wire_count == 0) { _pmap_unwire_l3(pmap, va, m, free); return (TRUE); } else return (FALSE); } static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * unmap the page table page */ - if (m->pindex >= NUPDE) { - /* PD page */ + if (m->pindex >= (NUL2E + NUL1E)) { + /* l1 page */ + pd_entry_t *l0; + + l0 = pmap_l0(pmap, va); + pmap_load_clear(l0); + PTE_SYNC(l0); + } else if (m->pindex >= NUL2E) { + /* l2 page */ pd_entry_t *l1; + l1 = pmap_l1(pmap, va); pmap_load_clear(l1); PTE_SYNC(l1); } else { - /* PTE page */ + /* l3 page */ pd_entry_t *l2; + l2 = pmap_l2(pmap, va); pmap_load_clear(l2); PTE_SYNC(l2); } pmap_resident_count_dec(pmap, 1); - if (m->pindex < NUPDE) { - /* We just released a PT, unhold the matching PD */ - vm_page_t pdpg; + if (m->pindex < NUL2E) { + /* We just released an l3, unhold the matching l2 */ + pd_entry_t *l1, tl1; + vm_page_t l2pg; - pdpg = PHYS_TO_VM_PAGE(*pmap_l1(pmap, va) & ~ATTR_MASK); - pmap_unwire_l3(pmap, va, pdpg, free); + l1 = pmap_l1(pmap, va); + tl1 = pmap_load(l1); + l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); + pmap_unwire_l3(pmap, va, l2pg, free); + } else if (m->pindex < (NUL2E + NUL1E)) { + /* We just released an l2, unhold the matching l1 */ + pd_entry_t *l0, tl0; + vm_page_t l1pg; + + l0 = pmap_l0(pmap, va); + tl0 = pmap_load(l0); + l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); + pmap_unwire_l3(pmap, va, l1pg, free); } pmap_invalidate_page(pmap, va); /* * This is a release store so that the ordinary store unmapping * the page table page is globally performed before TLB shoot- * down is begun. */ atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ pmap_add_delayed_free_list(m, free, TRUE); } /* * After removing an l3 entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, struct spglist *free) { vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (0); KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK); return (pmap_unwire_l3(pmap, va, mpte, free)); } void pmap_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); - pmap->pm_l1 = kernel_pmap->pm_l1; + pmap->pm_l0 = kernel_pmap->pm_l0; } int pmap_pinit(pmap_t pmap) { - vm_paddr_t l1phys; - vm_page_t l1pt; + vm_paddr_t l0phys; + vm_page_t l0pt; /* - * allocate the l1 page + * allocate the l0 page */ - while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | + while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) VM_WAIT; - l1phys = VM_PAGE_TO_PHYS(l1pt); - pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); + l0phys = VM_PAGE_TO_PHYS(l0pt); + pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys); - if ((l1pt->flags & PG_ZERO) == 0) - pagezero(pmap->pm_l1); + if ((l0pt->flags & PG_ZERO) == 0) + pagezero(pmap->pm_l0); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); return (1); } /* * This routine is called if the desired page table page does not exist. * * If page table page allocation fails, this routine may sleep before * returning NULL. It sleeps only if a lock pointer was given. * * Note: If a page allocation fails at page table level two or three, * one or two pages may be held during the wait, only to be released * afterwards. This conservative approach is easily argued to avoid * race conditions. */ static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) { - vm_page_t m, /*pdppg, */pdpg; + vm_page_t m, l1pg, l2pg; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if (lockp != NULL) { RELEASE_PV_LIST_LOCK(lockp); PMAP_UNLOCK(pmap); rw_runlock(&pvh_global_lock); VM_WAIT; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, the page table * page may have been allocated. */ return (NULL); } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); /* * Map the pagetable page into the process address space, if * it isn't already there. */ - if (ptepindex >= NUPDE) { - pd_entry_t *l1; - vm_pindex_t l1index; + if (ptepindex >= (NUL2E + NUL1E)) { + pd_entry_t *l0; + vm_pindex_t l0index; - l1index = ptepindex - NUPDE; - l1 = &pmap->pm_l1[l1index]; + l0index = ptepindex - (NUL2E + NUL1E); + l0 = &pmap->pm_l0[l0index]; + pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE); + PTE_SYNC(l0); + } else if (ptepindex >= NUL2E) { + vm_pindex_t l0index, l1index; + pd_entry_t *l0, *l1; + pd_entry_t tl0; + + l1index = ptepindex - NUL2E; + l0index = l1index >> L0_ENTRIES_SHIFT; + + l0 = &pmap->pm_l0[l0index]; + tl0 = pmap_load(l0); + if (tl0 == 0) { + /* recurse for allocating page dir */ + if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index, + lockp) == NULL) { + --m->wire_count; + /* XXX: release mem barrier? */ + atomic_subtract_int(&vm_cnt.v_wire_count, 1); + vm_page_free_zero(m); + return (NULL); + } + } else { + l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); + l1pg->wire_count++; + } + + l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); + l1 = &l1[ptepindex & Ln_ADDR_MASK]; pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); PTE_SYNC(l1); - } else { - vm_pindex_t l1index; - pd_entry_t *l1, *l2; + vm_pindex_t l0index, l1index; + pd_entry_t *l0, *l1, *l2; + pd_entry_t tl0, tl1; - l1index = ptepindex >> (L1_SHIFT - L2_SHIFT); - l1 = &pmap->pm_l1[l1index]; - if (pmap_load(l1) == 0) { + l1index = ptepindex >> Ln_ENTRIES_SHIFT; + l0index = l1index >> L0_ENTRIES_SHIFT; + + l0 = &pmap->pm_l0[l0index]; + tl0 = pmap_load(l0); + if (tl0 == 0) { /* recurse for allocating page dir */ - if (_pmap_alloc_l3(pmap, NUPDE + l1index, + if (_pmap_alloc_l3(pmap, NUL2E + l1index, lockp) == NULL) { --m->wire_count; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); return (NULL); } + tl0 = pmap_load(l0); + l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); + l1 = &l1[l1index & Ln_ADDR_MASK]; } else { - pdpg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK); - pdpg->wire_count++; + l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); + l1 = &l1[l1index & Ln_ADDR_MASK]; + tl1 = pmap_load(l1); + if (tl1 == 0) { + /* recurse for allocating page dir */ + if (_pmap_alloc_l3(pmap, NUL2E + l1index, + lockp) == NULL) { + --m->wire_count; + /* XXX: release mem barrier? */ + atomic_subtract_int( + &vm_cnt.v_wire_count, 1); + vm_page_free_zero(m); + return (NULL); + } + } else { + l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); + l2pg->wire_count++; + } } l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); l2 = &l2[ptepindex & Ln_ADDR_MASK]; pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE); PTE_SYNC(l2); } pmap_resident_count_inc(pmap, 1); return (m); } static vm_page_t pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { vm_pindex_t ptepindex; - pd_entry_t *l2; + pd_entry_t *pde, tpde; vm_page_t m; + int lvl; /* * Calculate pagetable page index */ ptepindex = pmap_l2_pindex(va); retry: /* * Get the page directory entry */ - l2 = pmap_l2(pmap, va); + pde = pmap_pde(pmap, va, &lvl); /* - * If the page table page is mapped, we just increment the - * hold count, and activate it. + * If the page table page is mapped, we just increment the hold count, + * and activate it. If we get a level 2 pde it will point to a level 3 + * table. */ - if (l2 != NULL && pmap_load(l2) != 0) { - m = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); - m->wire_count++; - } else { - /* - * Here if the pte page isn't mapped, or if it has been - * deallocated. - */ - m = _pmap_alloc_l3(pmap, ptepindex, lockp); - if (m == NULL && lockp != NULL) - goto retry; + if (lvl == 2) { + tpde = pmap_load(pde); + if (tpde != 0) { + m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK); + m->wire_count++; + return (m); + } } + + /* + * Here if the pte page isn't mapped, or if it has been deallocated. + */ + m = _pmap_alloc_l3(pmap, ptepindex, lockp); + if (m == NULL && lockp != NULL) + goto retry; + return (m); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { vm_page_t m; KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); m->wire_count--; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); } #if 0 static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; return sysctl_handle_long(oidp, &ksize, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "LU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; return sysctl_handle_long(oidp, &kfree, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "LU", "Amount of KVM free"); #endif /* 0 */ /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { vm_paddr_t paddr; vm_page_t nkpg; - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, L2_SIZE); if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { - l1 = pmap_l1(kernel_pmap, kernel_vm_end); + l0 = pmap_l0(kernel_pmap, kernel_vm_end); + KASSERT(pmap_load(l0) != 0, + ("pmap_growkernel: No level 0 kernel entry")); + + l1 = pmap_l0_to_l1(l0, kernel_vm_end); if (pmap_load(l1) == 0) { /* We need a new PDP entry */ nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); paddr = VM_PAGE_TO_PHYS(nkpg); pmap_load_store(l1, paddr | L1_TABLE); PTE_SYNC(l1); continue; /* try again */ } l2 = pmap_l1_to_l2(l1, kernel_vm_end); if ((pmap_load(l2) & ATTR_AF) != 0) { kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } continue; } nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); paddr = VM_PAGE_TO_PHYS(nkpg); pmap_load_store(l2, paddr | L2_TABLE); PTE_SYNC(l2); pmap_invalidate_page(kernel_pmap, kernel_vm_end); kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } } } /*************************************************** * page management routines. ***************************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 3); CTASSERT(_NPCPV == 168); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0 0xfffffffffffffffful #define PC_FREE1 0xfffffffffffffffful #define PC_FREE2 0x000000fffffffffful static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; #if 0 #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs, pv_entry_count; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); #endif #endif /* 0 */ /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. * * Returns NULL if PV entries were reclaimed from the specified pmap. * * We do not, however, unmap 2mpages because subsequent accesses will * allocate per-page pv entries until repromotion occurs, thereby * exacerbating the shortage of free pv entries. */ static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) { panic("ARM64TODO: reclaim_pv_chunk"); } /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { struct pv_chunk *pc; int idx, field, bit; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_frees, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, 1)); PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 64; bit = idx % 64; pc->pc_map[field] |= 1ul << bit; if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || pc->pc_map[2] != PC_FREE2) { /* 98% of the time, pc is already at the head of the list. */ if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); } return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } static void free_pv_chunk(struct pv_chunk *pc) { vm_page_t m; mtx_lock(&pv_chunks_mutex); TAILQ_REMOVE(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); vm_page_unwire(m, PQ_NONE); vm_page_free(m); } /* * Returns a new PV entry, allocating a new PV chunk from the system when * needed. If this PV chunk allocation fails and a PV list lock pointer was * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is * returned. * * The given PV list lock may be released. */ static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp) { int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = ffsl(pc->pc_map[field]) - 1; break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 64 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); return (pv); } } /* No free items, allocate another chunk */ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) { if (lockp == NULL) { PV_STAT(pc_chunk_tryfail++); return (NULL); } m = reclaim_pv_chunk(pmap, lockp); if (m == NULL) goto retry; } PV_STAT(atomic_add_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); dump_add_page(m->phys_addr); pc = (void *)PHYS_TO_DMAP(m->phys_addr); pc->pc_pmap = pmap; pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; mtx_lock(&pv_chunks_mutex); TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); return (pv); } /* * First find and then remove the pv entry for the specified pmap and virtual * address from the specified pv list. Returns the pv entry if found and NULL * otherwise. This operation can be performed on pv lists for either 4KB or * 2MB page mappings. */ static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_LOCKED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; break; } } return (pv); } /* * First find and then destroy the pv entry for the specified pmap and virtual * address. This operation can be performed on pv lists for either 4KB or 2MB * page mappings. */ static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); free_pv_entry(pmap, pv); } /* * Conditionally create the PV entry for a 4KB page mapping if the required * memory can be allocated without resorting to reclamation. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Pass NULL instead of the lock pointer to disable reclamation. */ if ((pv = get_pv_entry(pmap, NULL)) != NULL) { pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; return (TRUE); } else return (FALSE); } /* * pmap_remove_l3: do the things to unmap a page in a process */ static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) { pt_entry_t old_l3; vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) cpu_dcache_wb_range(va, L3_SIZE); old_l3 = pmap_load_clear(l3); PTE_SYNC(l3); pmap_invalidate_page(pmap, va); if (old_l3 & ATTR_SW_WIRED) pmap->pm_stats.wired_count -= 1; pmap_resident_count_dec(pmap, 1); if (old_l3 & ATTR_SW_MANAGED) { m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); if (pmap_page_dirty(old_l3)) vm_page_dirty(m); if (old_l3 & ATTR_AF) vm_page_aflag_set(m, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); pmap_pvh_free(&m->md, pmap, va); } return (pmap_unuse_l3(pmap, va, l2e, free)); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct rwlock *lock; vm_offset_t va, va_next; - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; pt_entry_t l3_paddr, *l3; struct spglist free; int anyvalid; /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; anyvalid = 0; SLIST_INIT(&free); rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); lock = NULL; for (; sva < eva; sva = va_next) { if (pmap->pm_stats.resident_count == 0) break; - l1 = pmap_l1(pmap, sva); + l0 = pmap_l0(pmap, sva); + if (pmap_load(l0) == 0) { + va_next = (sva + L0_SIZE) & ~L0_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + + l1 = pmap_l0_to_l1(l0, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } /* * Calculate index for next page table. */ va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (l2 == NULL) continue; l3_paddr = pmap_load(l2); /* * Weed out invalid mappings. */ if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE) continue; /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (va_next > eva) va_next = eva; va = va_next; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { if (l3 == NULL) panic("l3 == NULL"); if (pmap_load(l3) == 0) { if (va != va_next) { pmap_invalidate_range(pmap, va, sva); va = va_next; } continue; } if (va == va_next) va = sva; if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free, &lock)) { sva += L3_SIZE; break; } } if (va != va_next) pmap_invalidate_range(pmap, va, sva); } if (lock != NULL) rw_wunlock(lock); if (anyvalid) pmap_invalidate_all(pmap); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { pv_entry_t pv; pmap_t pmap; - pt_entry_t *l3, tl3; - pd_entry_t *l2, tl2; + pd_entry_t *pde, tpde; + pt_entry_t *pte, tpte; struct spglist free; + int lvl; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); rw_wlock(&pvh_global_lock); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap_resident_count_dec(pmap, 1); - l2 = pmap_l2(pmap, pv->pv_va); - KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found")); - tl2 = pmap_load(l2); - KASSERT((tl2 & ATTR_DESCR_MASK) == L2_TABLE, - ("pmap_remove_all: found a table when expecting " - "a block in %p's pv list", m)); - l3 = pmap_l2_to_l3(l2, pv->pv_va); + + pde = pmap_pde(pmap, pv->pv_va, &lvl); + KASSERT(pde != NULL, + ("pmap_remove_all: no page directory entry found")); + KASSERT(lvl == 2, + ("pmap_remove_all: invalid pde level %d", lvl)); + tpde = pmap_load(pde); + + pte = pmap_l2_to_l3(pde, pv->pv_va); + tpte = pmap_load(pte); if (pmap_is_current(pmap) && - pmap_l3_valid_cacheable(pmap_load(l3))) + pmap_l3_valid_cacheable(tpte)) cpu_dcache_wb_range(pv->pv_va, L3_SIZE); - tl3 = pmap_load_clear(l3); - PTE_SYNC(l3); + pmap_load_clear(pte); + PTE_SYNC(pte); pmap_invalidate_page(pmap, pv->pv_va); - if (tl3 & ATTR_SW_WIRED) + if (tpte & ATTR_SW_WIRED) pmap->pm_stats.wired_count--; - if ((tl3 & ATTR_AF) != 0) + if ((tpte & ATTR_AF) != 0) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ - if (pmap_page_dirty(tl3)) + if (pmap_page_dirty(tpte)) vm_page_dirty(m); - pmap_unuse_l3(pmap, pv->pv_va, tl2, &free); + pmap_unuse_l3(pmap, pv->pv_va, tpde, &free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); pmap_free_zero_pages(&free); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t va, va_next; - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; pt_entry_t *l3p, l3; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE) return; PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { - l1 = pmap_l1(pmap, sva); + l0 = pmap_l0(pmap, sva); + if (pmap_load(l0) == 0) { + va_next = (sva + L0_SIZE) & ~L0_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + + l1 = pmap_l0_to_l1(l0, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE) continue; if (va_next > eva) va_next = eva; va = va_next; for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, sva += L3_SIZE) { l3 = pmap_load(l3p); if (pmap_l3_valid(l3)) { pmap_set(l3p, ATTR_AP(ATTR_AP_RO)); PTE_SYNC(l3p); /* XXX: Use pmap_invalidate_range */ pmap_invalidate_page(pmap, va); } } } PMAP_UNLOCK(pmap); /* TODO: Only invalidate entries we are touching */ pmap_invalidate_all(pmap); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind __unused) { struct rwlock *lock; - pd_entry_t *l1, *l2; + pd_entry_t *pde; pt_entry_t new_l3, orig_l3; pt_entry_t *l3; pv_entry_t pv; - vm_paddr_t opa, pa, l2_pa, l3_pa; - vm_page_t mpte, om, l2_m, l3_m; + vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa; + vm_page_t mpte, om, l1_m, l2_m, l3_m; boolean_t nosleep; + int lvl; va = trunc_page(va); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); pa = VM_PAGE_TO_PHYS(m); new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | L3_PAGE); if ((prot & VM_PROT_WRITE) == 0) new_l3 |= ATTR_AP(ATTR_AP_RO); if ((flags & PMAP_ENTER_WIRED) != 0) new_l3 |= ATTR_SW_WIRED; if ((va >> 63) == 0) new_l3 |= ATTR_AP(ATTR_AP_USER); CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); mpte = NULL; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); if (va < VM_MAXUSER_ADDRESS) { nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); if (mpte == NULL && nosleep) { CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_RESOURCE_SHORTAGE); } - l3 = pmap_l3(pmap, va); + pde = pmap_pde(pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_enter: Invalid page entry, va: 0x%lx", va)); + KASSERT(lvl == 2, + ("pmap_enter: Invalid level %d", lvl)); + + l3 = pmap_l2_to_l3(pde, va); } else { - l3 = pmap_l3(pmap, va); - /* TODO: This is not optimal, but should mostly work */ - if (l3 == NULL) { - l2 = pmap_l2(pmap, va); + pde = pmap_pde(pmap, va, &lvl); + /* + * If we get a level 2 pde it must point to a level 3 entry + * otherwise we will need to create the intermediate tables + */ + if (lvl < 2) { + switch(lvl) { + default: + case -1: + /* Get the l0 pde to update */ + pde = pmap_l0(pmap, va); + KASSERT(pde != NULL, ("...")); - if (l2 == NULL) { + l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | + VM_ALLOC_ZERO); + if (l1_m == NULL) + panic("pmap_enter: l1 pte_m == NULL"); + if ((l1_m->flags & PG_ZERO) == 0) + pmap_zero_page(l1_m); + + l1_pa = VM_PAGE_TO_PHYS(l1_m); + pmap_load_store(pde, l1_pa | L0_TABLE); + PTE_SYNC(pde); + /* FALLTHROUGH */ + case 0: + /* Get the l1 pde to update */ + pde = pmap_l1_to_l2(pde, va); + KASSERT(pde != NULL, ("...")); + l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (l2_m == NULL) panic("pmap_enter: l2 pte_m == NULL"); if ((l2_m->flags & PG_ZERO) == 0) pmap_zero_page(l2_m); l2_pa = VM_PAGE_TO_PHYS(l2_m); - l1 = pmap_l1(pmap, va); - pmap_load_store(l1, l2_pa | L1_TABLE); - PTE_SYNC(l1); - l2 = pmap_l1_to_l2(l1, va); - } + pmap_load_store(pde, l2_pa | L1_TABLE); + PTE_SYNC(pde); + /* FALLTHROUGH */ + case 1: + /* Get the l2 pde to update */ + pde = pmap_l1_to_l2(pde, va); - KASSERT(l2 != NULL, - ("No l2 table after allocating one")); + l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | + VM_ALLOC_ZERO); + if (l3_m == NULL) + panic("pmap_enter: l3 pte_m == NULL"); + if ((l3_m->flags & PG_ZERO) == 0) + pmap_zero_page(l3_m); - l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | - VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); - if (l3_m == NULL) - panic("pmap_enter: l3 pte_m == NULL"); - if ((l3_m->flags & PG_ZERO) == 0) - pmap_zero_page(l3_m); - - l3_pa = VM_PAGE_TO_PHYS(l3_m); - pmap_load_store(l2, l3_pa | L2_TABLE); - PTE_SYNC(l2); - l3 = pmap_l2_to_l3(l2, va); + l3_pa = VM_PAGE_TO_PHYS(l3_m); + pmap_load_store(pde, l3_pa | L2_TABLE); + PTE_SYNC(pde); + break; + } } + l3 = pmap_l2_to_l3(pde, va); pmap_invalidate_page(pmap, va); } om = NULL; orig_l3 = pmap_load(l3); opa = orig_l3 & ~ATTR_MASK; /* * Is the specified virtual address already mapped? */ if (pmap_l3_valid(orig_l3)) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if ((flags & PMAP_ENTER_WIRED) != 0 && (orig_l3 & ATTR_SW_WIRED) == 0) pmap->pm_stats.wired_count++; else if ((flags & PMAP_ENTER_WIRED) == 0 && (orig_l3 & ATTR_SW_WIRED) != 0) pmap->pm_stats.wired_count--; /* * Remove the extra PT page reference. */ if (mpte != NULL) { mpte->wire_count--; KASSERT(mpte->wire_count > 0, ("pmap_enter: missing reference to page table page," " va: 0x%lx", va)); } /* * Has the physical page changed? */ if (opa == pa) { /* * No, might be a protection or wiring change. */ if ((orig_l3 & ATTR_SW_MANAGED) != 0) { new_l3 |= ATTR_SW_MANAGED; if ((new_l3 & ATTR_AP(ATTR_AP_RW)) == ATTR_AP(ATTR_AP_RW)) { vm_page_aflag_set(m, PGA_WRITEABLE); } } goto validate; } /* Flush the cache, there might be uncommitted data in it */ if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) cpu_dcache_wb_range(va, L3_SIZE); } else { /* * Increment the counters. */ if ((new_l3 & ATTR_SW_WIRED) != 0) pmap->pm_stats.wired_count++; pmap_resident_count_inc(pmap, 1); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { new_l3 |= ATTR_SW_MANAGED; pv = get_pv_entry(pmap, &lock); pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) vm_page_aflag_set(m, PGA_WRITEABLE); } /* * Update the L3 entry. */ if (orig_l3 != 0) { validate: orig_l3 = pmap_load_store(l3, new_l3); PTE_SYNC(l3); opa = orig_l3 & ~ATTR_MASK; if (opa != pa) { if ((orig_l3 & ATTR_SW_MANAGED) != 0) { om = PHYS_TO_VM_PAGE(opa); if (pmap_page_dirty(orig_l3)) vm_page_dirty(om); if ((orig_l3 & ATTR_AF) != 0) vm_page_aflag_set(om, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); pmap_pvh_free(&om->md, pmap, va); } } else if (pmap_page_dirty(orig_l3)) { if ((orig_l3 & ATTR_SW_MANAGED) != 0) vm_page_dirty(m); } } else { pmap_load_store(l3, new_l3); PTE_SYNC(l3); } pmap_invalidate_page(pmap, va); if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) cpu_icache_sync_range(va, PAGE_SIZE); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { struct rwlock *lock; vm_offset_t va; vm_page_t m, mpte; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); mpte = NULL; m = m_start; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); m = TAILQ_NEXT(m, listq); } if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { struct rwlock *lock; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) { struct spglist free; - pd_entry_t *l2; + pd_entry_t *pde; pt_entry_t *l3; vm_paddr_t pa; + int lvl; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { vm_pindex_t l2pindex; /* * Calculate pagetable page index */ l2pindex = pmap_l2_pindex(va); if (mpte && (mpte->pindex == l2pindex)) { mpte->wire_count++; } else { /* * Get the l2 entry */ - l2 = pmap_l2(pmap, va); + pde = pmap_pde(pmap, va, &lvl); /* * If the page table page is mapped, we just increment * the hold count, and activate it. Otherwise, we * attempt to allocate a page table page. If this * attempt fails, we don't retry. Instead, we give up. */ - if (l2 != NULL && pmap_load(l2) != 0) { + if (lvl == 2 && pmap_load(pde) != 0) { mpte = - PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); + PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); mpte->wire_count++; } else { /* * Pass NULL instead of the PV list lock * pointer, because we don't intend to sleep. */ mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); if (mpte == NULL) return (mpte); } } l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); l3 = &l3[pmap_l3_index(va)]; } else { mpte = NULL; - l3 = pmap_l3(kernel_pmap, va); + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx", + va)); + KASSERT(lvl == 2, + ("pmap_enter_quick_locked: Invalid level %d", lvl)); + l3 = pmap_l2_to_l3(pde, va); } - if (l3 == NULL) - panic("pmap_enter_quick_locked: No l3"); + if (pmap_load(l3) != 0) { if (mpte != NULL) { mpte->wire_count--; mpte = NULL; } return (mpte); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0 && !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { if (mpte != NULL) { SLIST_INIT(&free); if (pmap_unwire_l3(pmap, va, mpte, &free)) { pmap_invalidate_page(pmap, va); pmap_free_zero_pages(&free); } mpte = NULL; } return (mpte); } /* * Increment counters */ pmap_resident_count_inc(pmap, 1); pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | ATTR_AP(ATTR_AP_RW) | L3_PAGE; /* * Now validate mapping with RO protection */ if ((m->oflags & VPO_UNMANAGED) == 0) pa |= ATTR_SW_MANAGED; pmap_load_store(l3, pa); PTE_SYNC(l3); pmap_invalidate_page(pmap, va); return (mpte); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range * must have the wired attribute set. In contrast, invalid mappings * cannot have the wired attribute set, so they are ignored. * * The wired attribute of the page table entry is not a hardware feature, * so there is no need to invalidate any TLB entries. */ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t va_next; - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; pt_entry_t *l3; boolean_t pv_lists_locked; pv_lists_locked = FALSE; PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { - l1 = pmap_l1(pmap, sva); + l0 = pmap_l0(pmap, sva); + if (pmap_load(l0) == 0) { + va_next = (sva + L0_SIZE) & ~L0_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + + l1 = pmap_l0_to_l1(l0, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (pmap_load(l2) == 0) continue; if (va_next > eva) va_next = eva; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { if (pmap_load(l3) == 0) continue; if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) panic("pmap_unwire: l3 %#jx is missing " "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3)); /* * PG_W must be cleared atomically. Although the pmap * lock synchronizes access to PG_W, another processor * could be setting PG_M and/or PG_A concurrently. */ atomic_clear_long(l3, ATTR_SW_WIRED); pmap->pm_stats.wired_count--; } } if (pv_lists_locked) rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { } /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. */ void pmap_zero_page(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); pagezero((void *)va); } /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); if (off == 0 && size == PAGE_SIZE) pagezero((void *)va); else bzero((char *)va + off, size); } /* * pmap_zero_page_idle zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. This * is intended to be called from the vm_pagezero process only and * outside of Giant. */ void pmap_zero_page_idle(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); pagezero((void *)va); } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ void pmap_copy_page(vm_page_t msrc, vm_page_t mdst) { vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); pagecopy((void *)src, (void *)dst); } int unmapped_buf_allowed = 1; void pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_page_t m_a, m_b; vm_paddr_t p_a, p_b; vm_offset_t a_pg_offset, b_pg_offset; int cnt; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; m_a = ma[a_offset >> PAGE_SHIFT]; p_a = m_a->phys_addr; b_pg_offset = b_offset & PAGE_MASK; m_b = mb[b_offset >> PAGE_SHIFT]; p_b = m_b->phys_addr; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); cnt = min(cnt, PAGE_SIZE - b_pg_offset); if (__predict_false(!PHYS_IN_DMAP(p_a))) { panic("!DMAP a %lx", p_a); } else { a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; } if (__predict_false(!PHYS_IN_DMAP(p_b))) { panic("!DMAP b %lx", p_b); } else { b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; } bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } } vm_offset_t pmap_quick_enter_page(vm_page_t m) { return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); } void pmap_quick_remove_page(vm_offset_t addr) { } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { struct rwlock *lock; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { struct rwlock *lock; pmap_t pmap; - pt_entry_t *l3; + pt_entry_t *pte; pv_entry_t pv; - int count, md_gen; + int count, lvl, md_gen; if ((m->oflags & VPO_UNMANAGED) != 0) return (0); rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: count = 0; TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } - l3 = pmap_l3(pmap, pv->pv_va); - if (l3 != NULL && (pmap_load(l3) & ATTR_SW_WIRED) != 0) + pte = pmap_pte(pmap, pv->pv_va, &lvl); + if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0) count++; PMAP_UNLOCK(pmap); } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (count); } /* * Destroy all managed, non-wired mappings in the given user-space * pmap. This pmap cannot be active on any processor besides the * caller. * * This function cannot be applied to the kernel pmap. Moreover, it * is not intended for general use. It is only to be used during * process termination. Consequently, it can be implemented in ways * that make it faster than pmap_remove(). First, it can more quickly * destroy mappings by iterating over the pmap's collection of PV * entries, rather than searching the page table. Second, it doesn't * have to test and clear the page table entries atomically, because * no processor is currently accessing the user address space. In * particular, a page table entry's dirty bit won't change state once * this function starts. */ void pmap_remove_pages(pmap_t pmap) { - pd_entry_t ptepde, *l2; - pt_entry_t *l3, tl3; + pd_entry_t *pde; + pt_entry_t *pte, tpte; struct spglist free; vm_page_t m; pv_entry_t pv; struct pv_chunk *pc, *npc; struct rwlock *lock; int64_t bit; uint64_t inuse, bitmask; - int allfree, field, freed, idx; + int allfree, field, freed, idx, lvl; vm_paddr_t pa; lock = NULL; SLIST_INIT(&free); rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { allfree = 1; freed = 0; for (field = 0; field < _NPCM; field++) { inuse = ~pc->pc_map[field] & pc_freemask[field]; while (inuse != 0) { bit = ffsl(inuse) - 1; bitmask = 1UL << bit; idx = field * 64 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; - l2 = pmap_l2(pmap, pv->pv_va); - ptepde = pmap_load(l2); - l3 = pmap_l2_to_l3(l2, pv->pv_va); - tl3 = pmap_load(l3); + pde = pmap_pde(pmap, pv->pv_va, &lvl); + KASSERT(pde != NULL, + ("Attempting to remove an unmapped page")); + KASSERT(lvl == 2, + ("Invalid page directory level: %d", lvl)); + pte = pmap_l2_to_l3(pde, pv->pv_va); + KASSERT(pte != NULL, + ("Attempting to remove an unmapped page")); + + tpte = pmap_load(pte); + /* * We cannot remove wired pages from a process' mapping at this time */ - if (tl3 & ATTR_SW_WIRED) { + if (tpte & ATTR_SW_WIRED) { allfree = 0; continue; } - pa = tl3 & ~ATTR_MASK; + pa = tpte & ~ATTR_MASK; m = PHYS_TO_VM_PAGE(pa); KASSERT(m->phys_addr == pa, ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, - (uintmax_t)tl3)); + (uintmax_t)tpte)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], - ("pmap_remove_pages: bad l3 %#jx", - (uintmax_t)tl3)); + ("pmap_remove_pages: bad pte %#jx", + (uintmax_t)tpte)); + /* XXX: assumes tpte is level 3 */ if (pmap_is_current(pmap) && - pmap_l3_valid_cacheable(pmap_load(l3))) + pmap_l3_valid_cacheable(tpte)) cpu_dcache_wb_range(pv->pv_va, L3_SIZE); - pmap_load_clear(l3); - PTE_SYNC(l3); + pmap_load_clear(pte); + PTE_SYNC(pte); pmap_invalidate_page(pmap, pv->pv_va); /* * Update the vm_page_t clean/reference bits. */ - if ((tl3 & ATTR_AP_RW_BIT) == - ATTR_AP(ATTR_AP_RW)) + if ((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) vm_page_dirty(m); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); /* Mark free */ pc->pc_map[field] |= bitmask; pmap_resident_count_dec(pmap, 1); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; - pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free); + pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde), + &free); freed++; } } PV_STAT(atomic_add_long(&pv_entry_frees, freed)); PV_STAT(atomic_add_int(&pv_entry_spare, freed)); PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); if (allfree) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } } pmap_invalidate_all(pmap); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } /* * This is used to check if a page has been accessed or modified. As we * don't have a bit to see if it has been modified we have to assume it * has been if the page is read/write. */ static boolean_t pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) { struct rwlock *lock; pv_entry_t pv; - pt_entry_t *l3, mask, value; + pt_entry_t *pte, mask, value; pmap_t pmap; - int md_gen; + int lvl, md_gen; boolean_t rv; rv = FALSE; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } - l3 = pmap_l3(pmap, pv->pv_va); + pte = pmap_pte(pmap, pv->pv_va, &lvl); + KASSERT(lvl == 3, + ("pmap_page_test_mappings: Invalid level %d", lvl)); mask = 0; value = 0; if (modified) { mask |= ATTR_AP_RW_BIT; value |= ATTR_AP(ATTR_AP_RW); } if (accessed) { mask |= ATTR_AF | ATTR_DESCR_MASK; value |= ATTR_AF | L3_PAGE; } - rv = (pmap_load(l3) & mask) == value; + rv = (pmap_load(pte) & mask) == value; PMAP_UNLOCK(pmap); if (rv) goto out; } out: rw_runlock(lock); rw_runlock(&pvh_global_lock); return (rv); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_modified: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); return (pmap_page_test_mappings(m, FALSE, TRUE)); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is eligible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { - pt_entry_t *l3; + pt_entry_t *pte; boolean_t rv; + int lvl; rv = FALSE; PMAP_LOCK(pmap); - l3 = pmap_l3(pmap, addr); - if (l3 != NULL && pmap_load(l3) != 0) { + pte = pmap_pte(pmap, addr, &lvl); + if (pte != NULL && pmap_load(pte) != 0) { rv = TRUE; } PMAP_UNLOCK(pmap); return (rv); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ boolean_t pmap_is_referenced(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_referenced: page %p is not managed", m)); return (pmap_page_test_mappings(m, TRUE, FALSE)); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { pmap_t pmap; struct rwlock *lock; pv_entry_t pv; - pt_entry_t *l3, oldl3; - int md_gen; + pt_entry_t oldpte, *pte; + int lvl, md_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); retry_pv_loop: rw_wlock(lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); rw_wunlock(lock); goto retry_pv_loop; } } - l3 = pmap_l3(pmap, pv->pv_va); + pte = pmap_pte(pmap, pv->pv_va, &lvl); retry: - oldl3 = pmap_load(l3); - if ((oldl3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { - if (!atomic_cmpset_long(l3, oldl3, - oldl3 | ATTR_AP(ATTR_AP_RO))) + oldpte = pmap_load(pte); + if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { + if (!atomic_cmpset_long(pte, oldpte, + oldpte | ATTR_AP(ATTR_AP_RO))) goto retry; - if ((oldl3 & ATTR_AF) != 0) + if ((oldpte & ATTR_AF) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } rw_wunlock(lock); vm_page_aflag_clear(m, PGA_WRITEABLE); rw_runlock(&pvh_global_lock); } static __inline boolean_t safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) { return (FALSE); } #define PMAP_TS_REFERENCED_MAX 5 /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int pmap_ts_referenced(vm_page_t m) { pv_entry_t pv, pvf; pmap_t pmap; struct rwlock *lock; - pd_entry_t *l2p, l2; - pt_entry_t *l3; + pd_entry_t *pde, tpde; + pt_entry_t *pte, tpte; vm_paddr_t pa; - int cleared, md_gen, not_cleared; + int cleared, md_gen, not_cleared, lvl; struct spglist free; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); SLIST_INIT(&free); cleared = 0; pa = VM_PAGE_TO_PHYS(m); lock = PHYS_TO_PV_LIST_LOCK(pa); rw_rlock(&pvh_global_lock); rw_wlock(lock); retry: not_cleared = 0; if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; do { if (pvf == NULL) pvf = pv; pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto retry; } } - l2p = pmap_l2(pmap, pv->pv_va); - KASSERT(l2p != NULL, ("pmap_ts_referenced: no l2 table found")); - l2 = pmap_load(l2p); - KASSERT((l2 & ATTR_DESCR_MASK) == L2_TABLE, + pde = pmap_pde(pmap, pv->pv_va, &lvl); + KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found")); + KASSERT(lvl == 2, + ("pmap_ts_referenced: invalid pde level %d", lvl)); + tpde = pmap_load(pde); + KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE, ("pmap_ts_referenced: found an invalid l2 table")); - l3 = pmap_l2_to_l3(l2p, pv->pv_va); - if ((pmap_load(l3) & ATTR_AF) != 0) { - if (safe_to_clear_referenced(pmap, pmap_load(l3))) { + pte = pmap_l2_to_l3(pde, pv->pv_va); + tpte = pmap_load(pte); + if ((tpte & ATTR_AF) != 0) { + if (safe_to_clear_referenced(pmap, tpte)) { /* * TODO: We don't handle the access flag * at all. We need to be able to set it in * the exception handler. */ panic("ARM64TODO: safe_to_clear_referenced\n"); - } else if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) { + } else if ((tpte & ATTR_SW_WIRED) == 0) { /* * Wired pages cannot be paged out so * doing accessed bit emulation for * them is wasted effort. We do the * hard work for unwired pages only. */ - pmap_remove_l3(pmap, l3, pv->pv_va, l2, + pmap_remove_l3(pmap, pte, pv->pv_va, tpde, &free, &lock); pmap_invalidate_page(pmap, pv->pv_va); cleared++; if (pvf == pv) pvf = NULL; pv = NULL; KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), ("inconsistent pv lock %p %p for page %p", lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); } else not_cleared++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; } } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + not_cleared < PMAP_TS_REFERENCED_MAX); out: rw_wunlock(lock); rw_runlock(&pvh_global_lock); pmap_free_zero_pages(&free); return (cleared + not_cleared); } /* * Apply the given advice to the specified range of addresses within the * given pmap. Depending on the advice, clear the referenced and/or * modified flags in each mapping and set the mapped page's dirty field. */ void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) { } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; /* ARM64TODO: We lack support for tracking if a page is modified */ } void * pmap_mapbios(vm_paddr_t pa, vm_size_t size) { return ((void *)PHYS_TO_DMAP(pa)); } void pmap_unmapbios(vm_paddr_t pa, vm_size_t size) { } /* * Sets the memory attribute for the specified page. */ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { m->md.pv_memattr = ma; /* * ARM64TODO: Implement the below (from the amd64 pmap) * If "m" is a normal page, update its direct mapping. This update * can be relied upon to perform any cache operations that are * required for data coherence. */ if ((m->flags & PG_FICTITIOUS) == 0 && PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m))) panic("ARM64TODO: pmap_page_set_memattr"); } /* * perform the pmap work for mincore */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pd_entry_t *l1p, l1; pd_entry_t *l2p, l2; pt_entry_t *l3p, l3; vm_paddr_t pa; bool managed; int val; PMAP_LOCK(pmap); retry: pa = 0; val = 0; managed = false; l1p = pmap_l1(pmap, addr); if (l1p == NULL) /* No l1 */ goto done; l1 = pmap_load(l1p); if ((l1 & ATTR_DESCR_MASK) == L1_INVAL) goto done; if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) { pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET); managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; val = MINCORE_SUPER | MINCORE_INCORE; if (pmap_page_dirty(l1)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((l1 & ATTR_AF) == ATTR_AF) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; goto done; } l2p = pmap_l1_to_l2(l1p, addr); if (l2p == NULL) /* No l2 */ goto done; l2 = pmap_load(l2p); if ((l2 & ATTR_DESCR_MASK) == L2_INVAL) goto done; if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) { pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET); managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; val = MINCORE_SUPER | MINCORE_INCORE; if (pmap_page_dirty(l2)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((l2 & ATTR_AF) == ATTR_AF) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; goto done; } l3p = pmap_l2_to_l3(l2p, addr); if (l3p == NULL) /* No l3 */ goto done; l3 = pmap_load(l2p); if ((l3 & ATTR_DESCR_MASK) == L3_INVAL) goto done; if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) { pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET); managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; val = MINCORE_INCORE; if (pmap_page_dirty(l3)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((l3 & ATTR_AF) == ATTR_AF) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } done: if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) goto retry; } else PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); return (val); } void pmap_activate(struct thread *td) { pmap_t pmap; critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); - td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1); - __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l1addr)); + td->td_pcb->pcb_l0addr = vtophys(pmap->pm_l0); + __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l0addr)); pmap_invalidate_all(pmap); critical_exit(); } void pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) { if (va >= VM_MIN_KERNEL_ADDRESS) { cpu_icache_sync_range(va, sz); } else { u_int len, offset; vm_paddr_t pa; /* Find the length of data in this page to flush */ offset = va & PAGE_MASK; len = imin(PAGE_SIZE - offset, sz); while (sz != 0) { /* Extract the physical address & find it in the DMAP */ pa = pmap_extract(pmap, va); if (pa != 0) cpu_icache_sync_range(PHYS_TO_DMAP(pa), len); /* Move to the next page */ sz -= len; va += len; /* Set the length for the next iteration */ len = imin(PAGE_SIZE, sz); } } } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { } /** * Get the kernel virtual address of a set of physical pages. If there are * physical addresses not covered by the DMAP perform a transient mapping * that will be removed when calling pmap_unmap_io_transient. * * \param page The pages the caller wishes to obtain the virtual * address on the kernel memory map. * \param vaddr On return contains the kernel virtual memory address * of the pages passed in the page parameter. * \param count Number of pages passed in. * \param can_fault TRUE if the thread using the mapped pages can take * page faults, FALSE otherwise. * * \returns TRUE if the caller must call pmap_unmap_io_transient when * finished or FALSE otherwise. * */ boolean_t pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; boolean_t needs_mapping; int error, i; /* * Allocate any KVA space that we need, this is done in a separate * loop to prevent calling vmem_alloc while pinned. */ needs_mapping = FALSE; for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) { error = vmem_alloc(kernel_arena, PAGE_SIZE, M_BESTFIT | M_WAITOK, &vaddr[i]); KASSERT(error == 0, ("vmem_alloc failed: %d", error)); needs_mapping = TRUE; } else { vaddr[i] = PHYS_TO_DMAP(paddr); } } /* Exit early if everything is covered by the DMAP */ if (!needs_mapping) return (FALSE); if (!can_fault) sched_pin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (paddr >= DMAP_MAX_PHYSADDR) { panic( "pmap_map_io_transient: TODO: Map out of DMAP data"); } } return (needs_mapping); } void pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; int i; if (!can_fault) sched_unpin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (paddr >= DMAP_MAX_PHYSADDR) { panic("ARM64TODO: pmap_unmap_io_transient: Unmap data"); } } } Index: head/sys/arm64/arm64/swtch.S =================================================================== --- head/sys/arm64/arm64/swtch.S (revision 297445) +++ head/sys/arm64/arm64/swtch.S (revision 297446) @@ -1,319 +1,319 @@ /*- * Copyright (c) 2014 Andrew Turner * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * * This software was developed by Andrew Turner under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include "assym.s" #include "opt_kstack_pages.h" #include "opt_sched.h" #include __FBSDID("$FreeBSD$"); .macro clear_step_flag pcbflags, tmp tbz \pcbflags, #PCB_SINGLE_STEP_SHIFT, 999f mrs \tmp, mdscr_el1 bic \tmp, \tmp, #1 msr mdscr_el1, \tmp isb 999: .endm .macro set_step_flag pcbflags, tmp tbz \pcbflags, #PCB_SINGLE_STEP_SHIFT, 999f mrs \tmp, mdscr_el1 orr \tmp, \tmp, #1 msr mdscr_el1, \tmp isb 999: .endm /* * void cpu_throw(struct thread *old, struct thread *new) */ ENTRY(cpu_throw) /* Of old == NULL skip disabling stepping */ cbz x0, 1f /* If we were single stepping, disable it */ ldr x4, [x0, #TD_PCB] ldr w5, [x4, #PCB_FLAGS] clear_step_flag w5, x6 1: #ifdef VFP /* Backup the new thread pointer around a call to C code */ mov x19, x1 bl vfp_discard mov x1, x19 #endif /* Store the new curthread */ str x1, [x18, #PC_CURTHREAD] /* And the new pcb */ ldr x4, [x1, #TD_PCB] str x4, [x18, #PC_CURPCB] /* * TODO: We may need to flush the cache here. */ /* Switch to the new pmap */ - ldr x5, [x4, #PCB_L1ADDR] + ldr x5, [x4, #PCB_L0ADDR] msr ttbr0_el1, x5 isb /* Invalidate the TLB */ dsb sy tlbi vmalle1is dsb sy isb /* If we are single stepping, enable it */ ldr w5, [x4, #PCB_FLAGS] set_step_flag w5, x6 /* Restore the registers */ ldp x5, x6, [x4, #PCB_SP] mov sp, x5 msr tpidr_el0, x6 ldp x8, x9, [x4, #PCB_REGS + 8 * 8] ldp x10, x11, [x4, #PCB_REGS + 10 * 8] ldp x12, x13, [x4, #PCB_REGS + 12 * 8] ldp x14, x15, [x4, #PCB_REGS + 14 * 8] ldp x16, x17, [x4, #PCB_REGS + 16 * 8] ldr x19, [x4, #PCB_REGS + 19 * 8] ldp x20, x21, [x4, #PCB_REGS + 20 * 8] ldp x22, x23, [x4, #PCB_REGS + 22 * 8] ldp x24, x25, [x4, #PCB_REGS + 24 * 8] ldp x26, x27, [x4, #PCB_REGS + 26 * 8] ldp x28, x29, [x4, #PCB_REGS + 28 * 8] ldr x30, [x4, #PCB_REGS + 30 * 8] ret END(cpu_throw) /* * void cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx) * * x0 = old * x1 = new * x2 = mtx * x3 to x7, x16 and x17 are caller saved */ ENTRY(cpu_switch) /* * Save the old context. */ ldr x4, [x0, #TD_PCB] /* Store the callee-saved registers */ stp x8, x9, [x4, #PCB_REGS + 8 * 8] stp x10, x11, [x4, #PCB_REGS + 10 * 8] stp x12, x13, [x4, #PCB_REGS + 12 * 8] stp x14, x15, [x4, #PCB_REGS + 14 * 8] stp x16, x17, [x4, #PCB_REGS + 16 * 8] stp x18, x19, [x4, #PCB_REGS + 18 * 8] stp x20, x21, [x4, #PCB_REGS + 20 * 8] stp x22, x23, [x4, #PCB_REGS + 22 * 8] stp x24, x25, [x4, #PCB_REGS + 24 * 8] stp x26, x27, [x4, #PCB_REGS + 26 * 8] stp x28, x29, [x4, #PCB_REGS + 28 * 8] str x30, [x4, #PCB_REGS + 30 * 8] /* And the old stack pointer */ mov x5, sp mrs x6, tpidr_el0 stp x5, x6, [x4, #PCB_SP] /* If we were single stepping, disable it */ ldr w5, [x4, #PCB_FLAGS] clear_step_flag w5, x6 #ifdef VFP mov x19, x0 mov x20, x1 mov x21, x2 /* Load the pcb address */ mov x1, x4 bl vfp_save_state mov x2, x21 mov x1, x20 mov x0, x19 #endif /* Store the new curthread */ str x1, [x18, #PC_CURTHREAD] /* * Restore the saved context and set it as curpcb. */ ldr x4, [x1, #TD_PCB] str x4, [x18, #PC_CURPCB] /* * TODO: We may need to flush the cache here if switching * to a user process. */ /* Switch to the new pmap */ - ldr x5, [x4, #PCB_L1ADDR] + ldr x5, [x4, #PCB_L0ADDR] msr ttbr0_el1, x5 isb /* Invalidate the TLB */ dsb sy tlbi vmalle1is dsb sy isb /* * Release the old thread. This doesn't need to be a store-release * as the above dsb instruction will provide release semantics. */ str x2, [x0, #TD_LOCK] #if defined(SCHED_ULE) && defined(SMP) /* Spin if TD_LOCK points to a blocked_lock */ ldr x2, =_C_LABEL(blocked_lock) 1: ldar x3, [x1, #TD_LOCK] cmp x3, x2 b.eq 1b #endif /* If we are single stepping, enable it */ ldr w5, [x4, #PCB_FLAGS] set_step_flag w5, x6 /* Restore the registers */ ldp x5, x6, [x4, #PCB_SP] mov sp, x5 msr tpidr_el0, x6 ldp x8, x9, [x4, #PCB_REGS + 8 * 8] ldp x10, x11, [x4, #PCB_REGS + 10 * 8] ldp x12, x13, [x4, #PCB_REGS + 12 * 8] ldp x14, x15, [x4, #PCB_REGS + 14 * 8] ldp x16, x17, [x4, #PCB_REGS + 16 * 8] ldr x19, [x4, #PCB_REGS + 19 * 8] ldp x20, x21, [x4, #PCB_REGS + 20 * 8] ldp x22, x23, [x4, #PCB_REGS + 22 * 8] ldp x24, x25, [x4, #PCB_REGS + 24 * 8] ldp x26, x27, [x4, #PCB_REGS + 26 * 8] ldp x28, x29, [x4, #PCB_REGS + 28 * 8] ldr x30, [x4, #PCB_REGS + 30 * 8] str xzr, [x4, #PCB_REGS + 18 * 8] ret .Lcpu_switch_panic_str: .asciz "cpu_switch: %p\0" END(cpu_switch) ENTRY(fork_trampoline) mov x0, x8 mov x1, x9 mov x2, sp mov fp, #0 /* Stack traceback stops here. */ bl _C_LABEL(fork_exit) /* Restore sp and lr */ ldp x0, x1, [sp] msr sp_el0, x0 mov lr, x1 /* Restore the registers other than x0 and x1 */ ldp x2, x3, [sp, #TF_X + 2 * 8] ldp x4, x5, [sp, #TF_X + 4 * 8] ldp x6, x7, [sp, #TF_X + 6 * 8] ldp x8, x9, [sp, #TF_X + 8 * 8] ldp x10, x11, [sp, #TF_X + 10 * 8] ldp x12, x13, [sp, #TF_X + 12 * 8] ldp x14, x15, [sp, #TF_X + 14 * 8] ldp x16, x17, [sp, #TF_X + 16 * 8] ldr x19, [sp, #TF_X + 19 * 8] ldp x20, x21, [sp, #TF_X + 20 * 8] ldp x22, x23, [sp, #TF_X + 22 * 8] ldp x24, x25, [sp, #TF_X + 24 * 8] ldp x26, x27, [sp, #TF_X + 26 * 8] ldp x28, x29, [sp, #TF_X + 28 * 8] /* Skip x30 as it was restored above as lr */ /* * Disable interrupts to avoid * overwriting spsr_el1 by an IRQ exception. */ msr daifset, #2 /* Restore elr and spsr */ ldp x0, x1, [sp, #16] msr elr_el1, x0 msr spsr_el1, x1 /* Finally x0 and x1 */ ldp x0, x1, [sp, #TF_X + 0 * 8] ldr x18, [sp, #TF_X + 18 * 8] /* * No need for interrupts reenabling since PSR * will be set to the desired value anyway. */ eret END(fork_trampoline) ENTRY(savectx) /* Store the callee-saved registers */ stp x8, x9, [x0, #PCB_REGS + 8 * 8] stp x10, x11, [x0, #PCB_REGS + 10 * 8] stp x12, x13, [x0, #PCB_REGS + 12 * 8] stp x14, x15, [x0, #PCB_REGS + 14 * 8] stp x16, x17, [x0, #PCB_REGS + 16 * 8] stp x18, x19, [x0, #PCB_REGS + 18 * 8] stp x20, x21, [x0, #PCB_REGS + 20 * 8] stp x22, x23, [x0, #PCB_REGS + 22 * 8] stp x24, x25, [x0, #PCB_REGS + 24 * 8] stp x26, x27, [x0, #PCB_REGS + 26 * 8] stp x28, x29, [x0, #PCB_REGS + 28 * 8] str x30, [x0, #PCB_REGS + 30 * 8] /* And the old stack pointer */ mov x5, sp mrs x6, tpidr_el0 stp x5, x6, [x0, #PCB_SP] /* Store the VFP registers */ #ifdef VFP mov x28, lr mov x1, x0 /* move pcb to the correct register */ mov x0, xzr /* td = NULL */ bl vfp_save_state mov lr, x28 #endif ret END(savectx) Index: head/sys/arm64/arm64/vm_machdep.c =================================================================== --- head/sys/arm64/arm64/vm_machdep.c (revision 297445) +++ head/sys/arm64/arm64/vm_machdep.c (revision 297446) @@ -1,260 +1,260 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { struct pcb *pcb2; struct trapframe *tf; if ((flags & RFPROC) == 0) return; if (td1 == curthread) { /* * Save the tpidr_el0 and the vfp state, these normally happen * in cpu_switch, but if userland changes these then forks * this may not have happened. */ td1->td_pcb->pcb_tpidr_el0 = READ_SPECIALREG(tpidr_el0); #ifdef VFP if ((td1->td_pcb->pcb_fpflags & PCB_FP_STARTED) != 0) vfp_save_state(td1, td1->td_pcb); #endif } pcb2 = (struct pcb *)(td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1; td2->td_pcb = pcb2; bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); - td2->td_pcb->pcb_l1addr = - vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l1); + td2->td_pcb->pcb_l0addr = + vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l0); tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1); bcopy(td1->td_frame, tf, sizeof(*tf)); tf->tf_x[0] = 0; tf->tf_x[1] = 0; tf->tf_spsr = 0; td2->td_frame = tf; /* Set the return value registers for fork() */ td2->td_pcb->pcb_x[8] = (uintptr_t)fork_return; td2->td_pcb->pcb_x[9] = (uintptr_t)td2; td2->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline; td2->td_pcb->pcb_sp = (uintptr_t)td2->td_frame; td2->td_pcb->pcb_vfpcpu = UINT_MAX; /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_daif = 0; } void cpu_reset(void) { printf("cpu_reset"); while(1) __asm volatile("wfi" ::: "memory"); } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame; frame = td->td_frame; switch (error) { case 0: frame->tf_x[0] = td->td_retval[0]; frame->tf_x[1] = td->td_retval[1]; frame->tf_spsr &= ~PSR_C; /* carry bit */ break; case ERESTART: frame->tf_elr -= 4; break; case EJUSTRETURN: break; default: frame->tf_spsr |= PSR_C; /* carry bit */ frame->tf_x[0] = error; break; } } /* * Initialize machine state (pcb and trap frame) for a new thread about to * upcall. Put enough state in the new thread's PCB to get it to go back * userret(), where we can intercept it again to set the return (upcall) * Address and stack, along with those from upcals that are from other sources * such as those generated in thread_userret() itself. */ void cpu_set_upcall(struct thread *td, struct thread *td0) { bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb)); td->td_pcb->pcb_x[8] = (uintptr_t)fork_return; td->td_pcb->pcb_x[9] = (uintptr_t)td; td->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline; td->td_pcb->pcb_sp = (uintptr_t)td->td_frame; td->td_pcb->pcb_vfpcpu = UINT_MAX; /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_daif = 0; } /* * Set that machine state for performing an upcall that has to * be done in thread_userret() so that those upcalls generated * in thread_userret() itself can be done as well. */ void cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { struct trapframe *tf = td->td_frame; tf->tf_sp = STACKALIGN((uintptr_t)stack->ss_sp + stack->ss_size); tf->tf_elr = (register_t)entry; tf->tf_x[0] = (register_t)arg; } int cpu_set_user_tls(struct thread *td, void *tls_base) { struct pcb *pcb; if ((uintptr_t)tls_base >= VM_MAXUSER_ADDRESS) return (EINVAL); pcb = td->td_pcb; pcb->pcb_tpidr_el0 = (register_t)tls_base; return (0); } void cpu_thread_exit(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = (struct trapframe *)STACKALIGN( td->td_pcb - 1); } void cpu_thread_free(struct thread *td) { } void cpu_thread_clean(struct thread *td) { } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg) { td->td_pcb->pcb_x[8] = (uintptr_t)func; td->td_pcb->pcb_x[9] = (uintptr_t)arg; td->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline; td->td_pcb->pcb_sp = (uintptr_t)td->td_frame; td->td_pcb->pcb_vfpcpu = UINT_MAX; } void cpu_exit(struct thread *td) { } void swi_vm(void *v) { /* Nothing to do here - busdma bounce buffers are not implemented. */ } Index: head/sys/arm64/include/machdep.h =================================================================== --- head/sys/arm64/include/machdep.h (revision 297445) +++ head/sys/arm64/include/machdep.h (revision 297446) @@ -1,44 +1,45 @@ /*- * Copyright (c) 2013 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_MACHDEP_H_ #define _MACHINE_MACHDEP_H_ struct arm64_bootparams { vm_offset_t modulep; vm_offset_t kern_l1pt; /* L1 page table for the kernel */ uint64_t kern_delta; vm_offset_t kern_stack; + vm_offset_t kern_l0pt; /* L1 page table for the kernel */ }; extern vm_paddr_t physmap[]; extern u_int physmap_idx; void initarm(struct arm64_bootparams *); #endif /* _MACHINE_MACHDEP_H_ */ Index: head/sys/arm64/include/pcb.h =================================================================== --- head/sys/arm64/include/pcb.h (revision 297445) +++ head/sys/arm64/include/pcb.h (revision 297446) @@ -1,68 +1,68 @@ /*- * Copyright (c) 2001 Jake Burkholder. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_PCB_H_ #define _MACHINE_PCB_H_ #ifndef LOCORE struct trapframe; #define PCB_LR 30 struct pcb { uint64_t pcb_x[31]; uint64_t pcb_pc; /* These two need to be in order as we access them together */ uint64_t pcb_sp; uint64_t pcb_tpidr_el0; - vm_offset_t pcb_l1addr; + vm_offset_t pcb_l0addr; /* Fault handler, the error value is passed in x0 */ vm_offset_t pcb_onfault; u_int pcb_flags; #define PCB_SINGLE_STEP_SHIFT 0 #define PCB_SINGLE_STEP (1 << PCB_SINGLE_STEP_SHIFT) /* Place last to simplify the asm to access the rest if the struct */ __uint128_t pcb_vfp[32]; uint32_t pcb_fpcr; uint32_t pcb_fpsr; int pcb_fpflags; #define PCB_FP_STARTED 0x01 u_int pcb_vfpcpu; /* Last cpu this thread ran VFP code */ }; #ifdef _KERNEL void makectx(struct trapframe *tf, struct pcb *pcb); int savectx(struct pcb *pcb) __returns_twice; #endif #endif /* !LOCORE */ #endif /* !_MACHINE_PCB_H_ */ Index: head/sys/arm64/include/pmap.h =================================================================== --- head/sys/arm64/include/pmap.h (revision 297445) +++ head/sys/arm64/include/pmap.h (revision 297446) @@ -1,160 +1,160 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_PMAP_H_ #define _MACHINE_PMAP_H_ #include #ifndef LOCORE #include #include #include #ifdef _KERNEL #define vtophys(va) pmap_kextract((vm_offset_t)(va)) #endif #define pmap_page_get_memattr(m) ((m)->md.pv_memattr) #define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0) void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma); /* * Pmap stuff */ struct md_page { TAILQ_HEAD(,pv_entry) pv_list; int pv_gen; vm_memattr_t pv_memattr; }; /* * This structure is used to hold a virtual<->physical address * association and is used mostly by bootstrap code */ struct pv_addr { SLIST_ENTRY(pv_addr) pv_list; vm_offset_t pv_va; vm_paddr_t pv_pa; }; struct pmap { struct mtx pm_mtx; struct pmap_statistics pm_stats; /* pmap statictics */ - pd_entry_t *pm_l1; + pd_entry_t *pm_l0; TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ }; typedef struct pv_entry { vm_offset_t pv_va; /* virtual address for mapping */ TAILQ_ENTRY(pv_entry) pv_next; } *pv_entry_t; /* * pv_entries are allocated in chunks per-process. This avoids the * need to track per-pmap assignments. */ #define _NPCM 3 #define _NPCPV 168 struct pv_chunk { struct pmap * pc_pmap; TAILQ_ENTRY(pv_chunk) pc_list; uint64_t pc_map[_NPCM]; /* bitmap; 1 = free */ TAILQ_ENTRY(pv_chunk) pc_lru; struct pv_entry pc_pventry[_NPCPV]; }; typedef struct pmap *pmap_t; #ifdef _KERNEL extern struct pmap kernel_pmap_store; #define kernel_pmap (&kernel_pmap_store) #define pmap_kernel() kernel_pmap #define PMAP_ASSERT_LOCKED(pmap) \ mtx_assert(&(pmap)->pm_mtx, MA_OWNED) #define PMAP_LOCK(pmap) mtx_lock(&(pmap)->pm_mtx) #define PMAP_LOCK_ASSERT(pmap, type) \ mtx_assert(&(pmap)->pm_mtx, (type)) #define PMAP_LOCK_DESTROY(pmap) mtx_destroy(&(pmap)->pm_mtx) #define PMAP_LOCK_INIT(pmap) mtx_init(&(pmap)->pm_mtx, "pmap", \ NULL, MTX_DEF | MTX_DUPOK) #define PMAP_OWNED(pmap) mtx_owned(&(pmap)->pm_mtx) #define PMAP_MTX(pmap) (&(pmap)->pm_mtx) #define PMAP_TRYLOCK(pmap) mtx_trylock(&(pmap)->pm_mtx) #define PMAP_UNLOCK(pmap) mtx_unlock(&(pmap)->pm_mtx) #define PHYS_AVAIL_SIZE 32 extern vm_paddr_t phys_avail[]; extern vm_paddr_t dump_avail[]; extern vm_offset_t virtual_avail; extern vm_offset_t virtual_end; /* * Macros to test if a mapping is mappable with an L1 Section mapping * or an L2 Large Page mapping. */ #define L1_MAPPABLE_P(va, pa, size) \ ((((va) | (pa)) & L1_OFFSET) == 0 && (size) >= L1_SIZE) -void pmap_bootstrap(vm_offset_t, vm_paddr_t, vm_size_t); +void pmap_bootstrap(vm_offset_t, vm_offset_t, vm_paddr_t, vm_size_t); void pmap_kenter_device(vm_offset_t, vm_size_t, vm_paddr_t); vm_paddr_t pmap_kextract(vm_offset_t va); void pmap_kremove(vm_offset_t); void pmap_kremove_device(vm_offset_t, vm_size_t); void *pmap_mapdev(vm_offset_t, vm_size_t); void *pmap_mapbios(vm_paddr_t, vm_size_t); void pmap_unmapdev(vm_offset_t, vm_size_t); void pmap_unmapbios(vm_offset_t, vm_size_t); boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); bool pmap_get_tables(pmap_t, vm_offset_t, pd_entry_t **, pd_entry_t **, - pt_entry_t **); + pd_entry_t **, pt_entry_t **); #define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list)) #endif /* _KERNEL */ #endif /* !LOCORE */ #endif /* !_MACHINE_PMAP_H_ */ Index: head/sys/arm64/include/pte.h =================================================================== --- head/sys/arm64/include/pte.h (revision 297445) +++ head/sys/arm64/include/pte.h (revision 297446) @@ -1,115 +1,122 @@ /*- * Copyright (c) 2014 Andrew Turner * Copyright (c) 2014-2015 The FreeBSD Foundation * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_PTE_H_ #define _MACHINE_PTE_H_ #ifndef LOCORE typedef uint64_t pd_entry_t; /* page directory entry */ typedef uint64_t pt_entry_t; /* page table entry */ #endif /* Block and Page attributes */ /* TODO: Add the upper attributes */ #define ATTR_MASK_H UINT64_C(0xfff0000000000000) #define ATTR_MASK_L UINT64_C(0x0000000000000fff) #define ATTR_MASK (ATTR_MASK_H | ATTR_MASK_L) /* Bits 58:55 are reserved for software */ #define ATTR_SW_MANAGED (1UL << 56) #define ATTR_SW_WIRED (1UL << 55) #define ATTR_nG (1 << 11) #define ATTR_AF (1 << 10) #define ATTR_SH(x) ((x) << 8) #define ATTR_SH_MASK ATTR_SH(3) #define ATTR_SH_NS 0 /* Non-shareable */ #define ATTR_SH_OS 2 /* Outer-shareable */ #define ATTR_SH_IS 3 /* Inner-shareable */ #define ATTR_AP_RW_BIT (1 << 7) #define ATTR_AP(x) ((x) << 6) #define ATTR_AP_MASK ATTR_AP(3) #define ATTR_AP_RW (0 << 1) #define ATTR_AP_RO (1 << 1) #define ATTR_AP_USER (1 << 0) #define ATTR_NS (1 << 5) #define ATTR_IDX(x) ((x) << 2) #define ATTR_IDX_MASK (7 << 2) #ifdef SMP #define ATTR_DEFAULT (ATTR_AF | ATTR_SH(ATTR_SH_IS)) #else #define ATTR_DEFAULT (ATTR_AF) #endif #define ATTR_DESCR_MASK 3 /* Level 0 table, 512GiB per entry */ #define L0_SHIFT 39 +#define L0_SIZE (1ul << L0_SHIFT) +#define L0_OFFSET (L0_SIZE - 1ul) #define L0_INVAL 0x0 /* An invalid address */ /* 0x1 Level 0 doesn't support block translation */ /* 0x2 also marks an invalid address */ #define L0_TABLE 0x3 /* A next-level table */ /* Level 1 table, 1GiB per entry */ #define L1_SHIFT 30 #define L1_SIZE (1 << L1_SHIFT) #define L1_OFFSET (L1_SIZE - 1) #define L1_INVAL L0_INVAL #define L1_BLOCK 0x1 #define L1_TABLE L0_TABLE /* Level 2 table, 2MiB per entry */ #define L2_SHIFT 21 #define L2_SIZE (1 << L2_SHIFT) #define L2_OFFSET (L2_SIZE - 1) #define L2_INVAL L1_INVAL #define L2_BLOCK L1_BLOCK #define L2_TABLE L1_TABLE #define L2_BLOCK_MASK UINT64_C(0xffffffe00000) /* Level 3 table, 4KiB per entry */ #define L3_SHIFT 12 #define L3_SIZE (1 << L3_SHIFT) #define L3_OFFSET (L3_SIZE - 1) #define L3_SHIFT 12 #define L3_INVAL 0x0 /* 0x1 is reserved */ /* 0x2 also marks an invalid address */ #define L3_PAGE 0x3 -#define Ln_ENTRIES (1 << 9) +#define L0_ENTRIES_SHIFT 9 +#define L0_ENTRIES (1 << L0_ENTRIES_SHIFT) +#define L0_ADDR_MASK (L0_ENTRIES - 1) + +#define Ln_ENTRIES_SHIFT 9 +#define Ln_ENTRIES (1 << Ln_ENTRIES_SHIFT) #define Ln_ADDR_MASK (Ln_ENTRIES - 1) #define Ln_TABLE_MASK ((1 << 12) - 1) #endif /* !_MACHINE_PTE_H_ */ /* End of pte.h */ Index: head/sys/arm64/include/vmparam.h =================================================================== --- head/sys/arm64/include/vmparam.h (revision 297445) +++ head/sys/arm64/include/vmparam.h (revision 297446) @@ -1,238 +1,238 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vmparam.h 5.9 (Berkeley) 5/12/91 * from: FreeBSD: src/sys/i386/include/vmparam.h,v 1.33 2000/03/30 * $FreeBSD$ */ #ifndef _MACHINE_VMPARAM_H_ #define _MACHINE_VMPARAM_H_ /* * Virtual memory related constants, all in bytes */ #ifndef MAXTSIZ #define MAXTSIZ (1*1024*1024*1024) /* max text size */ #endif #ifndef DFLDSIZ #define DFLDSIZ (128*1024*1024) /* initial data size limit */ #endif #ifndef MAXDSIZ #define MAXDSIZ (1*1024*1024*1024) /* max data size */ #endif #ifndef DFLSSIZ #define DFLSSIZ (128*1024*1024) /* initial stack size limit */ #endif #ifndef MAXSSIZ #define MAXSSIZ (1*1024*1024*1024) /* max stack size */ #endif #ifndef SGROWSIZ #define SGROWSIZ (128*1024) /* amount to grow stack */ #endif /* * The physical address space is sparsely populated. */ #define VM_PHYSSEG_SPARSE /* * The number of PHYSSEG entries must be one greater than the number * of phys_avail entries because the phys_avail entry that spans the * largest physical address that is accessible by ISA DMA is split * into two PHYSSEG entries. */ #define VM_PHYSSEG_MAX 64 /* * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool * from which physical pages are allocated and VM_FREEPOOL_DIRECT is * the pool from which physical pages for small UMA objects are * allocated. */ #define VM_NFREEPOOL 2 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 1 /* * Create two free page lists: VM_FREELIST_DEFAULT is for physical * pages that are above the largest physical address that is * accessible by ISA DMA and VM_FREELIST_ISADMA is for physical pages * that are below that address. */ #define VM_NFREELIST 2 #define VM_FREELIST_DEFAULT 0 #define VM_FREELIST_ISADMA 1 /* * An allocation size of 16MB is supported in order to optimize the * use of the direct map by UMA. Specifically, a cache line contains * at most four TTEs, collectively mapping 16MB of physical memory. * By reducing the number of distinct 16MB "pages" that are used by UMA, * the physical memory allocator reduces the likelihood of both 4MB * page TLB misses and cache misses caused by 4MB page TLB misses. */ #define VM_NFREEORDER 12 /* * Enable superpage reservations: 1 level. */ #ifndef VM_NRESERVLEVEL #define VM_NRESERVLEVEL 1 #endif /* * Level 0 reservations consist of 512 pages. */ #ifndef VM_LEVEL_0_ORDER #define VM_LEVEL_0_ORDER 9 #endif /** * Address space layout. * * ARMv8 implements up to a 48 bit virtual address space. The address space is * split into 2 regions at each end of the 64 bit address space, with an * out of range "hole" in the middle. * * We limit the size of the two spaces to 39 bits each. * * Upper region: 0xffffffffffffffff * 0xffffff8000000000 * * Hole: 0xffffff7fffffffff * 0x0000008000000000 * * Lower region: 0x0000007fffffffff * 0x0000000000000000 * * We use the upper region for the kernel, and the lower region for userland. * * We define some interesting address constants: * * VM_MIN_ADDRESS and VM_MAX_ADDRESS define the start and end of the entire * 64 bit address space, mostly just for convenience. * * VM_MIN_KERNEL_ADDRESS and VM_MAX_KERNEL_ADDRESS define the start and end of * mappable kernel virtual address space. * * VM_MIN_USER_ADDRESS and VM_MAX_USER_ADDRESS define the start and end of the * user address space. */ #define VM_MIN_ADDRESS (0x0000000000000000UL) #define VM_MAX_ADDRESS (0xffffffffffffffffUL) /* 32 GiB of kernel addresses */ #define VM_MIN_KERNEL_ADDRESS (0xffffff8000000000UL) #define VM_MAX_KERNEL_ADDRESS (0xffffff8800000000UL) /* Direct Map for 128 GiB of PA: 0x0 - 0x1fffffffff */ #define DMAP_MIN_ADDRESS (0xffffffc000000000UL) #define DMAP_MAX_ADDRESS (0xffffffdfffffffffUL) extern vm_paddr_t dmap_phys_base; #define DMAP_MIN_PHYSADDR (dmap_phys_base) #define DMAP_MAX_PHYSADDR (dmap_phys_base + (DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS)) /* True if pa is in the dmap range */ #define PHYS_IN_DMAP(pa) ((pa) >= DMAP_MIN_PHYSADDR && \ (pa) <= DMAP_MAX_PHYSADDR) /* True if va is in the dmap range */ #define VIRT_IN_DMAP(va) ((va) >= DMAP_MIN_ADDRESS && \ (va) <= DMAP_MAX_ADDRESS) #define PHYS_TO_DMAP(pa) \ ({ \ KASSERT(PHYS_IN_DMAP(pa), \ ("%s: PA out of range, PA: 0x%lx", __func__, \ (vm_paddr_t)(pa))); \ ((pa) - dmap_phys_base) | DMAP_MIN_ADDRESS; \ }) #define DMAP_TO_PHYS(va) \ ({ \ KASSERT(VIRT_IN_DMAP(va), \ ("%s: VA out of range, VA: 0x%lx", __func__, \ (vm_offset_t)(va))); \ ((va) & ~DMAP_MIN_ADDRESS) + dmap_phys_base; \ }) #define VM_MIN_USER_ADDRESS (0x0000000000000000UL) -#define VM_MAX_USER_ADDRESS (0x0000008000000000UL) +#define VM_MAX_USER_ADDRESS (0x0001000000000000UL) #define VM_MINUSER_ADDRESS (VM_MIN_USER_ADDRESS) #define VM_MAXUSER_ADDRESS (VM_MAX_USER_ADDRESS) #define KERNBASE (VM_MIN_KERNEL_ADDRESS) #define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE) #define USRSTACK SHAREDPAGE /* * How many physical pages per kmem arena virtual page. */ #ifndef VM_KMEM_SIZE_SCALE #define VM_KMEM_SIZE_SCALE (3) #endif /* * Optional floor (in bytes) on the size of the kmem arena. */ #ifndef VM_KMEM_SIZE_MIN #define VM_KMEM_SIZE_MIN (16 * 1024 * 1024) #endif /* * Optional ceiling (in bytes) on the size of the kmem arena: 60% of the * kernel map. */ #ifndef VM_KMEM_SIZE_MAX #define VM_KMEM_SIZE_MAX ((VM_MAX_KERNEL_ADDRESS - \ VM_MIN_KERNEL_ADDRESS + 1) * 3 / 5) #endif /* * Initial pagein size of beginning of executable file. */ #ifndef VM_INITIAL_PAGEIN #define VM_INITIAL_PAGEIN 16 #endif #define UMA_MD_SMALL_ALLOC extern u_int tsb_kernel_ldd_phys; extern vm_offset_t vm_max_kernel_address; extern vm_offset_t init_pt_va; #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ #endif /* !_MACHINE_VMPARAM_H_ */