Index: head/sys/arm64/arm64/genassym.c =================================================================== --- head/sys/arm64/arm64/genassym.c +++ head/sys/arm64/arm64/genassym.c @@ -52,7 +52,7 @@ ASSYM(PCB_SINGLE_STEP_SHIFT, PCB_SINGLE_STEP_SHIFT); ASSYM(PCB_REGS, offsetof(struct pcb, pcb_x)); ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp)); -ASSYM(PCB_L1ADDR, offsetof(struct pcb, pcb_l1addr)); +ASSYM(PCB_L0ADDR, offsetof(struct pcb, pcb_l0addr)); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); Index: head/sys/arm64/arm64/locore.S =================================================================== --- head/sys/arm64/arm64/locore.S +++ head/sys/arm64/arm64/locore.S @@ -35,7 +35,7 @@ #include #include -#define VIRT_BITS 39 +#define VIRT_BITS 48 .globl kernbase .set kernbase, KERNBASE @@ -89,7 +89,8 @@ /* * At this point: * x27 = TTBR0 table - * x26 = TTBR1 table + * x26 = Kernel L1 table + * x24 = TTBR1 table */ /* Enable the mmu */ @@ -100,16 +101,6 @@ br x15 virtdone: - /* - * Now that we are in virtual address space, - * we don't need the identity mapping in TTBR0 and - * can set the TCR to a more useful value. - */ - ldr x2, tcr - mrs x3, id_aa64mmfr0_el1 - bfi x2, x3, #32, #3 - msr tcr_el1, x2 - /* Set up the stack */ adr x25, initstack_end mov sp, x25 @@ -128,6 +119,7 @@ /* Make the page table base a virtual address */ sub x26, x26, x29 + sub x24, x24, x29 sub sp, sp, #(64 * 4) mov x0, sp @@ -139,6 +131,7 @@ str x26, [x0, 8] /* kern_l1pt */ str x29, [x0, 16] /* kern_delta */ str x25, [x0, 24] /* kern_stack */ + str x24, [x0, 32] /* kern_l0pt */ /* trace back starts here */ mov fp, #0 @@ -175,7 +168,7 @@ msr contextidr_el1, x1 /* Load the kernel page table */ - adr x26, pagetable_l1_ttbr1 + adr x24, pagetable_l0_ttbr1 /* Load the identity page table */ adr x27, pagetable_l0_ttbr0 @@ -187,16 +180,6 @@ br x15 mp_virtdone: - /* - * Now that we are in virtual address space, - * we don't need the identity mapping in TTBR0 and - * can set the TCR to a more useful value. - */ - ldr x2, tcr - mrs x3, id_aa64mmfr0_el1 - bfi x2, x3, #32, #3 - msr tcr_el1, x2 - ldr x4, =secondary_stacks mov x5, #(PAGE_SIZE * KSTACK_PAGES) mul x5, x0, x5 @@ -388,11 +371,18 @@ mov x6, x26 bl link_l1_pagetable + /* Move to the l0 table */ + add x24, x26, #PAGE_SIZE + + /* Link the l0 -> l1 table */ + mov x9, x6 + mov x6, x24 + bl link_l0_pagetable /* * Build the TTBR0 maps. */ - add x27, x26, #PAGE_SIZE + add x27, x24, #PAGE_SIZE mov x6, x27 /* The initial page table */ #if defined(SOCDEV_PA) && defined(SOCDEV_VA) @@ -440,7 +430,7 @@ */ /* Find the table index */ lsr x11, x8, #L0_SHIFT - and x11, x11, #Ln_ADDR_MASK + and x11, x11, #L0_ADDR_MASK /* Build the L0 block entry */ mov x12, #L0_TABLE @@ -582,7 +572,7 @@ /* Load ttbr0 and ttbr1 */ msr ttbr0_el1, x27 - msr ttbr1_el1, x26 + msr ttbr1_el1, x24 isb /* Clear the Monitor Debug System control register */ @@ -596,11 +586,8 @@ /* * Setup TCR according to PARange bits from ID_AA64MMFR0_EL1. - * Some machines have physical memory mapped >512GiB, which can not - * be identity-mapped using the default 39 VA bits. Thus, use - * 48 VA bits for now and switch back to 39 after the VA jump. */ - ldr x2, tcr_early + ldr x2, tcr mrs x3, id_aa64mmfr0_el1 bfi x2, x3, #32, #3 msr tcr_el1, x2 @@ -623,9 +610,6 @@ tcr: .quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_ASID_16 | TCR_TG1_4K | \ TCR_CACHE_ATTRS | TCR_SMP_ATTRS) -tcr_early: - .quad (TCR_T1SZ(64 - VIRT_BITS) | TCR_T0SZ(64 - 48) | \ - TCR_ASID_16 | TCR_TG1_4K | TCR_CACHE_ATTRS | TCR_SMP_ATTRS) sctlr_set: /* Bits to set */ .quad (SCTLR_UCI | SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \ @@ -651,6 +635,8 @@ .space PAGE_SIZE pagetable_l1_ttbr1: .space PAGE_SIZE +pagetable_l0_ttbr1: + .space PAGE_SIZE pagetable_l1_ttbr0: .space PAGE_SIZE pagetable_l0_ttbr0: Index: head/sys/arm64/arm64/machdep.c =================================================================== --- head/sys/arm64/arm64/machdep.c +++ head/sys/arm64/arm64/machdep.c @@ -896,8 +896,8 @@ cache_setup(); /* Bootstrap enough of pmap to enter the kernel proper */ - pmap_bootstrap(abp->kern_l1pt, KERNBASE - abp->kern_delta, - lastaddr - KERNBASE); + pmap_bootstrap(abp->kern_l0pt, abp->kern_l1pt, + KERNBASE - abp->kern_delta, lastaddr - KERNBASE); arm_devmap_bootstrap(0, NULL); Index: head/sys/arm64/arm64/minidump_machdep.c =================================================================== --- head/sys/arm64/arm64/minidump_machdep.c +++ head/sys/arm64/arm64/minidump_machdep.c @@ -218,7 +218,7 @@ int minidumpsys(struct dumperinfo *di) { - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; pt_entry_t *l3; uint32_t pmapsize; vm_offset_t va; @@ -236,7 +236,7 @@ pmapsize = 0; for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { pmapsize += PAGE_SIZE; - if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3)) + if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) continue; /* We should always be using the l2 table for kvm */ @@ -335,7 +335,7 @@ /* Dump kernel page directory pages */ bzero(&tmpbuffer, sizeof(tmpbuffer)); for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { - if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3)) { + if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) { /* We always write a page, even if it is zero */ error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); if (error) Index: head/sys/arm64/arm64/pmap.c =================================================================== --- head/sys/arm64/arm64/pmap.c +++ head/sys/arm64/arm64/pmap.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2014 Andrew Turner * All rights reserved. - * Copyright (c) 2014 The FreeBSD Foundation + * Copyright (c) 2014-2016 The FreeBSD Foundation * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -142,9 +142,14 @@ #include #include -#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) -#define NUPDE (NPDEPG * NPDEPG) -#define NUSERPGTBLS (NUPDE + NPDEPG) +#define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t))) + +#define NUL0E L0_ENTRIES +#define NUL1E (NUL0E * NL1PG) +#define NUL2E (NUL1E * NL2PG) #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ @@ -273,15 +278,37 @@ bzero(p, PAGE_SIZE); } +#define pmap_l0_index(va) (((va) >> L0_SHIFT) & L0_ADDR_MASK) #define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) #define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) #define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) static __inline pd_entry_t * +pmap_l0(pmap_t pmap, vm_offset_t va) +{ + + return (&pmap->pm_l0[pmap_l0_index(va)]); +} + +static __inline pd_entry_t * +pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va) +{ + pd_entry_t *l1; + + l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); + return (&l1[pmap_l1_index(va)]); +} + +static __inline pd_entry_t * pmap_l1(pmap_t pmap, vm_offset_t va) { + pd_entry_t *l0; + + l0 = pmap_l0(pmap, va); + if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE) + return (NULL); - return (&pmap->pm_l1[pmap_l1_index(va)]); + return (pmap_l0_to_l1(l0, va)); } static __inline pd_entry_t * @@ -314,28 +341,103 @@ return (&l3[pmap_l3_index(va)]); } +/* + * Returns the lowest valid pde for a given virtual address. + * The next level may or may not point to a valid page or block. + */ +static __inline pd_entry_t * +pmap_pde(pmap_t pmap, vm_offset_t va, int *level) +{ + pd_entry_t *l0, *l1, *l2, desc; + + l0 = pmap_l0(pmap, va); + desc = pmap_load(l0) & ATTR_DESCR_MASK; + if (desc != L0_TABLE) { + *level = -1; + return (NULL); + } + + l1 = pmap_l0_to_l1(l0, va); + desc = pmap_load(l1) & ATTR_DESCR_MASK; + if (desc != L1_TABLE) { + *level = 0; + return (l0); + } + + l2 = pmap_l1_to_l2(l1, va); + desc = pmap_load(l2) & ATTR_DESCR_MASK; + if (desc != L2_TABLE) { + *level = 1; + return (l1); + } + + *level = 2; + return (l2); +} + +/* + * Returns the lowest valid pte block or table entry for a given virtual + * address. If there are no valid entries return NULL and set the level to + * the first invalid level. + */ static __inline pt_entry_t * -pmap_l3(pmap_t pmap, vm_offset_t va) +pmap_pte(pmap_t pmap, vm_offset_t va, int *level) { - pd_entry_t *l2; + pd_entry_t *l1, *l2, desc; + pt_entry_t *l3; - l2 = pmap_l2(pmap, va); - if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE) + l1 = pmap_l1(pmap, va); + if (l1 == NULL) { + *level = 0; return (NULL); + } + desc = pmap_load(l1) & ATTR_DESCR_MASK; + if (desc == L1_BLOCK) { + *level = 1; + return (l1); + } - return (pmap_l2_to_l3(l2, va)); + if (desc != L1_TABLE) { + *level = 1; + return (NULL); + } + + l2 = pmap_l1_to_l2(l1, va); + desc = pmap_load(l2) & ATTR_DESCR_MASK; + if (desc == L2_BLOCK) { + *level = 2; + return (l2); + } + + if (desc != L2_TABLE) { + *level = 2; + return (NULL); + } + + *level = 3; + l3 = pmap_l2_to_l3(l2, va); + if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE) + return (NULL); + + return (l3); } bool -pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l1, pd_entry_t **l2, - pt_entry_t **l3) +pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, + pd_entry_t **l2, pt_entry_t **l3) { - pd_entry_t *l1p, *l2p; + pd_entry_t *l0p, *l1p, *l2p; + + if (pmap->pm_l0 == NULL) + return (false); + + l0p = pmap_l0(pmap, va); + *l0 = l0p; - if (pmap->pm_l1 == NULL) + if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE) return (false); - l1p = pmap_l1(pmap, va); + l1p = pmap_l0_to_l1(l0p, va); *l1 = l1p; if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) { @@ -544,7 +646,8 @@ * Bootstrap the system enough to run with virtual memory. */ void -pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) +pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart, + vm_size_t kernlen) { u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; uint64_t kern_delta; @@ -562,7 +665,7 @@ printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); /* Set this early so we can use the pagetable walking functions */ - kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt; + kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; PMAP_LOCK_INIT(kernel_pmap); /* @@ -805,30 +908,40 @@ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { - pd_entry_t *l2p, l2; - pt_entry_t *l3p, l3; + pt_entry_t *pte, tpte; vm_paddr_t pa; + int lvl; pa = 0; PMAP_LOCK(pmap); /* - * Start with the l2 tabel. We are unable to allocate - * pages in the l1 table. + * Find the block or page map for this virtual address. pmap_pte + * will return either a valid block/page entry, or NULL. */ - l2p = pmap_l2(pmap, va); - if (l2p != NULL) { - l2 = pmap_load(l2p); - if ((l2 & ATTR_DESCR_MASK) == L2_TABLE) { - l3p = pmap_l2_to_l3(l2p, va); - if (l3p != NULL) { - l3 = pmap_load(l3p); - - if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) - pa = (l3 & ~ATTR_MASK) | - (va & L3_OFFSET); - } - } else if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) - pa = (l2 & ~ATTR_MASK) | (va & L2_OFFSET); + pte = pmap_pte(pmap, va, &lvl); + if (pte != NULL) { + tpte = pmap_load(pte); + pa = tpte & ~ATTR_MASK; + switch(lvl) { + case 1: + KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, + ("pmap_extract: Invalid L1 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L1_OFFSET); + break; + case 2: + KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, + ("pmap_extract: Invalid L2 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L2_OFFSET); + break; + case 3: + KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, + ("pmap_extract: Invalid L3 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L3_OFFSET); + break; + } } PMAP_UNLOCK(pmap); return (pa); @@ -844,21 +957,31 @@ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { - pt_entry_t *l3p, l3; + pt_entry_t *pte, tpte; vm_paddr_t pa; vm_page_t m; + int lvl; pa = 0; m = NULL; PMAP_LOCK(pmap); retry: - l3p = pmap_l3(pmap, va); - if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { - if (((l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || + pte = pmap_pte(pmap, va, &lvl); + if (pte != NULL) { + tpte = pmap_load(pte); + + KASSERT(lvl > 0 && lvl <= 3, + ("pmap_extract_and_hold: Invalid level %d", lvl)); + CTASSERT(L1_BLOCK == L2_BLOCK); + KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) || + (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK), + ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl, + tpte & ATTR_DESCR_MASK)); + if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || ((prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, l3 & ~ATTR_MASK, &pa)) + if (vm_page_pa_tryrelock(pmap, tpte & ~ATTR_MASK, &pa)) goto retry; - m = PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK); + m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK); vm_page_hold(m); } } @@ -870,25 +993,39 @@ vm_paddr_t pmap_kextract(vm_offset_t va) { - pd_entry_t *l2p, l2; - pt_entry_t *l3; + pt_entry_t *pte, tpte; vm_paddr_t pa; + int lvl; if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { pa = DMAP_TO_PHYS(va); } else { - l2p = pmap_l2(kernel_pmap, va); - if (l2p == NULL) - panic("pmap_kextract: No l2"); - l2 = pmap_load(l2p); - if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) - return ((l2 & ~ATTR_MASK) | - (va & L2_OFFSET)); - - l3 = pmap_l2_to_l3(l2p, va); - if (l3 == NULL) - panic("pmap_kextract: No l3..."); - pa = (pmap_load(l3) & ~ATTR_MASK) | (va & PAGE_MASK); + pa = 0; + pte = pmap_pte(kernel_pmap, va, &lvl); + if (pte != NULL) { + tpte = pmap_load(pte); + pa = tpte & ~ATTR_MASK; + switch(lvl) { + case 1: + KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, + ("pmap_kextract: Invalid L1 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L1_OFFSET); + break; + case 2: + KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, + ("pmap_kextract: Invalid L2 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L2_OFFSET); + break; + case 3: + KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, + ("pmap_kextract: Invalid L3 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L3_OFFSET); + break; + } + } } return (pa); } @@ -900,8 +1037,10 @@ void pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) { - pt_entry_t *l3; + pd_entry_t *pde; + pt_entry_t *pte; vm_offset_t va; + int lvl; KASSERT((pa & L3_OFFSET) == 0, ("pmap_kenter_device: Invalid physical address")); @@ -912,11 +1051,16 @@ va = sva; while (size != 0) { - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); - pmap_load_store(l3, (pa & ~L3_OFFSET) | ATTR_DEFAULT | + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_kenter_device: Invalid page entry, va: 0x%lx", va)); + KASSERT(lvl == 2, + ("pmap_kenter_device: Invalid level %d", lvl)); + + pte = pmap_l2_to_l3(pde, va); + pmap_load_store(pte, (pa & ~L3_OFFSET) | ATTR_DEFAULT | ATTR_IDX(DEVICE_MEMORY) | L3_PAGE); - PTE_SYNC(l3); + PTE_SYNC(pte); va += PAGE_SIZE; pa += PAGE_SIZE; @@ -927,28 +1071,30 @@ /* * Remove a page from the kernel pagetables. - * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { - pt_entry_t *l3; + pt_entry_t *pte; + int lvl; - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); + pte = pmap_pte(kernel_pmap, va, &lvl); + KASSERT(pte != NULL, ("pmap_kremove: Invalid address")); + KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl)); - if (pmap_l3_valid_cacheable(pmap_load(l3))) + if (pmap_l3_valid_cacheable(pmap_load(pte))) cpu_dcache_wb_range(va, L3_SIZE); - pmap_load_clear(l3); - PTE_SYNC(l3); + pmap_load_clear(pte); + PTE_SYNC(pte); pmap_invalidate_page(kernel_pmap, va); } void pmap_kremove_device(vm_offset_t sva, vm_size_t size) { - pt_entry_t *l3; + pt_entry_t *pte; vm_offset_t va; + int lvl; KASSERT((sva & L3_OFFSET) == 0, ("pmap_kremove_device: Invalid virtual address")); @@ -957,10 +1103,12 @@ va = sva; while (size != 0) { - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); - pmap_load_clear(l3); - PTE_SYNC(l3); + pte = pmap_pte(kernel_pmap, va, &lvl); + KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va)); + KASSERT(lvl == 3, + ("Invalid device pagetable level: %d != 3", lvl)); + pmap_load_clear(pte); + PTE_SYNC(pte); va += PAGE_SIZE; size -= PAGE_SIZE; @@ -999,19 +1147,26 @@ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { - pt_entry_t *l3, pa; + pd_entry_t *pde; + pt_entry_t *pte, pa; vm_offset_t va; vm_page_t m; - int i; + int i, lvl; va = sva; for (i = 0; i < count; i++) { + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_qenter: Invalid page entry, va: 0x%lx", va)); + KASSERT(lvl == 2, + ("pmap_qenter: Invalid level %d", lvl)); + m = ma[i]; pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | ATTR_IDX(m->md.pv_memattr) | L3_PAGE; - l3 = pmap_l3(kernel_pmap, va); - pmap_load_store(l3, pa); - PTE_SYNC(l3); + pte = pmap_l2_to_l3(pde, va); + pmap_load_store(pte, pa); + PTE_SYNC(pte); va += L3_SIZE; } @@ -1021,25 +1176,27 @@ /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. - * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { - pt_entry_t *l3; + pt_entry_t *pte; vm_offset_t va; + int lvl; KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); va = sva; while (count-- > 0) { - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); - - if (pmap_l3_valid_cacheable(pmap_load(l3))) - cpu_dcache_wb_range(va, L3_SIZE); - pmap_load_clear(l3); - PTE_SYNC(l3); + pte = pmap_pte(kernel_pmap, va, &lvl); + KASSERT(lvl == 3, + ("Invalid device pagetable level: %d != 3", lvl)); + if (pte != NULL) { + if (pmap_l3_valid_cacheable(pmap_load(pte))) + cpu_dcache_wb_range(va, L3_SIZE); + pmap_load_clear(pte); + PTE_SYNC(pte); + } va += PAGE_SIZE; } @@ -1104,26 +1261,47 @@ /* * unmap the page table page */ - if (m->pindex >= NUPDE) { - /* PD page */ + if (m->pindex >= (NUL2E + NUL1E)) { + /* l1 page */ + pd_entry_t *l0; + + l0 = pmap_l0(pmap, va); + pmap_load_clear(l0); + PTE_SYNC(l0); + } else if (m->pindex >= NUL2E) { + /* l2 page */ pd_entry_t *l1; + l1 = pmap_l1(pmap, va); pmap_load_clear(l1); PTE_SYNC(l1); } else { - /* PTE page */ + /* l3 page */ pd_entry_t *l2; + l2 = pmap_l2(pmap, va); pmap_load_clear(l2); PTE_SYNC(l2); } pmap_resident_count_dec(pmap, 1); - if (m->pindex < NUPDE) { - /* We just released a PT, unhold the matching PD */ - vm_page_t pdpg; + if (m->pindex < NUL2E) { + /* We just released an l3, unhold the matching l2 */ + pd_entry_t *l1, tl1; + vm_page_t l2pg; - pdpg = PHYS_TO_VM_PAGE(*pmap_l1(pmap, va) & ~ATTR_MASK); - pmap_unwire_l3(pmap, va, pdpg, free); + l1 = pmap_l1(pmap, va); + tl1 = pmap_load(l1); + l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); + pmap_unwire_l3(pmap, va, l2pg, free); + } else if (m->pindex < (NUL2E + NUL1E)) { + /* We just released an l2, unhold the matching l1 */ + pd_entry_t *l0, tl0; + vm_page_t l1pg; + + l0 = pmap_l0(pmap, va); + tl0 = pmap_load(l0); + l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); + pmap_unwire_l3(pmap, va, l1pg, free); } pmap_invalidate_page(pmap, va); @@ -1164,27 +1342,27 @@ PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); - pmap->pm_l1 = kernel_pmap->pm_l1; + pmap->pm_l0 = kernel_pmap->pm_l0; } int pmap_pinit(pmap_t pmap) { - vm_paddr_t l1phys; - vm_page_t l1pt; + vm_paddr_t l0phys; + vm_page_t l0pt; /* - * allocate the l1 page + * allocate the l0 page */ - while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | + while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) VM_WAIT; - l1phys = VM_PAGE_TO_PHYS(l1pt); - pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); + l0phys = VM_PAGE_TO_PHYS(l0pt); + pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys); - if ((l1pt->flags & PG_ZERO) == 0) - pagezero(pmap->pm_l1); + if ((l0pt->flags & PG_ZERO) == 0) + pagezero(pmap->pm_l0); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); @@ -1205,7 +1383,7 @@ static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) { - vm_page_t m, /*pdppg, */pdpg; + vm_page_t m, l1pg, l2pg; PMAP_LOCK_ASSERT(pmap, MA_OWNED); @@ -1237,33 +1415,84 @@ * it isn't already there. */ - if (ptepindex >= NUPDE) { - pd_entry_t *l1; - vm_pindex_t l1index; + if (ptepindex >= (NUL2E + NUL1E)) { + pd_entry_t *l0; + vm_pindex_t l0index; + + l0index = ptepindex - (NUL2E + NUL1E); + l0 = &pmap->pm_l0[l0index]; + pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE); + PTE_SYNC(l0); + } else if (ptepindex >= NUL2E) { + vm_pindex_t l0index, l1index; + pd_entry_t *l0, *l1; + pd_entry_t tl0; + + l1index = ptepindex - NUL2E; + l0index = l1index >> L0_ENTRIES_SHIFT; + + l0 = &pmap->pm_l0[l0index]; + tl0 = pmap_load(l0); + if (tl0 == 0) { + /* recurse for allocating page dir */ + if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index, + lockp) == NULL) { + --m->wire_count; + /* XXX: release mem barrier? */ + atomic_subtract_int(&vm_cnt.v_wire_count, 1); + vm_page_free_zero(m); + return (NULL); + } + } else { + l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); + l1pg->wire_count++; + } - l1index = ptepindex - NUPDE; - l1 = &pmap->pm_l1[l1index]; + l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); + l1 = &l1[ptepindex & Ln_ADDR_MASK]; pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); PTE_SYNC(l1); - } else { - vm_pindex_t l1index; - pd_entry_t *l1, *l2; - - l1index = ptepindex >> (L1_SHIFT - L2_SHIFT); - l1 = &pmap->pm_l1[l1index]; - if (pmap_load(l1) == 0) { + vm_pindex_t l0index, l1index; + pd_entry_t *l0, *l1, *l2; + pd_entry_t tl0, tl1; + + l1index = ptepindex >> Ln_ENTRIES_SHIFT; + l0index = l1index >> L0_ENTRIES_SHIFT; + + l0 = &pmap->pm_l0[l0index]; + tl0 = pmap_load(l0); + if (tl0 == 0) { /* recurse for allocating page dir */ - if (_pmap_alloc_l3(pmap, NUPDE + l1index, + if (_pmap_alloc_l3(pmap, NUL2E + l1index, lockp) == NULL) { --m->wire_count; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); return (NULL); } + tl0 = pmap_load(l0); + l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); + l1 = &l1[l1index & Ln_ADDR_MASK]; } else { - pdpg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK); - pdpg->wire_count++; + l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); + l1 = &l1[l1index & Ln_ADDR_MASK]; + tl1 = pmap_load(l1); + if (tl1 == 0) { + /* recurse for allocating page dir */ + if (_pmap_alloc_l3(pmap, NUL2E + l1index, + lockp) == NULL) { + --m->wire_count; + /* XXX: release mem barrier? */ + atomic_subtract_int( + &vm_cnt.v_wire_count, 1); + vm_page_free_zero(m); + return (NULL); + } + } else { + l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); + l2pg->wire_count++; + } } l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); @@ -1281,8 +1510,9 @@ pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { vm_pindex_t ptepindex; - pd_entry_t *l2; + pd_entry_t *pde, tpde; vm_page_t m; + int lvl; /* * Calculate pagetable page index @@ -1292,24 +1522,29 @@ /* * Get the page directory entry */ - l2 = pmap_l2(pmap, va); + pde = pmap_pde(pmap, va, &lvl); /* - * If the page table page is mapped, we just increment the - * hold count, and activate it. + * If the page table page is mapped, we just increment the hold count, + * and activate it. If we get a level 2 pde it will point to a level 3 + * table. */ - if (l2 != NULL && pmap_load(l2) != 0) { - m = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); - m->wire_count++; - } else { - /* - * Here if the pte page isn't mapped, or if it has been - * deallocated. - */ - m = _pmap_alloc_l3(pmap, ptepindex, lockp); - if (m == NULL && lockp != NULL) - goto retry; + if (lvl == 2) { + tpde = pmap_load(pde); + if (tpde != 0) { + m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK); + m->wire_count++; + return (m); + } } + + /* + * Here if the pte page isn't mapped, or if it has been deallocated. + */ + m = _pmap_alloc_l3(pmap, ptepindex, lockp); + if (m == NULL && lockp != NULL) + goto retry; + return (m); } @@ -1332,7 +1567,7 @@ ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); m->wire_count--; atomic_subtract_int(&vm_cnt.v_wire_count, 1); @@ -1369,7 +1604,7 @@ { vm_paddr_t paddr; vm_page_t nkpg; - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; mtx_assert(&kernel_map->system_mtx, MA_OWNED); @@ -1377,7 +1612,11 @@ if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { - l1 = pmap_l1(kernel_pmap, kernel_vm_end); + l0 = pmap_l0(kernel_pmap, kernel_vm_end); + KASSERT(pmap_load(l0) != 0, + ("pmap_growkernel: No level 0 kernel entry")); + + l1 = pmap_l0_to_l1(l0, kernel_vm_end); if (pmap_load(l1) == 0) { /* We need a new PDP entry */ nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, @@ -1716,7 +1955,7 @@ { struct rwlock *lock; vm_offset_t va, va_next; - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; pt_entry_t l3_paddr, *l3; struct spglist free; int anyvalid; @@ -1739,7 +1978,15 @@ if (pmap->pm_stats.resident_count == 0) break; - l1 = pmap_l1(pmap, sva); + l0 = pmap_l0(pmap, sva); + if (pmap_load(l0) == 0) { + va_next = (sva + L0_SIZE) & ~L0_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + + l1 = pmap_l0_to_l1(l0, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) @@ -1824,9 +2071,10 @@ { pv_entry_t pv; pmap_t pmap; - pt_entry_t *l3, tl3; - pd_entry_t *l2, tl2; + pd_entry_t *pde, tpde; + pt_entry_t *pte, tpte; struct spglist free; + int lvl; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); @@ -1836,30 +2084,33 @@ pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap_resident_count_dec(pmap, 1); - l2 = pmap_l2(pmap, pv->pv_va); - KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found")); - tl2 = pmap_load(l2); - KASSERT((tl2 & ATTR_DESCR_MASK) == L2_TABLE, - ("pmap_remove_all: found a table when expecting " - "a block in %p's pv list", m)); - l3 = pmap_l2_to_l3(l2, pv->pv_va); + + pde = pmap_pde(pmap, pv->pv_va, &lvl); + KASSERT(pde != NULL, + ("pmap_remove_all: no page directory entry found")); + KASSERT(lvl == 2, + ("pmap_remove_all: invalid pde level %d", lvl)); + tpde = pmap_load(pde); + + pte = pmap_l2_to_l3(pde, pv->pv_va); + tpte = pmap_load(pte); if (pmap_is_current(pmap) && - pmap_l3_valid_cacheable(pmap_load(l3))) + pmap_l3_valid_cacheable(tpte)) cpu_dcache_wb_range(pv->pv_va, L3_SIZE); - tl3 = pmap_load_clear(l3); - PTE_SYNC(l3); + pmap_load_clear(pte); + PTE_SYNC(pte); pmap_invalidate_page(pmap, pv->pv_va); - if (tl3 & ATTR_SW_WIRED) + if (tpte & ATTR_SW_WIRED) pmap->pm_stats.wired_count--; - if ((tl3 & ATTR_AF) != 0) + if ((tpte & ATTR_AF) != 0) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ - if (pmap_page_dirty(tl3)) + if (pmap_page_dirty(tpte)) vm_page_dirty(m); - pmap_unuse_l3(pmap, pv->pv_va, tl2, &free); + pmap_unuse_l3(pmap, pv->pv_va, tpde, &free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; free_pv_entry(pmap, pv); @@ -1878,7 +2129,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t va, va_next; - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; pt_entry_t *l3p, l3; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { @@ -1892,7 +2143,15 @@ PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { - l1 = pmap_l1(pmap, sva); + l0 = pmap_l0(pmap, sva); + if (pmap_load(l0) == 0) { + va_next = (sva + L0_SIZE) & ~L0_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + + l1 = pmap_l0_to_l1(l0, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) @@ -1946,13 +2205,14 @@ u_int flags, int8_t psind __unused) { struct rwlock *lock; - pd_entry_t *l1, *l2; + pd_entry_t *pde; pt_entry_t new_l3, orig_l3; pt_entry_t *l3; pv_entry_t pv; - vm_paddr_t opa, pa, l2_pa, l3_pa; - vm_page_t mpte, om, l2_m, l3_m; + vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa; + vm_page_t mpte, om, l1_m, l2_m, l3_m; boolean_t nosleep; + int lvl; va = trunc_page(va); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) @@ -1986,14 +2246,44 @@ PMAP_UNLOCK(pmap); return (KERN_RESOURCE_SHORTAGE); } - l3 = pmap_l3(pmap, va); + pde = pmap_pde(pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_enter: Invalid page entry, va: 0x%lx", va)); + KASSERT(lvl == 2, + ("pmap_enter: Invalid level %d", lvl)); + + l3 = pmap_l2_to_l3(pde, va); } else { - l3 = pmap_l3(pmap, va); - /* TODO: This is not optimal, but should mostly work */ - if (l3 == NULL) { - l2 = pmap_l2(pmap, va); + pde = pmap_pde(pmap, va, &lvl); + /* + * If we get a level 2 pde it must point to a level 3 entry + * otherwise we will need to create the intermediate tables + */ + if (lvl < 2) { + switch(lvl) { + default: + case -1: + /* Get the l0 pde to update */ + pde = pmap_l0(pmap, va); + KASSERT(pde != NULL, ("...")); + + l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | + VM_ALLOC_ZERO); + if (l1_m == NULL) + panic("pmap_enter: l1 pte_m == NULL"); + if ((l1_m->flags & PG_ZERO) == 0) + pmap_zero_page(l1_m); + + l1_pa = VM_PAGE_TO_PHYS(l1_m); + pmap_load_store(pde, l1_pa | L0_TABLE); + PTE_SYNC(pde); + /* FALLTHROUGH */ + case 0: + /* Get the l1 pde to update */ + pde = pmap_l1_to_l2(pde, va); + KASSERT(pde != NULL, ("...")); - if (l2 == NULL) { l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); @@ -2003,27 +2293,28 @@ pmap_zero_page(l2_m); l2_pa = VM_PAGE_TO_PHYS(l2_m); - l1 = pmap_l1(pmap, va); - pmap_load_store(l1, l2_pa | L1_TABLE); - PTE_SYNC(l1); - l2 = pmap_l1_to_l2(l1, va); - } - - KASSERT(l2 != NULL, - ("No l2 table after allocating one")); + pmap_load_store(pde, l2_pa | L1_TABLE); + PTE_SYNC(pde); + /* FALLTHROUGH */ + case 1: + /* Get the l2 pde to update */ + pde = pmap_l1_to_l2(pde, va); - l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | - VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); - if (l3_m == NULL) - panic("pmap_enter: l3 pte_m == NULL"); - if ((l3_m->flags & PG_ZERO) == 0) - pmap_zero_page(l3_m); - - l3_pa = VM_PAGE_TO_PHYS(l3_m); - pmap_load_store(l2, l3_pa | L2_TABLE); - PTE_SYNC(l2); - l3 = pmap_l2_to_l3(l2, va); + l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | + VM_ALLOC_ZERO); + if (l3_m == NULL) + panic("pmap_enter: l3 pte_m == NULL"); + if ((l3_m->flags & PG_ZERO) == 0) + pmap_zero_page(l3_m); + + l3_pa = VM_PAGE_TO_PHYS(l3_m); + pmap_load_store(pde, l3_pa | L2_TABLE); + PTE_SYNC(pde); + break; + } } + l3 = pmap_l2_to_l3(pde, va); pmap_invalidate_page(pmap, va); } @@ -2207,9 +2498,10 @@ vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) { struct spglist free; - pd_entry_t *l2; + pd_entry_t *pde; pt_entry_t *l3; vm_paddr_t pa; + int lvl; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, @@ -2235,7 +2527,7 @@ /* * Get the l2 entry */ - l2 = pmap_l2(pmap, va); + pde = pmap_pde(pmap, va, &lvl); /* * If the page table page is mapped, we just increment @@ -2243,9 +2535,9 @@ * attempt to allocate a page table page. If this * attempt fails, we don't retry. Instead, we give up. */ - if (l2 != NULL && pmap_load(l2) != 0) { + if (lvl == 2 && pmap_load(pde) != 0) { mpte = - PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); + PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); mpte->wire_count++; } else { /* @@ -2261,10 +2553,15 @@ l3 = &l3[pmap_l3_index(va)]; } else { mpte = NULL; - l3 = pmap_l3(kernel_pmap, va); + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx", + va)); + KASSERT(lvl == 2, + ("pmap_enter_quick_locked: Invalid level %d", lvl)); + l3 = pmap_l2_to_l3(pde, va); } - if (l3 == NULL) - panic("pmap_enter_quick_locked: No l3"); + if (pmap_load(l3) != 0) { if (mpte != NULL) { mpte->wire_count--; @@ -2336,14 +2633,22 @@ pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t va_next; - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; pt_entry_t *l3; boolean_t pv_lists_locked; pv_lists_locked = FALSE; PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { - l1 = pmap_l1(pmap, sva); + l0 = pmap_l0(pmap, sva); + if (pmap_load(l0) == 0) { + va_next = (sva + L0_SIZE) & ~L0_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + + l1 = pmap_l0_to_l1(l0, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) @@ -2551,9 +2856,9 @@ { struct rwlock *lock; pmap_t pmap; - pt_entry_t *l3; + pt_entry_t *pte; pv_entry_t pv; - int count, md_gen; + int count, lvl, md_gen; if ((m->oflags & VPO_UNMANAGED) != 0) return (0); @@ -2574,8 +2879,8 @@ goto restart; } } - l3 = pmap_l3(pmap, pv->pv_va); - if (l3 != NULL && (pmap_load(l3) & ATTR_SW_WIRED) != 0) + pte = pmap_pte(pmap, pv->pv_va, &lvl); + if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0) count++; PMAP_UNLOCK(pmap); } @@ -2603,8 +2908,8 @@ void pmap_remove_pages(pmap_t pmap) { - pd_entry_t ptepde, *l2; - pt_entry_t *l3, tl3; + pd_entry_t *pde; + pt_entry_t *pte, tpte; struct spglist free; vm_page_t m; pv_entry_t pv; @@ -2612,7 +2917,7 @@ struct rwlock *lock; int64_t bit; uint64_t inuse, bitmask; - int allfree, field, freed, idx; + int allfree, field, freed, idx, lvl; vm_paddr_t pa; lock = NULL; @@ -2632,44 +2937,51 @@ pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; - l2 = pmap_l2(pmap, pv->pv_va); - ptepde = pmap_load(l2); - l3 = pmap_l2_to_l3(l2, pv->pv_va); - tl3 = pmap_load(l3); + pde = pmap_pde(pmap, pv->pv_va, &lvl); + KASSERT(pde != NULL, + ("Attempting to remove an unmapped page")); + KASSERT(lvl == 2, + ("Invalid page directory level: %d", lvl)); + + pte = pmap_l2_to_l3(pde, pv->pv_va); + KASSERT(pte != NULL, + ("Attempting to remove an unmapped page")); + + tpte = pmap_load(pte); /* * We cannot remove wired pages from a process' mapping at this time */ - if (tl3 & ATTR_SW_WIRED) { + if (tpte & ATTR_SW_WIRED) { allfree = 0; continue; } - pa = tl3 & ~ATTR_MASK; + pa = tpte & ~ATTR_MASK; m = PHYS_TO_VM_PAGE(pa); KASSERT(m->phys_addr == pa, ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, - (uintmax_t)tl3)); + (uintmax_t)tpte)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], - ("pmap_remove_pages: bad l3 %#jx", - (uintmax_t)tl3)); + ("pmap_remove_pages: bad pte %#jx", + (uintmax_t)tpte)); + /* XXX: assumes tpte is level 3 */ if (pmap_is_current(pmap) && - pmap_l3_valid_cacheable(pmap_load(l3))) + pmap_l3_valid_cacheable(tpte)) cpu_dcache_wb_range(pv->pv_va, L3_SIZE); - pmap_load_clear(l3); - PTE_SYNC(l3); + pmap_load_clear(pte); + PTE_SYNC(pte); pmap_invalidate_page(pmap, pv->pv_va); /* * Update the vm_page_t clean/reference bits. */ - if ((tl3 & ATTR_AP_RW_BIT) == - ATTR_AP(ATTR_AP_RW)) + if ((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) vm_page_dirty(m); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); @@ -2681,7 +2993,8 @@ TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; - pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free); + pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde), + &free); freed++; } } @@ -2711,9 +3024,9 @@ { struct rwlock *lock; pv_entry_t pv; - pt_entry_t *l3, mask, value; + pt_entry_t *pte, mask, value; pmap_t pmap; - int md_gen; + int lvl, md_gen; boolean_t rv; rv = FALSE; @@ -2733,7 +3046,9 @@ goto restart; } } - l3 = pmap_l3(pmap, pv->pv_va); + pte = pmap_pte(pmap, pv->pv_va, &lvl); + KASSERT(lvl == 3, + ("pmap_page_test_mappings: Invalid level %d", lvl)); mask = 0; value = 0; if (modified) { @@ -2744,7 +3059,7 @@ mask |= ATTR_AF | ATTR_DESCR_MASK; value |= ATTR_AF | L3_PAGE; } - rv = (pmap_load(l3) & mask) == value; + rv = (pmap_load(pte) & mask) == value; PMAP_UNLOCK(pmap); if (rv) goto out; @@ -2788,13 +3103,14 @@ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { - pt_entry_t *l3; + pt_entry_t *pte; boolean_t rv; + int lvl; rv = FALSE; PMAP_LOCK(pmap); - l3 = pmap_l3(pmap, addr); - if (l3 != NULL && pmap_load(l3) != 0) { + pte = pmap_pte(pmap, addr, &lvl); + if (pte != NULL && pmap_load(pte) != 0) { rv = TRUE; } PMAP_UNLOCK(pmap); @@ -2825,8 +3141,8 @@ pmap_t pmap; struct rwlock *lock; pv_entry_t pv; - pt_entry_t *l3, oldl3; - int md_gen; + pt_entry_t oldpte, *pte; + int lvl, md_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); @@ -2856,14 +3172,14 @@ goto retry_pv_loop; } } - l3 = pmap_l3(pmap, pv->pv_va); + pte = pmap_pte(pmap, pv->pv_va, &lvl); retry: - oldl3 = pmap_load(l3); - if ((oldl3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { - if (!atomic_cmpset_long(l3, oldl3, - oldl3 | ATTR_AP(ATTR_AP_RO))) + oldpte = pmap_load(pte); + if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { + if (!atomic_cmpset_long(pte, oldpte, + oldpte | ATTR_AP(ATTR_AP_RO))) goto retry; - if ((oldl3 & ATTR_AF) != 0) + if ((oldpte & ATTR_AF) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } @@ -2901,10 +3217,10 @@ pv_entry_t pv, pvf; pmap_t pmap; struct rwlock *lock; - pd_entry_t *l2p, l2; - pt_entry_t *l3; + pd_entry_t *pde, tpde; + pt_entry_t *pte, tpte; vm_paddr_t pa; - int cleared, md_gen, not_cleared; + int cleared, md_gen, not_cleared, lvl; struct spglist free; KASSERT((m->oflags & VPO_UNMANAGED) == 0, @@ -2934,28 +3250,31 @@ goto retry; } } - l2p = pmap_l2(pmap, pv->pv_va); - KASSERT(l2p != NULL, ("pmap_ts_referenced: no l2 table found")); - l2 = pmap_load(l2p); - KASSERT((l2 & ATTR_DESCR_MASK) == L2_TABLE, + pde = pmap_pde(pmap, pv->pv_va, &lvl); + KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found")); + KASSERT(lvl == 2, + ("pmap_ts_referenced: invalid pde level %d", lvl)); + tpde = pmap_load(pde); + KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE, ("pmap_ts_referenced: found an invalid l2 table")); - l3 = pmap_l2_to_l3(l2p, pv->pv_va); - if ((pmap_load(l3) & ATTR_AF) != 0) { - if (safe_to_clear_referenced(pmap, pmap_load(l3))) { + pte = pmap_l2_to_l3(pde, pv->pv_va); + tpte = pmap_load(pte); + if ((tpte & ATTR_AF) != 0) { + if (safe_to_clear_referenced(pmap, tpte)) { /* * TODO: We don't handle the access flag * at all. We need to be able to set it in * the exception handler. */ panic("ARM64TODO: safe_to_clear_referenced\n"); - } else if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) { + } else if ((tpte & ATTR_SW_WIRED) == 0) { /* * Wired pages cannot be paged out so * doing accessed bit emulation for * them is wasted effort. We do the * hard work for unwired pages only. */ - pmap_remove_l3(pmap, l3, pv->pv_va, l2, + pmap_remove_l3(pmap, pte, pv->pv_va, tpde, &free, &lock); pmap_invalidate_page(pmap, pv->pv_va); cleared++; @@ -3145,8 +3464,8 @@ critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); - td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1); - __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l1addr)); + td->td_pcb->pcb_l0addr = vtophys(pmap->pm_l0); + __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l0addr)); pmap_invalidate_all(pmap); critical_exit(); } Index: head/sys/arm64/arm64/swtch.S =================================================================== --- head/sys/arm64/arm64/swtch.S +++ head/sys/arm64/arm64/swtch.S @@ -86,7 +86,7 @@ */ /* Switch to the new pmap */ - ldr x5, [x4, #PCB_L1ADDR] + ldr x5, [x4, #PCB_L0ADDR] msr ttbr0_el1, x5 isb @@ -183,7 +183,7 @@ */ /* Switch to the new pmap */ - ldr x5, [x4, #PCB_L1ADDR] + ldr x5, [x4, #PCB_L0ADDR] msr ttbr0_el1, x5 isb Index: head/sys/arm64/arm64/vm_machdep.c =================================================================== --- head/sys/arm64/arm64/vm_machdep.c +++ head/sys/arm64/arm64/vm_machdep.c @@ -84,8 +84,8 @@ td2->td_pcb = pcb2; bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); - td2->td_pcb->pcb_l1addr = - vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l1); + td2->td_pcb->pcb_l0addr = + vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l0); tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1); bcopy(td1->td_frame, tf, sizeof(*tf)); Index: head/sys/arm64/include/machdep.h =================================================================== --- head/sys/arm64/include/machdep.h +++ head/sys/arm64/include/machdep.h @@ -34,6 +34,7 @@ vm_offset_t kern_l1pt; /* L1 page table for the kernel */ uint64_t kern_delta; vm_offset_t kern_stack; + vm_offset_t kern_l0pt; /* L1 page table for the kernel */ }; extern vm_paddr_t physmap[]; Index: head/sys/arm64/include/pcb.h =================================================================== --- head/sys/arm64/include/pcb.h +++ head/sys/arm64/include/pcb.h @@ -40,7 +40,7 @@ /* These two need to be in order as we access them together */ uint64_t pcb_sp; uint64_t pcb_tpidr_el0; - vm_offset_t pcb_l1addr; + vm_offset_t pcb_l0addr; /* Fault handler, the error value is passed in x0 */ vm_offset_t pcb_onfault; Index: head/sys/arm64/include/pmap.h =================================================================== --- head/sys/arm64/include/pmap.h +++ head/sys/arm64/include/pmap.h @@ -78,7 +78,7 @@ struct pmap { struct mtx pm_mtx; struct pmap_statistics pm_stats; /* pmap statictics */ - pd_entry_t *pm_l1; + pd_entry_t *pm_l0; TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ }; @@ -134,7 +134,7 @@ #define L1_MAPPABLE_P(va, pa, size) \ ((((va) | (pa)) & L1_OFFSET) == 0 && (size) >= L1_SIZE) -void pmap_bootstrap(vm_offset_t, vm_paddr_t, vm_size_t); +void pmap_bootstrap(vm_offset_t, vm_offset_t, vm_paddr_t, vm_size_t); void pmap_kenter_device(vm_offset_t, vm_size_t, vm_paddr_t); vm_paddr_t pmap_kextract(vm_offset_t va); void pmap_kremove(vm_offset_t); @@ -149,7 +149,7 @@ void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); bool pmap_get_tables(pmap_t, vm_offset_t, pd_entry_t **, pd_entry_t **, - pt_entry_t **); + pd_entry_t **, pt_entry_t **); #define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list)) Index: head/sys/arm64/include/pte.h =================================================================== --- head/sys/arm64/include/pte.h +++ head/sys/arm64/include/pte.h @@ -73,6 +73,8 @@ /* Level 0 table, 512GiB per entry */ #define L0_SHIFT 39 +#define L0_SIZE (1ul << L0_SHIFT) +#define L0_OFFSET (L0_SIZE - 1ul) #define L0_INVAL 0x0 /* An invalid address */ /* 0x1 Level 0 doesn't support block translation */ /* 0x2 also marks an invalid address */ @@ -106,7 +108,12 @@ /* 0x2 also marks an invalid address */ #define L3_PAGE 0x3 -#define Ln_ENTRIES (1 << 9) +#define L0_ENTRIES_SHIFT 9 +#define L0_ENTRIES (1 << L0_ENTRIES_SHIFT) +#define L0_ADDR_MASK (L0_ENTRIES - 1) + +#define Ln_ENTRIES_SHIFT 9 +#define Ln_ENTRIES (1 << Ln_ENTRIES_SHIFT) #define Ln_ADDR_MASK (Ln_ENTRIES - 1) #define Ln_TABLE_MASK ((1 << 12) - 1) Index: head/sys/arm64/include/vmparam.h =================================================================== --- head/sys/arm64/include/vmparam.h +++ head/sys/arm64/include/vmparam.h @@ -188,7 +188,7 @@ }) #define VM_MIN_USER_ADDRESS (0x0000000000000000UL) -#define VM_MAX_USER_ADDRESS (0x0000008000000000UL) +#define VM_MAX_USER_ADDRESS (0x0001000000000000UL) #define VM_MINUSER_ADDRESS (VM_MIN_USER_ADDRESS) #define VM_MAXUSER_ADDRESS (VM_MAX_USER_ADDRESS)