Index: sys/arm64/arm64/locore.S =================================================================== --- sys/arm64/arm64/locore.S +++ sys/arm64/arm64/locore.S @@ -35,7 +35,7 @@ #include #include -#define VIRT_BITS 39 +#define VIRT_BITS 48 .globl kernbase .set kernbase, KERNBASE @@ -90,6 +90,7 @@ * At this point: * x27 = TTBR0 table * x26 = TTBR1 table + * x24 = Kernel l\L0 (temporary* */ /* Enable the mmu */ @@ -128,6 +129,7 @@ /* Make the page table base a virtual address */ sub x26, x26, x29 + sub x24, x24, x29 sub sp, sp, #(64 * 4) mov x0, sp @@ -139,6 +141,7 @@ str x26, [x0, 8] /* kern_l1pt */ str x29, [x0, 16] /* kern_delta */ str x25, [x0, 24] /* kern_stack */ + str x24, [x0, 32] /* kern_l0pt */ /* trace back starts here */ mov fp, #0 @@ -175,7 +178,7 @@ msr contextidr_el1, x1 /* Load the kernel page table */ - adr x26, pagetable_l1_ttbr1 + adr x24, pagetable_l0_ttbr1 /* Load the identity page table */ adr x27, pagetable_l0_ttbr0 @@ -388,11 +391,18 @@ mov x6, x26 bl link_l1_pagetable + /* Move to the l0 table */ + add x24, x26, #PAGE_SIZE + + /* Link the l0 -> l1 table */ + mov x9, x6 + mov x6, x24 + bl link_kern_l0_pagetable /* * Build the TTBR0 maps. */ - add x27, x26, #PAGE_SIZE + add x27, x24, #PAGE_SIZE mov x6, x27 /* The initial page table */ #if defined(SOCDEV_PA) && defined(SOCDEV_VA) @@ -455,6 +465,34 @@ ret /* + * Builds an L0 -> L1 table descriptor + * + * As with link_l0_pagetable, but handles a different number of entries at + * level 0. This is temporary to help the transition to 4 lebel page tables + * and will be removed when we both TTBR0 and TTBR1 handle the same number + * of address bits in this early boot code. + */ +link_kern_l0_pagetable: + /* + * Link an L0 -> L1 table entry. + */ + /* Find the table index */ + lsr x11, x8, #L0_SHIFT + and x11, x11, #L0_ADDR_MASK + + /* Build the L0 block entry */ + mov x12, #L0_TABLE + + /* Only use the output address bits */ + lsr x9, x9, #12 + orr x12, x12, x9, lsl #12 + + /* Store the entry */ + str x12, [x6, x11, lsl #3] + + ret + +/* * Builds an L1 -> L2 table descriptor * * This is a link for a 1GiB block of memory with up to 2MiB regions mapped @@ -582,7 +620,7 @@ /* Load ttbr0 and ttbr1 */ msr ttbr0_el1, x27 - msr ttbr1_el1, x26 + msr ttbr1_el1, x24 isb /* Clear the Monitor Debug System control register */ @@ -651,6 +689,8 @@ .space PAGE_SIZE pagetable_l1_ttbr1: .space PAGE_SIZE +pagetable_l0_ttbr1: + .space PAGE_SIZE pagetable_l1_ttbr0: .space PAGE_SIZE pagetable_l0_ttbr0: Index: sys/arm64/arm64/machdep.c =================================================================== --- sys/arm64/arm64/machdep.c +++ sys/arm64/arm64/machdep.c @@ -896,8 +896,8 @@ cache_setup(); /* Bootstrap enough of pmap to enter the kernel proper */ - pmap_bootstrap(abp->kern_l1pt, KERNBASE - abp->kern_delta, - lastaddr - KERNBASE); + pmap_bootstrap(abp->kern_l0pt, abp->kern_l1pt, + KERNBASE - abp->kern_delta, lastaddr - KERNBASE); arm_devmap_bootstrap(0, NULL); Index: sys/arm64/arm64/minidump_machdep.c =================================================================== --- sys/arm64/arm64/minidump_machdep.c +++ sys/arm64/arm64/minidump_machdep.c @@ -218,7 +218,7 @@ int minidumpsys(struct dumperinfo *di) { - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; pt_entry_t *l3; uint32_t pmapsize; vm_offset_t va; @@ -236,7 +236,7 @@ pmapsize = 0; for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { pmapsize += PAGE_SIZE; - if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3)) + if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) continue; /* We should always be using the l2 table for kvm */ @@ -335,7 +335,7 @@ /* Dump kernel page directory pages */ bzero(&tmpbuffer, sizeof(tmpbuffer)); for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { - if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3)) { + if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) { /* We always write a page, even if it is zero */ error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); if (error) Index: sys/arm64/arm64/pmap.c =================================================================== --- sys/arm64/arm64/pmap.c +++ sys/arm64/arm64/pmap.c @@ -142,9 +142,14 @@ #include #include -#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) -#define NUPDE (NPDEPG * NPDEPG) -#define NUSERPGTBLS (NUPDE + NPDEPG) +#define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t))) + +#define NUL0E L0_ENTRIES +#define NUL1E (NUL0E * NL1PG) +#define NUL2E (NUL1E * NL2PG) #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ @@ -273,15 +278,37 @@ bzero(p, PAGE_SIZE); } +#define pmap_l0_index(va) (((va) >> L0_SHIFT) & L0_ADDR_MASK) #define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) #define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) #define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) static __inline pd_entry_t * +pmap_l0(pmap_t pmap, vm_offset_t va) +{ + + return (&pmap->pm_l0[pmap_l0_index(va)]); +} + +static __inline pd_entry_t * +pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va) +{ + pd_entry_t *l1; + + l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); + return (&l1[pmap_l1_index(va)]); +} + +static __inline pd_entry_t * pmap_l1(pmap_t pmap, vm_offset_t va) { + pd_entry_t *l0; - return (&pmap->pm_l1[pmap_l1_index(va)]); + l0 = pmap_l0(pmap, va); + if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE) + return (NULL); + + return (pmap_l0_to_l1(l0, va)); } static __inline pd_entry_t * @@ -314,28 +341,103 @@ return (&l3[pmap_l3_index(va)]); } +/* + * Returns the lowest valid pde for a given virtual address. + * The next level may or may not point to a valid page or block. + */ +static __inline pd_entry_t * +pmap_pde(pmap_t pmap, vm_offset_t va, int *level) +{ + pd_entry_t *l0, *l1, *l2, desc; + + l0 = pmap_l0(pmap, va); + desc = pmap_load(l0) & ATTR_DESCR_MASK; + if (desc != L0_TABLE) { + *level = -1; + return (NULL); + } + + l1 = pmap_l0_to_l1(l0, va); + desc = pmap_load(l1) & ATTR_DESCR_MASK; + if (desc != L1_TABLE) { + *level = 0; + return (l0); + } + + l2 = pmap_l1_to_l2(l1, va); + desc = pmap_load(l2) & ATTR_DESCR_MASK; + if (desc != L2_TABLE) { + *level = 1; + return (l1); + } + + *level = 2; + return (l2); +} + +/* + * Returns the lowest valid pte block or table entry for a given virtual + * address. If there are no valid entries return NULL and set the level to + * the first invalid level. + */ static __inline pt_entry_t * -pmap_l3(pmap_t pmap, vm_offset_t va) +pmap_pte(pmap_t pmap, vm_offset_t va, int *level) { - pd_entry_t *l2; + pd_entry_t *l1, *l2, desc; + pt_entry_t *l3; - l2 = pmap_l2(pmap, va); - if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE) + l1 = pmap_l1(pmap, va); + if (l1 == NULL) { + *level = 0; return (NULL); + } + desc = pmap_load(l1) & ATTR_DESCR_MASK; + if (desc == L1_BLOCK) { + *level = 1; + return (l1); + } - return (pmap_l2_to_l3(l2, va)); + if (desc != L1_TABLE) { + *level = 1; + return (NULL); + } + + l2 = pmap_l1_to_l2(l1, va); + desc = pmap_load(l2) & ATTR_DESCR_MASK; + if (desc == L2_BLOCK) { + *level = 2; + return (l2); + } + + if (desc != L2_TABLE) { + *level = 2; + return (NULL); + } + + *level = 3; + l3 = pmap_l2_to_l3(l2, va); + if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE) + return (NULL); + + return (l3); } bool -pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l1, pd_entry_t **l2, - pt_entry_t **l3) +pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, + pd_entry_t **l2, pt_entry_t **l3) { - pd_entry_t *l1p, *l2p; + pd_entry_t *l0p, *l1p, *l2p; - if (pmap->pm_l1 == NULL) + if (pmap->pm_l0 == NULL) return (false); - l1p = pmap_l1(pmap, va); + l0p = pmap_l0(pmap, va); + *l0 = l0p; + + if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE) + return (false); + + l1p = pmap_l0_to_l1(l0p, va); *l1 = l1p; if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) { @@ -544,7 +646,8 @@ * Bootstrap the system enough to run with virtual memory. */ void -pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) +pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart, + vm_size_t kernlen) { u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; uint64_t kern_delta; @@ -562,7 +665,7 @@ printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); /* Set this early so we can use the pagetable walking functions */ - kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt; + kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; PMAP_LOCK_INIT(kernel_pmap); /* @@ -805,9 +908,9 @@ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { - pd_entry_t *l2p, l2; - pt_entry_t *l3p, l3; + pt_entry_t *pte; vm_paddr_t pa; + int lvl; pa = 0; PMAP_LOCK(pmap); @@ -815,20 +918,20 @@ * Start with the l2 tabel. We are unable to allocate * pages in the l1 table. */ - l2p = pmap_l2(pmap, va); - if (l2p != NULL) { - l2 = pmap_load(l2p); - if ((l2 & ATTR_DESCR_MASK) == L2_TABLE) { - l3p = pmap_l2_to_l3(l2p, va); - if (l3p != NULL) { - l3 = pmap_load(l3p); - - if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) - pa = (l3 & ~ATTR_MASK) | - (va & L3_OFFSET); - } - } else if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) - pa = (l2 & ~ATTR_MASK) | (va & L2_OFFSET); + pte = pmap_pte(pmap, va, &lvl); + if (pte != NULL) { + pa = pmap_load(pte) & ~ATTR_MASK; + switch(lvl) { + case 1: + pa |= (va & L1_OFFSET); + break; + case 2: + pa |= (va & L2_OFFSET); + break; + case 3: + pa |= (va & L3_OFFSET); + break; + } } PMAP_UNLOCK(pmap); return (pa); @@ -844,21 +947,23 @@ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { - pt_entry_t *l3p, l3; + pt_entry_t *pte, tpte; vm_paddr_t pa; vm_page_t m; + int lvl; pa = 0; m = NULL; PMAP_LOCK(pmap); retry: - l3p = pmap_l3(pmap, va); - if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { - if (((l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || + pte = pmap_pte(pmap, va, &lvl); + if (pte != NULL) { + tpte = pmap_load(pte); + if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || ((prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, l3 & ~ATTR_MASK, &pa)) + if (vm_page_pa_tryrelock(pmap, tpte & ~ATTR_MASK, &pa)) goto retry; - m = PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK); + m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK); vm_page_hold(m); } } @@ -870,25 +975,29 @@ vm_paddr_t pmap_kextract(vm_offset_t va) { - pd_entry_t *l2p, l2; - pt_entry_t *l3; + pt_entry_t *pte; vm_paddr_t pa; + int lvl; if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { pa = DMAP_TO_PHYS(va); } else { - l2p = pmap_l2(kernel_pmap, va); - if (l2p == NULL) - panic("pmap_kextract: No l2"); - l2 = pmap_load(l2p); - if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) - return ((l2 & ~ATTR_MASK) | - (va & L2_OFFSET)); - - l3 = pmap_l2_to_l3(l2p, va); - if (l3 == NULL) - panic("pmap_kextract: No l3..."); - pa = (pmap_load(l3) & ~ATTR_MASK) | (va & PAGE_MASK); + pa = 0; + pte = pmap_pte(kernel_pmap, va, &lvl); + if (pte != NULL) { + pa = pmap_load(pte) & ~ATTR_MASK; + switch(lvl) { + case 1: + pa |= (va & L1_OFFSET); + break; + case 2: + pa |= (va & L2_OFFSET); + break; + case 3: + pa |= (va & L3_OFFSET); + break; + } + } } return (pa); } @@ -900,8 +1009,10 @@ void pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) { - pt_entry_t *l3; + pd_entry_t *pde; + pt_entry_t *pte; vm_offset_t va; + int lvl; KASSERT((pa & L3_OFFSET) == 0, ("pmap_kenter_device: Invalid physical address")); @@ -912,11 +1023,16 @@ va = sva; while (size != 0) { - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); - pmap_load_store(l3, (pa & ~L3_OFFSET) | ATTR_DEFAULT | + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_kenter_device: Invalid page entry, va: 0x%lx", va)); + KASSERT(lvl == 2, + ("pmap_kenter_device: Invalid level %d", lvl)); + + pte = pmap_l2_to_l3(pde, va); + pmap_load_store(pte, (pa & ~L3_OFFSET) | ATTR_DEFAULT | ATTR_IDX(DEVICE_MEMORY) | L3_PAGE); - PTE_SYNC(l3); + PTE_SYNC(pte); va += PAGE_SIZE; pa += PAGE_SIZE; @@ -927,28 +1043,30 @@ /* * Remove a page from the kernel pagetables. - * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { - pt_entry_t *l3; + pt_entry_t *pte; + int lvl; - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); + pte = pmap_pte(kernel_pmap, va, &lvl); + KASSERT(pte != NULL, ("pmap_kremove: Invalid address")); + KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl)); - if (pmap_l3_valid_cacheable(pmap_load(l3))) + if (pmap_l3_valid_cacheable(pmap_load(pte))) cpu_dcache_wb_range(va, L3_SIZE); - pmap_load_clear(l3); - PTE_SYNC(l3); + pmap_load_clear(pte); + PTE_SYNC(pte); pmap_invalidate_page(kernel_pmap, va); } void pmap_kremove_device(vm_offset_t sva, vm_size_t size) { - pt_entry_t *l3; + pt_entry_t *pte; vm_offset_t va; + int lvl; KASSERT((sva & L3_OFFSET) == 0, ("pmap_kremove_device: Invalid virtual address")); @@ -957,10 +1075,12 @@ va = sva; while (size != 0) { - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); - pmap_load_clear(l3); - PTE_SYNC(l3); + pte = pmap_pte(kernel_pmap, va, &lvl); + KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va)); + KASSERT(lvl == 3, + ("Invalid device pagetable level: %d != 3", lvl)); + pmap_load_clear(pte); + PTE_SYNC(pte); va += PAGE_SIZE; size -= PAGE_SIZE; @@ -999,19 +1119,26 @@ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { - pt_entry_t *l3, pa; + pd_entry_t *pde; + pt_entry_t *pte, pa; vm_offset_t va; vm_page_t m; - int i; + int i, lvl; va = sva; for (i = 0; i < count; i++) { + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_qenter: Invalid page entry, va: 0x%lx", va)); + KASSERT(lvl == 2, + ("pmap_qenter: Invalid level %d", lvl)); + m = ma[i]; pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | ATTR_IDX(m->md.pv_memattr) | L3_PAGE; - l3 = pmap_l3(kernel_pmap, va); - pmap_load_store(l3, pa); - PTE_SYNC(l3); + pte = pmap_l2_to_l3(pde, va); + pmap_load_store(pte, pa); + PTE_SYNC(pte); va += L3_SIZE; } @@ -1021,25 +1148,27 @@ /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. - * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { - pt_entry_t *l3; + pt_entry_t *pte; vm_offset_t va; + int lvl; KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); va = sva; while (count-- > 0) { - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); - - if (pmap_l3_valid_cacheable(pmap_load(l3))) - cpu_dcache_wb_range(va, L3_SIZE); - pmap_load_clear(l3); - PTE_SYNC(l3); + pte = pmap_pte(kernel_pmap, va, &lvl); + KASSERT(lvl == 3, + ("Invalid device pagetable level: %d != 3", lvl)); + if (pte != NULL) { + if (pmap_l3_valid_cacheable(pmap_load(pte))) + cpu_dcache_wb_range(va, L3_SIZE); + pmap_load_clear(pte); + PTE_SYNC(pte); + } va += PAGE_SIZE; } @@ -1104,26 +1233,47 @@ /* * unmap the page table page */ - if (m->pindex >= NUPDE) { - /* PD page */ + if (m->pindex >= (NUL2E + NUL1E)) { + /* l1 page */ + pd_entry_t *l0; + + l0 = pmap_l0(pmap, va); + pmap_load_clear(l0); + PTE_SYNC(l0); + } else if (m->pindex >= NUL2E) { + /* l2 page */ pd_entry_t *l1; + l1 = pmap_l1(pmap, va); pmap_load_clear(l1); PTE_SYNC(l1); } else { - /* PTE page */ + /* l3 page */ pd_entry_t *l2; + l2 = pmap_l2(pmap, va); pmap_load_clear(l2); PTE_SYNC(l2); } pmap_resident_count_dec(pmap, 1); - if (m->pindex < NUPDE) { - /* We just released a PT, unhold the matching PD */ - vm_page_t pdpg; + if (m->pindex < NUL2E) { + /* We just released an l3, unhold the matching l2 */ + pd_entry_t *l1, tl1; + vm_page_t l2pg; - pdpg = PHYS_TO_VM_PAGE(*pmap_l1(pmap, va) & ~ATTR_MASK); - pmap_unwire_l3(pmap, va, pdpg, free); + l1 = pmap_l1(pmap, va); + tl1 = pmap_load(l1); + l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); + pmap_unwire_l3(pmap, va, l2pg, free); + } else if (m->pindex < (NUL2E + NUL1E)) { + /* We just released an l2, unhold the matching l1 */ + pd_entry_t *l0, tl0; + vm_page_t l1pg; + + l0 = pmap_l0(pmap, va); + tl0 = pmap_load(l0); + l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); + pmap_unwire_l3(pmap, va, l1pg, free); } pmap_invalidate_page(pmap, va); @@ -1164,27 +1314,27 @@ PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); - pmap->pm_l1 = kernel_pmap->pm_l1; + pmap->pm_l0 = kernel_pmap->pm_l0; } int pmap_pinit(pmap_t pmap) { - vm_paddr_t l1phys; - vm_page_t l1pt; + vm_paddr_t l0phys; + vm_page_t l0pt; /* - * allocate the l1 page + * allocate the l0 page */ - while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | + while ((l0pt = vm_page_alloc(NULL, 0xdeadc0de, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) VM_WAIT; - l1phys = VM_PAGE_TO_PHYS(l1pt); - pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); + l0phys = VM_PAGE_TO_PHYS(l0pt); + pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys); - if ((l1pt->flags & PG_ZERO) == 0) - pagezero(pmap->pm_l1); + if ((l0pt->flags & PG_ZERO) == 0) + pagezero(pmap->pm_l0); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); @@ -1205,7 +1355,7 @@ static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) { - vm_page_t m, /*pdppg, */pdpg; + vm_page_t m, l1pg, l2pg; PMAP_LOCK_ASSERT(pmap, MA_OWNED); @@ -1237,33 +1387,84 @@ * it isn't already there. */ - if (ptepindex >= NUPDE) { - pd_entry_t *l1; - vm_pindex_t l1index; + if (ptepindex >= (NUL2E + NUL1E)) { + pd_entry_t *l0; + vm_pindex_t l0index; + + l0index = ptepindex - (NUL2E + NUL1E); + l0 = &pmap->pm_l0[l0index]; + pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE); + PTE_SYNC(l0); + } else if (ptepindex >= NUL2E) { + vm_pindex_t l0index, l1index; + pd_entry_t *l0, *l1; + pd_entry_t tl0; + + l1index = ptepindex - NUL2E; + l0index = l1index >> 9; + + l0 = &pmap->pm_l0[l0index]; + tl0 = pmap_load(l0); + if (tl0 == 0) { + /* recurse for allocating page dir */ + if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index, + lockp) == NULL) { + --m->wire_count; + /* XXX: release mem barrier? */ + atomic_subtract_int(&vm_cnt.v_wire_count, 1); + vm_page_free_zero(m); + return (NULL); + } + } else { + l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); + l1pg->wire_count++; + } - l1index = ptepindex - NUPDE; - l1 = &pmap->pm_l1[l1index]; + l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); + l1 = &l1[ptepindex & Ln_ADDR_MASK]; pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); PTE_SYNC(l1); - } else { - vm_pindex_t l1index; - pd_entry_t *l1, *l2; + vm_pindex_t l0index, l1index; + pd_entry_t *l0, *l1, *l2; + pd_entry_t tl0, tl1; - l1index = ptepindex >> (L1_SHIFT - L2_SHIFT); - l1 = &pmap->pm_l1[l1index]; - if (pmap_load(l1) == 0) { + l1index = ptepindex >> 9; + l0index = l1index >> 9; + + l0 = &pmap->pm_l0[l0index]; + tl0 = pmap_load(l0); + if (tl0 == 0) { /* recurse for allocating page dir */ - if (_pmap_alloc_l3(pmap, NUPDE + l1index, + if (_pmap_alloc_l3(pmap, NUL2E + l1index, lockp) == NULL) { --m->wire_count; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); return (NULL); } + tl0 = pmap_load(l0); + l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); + l1 = &l1[l1index & Ln_ADDR_MASK]; } else { - pdpg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK); - pdpg->wire_count++; + l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); + l1 = &l1[l1index & Ln_ADDR_MASK]; + tl1 = pmap_load(l1); + if (tl1 == 0) { + /* recurse for allocating page dir */ + if (_pmap_alloc_l3(pmap, NUL2E + l1index, + lockp) == NULL) { + --m->wire_count; + /* XXX: release mem barrier? */ + atomic_subtract_int( + &vm_cnt.v_wire_count, 1); + vm_page_free_zero(m); + return (NULL); + } + } else { + l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); + l2pg->wire_count++; + } } l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); @@ -1281,8 +1482,9 @@ pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { vm_pindex_t ptepindex; - pd_entry_t *l2; + pd_entry_t *pde, tpde; vm_page_t m; + int lvl; /* * Calculate pagetable page index @@ -1292,24 +1494,29 @@ /* * Get the page directory entry */ - l2 = pmap_l2(pmap, va); + pde = pmap_pde(pmap, va, &lvl); /* - * If the page table page is mapped, we just increment the - * hold count, and activate it. + * If the page table page is mapped, we just increment the hold count, + * and activate it. If we get a level 2 pde it will point to a level 3 + * table. */ - if (l2 != NULL && pmap_load(l2) != 0) { - m = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); - m->wire_count++; - } else { - /* - * Here if the pte page isn't mapped, or if it has been - * deallocated. - */ - m = _pmap_alloc_l3(pmap, ptepindex, lockp); - if (m == NULL && lockp != NULL) - goto retry; + if (lvl == 2) { + tpde = pmap_load(pde); + if (tpde != 0) { + m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK); + m->wire_count++; + return (m); + } } + + /* + * Here if the pte page isn't mapped, or if it has been deallocated. + */ + m = _pmap_alloc_l3(pmap, ptepindex, lockp); + if (m == NULL && lockp != NULL) + goto retry; + return (m); } @@ -1332,7 +1539,7 @@ ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); m->wire_count--; atomic_subtract_int(&vm_cnt.v_wire_count, 1); @@ -1369,7 +1576,7 @@ { vm_paddr_t paddr; vm_page_t nkpg; - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; mtx_assert(&kernel_map->system_mtx, MA_OWNED); @@ -1377,7 +1584,11 @@ if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { - l1 = pmap_l1(kernel_pmap, kernel_vm_end); + l0 = pmap_l0(kernel_pmap, kernel_vm_end); + KASSERT(pmap_load(l0) != 0, + ("pmap_growkernel: No level 0 kernel entry")); + + l1 = pmap_l0_to_l1(l0, kernel_vm_end); if (pmap_load(l1) == 0) { /* We need a new PDP entry */ nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, @@ -1716,7 +1927,7 @@ { struct rwlock *lock; vm_offset_t va, va_next; - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; pt_entry_t l3_paddr, *l3; struct spglist free; int anyvalid; @@ -1739,7 +1950,15 @@ if (pmap->pm_stats.resident_count == 0) break; - l1 = pmap_l1(pmap, sva); + l0 = pmap_l0(pmap, sva); + if (pmap_load(l0) == 0) { + va_next = (sva + L0_SIZE) & ~L0_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + + l1 = pmap_l0_to_l1(l0, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) @@ -1824,9 +2043,10 @@ { pv_entry_t pv; pmap_t pmap; - pt_entry_t *l3, tl3; - pd_entry_t *l2, tl2; + pd_entry_t *pde, tpde; + pt_entry_t *pte, tpte; struct spglist free; + int lvl; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); @@ -1836,30 +2056,33 @@ pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap_resident_count_dec(pmap, 1); - l2 = pmap_l2(pmap, pv->pv_va); - KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found")); - tl2 = pmap_load(l2); - KASSERT((tl2 & ATTR_DESCR_MASK) == L2_TABLE, - ("pmap_remove_all: found a table when expecting " - "a block in %p's pv list", m)); - l3 = pmap_l2_to_l3(l2, pv->pv_va); + + pde = pmap_pde(pmap, pv->pv_va, &lvl); + KASSERT(pde != NULL, + ("pmap_remove_all: no page directory entry found")); + KASSERT(lvl == 2, + ("pmap_remove_all: invalid pde level %d", lvl)); + tpde = pmap_load(pde); + + pte = pmap_l2_to_l3(pde, pv->pv_va); + tpte = pmap_load(pte); if (pmap_is_current(pmap) && - pmap_l3_valid_cacheable(pmap_load(l3))) + pmap_l3_valid_cacheable(tpte)) cpu_dcache_wb_range(pv->pv_va, L3_SIZE); - tl3 = pmap_load_clear(l3); - PTE_SYNC(l3); + pmap_load_clear(pte); + PTE_SYNC(pte); pmap_invalidate_page(pmap, pv->pv_va); - if (tl3 & ATTR_SW_WIRED) + if (tpte & ATTR_SW_WIRED) pmap->pm_stats.wired_count--; - if ((tl3 & ATTR_AF) != 0) + if ((tpte & ATTR_AF) != 0) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ - if (pmap_page_dirty(tl3)) + if (pmap_page_dirty(tpte)) vm_page_dirty(m); - pmap_unuse_l3(pmap, pv->pv_va, tl2, &free); + pmap_unuse_l3(pmap, pv->pv_va, tpde, &free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; free_pv_entry(pmap, pv); @@ -1878,7 +2101,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t va, va_next; - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; pt_entry_t *l3p, l3; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { @@ -1892,7 +2115,15 @@ PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { - l1 = pmap_l1(pmap, sva); + l0 = pmap_l0(pmap, sva); + if (pmap_load(l0) == 0) { + va_next = (sva + L0_SIZE) & ~L0_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + + l1 = pmap_l0_to_l1(l0, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) @@ -1946,13 +2177,14 @@ u_int flags, int8_t psind __unused) { struct rwlock *lock; - pd_entry_t *l1, *l2; + pd_entry_t *pde; pt_entry_t new_l3, orig_l3; pt_entry_t *l3; pv_entry_t pv; - vm_paddr_t opa, pa, l2_pa, l3_pa; - vm_page_t mpte, om, l2_m, l3_m; + vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa; + vm_page_t mpte, om, l1_m, l2_m, l3_m; boolean_t nosleep; + int lvl; va = trunc_page(va); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) @@ -1986,14 +2218,44 @@ PMAP_UNLOCK(pmap); return (KERN_RESOURCE_SHORTAGE); } - l3 = pmap_l3(pmap, va); + pde = pmap_pde(pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_enter: Invalid page entry, va: 0x%lx", va)); + KASSERT(lvl == 2, + ("pmap_enter: Invalid level %d", lvl)); + + l3 = pmap_l2_to_l3(pde, va); } else { - l3 = pmap_l3(pmap, va); - /* TODO: This is not optimal, but should mostly work */ - if (l3 == NULL) { - l2 = pmap_l2(pmap, va); + pde = pmap_pde(pmap, va, &lvl); + /* + * If we get a level 2 pde it must point to a level 3 entry + * otherwise we will need to create the intermediate tables + */ + if (lvl < 2) { + switch(lvl) { + default: + case -1: + /* Get the l0 pde to update */ + pde = pmap_l0(pmap, va); + KASSERT(pde != NULL, ("...")); + + l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | + VM_ALLOC_ZERO); + if (l1_m == NULL) + panic("pmap_enter: l1 pte_m == NULL"); + if ((l1_m->flags & PG_ZERO) == 0) + pmap_zero_page(l1_m); + + l1_pa = VM_PAGE_TO_PHYS(l1_m); + pmap_load_store(pde, l1_pa | L0_TABLE); + PTE_SYNC(pde); + /* FALLTHROUGH */ + case 0: + /* Get the l1 pde to update */ + pde = pmap_l1_to_l2(pde, va); + KASSERT(pde != NULL, ("...")); - if (l2 == NULL) { l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); @@ -2003,27 +2265,28 @@ pmap_zero_page(l2_m); l2_pa = VM_PAGE_TO_PHYS(l2_m); - l1 = pmap_l1(pmap, va); - pmap_load_store(l1, l2_pa | L1_TABLE); - PTE_SYNC(l1); - l2 = pmap_l1_to_l2(l1, va); + pmap_load_store(pde, l2_pa | L1_TABLE); + PTE_SYNC(pde); + /* FALLTHROUGH */ + case 1: + /* Get the l2 pde to update */ + pde = pmap_l1_to_l2(pde, va); + + l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | + VM_ALLOC_ZERO); + if (l3_m == NULL) + panic("pmap_enter: l3 pte_m == NULL"); + if ((l3_m->flags & PG_ZERO) == 0) + pmap_zero_page(l3_m); + + l3_pa = VM_PAGE_TO_PHYS(l3_m); + pmap_load_store(pde, l3_pa | L2_TABLE); + PTE_SYNC(pde); + break; } - - KASSERT(l2 != NULL, - ("No l2 table after allocating one")); - - l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | - VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); - if (l3_m == NULL) - panic("pmap_enter: l3 pte_m == NULL"); - if ((l3_m->flags & PG_ZERO) == 0) - pmap_zero_page(l3_m); - - l3_pa = VM_PAGE_TO_PHYS(l3_m); - pmap_load_store(l2, l3_pa | L2_TABLE); - PTE_SYNC(l2); - l3 = pmap_l2_to_l3(l2, va); } + l3 = pmap_l2_to_l3(pde, va); pmap_invalidate_page(pmap, va); } @@ -2207,9 +2470,10 @@ vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) { struct spglist free; - pd_entry_t *l2; + pd_entry_t *pde; pt_entry_t *l3; vm_paddr_t pa; + int lvl; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, @@ -2235,7 +2499,7 @@ /* * Get the l2 entry */ - l2 = pmap_l2(pmap, va); + pde = pmap_pde(pmap, va, &lvl); /* * If the page table page is mapped, we just increment @@ -2243,9 +2507,9 @@ * attempt to allocate a page table page. If this * attempt fails, we don't retry. Instead, we give up. */ - if (l2 != NULL && pmap_load(l2) != 0) { + if (lvl == 2 && pmap_load(pde) != 0) { mpte = - PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); + PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); mpte->wire_count++; } else { /* @@ -2261,10 +2525,15 @@ l3 = &l3[pmap_l3_index(va)]; } else { mpte = NULL; - l3 = pmap_l3(kernel_pmap, va); + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx", + va)); + KASSERT(lvl == 2, + ("pmap_enter_quick_locked: Invalid level %d", lvl)); + l3 = pmap_l2_to_l3(pde, va); } - if (l3 == NULL) - panic("pmap_enter_quick_locked: No l3"); + if (pmap_load(l3) != 0) { if (mpte != NULL) { mpte->wire_count--; @@ -2336,14 +2605,22 @@ pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t va_next; - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; pt_entry_t *l3; boolean_t pv_lists_locked; pv_lists_locked = FALSE; PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { - l1 = pmap_l1(pmap, sva); + l0 = pmap_l0(pmap, sva); + if (pmap_load(l0) == 0) { + va_next = (sva + L0_SIZE) & ~L0_OFFSET; + if (va_next < sva) + va_next = eva; + continue; + } + + l1 = pmap_l0_to_l1(l0, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) @@ -2551,9 +2828,9 @@ { struct rwlock *lock; pmap_t pmap; - pt_entry_t *l3; + pt_entry_t *pte; pv_entry_t pv; - int count, md_gen; + int count, lvl, md_gen; if ((m->oflags & VPO_UNMANAGED) != 0) return (0); @@ -2574,8 +2851,8 @@ goto restart; } } - l3 = pmap_l3(pmap, pv->pv_va); - if (l3 != NULL && (pmap_load(l3) & ATTR_SW_WIRED) != 0) + pte = pmap_pte(pmap, pv->pv_va, &lvl); + if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0) count++; PMAP_UNLOCK(pmap); } @@ -2603,8 +2880,8 @@ void pmap_remove_pages(pmap_t pmap) { - pd_entry_t ptepde, *l2; - pt_entry_t *l3, tl3; + pd_entry_t *pde; + pt_entry_t *pte, tpte; struct spglist free; vm_page_t m; pv_entry_t pv; @@ -2612,7 +2889,7 @@ struct rwlock *lock; int64_t bit; uint64_t inuse, bitmask; - int allfree, field, freed, idx; + int allfree, field, freed, idx, lvl; vm_paddr_t pa; lock = NULL; @@ -2632,44 +2909,51 @@ pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; - l2 = pmap_l2(pmap, pv->pv_va); - ptepde = pmap_load(l2); - l3 = pmap_l2_to_l3(l2, pv->pv_va); - tl3 = pmap_load(l3); + pde = pmap_pde(pmap, pv->pv_va, &lvl); + KASSERT(pde != NULL, + ("Attempting to remove an unmapped page")); + KASSERT(lvl == 2, + ("Invalid page directory level: %d", lvl)); + + pte = pmap_l2_to_l3(pde, pv->pv_va); + KASSERT(pte != NULL, + ("Attempting to remove an unmapped page")); + + tpte = pmap_load(pte); /* * We cannot remove wired pages from a process' mapping at this time */ - if (tl3 & ATTR_SW_WIRED) { + if (tpte & ATTR_SW_WIRED) { allfree = 0; continue; } - pa = tl3 & ~ATTR_MASK; + pa = tpte & ~ATTR_MASK; m = PHYS_TO_VM_PAGE(pa); KASSERT(m->phys_addr == pa, ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, - (uintmax_t)tl3)); + (uintmax_t)tpte)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], - ("pmap_remove_pages: bad l3 %#jx", - (uintmax_t)tl3)); + ("pmap_remove_pages: bad pte %#jx", + (uintmax_t)tpte)); + /* XXX: assumes tpte is level 3 */ if (pmap_is_current(pmap) && - pmap_l3_valid_cacheable(pmap_load(l3))) + pmap_l3_valid_cacheable(tpte)) cpu_dcache_wb_range(pv->pv_va, L3_SIZE); - pmap_load_clear(l3); - PTE_SYNC(l3); + pmap_load_clear(pte); + PTE_SYNC(pte); pmap_invalidate_page(pmap, pv->pv_va); /* * Update the vm_page_t clean/reference bits. */ - if ((tl3 & ATTR_AP_RW_BIT) == - ATTR_AP(ATTR_AP_RW)) + if ((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) vm_page_dirty(m); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); @@ -2681,7 +2965,8 @@ TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; - pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free); + pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde), + &free); freed++; } } @@ -2711,9 +2996,9 @@ { struct rwlock *lock; pv_entry_t pv; - pt_entry_t *l3, mask, value; + pt_entry_t *pte, mask, value; pmap_t pmap; - int md_gen; + int lvl, md_gen; boolean_t rv; rv = FALSE; @@ -2733,7 +3018,9 @@ goto restart; } } - l3 = pmap_l3(pmap, pv->pv_va); + pte = pmap_pte(pmap, pv->pv_va, &lvl); + KASSERT(lvl == 3, + ("pmap_page_test_mappings: Invalid level %d", lvl)); mask = 0; value = 0; if (modified) { @@ -2744,7 +3031,7 @@ mask |= ATTR_AF | ATTR_DESCR_MASK; value |= ATTR_AF | L3_PAGE; } - rv = (pmap_load(l3) & mask) == value; + rv = (pmap_load(pte) & mask) == value; PMAP_UNLOCK(pmap); if (rv) goto out; @@ -2788,13 +3075,14 @@ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { - pt_entry_t *l3; + pt_entry_t *pte; boolean_t rv; + int lvl; rv = FALSE; PMAP_LOCK(pmap); - l3 = pmap_l3(pmap, addr); - if (l3 != NULL && pmap_load(l3) != 0) { + pte = pmap_pte(pmap, addr, &lvl); + if (pte != NULL && pmap_load(pte) != 0) { rv = TRUE; } PMAP_UNLOCK(pmap); @@ -2825,8 +3113,8 @@ pmap_t pmap; struct rwlock *lock; pv_entry_t pv; - pt_entry_t *l3, oldl3; - int md_gen; + pt_entry_t oldpte, *pte; + int lvl, md_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); @@ -2856,14 +3144,14 @@ goto retry_pv_loop; } } - l3 = pmap_l3(pmap, pv->pv_va); + pte = pmap_pte(pmap, pv->pv_va, &lvl); retry: - oldl3 = pmap_load(l3); - if ((oldl3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { - if (!atomic_cmpset_long(l3, oldl3, - oldl3 | ATTR_AP(ATTR_AP_RO))) + oldpte = pmap_load(pte); + if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { + if (!atomic_cmpset_long(pte, oldpte, + oldpte | ATTR_AP(ATTR_AP_RO))) goto retry; - if ((oldl3 & ATTR_AF) != 0) + if ((oldpte & ATTR_AF) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } @@ -2901,10 +3189,10 @@ pv_entry_t pv, pvf; pmap_t pmap; struct rwlock *lock; - pd_entry_t *l2p, l2; - pt_entry_t *l3; + pd_entry_t *pde, tpde; + pt_entry_t *pte, tpte; vm_paddr_t pa; - int cleared, md_gen, not_cleared; + int cleared, md_gen, not_cleared, lvl; struct spglist free; KASSERT((m->oflags & VPO_UNMANAGED) == 0, @@ -2934,28 +3222,31 @@ goto retry; } } - l2p = pmap_l2(pmap, pv->pv_va); - KASSERT(l2p != NULL, ("pmap_ts_referenced: no l2 table found")); - l2 = pmap_load(l2p); - KASSERT((l2 & ATTR_DESCR_MASK) == L2_TABLE, + pde = pmap_pde(pmap, pv->pv_va, &lvl); + KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found")); + KASSERT(lvl == 2, + ("pmap_ts_referenced: invalid pde level %d", lvl)); + tpde = pmap_load(pde); + KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE, ("pmap_ts_referenced: found an invalid l2 table")); - l3 = pmap_l2_to_l3(l2p, pv->pv_va); - if ((pmap_load(l3) & ATTR_AF) != 0) { - if (safe_to_clear_referenced(pmap, pmap_load(l3))) { + pte = pmap_l2_to_l3(pde, pv->pv_va); + tpte = pmap_load(pte); + if ((tpte & ATTR_AF) != 0) { + if (safe_to_clear_referenced(pmap, tpte)) { /* * TODO: We don't handle the access flag * at all. We need to be able to set it in * the exception handler. */ panic("ARM64TODO: safe_to_clear_referenced\n"); - } else if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) { + } else if ((tpte & ATTR_SW_WIRED) == 0) { /* * Wired pages cannot be paged out so * doing accessed bit emulation for * them is wasted effort. We do the * hard work for unwired pages only. */ - pmap_remove_l3(pmap, l3, pv->pv_va, l2, + pmap_remove_l3(pmap, pte, pv->pv_va, tpde, &free, &lock); pmap_invalidate_page(pmap, pv->pv_va); cleared++; @@ -3145,7 +3436,7 @@ critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); - td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1); + td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l0); __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l1addr)); pmap_invalidate_all(pmap); critical_exit(); Index: sys/arm64/arm64/vm_machdep.c =================================================================== --- sys/arm64/arm64/vm_machdep.c +++ sys/arm64/arm64/vm_machdep.c @@ -85,7 +85,7 @@ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); td2->td_pcb->pcb_l1addr = - vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l1); + vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l0); tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1); bcopy(td1->td_frame, tf, sizeof(*tf)); Index: sys/arm64/include/machdep.h =================================================================== --- sys/arm64/include/machdep.h +++ sys/arm64/include/machdep.h @@ -34,6 +34,7 @@ vm_offset_t kern_l1pt; /* L1 page table for the kernel */ uint64_t kern_delta; vm_offset_t kern_stack; + vm_offset_t kern_l0pt; /* L1 page table for the kernel */ }; extern vm_paddr_t physmap[]; Index: sys/arm64/include/pmap.h =================================================================== --- sys/arm64/include/pmap.h +++ sys/arm64/include/pmap.h @@ -78,7 +78,7 @@ struct pmap { struct mtx pm_mtx; struct pmap_statistics pm_stats; /* pmap statictics */ - pd_entry_t *pm_l1; + pd_entry_t *pm_l0; TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ }; @@ -134,7 +134,7 @@ #define L1_MAPPABLE_P(va, pa, size) \ ((((va) | (pa)) & L1_OFFSET) == 0 && (size) >= L1_SIZE) -void pmap_bootstrap(vm_offset_t, vm_paddr_t, vm_size_t); +void pmap_bootstrap(vm_offset_t, vm_offset_t, vm_paddr_t, vm_size_t); void pmap_kenter_device(vm_offset_t, vm_size_t, vm_paddr_t); vm_paddr_t pmap_kextract(vm_offset_t va); void pmap_kremove(vm_offset_t); @@ -149,7 +149,7 @@ void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); bool pmap_get_tables(pmap_t, vm_offset_t, pd_entry_t **, pd_entry_t **, - pt_entry_t **); + pd_entry_t **, pt_entry_t **); #define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list)) Index: sys/arm64/include/pte.h =================================================================== --- sys/arm64/include/pte.h +++ sys/arm64/include/pte.h @@ -73,6 +73,8 @@ /* Level 0 table, 512GiB per entry */ #define L0_SHIFT 39 +#define L0_SIZE (1ul << L0_SHIFT) +#define L0_OFFSET (L0_SIZE - 1ul) #define L0_INVAL 0x0 /* An invalid address */ /* 0x1 Level 0 doesn't support block translation */ /* 0x2 also marks an invalid address */ @@ -106,6 +108,9 @@ /* 0x2 also marks an invalid address */ #define L3_PAGE 0x3 +#define L0_ENTRIES (1 << 9) +#define L0_ADDR_MASK (L0_ENTRIES - 1) + #define Ln_ENTRIES (1 << 9) #define Ln_ADDR_MASK (Ln_ENTRIES - 1) #define Ln_TABLE_MASK ((1 << 12) - 1) Index: sys/arm64/include/vmparam.h =================================================================== --- sys/arm64/include/vmparam.h +++ sys/arm64/include/vmparam.h @@ -188,7 +188,7 @@ }) #define VM_MIN_USER_ADDRESS (0x0000000000000000UL) -#define VM_MAX_USER_ADDRESS (0x0000008000000000UL) +#define VM_MAX_USER_ADDRESS (0x0001000000000000UL) #define VM_MINUSER_ADDRESS (VM_MIN_USER_ADDRESS) #define VM_MAXUSER_ADDRESS (VM_MAX_USER_ADDRESS)