Index: sys/arm64/arm64/pmap.c =================================================================== --- sys/arm64/arm64/pmap.c +++ sys/arm64/arm64/pmap.c @@ -177,6 +177,8 @@ #define PV_STAT(x) do { } while (0) #endif +#define pmap_l0_pindex(v) (NUL2E + NUL1E + ((v) >> L0_SHIFT)) +#define pmap_l1_pindex(v) (NUL2E + ((v) >> L1_SHIFT)) #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) #define pa_to_pvh(pa) (&pv_table[pmap_l2_pindex(pa)]) @@ -1047,6 +1049,9 @@ KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, ("pmap_init: can't assign to pagesizes[1]")); pagesizes[1] = L2_SIZE; + KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0, + ("pmap_init: can't assign to pagesizes[2]")); + pagesizes[2] = L1_SIZE; } /* @@ -1267,7 +1272,7 @@ use = true; if (use) { - switch(lvl) { + switch (lvl) { case 1: off = va & L1_OFFSET; break; @@ -1279,7 +1284,7 @@ off = 0; } m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off); - if (!vm_page_wire_mapped(m)) + if (m != NULL && !vm_page_wire_mapped(m)) m = NULL; } } @@ -2874,6 +2879,7 @@ pd_entry_t *l0, *l1, *l2; pt_entry_t l3_paddr; struct spglist free; + vm_page_t mt; /* * Perform an unsynchronized read. This is, however, safe. @@ -2898,11 +2904,25 @@ continue; } + va_next = (sva + L1_SIZE) & ~L1_OFFSET; + if (va_next < sva) + va_next = eva; l1 = pmap_l0_to_l1(l0, sva); - if (pmap_load(l1) == 0) { - va_next = (sva + L1_SIZE) & ~L1_OFFSET; - if (va_next < sva) - va_next = eva; + if (pmap_load(l1) == 0) + continue; + if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) { + KASSERT(va_next <= eva, + ("partial update of non-transparent 1G page " + "l1 %#lx sva %#lx eva %#lx va_next %#lx", + pmap_load(l1), sva, eva, va_next)); + MPASS(pmap != kernel_pmap); + MPASS((pmap_load(l1) & ATTR_SW_MANAGED) == 0); + pmap_clear(l1); + pmap_invalidate_page(pmap, sva); + pmap_resident_count_dec(pmap, L1_SIZE / PAGE_SIZE); + mt = PHYS_TO_VM_PAGE(pmap_load(pmap_l0(pmap, sva)) & + ~ATTR_MASK); + pmap_unwire_l3(pmap, sva, mt, &free); continue; } @@ -3148,11 +3168,22 @@ continue; } + va_next = (sva + L1_SIZE) & ~L1_OFFSET; + if (va_next < sva) + va_next = eva; l1 = pmap_l0_to_l1(l0, sva); - if (pmap_load(l1) == 0) { - va_next = (sva + L1_SIZE) & ~L1_OFFSET; - if (va_next < sva) - va_next = eva; + if (pmap_load(l1) == 0) + continue; + if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) { + KASSERT(va_next <= eva, + ("partial update of non-transparent 1G page " + "l1 %#lx sva %#lx eva %#lx va_next %#lx", + pmap_load(l1), sva, eva, va_next)); + MPASS((pmap_load(l1) & ATTR_SW_MANAGED) == 0); + if ((pmap_load(l1) & mask) != nbits) { + pmap_store(l1, (pmap_load(l1) & ~mask) | nbits); + pmap_invalidate_page(pmap, sva); + } continue; } @@ -3416,6 +3447,99 @@ } #endif /* VM_NRESERVLEVEL > 0 */ +static int +pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags, + int psind) +{ + pd_entry_t *l0p, l1, *l1p, l2, *l2p; + vm_page_t mp; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT(psind > 0 && psind < MAXPAGESIZES, + ("psind %d unexpected", psind)); + KASSERT(((newpte & ~ATTR_MASK) & (pagesizes[psind] - 1)) == 0, + ("unaligned phys address %#lx newpte %#lx psind %d", + (newpte & ~ATTR_MASK), newpte, psind)); + +restart: + if (psind == 2) { + l0p = pmap_l0(pmap, va); + if ((pmap_load(l0p) & ATTR_DESCR_VALID) == 0) { + mp = _pmap_alloc_l3(pmap, pmap_l0_pindex(va), NULL); + if (mp == NULL) { + if ((flags & PMAP_ENTER_NOSLEEP) != 0) + return (KERN_RESOURCE_SHORTAGE); + PMAP_UNLOCK(pmap); + vm_wait(NULL); + PMAP_LOCK(pmap); + goto restart; + } + l1p = pmap_l0_to_l1(l0p, va); + KASSERT(l1p != NULL, ("va %#lx lost l1 entry", va)); + l1 = pmap_load(l1p); + } else { + mp = PHYS_TO_VM_PAGE(pmap_load(l0p) & ~ATTR_MASK); + l1p = pmap_l0_to_l1(l0p, va); + KASSERT(l1p != NULL, ("va %#lx lost l1 entry", va)); + l1 = pmap_load(l1p); + if ((l1 & ATTR_DESCR_VALID) == 0) { + mp->ref_count++; + pmap_resident_count_inc(pmap, + pagesizes[psind] / PAGE_SIZE); + } + } + KASSERT((l1 & ATTR_DESCR_VALID) == 0 || + ((l1 & ATTR_DESCR_MASK) == L1_BLOCK && + (l1 & ~ATTR_MASK) == (newpte & ~ATTR_MASK)), + ("va %#lx changing 1G phys page l1 %#lx newpte %#lx", + va, l1, newpte)); + if ((newpte & ATTR_SW_WIRED) != 0 && (l1 & ATTR_SW_WIRED) == 0) + pmap->pm_stats.wired_count += L1_SIZE / PAGE_SIZE; + else if ((newpte & ATTR_SW_WIRED) == 0 && + (l1 & ATTR_SW_WIRED) != 0) + pmap->pm_stats.wired_count -= L1_SIZE / PAGE_SIZE; + pmap_store(l1p, newpte); + } else /* (psind == 1) */ { + l1p = pmap_l1(pmap, va); + if ((pmap_load(l1p) & ATTR_DESCR_VALID) == 0) { + mp = _pmap_alloc_l3(pmap, pmap_l1_pindex(va), NULL); + if (mp == NULL) { + if ((flags & PMAP_ENTER_NOSLEEP) != 0) + return (KERN_RESOURCE_SHORTAGE); + PMAP_UNLOCK(pmap); + vm_wait(NULL); + PMAP_LOCK(pmap); + goto restart; + } + l2p = pmap_l1_to_l2(l1p, va); + KASSERT(l2p != NULL, ("va %#lx lost l2 entry", va)); + l2 = pmap_load(l2p); + } else { + mp = PHYS_TO_VM_PAGE(pmap_load(l1p) & ~ATTR_MASK); + l2p = pmap_l1_to_l2(l1p, va); + KASSERT(l1p != NULL, ("va %#lx lost l2 entry", va)); + l2 = pmap_load(l2p); + if ((l2 & ATTR_DESCR_VALID) == 0) { + mp->ref_count++; + pmap_resident_count_inc(pmap, + pagesizes[psind] / PAGE_SIZE); + } + } + KASSERT((l2 & ATTR_DESCR_VALID) == 0 || + ((l2 & ATTR_DESCR_MASK) == L2_BLOCK && + (l2 & ~ATTR_MASK) == (newpte & ~ATTR_MASK)), + ("va %#lx changing 1G phys page l2 %#lx newpte %#lx", + va, l2, newpte)); + if ((newpte & ATTR_SW_WIRED) != 0 && (l2 & ATTR_SW_WIRED) == 0) + pmap->pm_stats.wired_count += L1_SIZE / PAGE_SIZE; + else if ((newpte & ATTR_SW_WIRED) == 0 && + (l2 & ATTR_SW_WIRED) != 0) + pmap->pm_stats.wired_count -= L1_SIZE / PAGE_SIZE; + pmap_store(l2p, newpte); + } + return (KERN_SUCCESS); +} + /* * Insert the given physical page (p) at * the specified virtual address (v) in the @@ -3491,6 +3615,17 @@ lock = NULL; PMAP_LOCK(pmap); + if ((flags & PMAP_ENTER_LARGEPAGE) != 0) { + KASSERT((m->oflags & VPO_UNMANAGED) != 0, + ("managed largepage va %#lx flags %#x", va, flags)); + new_l3 &= ~L3_PAGE; + if (psind == 2) + new_l3 |= L1_BLOCK; + else /* (psind == 1) */ + new_l3 |= L2_BLOCK; + rv = pmap_enter_largepage(pmap, va, new_l3, flags, psind); + goto out; + } if (psind == 1) { /* Assert the required virtual and physical alignment. */ KASSERT((va & L2_OFFSET) == 0, ("pmap_enter: va unaligned")); @@ -4140,10 +4275,21 @@ } l1 = pmap_l0_to_l1(l0, sva); - if (pmap_load(l1) == 0) { - va_next = (sva + L1_SIZE) & ~L1_OFFSET; - if (va_next < sva) - va_next = eva; + va_next = (sva + L1_SIZE) & ~L1_OFFSET; + if (va_next < sva) + va_next = eva; + if (pmap_load(l1) == 0) + continue; + + if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) { + KASSERT(va_next <= eva, + ("partial update of non-transparent 1G page " + "l1 %#lx sva %#lx eva %#lx va_next %#lx", + pmap_load(l1), sva, eva, va_next)); + MPASS(pmap != kernel_pmap); + MPASS((pmap_load(l1) & ATTR_SW_MANAGED) == 0); + pmap_clear_bits(l1, ATTR_SW_WIRED); + pmap->pm_stats.wired_count -= L1_SIZE / PAGE_SIZE; continue; } @@ -4215,7 +4361,7 @@ pd_entry_t *l0, *l1, *l2, srcptepaddr; pt_entry_t *dst_pte, mask, nbits, ptetemp, *src_pte; vm_offset_t addr, end_addr, va_next; - vm_page_t dst_l2pg, dstmpte, srcmpte; + vm_page_t dst_m, dstmpte, srcmpte; PMAP_ASSERT_STAGE1(dst_pmap); PMAP_ASSERT_STAGE1(src_pmap); @@ -4239,13 +4385,40 @@ va_next = end_addr; continue; } + + va_next = (addr + L1_SIZE) & ~L1_OFFSET; + if (va_next < addr) + va_next = end_addr; l1 = pmap_l0_to_l1(l0, addr); - if (pmap_load(l1) == 0) { - va_next = (addr + L1_SIZE) & ~L1_OFFSET; - if (va_next < addr) - va_next = end_addr; + if (pmap_load(l1) == 0) + continue; + if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) { + KASSERT(va_next <= end_addr, + ("partial update of non-transparent 1G page " + "l1 %#lx addr %#lx end_addr %#lx va_next %#lx", + pmap_load(l1), addr, end_addr, va_next)); + srcptepaddr = pmap_load(l1); + l1 = pmap_l1(dst_pmap, addr); + if (l1 == NULL) { + if (_pmap_alloc_l3(dst_pmap, + pmap_l0_pindex(addr), NULL) == NULL) + break; + l1 = pmap_l1(dst_pmap, addr); + } else { + l0 = pmap_l0(dst_pmap, addr); + dst_m = PHYS_TO_VM_PAGE(pmap_load(l0) & + ~ATTR_MASK); + dst_m->ref_count++; + } + KASSERT(pmap_load(l1) == 0, + ("1G mapping present in dst pmap " + "l1 %#lx addr %#lx end_addr %#lx va_next %#lx", + pmap_load(l1), addr, end_addr, va_next)); + pmap_store(l1, srcptepaddr & ~ATTR_SW_WIRED); + pmap_resident_count_inc(dst_pmap, L1_SIZE / PAGE_SIZE); continue; } + va_next = (addr + L2_SIZE) & ~L2_OFFSET; if (va_next < addr) va_next = end_addr; @@ -4257,7 +4430,7 @@ if ((addr & L2_OFFSET) != 0 || addr + L2_SIZE > end_addr) continue; - l2 = pmap_alloc_l2(dst_pmap, addr, &dst_l2pg, NULL); + l2 = pmap_alloc_l2(dst_pmap, addr, &dst_m, NULL); if (l2 == NULL) break; if (pmap_load(l2) == 0 && @@ -4273,7 +4446,7 @@ PAGE_SIZE); atomic_add_long(&pmap_l2_mappings, 1); } else - pmap_abort_ptp(dst_pmap, addr, dst_l2pg); + pmap_abort_ptp(dst_pmap, addr, dst_m); continue; } KASSERT((srcptepaddr & ATTR_DESCR_MASK) == L2_TABLE, @@ -5180,13 +5353,21 @@ va_next = eva; continue; } + + va_next = (sva + L1_SIZE) & ~L1_OFFSET; + if (va_next < sva) + va_next = eva; l1 = pmap_l0_to_l1(l0, sva); - if (pmap_load(l1) == 0) { - va_next = (sva + L1_SIZE) & ~L1_OFFSET; - if (va_next < sva) - va_next = eva; + if (pmap_load(l1) == 0) + continue; + if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) { + KASSERT(va_next <= eva, + ("partial update of non-transparent 1G page " + "l1 %#lx sva %#lx eva %#lx va_next %#lx", + pmap_load(l1), sva, eva, va_next)); continue; } + va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; Index: sys/arm64/include/param.h =================================================================== --- sys/arm64/include/param.h +++ sys/arm64/include/param.h @@ -95,7 +95,7 @@ #define PAGE_SIZE_64K (1 << PAGE_SHIFT_64K) #define PAGE_MASK_64K (PAGE_SIZE_64K - 1) -#define MAXPAGESIZES 2 /* maximum number of supported page sizes */ +#define MAXPAGESIZES 3 /* maximum number of supported page sizes */ #ifndef KSTACK_PAGES #define KSTACK_PAGES 4 /* pages of kernel stack (with pcb) */