Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -1202,6 +1202,7 @@ static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); +static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte); static int pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, int mode, int flags); static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); @@ -1256,7 +1257,7 @@ static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp); -static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, +static pd_entry_t *pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp, struct rwlock **lockp); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp); @@ -3599,6 +3600,27 @@ return (pmap_unwire_ptp(pmap, va, mpte, free)); } +/* + * Release a page table page reference after a failed attempt to create a + * mapping. + */ +static void +pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte) +{ + struct spglist free; + + SLIST_INIT(&free); + if (pmap_unwire_ptp(pmap, va, mpte, &free)) { + /* + * Although "va" was never mapped, paging-structure caches + * could nonetheless have entries that refer to the freed + * page table pages. Invalidate those entries. + */ + pmap_invalidate_page(pmap, va); + vm_page_free_pages_toq(&free, true); + } +} + void pmap_pinit0(pmap_t pmap) { @@ -3904,30 +3926,44 @@ return (m); } -static vm_page_t -pmap_allocpde(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) +static pd_entry_t * +pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp, + struct rwlock **lockp) { - vm_pindex_t pdpindex, ptepindex; pdp_entry_t *pdpe, PG_V; + pd_entry_t *pde; vm_page_t pdpg; + vm_pindex_t pdpindex; PG_V = pmap_valid_bit(pmap); retry: pdpe = pmap_pdpe(pmap, va); if (pdpe != NULL && (*pdpe & PG_V) != 0) { - /* Add a reference to the pd page. */ - pdpg = PHYS_TO_VM_PAGE(*pdpe & PG_FRAME); - pdpg->ref_count++; - } else { + pde = pmap_pdpe_to_pde(pdpe, va); + if (va < VM_MAXUSER_ADDRESS) { + /* Add a reference to the pd page. */ + pdpg = PHYS_TO_VM_PAGE(*pdpe & PG_FRAME); + pdpg->ref_count++; + } else + pdpg = NULL; + } else if (va < VM_MAXUSER_ADDRESS) { /* Allocate a pd page. */ - ptepindex = pmap_pde_pindex(va); - pdpindex = ptepindex >> NPDPEPGSHIFT; + pdpindex = pmap_pde_pindex(va) >> NPDPEPGSHIFT; pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, lockp); - if (pdpg == NULL && lockp != NULL) - goto retry; - } - return (pdpg); + if (pdpg == NULL) { + if (lockp != NULL) + goto retry; + else + return (NULL); + } + pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg)); + pde = &pde[pmap_pde_index(va)]; + } else + panic("pmap_alloc_pde: missing page table page for va %#lx", + va); + *pdpgp = pdpg; + return (pde); } static vm_page_t @@ -6215,6 +6251,24 @@ KERN_SUCCESS); } +/* + * Returns true if every page table entry in the specified page table page is + * zero. + */ +static bool +pmap_every_pte_zero(vm_paddr_t pa) +{ + pt_entry_t *pt_end, *pte; + + KASSERT((pa & PAGE_MASK) == 0, ("pa is misaligned")); + pte = (pt_entry_t *)PHYS_TO_DMAP(pa); + for (pt_end = pte + NPTEPG; pte < pt_end; pte++) { + if (*pte != 0) + return (false); + } + return (true); +} + /* * Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE @@ -6250,8 +6304,8 @@ " in pmap %p", va, pmap); return (KERN_FAILURE); } - if ((pdpg = pmap_allocpde(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ? - NULL : lockp)) == NULL) { + if ((pde = pmap_alloc_pde(pmap, va, &pdpg, (flags & + PMAP_ENTER_NOSLEEP) != 0 ? NULL : lockp)) == NULL) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (KERN_RESOURCE_SHORTAGE); @@ -6263,11 +6317,7 @@ * it could sleep. */ if (!pmap_pkru_same(pmap, va, va + NBPDR)) { - SLIST_INIT(&free); - if (pmap_unwire_ptp(pmap, va, pdpg, &free)) { - pmap_invalidate_page(pmap, va); - vm_page_free_pages_toq(&free, true); - } + pmap_abort_ptp(pmap, va, pdpg); return (KERN_FAILURE); } if (va < VM_MAXUSER_ADDRESS && pmap->pm_type == PT_X86) { @@ -6275,14 +6325,18 @@ newpde |= pmap_pkru_get(pmap, va); } - pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg)); - pde = &pde[pmap_pde_index(va)]; + /* + * If there are existing mappings, either abort or remove them. + */ oldpde = *pde; if ((oldpde & PG_V) != 0) { - KASSERT(pdpg->ref_count > 1, + KASSERT(pdpg == NULL || pdpg->ref_count > 1, ("pmap_enter_pde: pdpg's reference count is too low")); - if ((flags & PMAP_ENTER_NOREPLACE) != 0) { - pdpg->ref_count--; + if ((flags & PMAP_ENTER_NOREPLACE) != 0 && (va < + VM_MAXUSER_ADDRESS || (oldpde & PG_PS) != 0 || + pmap_every_pte_zero(oldpde & PG_FRAME))) { + if (pdpg != NULL) + pdpg->ref_count--; CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (KERN_FAILURE); @@ -6292,7 +6346,7 @@ if ((oldpde & PG_PS) != 0) { /* * The reference to the PD page that was acquired by - * pmap_allocpde() ensures that it won't be freed. + * pmap_alloc_pde() ensures that it won't be freed. * However, if the PDE resulted from a promotion, then * a reserved PT page could be freed. */ @@ -6306,8 +6360,14 @@ pmap_invalidate_all(pmap); pmap_delayed_invl_finish(); } - vm_page_free_pages_toq(&free, true); - if (va >= VM_MAXUSER_ADDRESS) { + if (va < VM_MAXUSER_ADDRESS) { + vm_page_free_pages_toq(&free, true); + KASSERT(*pde == 0, ("pmap_enter_pde: non-zero pde %p", + pde)); + } else { + KASSERT(SLIST_EMPTY(&free), + ("pmap_enter_pde: freed kernel page table page")); + /* * Both pmap_remove_pde() and pmap_remove_ptes() will * leave the kernel page table page zero filled. @@ -6315,26 +6375,16 @@ mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); if (pmap_insert_pt_page(pmap, mt, false)) panic("pmap_enter_pde: trie insert failed"); - } else - KASSERT(*pde == 0, ("pmap_enter_pde: non-zero pde %p", - pde)); + } } + if ((newpde & PG_MANAGED) != 0) { /* * Abort this mapping if its PV entry could not be created. */ if (!pmap_pv_insert_pde(pmap, va, newpde, flags, lockp)) { - SLIST_INIT(&free); - if (pmap_unwire_ptp(pmap, va, pdpg, &free)) { - /* - * Although "va" is not mapped, paging- - * structure caches could nonetheless have - * entries that refer to the freed page table - * pages. Invalidate those entries. - */ - pmap_invalidate_page(pmap, va); - vm_page_free_pages_toq(&free, true); - } + if (pdpg != NULL) + pmap_abort_ptp(pmap, va, pdpg); CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (KERN_RESOURCE_SHORTAGE); @@ -6359,8 +6409,8 @@ pde_store(pde, newpde); atomic_add_long(&pmap_pde_mappings, 1); - CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx" - " in pmap %p", va, pmap); + CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx in pmap %p", + va, pmap); return (KERN_SUCCESS); } @@ -6435,7 +6485,6 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) { - struct spglist free; pt_entry_t newpte, *pte, PG_V; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || @@ -6492,11 +6541,9 @@ pte = vtopte(va); } if (*pte) { - if (mpte != NULL) { + if (mpte != NULL) mpte->ref_count--; - mpte = NULL; - } - return (mpte); + return (NULL); } /* @@ -6504,21 +6551,9 @@ */ if ((m->oflags & VPO_UNMANAGED) == 0 && !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { - if (mpte != NULL) { - SLIST_INIT(&free); - if (pmap_unwire_ptp(pmap, va, mpte, &free)) { - /* - * Although "va" is not mapped, paging- - * structure caches could nonetheless have - * entries that refer to the freed page table - * pages. Invalidate those entries. - */ - pmap_invalidate_page(pmap, va); - vm_page_free_pages_toq(&free, true); - } - mpte = NULL; - } - return (mpte); + if (mpte != NULL) + pmap_abort_ptp(pmap, va, mpte); + return (NULL); } /* @@ -6618,8 +6653,8 @@ PMAP_LOCK(pmap); for (pa = ptepa | pmap_cache_bits(pmap, pat_mode, 1); pa < ptepa + size; pa += NBPDR) { - pdpg = pmap_allocpde(pmap, addr, NULL); - if (pdpg == NULL) { + pde = pmap_alloc_pde(pmap, addr, &pdpg, NULL); + if (pde == NULL) { /* * The creation of mappings below is only an * optimization. If a page directory page @@ -6630,8 +6665,6 @@ addr += NBPDR; continue; } - pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg)); - pde = &pde[pmap_pde_index(addr)]; if ((*pde & PG_V) == 0) { pde_store(pde, pa | PG_PS | PG_M | PG_A | PG_U | PG_RW | PG_V); @@ -6745,7 +6778,6 @@ vm_offset_t src_addr) { struct rwlock *lock; - struct spglist free; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t *pde, srcptepaddr; @@ -6816,12 +6848,9 @@ if (srcptepaddr & PG_PS) { if ((addr & PDRMASK) != 0 || addr + NBPDR > end_addr) continue; - dst_pdpg = pmap_allocpde(dst_pmap, addr, NULL); - if (dst_pdpg == NULL) + pde = pmap_alloc_pde(dst_pmap, addr, &dst_pdpg, NULL); + if (pde == NULL) break; - pde = (pd_entry_t *) - PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dst_pdpg)); - pde = &pde[pmap_pde_index(addr)]; if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 || pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr, PMAP_ENTER_NORECLAIM, &lock))) { @@ -6830,7 +6859,7 @@ PAGE_SIZE); atomic_add_long(&pmap_pde_mappings, 1); } else - dst_pdpg->ref_count--; + pmap_abort_ptp(dst_pmap, addr, dst_pdpg); continue; } @@ -6875,19 +6904,7 @@ *dst_pte = ptetemp & ~(PG_W | PG_M | PG_A); pmap_resident_count_inc(dst_pmap, 1); } else { - SLIST_INIT(&free); - if (pmap_unwire_ptp(dst_pmap, addr, dstmpte, - &free)) { - /* - * Although "addr" is not mapped, - * paging-structure caches could - * nonetheless have entries that refer - * to the freed page table pages. - * Invalidate those entries. - */ - pmap_invalidate_page(dst_pmap, addr); - vm_page_free_pages_toq(&free, true); - } + pmap_abort_ptp(dst_pmap, addr, dstmpte); goto out; } /* Have we copied all of the valid mappings? */ Index: sys/arm64/arm64/pmap.c =================================================================== --- sys/arm64/arm64/pmap.c +++ sys/arm64/arm64/pmap.c @@ -331,6 +331,7 @@ static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); +static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte); static bool pmap_activate_int(pmap_t pmap); static void pmap_alloc_asid(pmap_t pmap); static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); @@ -1500,6 +1501,29 @@ return (pmap_unwire_l3(pmap, va, mpte, free)); } +/* + * Release a page table page reference after a failed attempt to create a + * mapping. + */ +static void +pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte) +{ + struct spglist free; + + SLIST_INIT(&free); + if (pmap_unwire_l3(pmap, va, mpte, &free)) { + /* + * Although "va" was never mapped, the TLB could nonetheless + * have intermediate entries that refer to the freed page + * table pages. Invalidate those entries. + * + * XXX redundant invalidation (See _pmap_unwire_l3().) + */ + pmap_invalidate_page(pmap, va); + vm_page_free_pages_toq(&free, true); + } +} + void pmap_pinit0(pmap_t pmap) { @@ -1677,27 +1701,41 @@ return (m); } -static vm_page_t -pmap_alloc_l2(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) +static pd_entry_t * +pmap_alloc_l2(pmap_t pmap, vm_offset_t va, vm_page_t *l2pgp, + struct rwlock **lockp) { - pd_entry_t *l1; + pd_entry_t *l1, *l2; vm_page_t l2pg; vm_pindex_t l2pindex; retry: l1 = pmap_l1(pmap, va); if (l1 != NULL && (pmap_load(l1) & ATTR_DESCR_MASK) == L1_TABLE) { - /* Add a reference to the L2 page. */ - l2pg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK); - l2pg->ref_count++; - } else { + l2 = pmap_l1_to_l2(l1, va); + if (va < VM_MAXUSER_ADDRESS) { + /* Add a reference to the L2 page. */ + l2pg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK); + l2pg->ref_count++; + } else + l2pg = NULL; + } else if (va < VM_MAXUSER_ADDRESS) { /* Allocate a L2 page. */ l2pindex = pmap_l2_pindex(va) >> Ln_ENTRIES_SHIFT; l2pg = _pmap_alloc_l3(pmap, NUL2E + l2pindex, lockp); - if (l2pg == NULL && lockp != NULL) - goto retry; - } - return (l2pg); + if (l2pg == NULL) { + if (lockp != NULL) + goto retry; + else + return (NULL); + } + l2 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(l2pg)); + l2 = &l2[pmap_l2_index(va)]; + } else + panic("pmap_alloc_l2: missing page table page for va %#lx", + va); + *l2pgp = l2pg; + return (l2); } static vm_page_t @@ -3550,6 +3588,24 @@ KERN_SUCCESS); } +/* + * Returns true if every page table entry in the specified page table is + * zero. + */ +static bool +pmap_every_pte_zero(vm_paddr_t pa) +{ + pt_entry_t *pt_end, *pte; + + KASSERT((pa & PAGE_MASK) == 0, ("pa is misaligned")); + pte = (pt_entry_t *)PHYS_TO_DMAP(pa); + for (pt_end = pte + Ln_ENTRIES; pte < pt_end; pte++) { + if (*pte != 0) + return (false); + } + return (true); +} + /* * Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE @@ -3571,23 +3627,26 @@ PMAP_LOCK_ASSERT(pmap, MA_OWNED); - if ((l2pg = pmap_alloc_l2(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ? - NULL : lockp)) == NULL) { + if ((l2 = pmap_alloc_l2(pmap, va, &l2pg, (flags & + PMAP_ENTER_NOSLEEP) != 0 ? NULL : lockp)) == NULL) { CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p", va, pmap); return (KERN_RESOURCE_SHORTAGE); } - l2 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(l2pg)); - l2 = &l2[pmap_l2_index(va)]; + /* + * If there are existing mappings, either abort or remove them. + */ if ((old_l2 = pmap_load(l2)) != 0) { - KASSERT(l2pg->ref_count > 1, + KASSERT(l2pg == NULL || l2pg->ref_count > 1, ("pmap_enter_l2: l2pg's ref count is too low")); - if ((flags & PMAP_ENTER_NOREPLACE) != 0) { - l2pg->ref_count--; - CTR2(KTR_PMAP, - "pmap_enter_l2: failure for va %#lx in pmap %p", - va, pmap); + if ((flags & PMAP_ENTER_NOREPLACE) != 0 && (va < + VM_MAXUSER_ADDRESS || (old_l2 & ATTR_DESCR_MASK) == + L2_BLOCK || pmap_every_pte_zero(old_l2 & ~ATTR_MASK))) { + if (l2pg != NULL) + l2pg->ref_count--; + CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx" + " in pmap %p", va, pmap); return (KERN_FAILURE); } SLIST_INIT(&free); @@ -3597,8 +3656,14 @@ else pmap_remove_l3_range(pmap, old_l2, va, va + L2_SIZE, &free, lockp); - vm_page_free_pages_toq(&free, true); - if (va >= VM_MAXUSER_ADDRESS) { + if (va < VM_MAXUSER_ADDRESS) { + vm_page_free_pages_toq(&free, true); + KASSERT(pmap_load(l2) == 0, + ("pmap_enter_l2: non-zero L2 entry %p", l2)); + } else { + KASSERT(SLIST_EMPTY(&free), + ("pmap_enter_l2: freed kernel page table page")); + /* * Both pmap_remove_l2() and pmap_remove_l3_range() * will leave the kernel page table page zero filled. @@ -3610,9 +3675,7 @@ panic("pmap_enter_l2: trie insert failed"); pmap_clear(l2); pmap_invalidate_page(pmap, va); - } else - KASSERT(pmap_load(l2) == 0, - ("pmap_enter_l2: non-zero L2 entry %p", l2)); + } } if ((new_l2 & ATTR_SW_MANAGED) != 0) { @@ -3620,20 +3683,8 @@ * Abort this mapping if its PV entry could not be created. */ if (!pmap_pv_insert_l2(pmap, va, new_l2, flags, lockp)) { - SLIST_INIT(&free); - if (pmap_unwire_l3(pmap, va, l2pg, &free)) { - /* - * Although "va" is not mapped, the TLB could - * nonetheless have intermediate entries that - * refer to the freed page table pages. - * Invalidate those entries. - * - * XXX redundant invalidation (See - * _pmap_unwire_l3().) - */ - pmap_invalidate_page(pmap, va); - vm_page_free_pages_toq(&free, true); - } + if (l2pg != NULL) + pmap_abort_ptp(pmap, va, l2pg); CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p", va, pmap); @@ -3734,7 +3785,6 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) { - struct spglist free; pd_entry_t *pde; pt_entry_t *l2, *l3, l3_val; vm_paddr_t pa; @@ -3808,11 +3858,9 @@ * Abort if a mapping already exists. */ if (pmap_load(l3) != 0) { - if (mpte != NULL) { + if (mpte != NULL) mpte->ref_count--; - mpte = NULL; - } - return (mpte); + return (NULL); } /* @@ -3820,15 +3868,9 @@ */ if ((m->oflags & VPO_UNMANAGED) == 0 && !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { - if (mpte != NULL) { - SLIST_INIT(&free); - if (pmap_unwire_l3(pmap, va, mpte, &free)) { - pmap_invalidate_page(pmap, va); - vm_page_free_pages_toq(&free, true); - } - mpte = NULL; - } - return (mpte); + if (mpte != NULL) + pmap_abort_ptp(pmap, va, mpte); + return (NULL); } /* @@ -3982,7 +4024,6 @@ vm_offset_t src_addr) { struct rwlock *lock; - struct spglist free; pd_entry_t *l0, *l1, *l2, srcptepaddr; pt_entry_t *dst_pte, mask, nbits, ptetemp, *src_pte; vm_offset_t addr, end_addr, va_next; @@ -4025,12 +4066,9 @@ if ((addr & L2_OFFSET) != 0 || addr + L2_SIZE > end_addr) continue; - dst_l2pg = pmap_alloc_l2(dst_pmap, addr, NULL); - if (dst_l2pg == NULL) + l2 = pmap_alloc_l2(dst_pmap, addr, &dst_l2pg, NULL); + if (l2 == NULL) break; - l2 = (pd_entry_t *) - PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dst_l2pg)); - l2 = &l2[pmap_l2_index(addr)]; if (pmap_load(l2) == 0 && ((srcptepaddr & ATTR_SW_MANAGED) == 0 || pmap_pv_insert_l2(dst_pmap, addr, srcptepaddr, @@ -4044,7 +4082,7 @@ PAGE_SIZE); atomic_add_long(&pmap_l2_mappings, 1); } else - dst_l2pg->ref_count--; + pmap_abort_ptp(dst_pmap, addr, dst_l2pg); continue; } KASSERT((srcptepaddr & ATTR_DESCR_MASK) == L2_TABLE, @@ -4091,21 +4129,7 @@ pmap_store(dst_pte, (ptetemp & ~mask) | nbits); pmap_resident_count_inc(dst_pmap, 1); } else { - SLIST_INIT(&free); - if (pmap_unwire_l3(dst_pmap, addr, dstmpte, - &free)) { - /* - * Although "addr" is not mapped, - * the TLB could nonetheless have - * intermediate entries that refer - * to the freed page table pages. - * Invalidate those entries. - * - * XXX redundant invalidation - */ - pmap_invalidate_page(dst_pmap, addr); - vm_page_free_pages_toq(&free, true); - } + pmap_abort_ptp(dst_pmap, addr, dstmpte); goto out; } /* Have we copied all of the valid mappings? */