diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -313,6 +313,33 @@ return (pmap->pm_type == PT_X86 ? X86_PG_PKU_MASK : 0); } +static __inline boolean_t +safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) +{ + + if (!pmap_emulate_ad_bits(pmap)) + return (TRUE); + + KASSERT(pmap->pm_type == PT_EPT, ("invalid pm_type %d", pmap->pm_type)); + + /* + * XWR = 010 or 110 will cause an unconditional EPT misconfiguration + * so we don't let the referenced (aka EPT_PG_READ) bit to be cleared + * if the EPT_PG_WRITE bit is set. + */ + if ((pte & EPT_PG_WRITE) != 0) + return (FALSE); + + /* + * XWR = 100 is allowed only if the PMAP_SUPPORTS_EXEC_ONLY is set. + */ + if ((pte & EPT_PG_EXECUTE) == 0 || + ((pmap->pm_flags & PMAP_SUPPORTS_EXEC_ONLY) != 0)) + return (TRUE); + else + return (FALSE); +} + #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline @@ -1279,7 +1306,8 @@ static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); -static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted); +static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted, + bool allpte_PG_A_set); static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva); static void pmap_invalidate_cache_range_all(vm_offset_t sva, @@ -2491,7 +2519,7 @@ */ if ((i == 0 || kernphys + ((vm_paddr_t)(i - 1) << PDRSHIFT) < KERNend) && - pmap_insert_pt_page(kernel_pmap, mpte, false)) + pmap_insert_pt_page(kernel_pmap, mpte, false, false)) panic("pmap_init: pmap_insert_pt_page failed"); } PMAP_UNLOCK(kernel_pmap); @@ -4061,14 +4089,26 @@ * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. * - * If "promoted" is false, then the page table page "mpte" must be zero filled. + * If "promoted" is false, then the page table page "mpte" must be zero filled; + * "mpte"'s valid field will be set to 0. + * + * If "promoted" is true and "allpte_PG_A_set" is false, then "mpte" must + * contain valid mappings with identical attributes except for PG_A; "mpte"'s + * valid field will be set to 1. + * + * If "promoted" and "allpte_PG_A_set" are both true, then "mpte" must contain + * valid mappings with identical attributes including PG_A; "mpte"'s valid + * field will be set to VM_PAGE_BITS_ALL. */ static __inline int -pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted) +pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted, + bool allpte_PG_A_set) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0; + KASSERT(promoted || !allpte_PG_A_set, + ("a zero-filled PTP can't have PG_A set in every PTE")); + mpte->valid = promoted ? (allpte_PG_A_set ? VM_PAGE_BITS_ALL : 1) : 0; return (vm_radix_insert(&pmap->pm_root, mpte)); } @@ -6053,17 +6093,17 @@ newpte = pmap_swap_pat(pmap, newpte); /* - * If the page table page is not leftover from an earlier promotion, - * initialize it. + * If the PTP is not leftover from an earlier promotion or it does not + * have PG_A set in every PTE, then fill it. The new PTEs will all + * have PG_A set. */ - if (vm_page_none_valid(mpte)) + if (!vm_page_all_valid(mpte)) pmap_fill_ptp(firstpte, newpte); pmap_demote_pde_check(firstpte, newpte); /* - * If the mapping has changed attributes, update the page table - * entries. + * If the mapping has changed attributes, update the PTEs. */ if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) pmap_fill_ptp(firstpte, newpte); @@ -6198,7 +6238,7 @@ } else { mpte = pmap_remove_pt_page(pmap, sva); if (mpte != NULL) { - KASSERT(vm_page_all_valid(mpte), + KASSERT(vm_page_any_valid(mpte), ("pmap_remove_pde: pte page not promoted")); pmap_pt_page_count_adj(pmap, -1); KASSERT(mpte->ref_count == NPTEPG, @@ -6822,7 +6862,7 @@ { pd_entry_t newpde; pt_entry_t *firstpte, oldpte, pa, *pte; - pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V, PG_PKU_MASK; + pt_entry_t allpte_PG_A, PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V; int PG_PTE_CACHE; PG_A = pmap_accessed_bit(pmap); @@ -6876,12 +6916,8 @@ if (!atomic_fcmpset_long(firstpte, &newpde, newpde & ~PG_RW)) goto setpde; newpde &= ~PG_RW; - } - if ((newpde & PG_A) == 0) { - counter_u64_add(pmap_pde_p_failures, 1); - CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" - " in pmap %p", va, pmap); - return; + CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx" + " in pmap %p", va & ~PDRMASK, pmap); } /* @@ -6889,6 +6925,7 @@ * PTE maps an unexpected 4KB physical page or does not have identical * characteristics to the first PTE. */ + allpte_PG_A = newpde & PG_A; pa = (newpde & (PG_PS_FRAME | PG_V)) + NBPDR - PAGE_SIZE; for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { oldpte = *pte; @@ -6917,13 +6954,30 @@ " in pmap %p", va, pmap); return; } + allpte_PG_A &= oldpte; pa -= PAGE_SIZE; } /* - * Save the page table page in its current state until the PDE - * mapping the superpage is demoted by pmap_demote_pde() or - * destroyed by pmap_remove_pde(). + * Unless all PTEs have PG_A set, clear it from the superpage mapping, + * so that promotions triggered by speculative mappings, such as + * pmap_enter_quick(), don't automatically mark the underlying pages + * as referenced. + */ + newpde &= ~PG_A | allpte_PG_A; + + /* + * EPT PTEs with PG_M set and PG_A clear are not supported by early + * MMUs supporting EPT. + */ + KASSERT((newpde & PG_A) != 0 || safe_to_clear_referenced(pmap, newpde), + ("unsupported EPT PTE")); + + /* + * Save the PTP in its current state until the PDE mapping the + * superpage is demoted by pmap_demote_pde() or destroyed by + * pmap_remove_pde(). If PG_A is not set in every PTE, then request + * that the PTP be refilled on demotion. */ if (mpte == NULL) mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); @@ -6934,7 +6988,7 @@ ("pmap_promote_pde: page table page's pindex is wrong " "mpte %p pidx %#lx va %#lx va pde pidx %#lx", mpte, mpte->pindex, va, pmap_pde_pindex(va))); - if (pmap_insert_pt_page(pmap, mpte, true)) { + if (pmap_insert_pt_page(pmap, mpte, true, allpte_PG_A != 0)) { counter_u64_add(pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx in pmap %p", va, @@ -7516,7 +7570,7 @@ * leave the kernel page table page zero filled. */ mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); - if (pmap_insert_pt_page(pmap, mt, false)) + if (pmap_insert_pt_page(pmap, mt, false, false)) panic("pmap_enter_pde: trie insert failed"); } } @@ -7629,6 +7683,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) { + pd_entry_t *pde; pt_entry_t newpte, *pte, PG_V; KASSERT(!VA_IS_CLEANMAP(va) || @@ -7636,6 +7691,7 @@ ("pmap_enter_quick_locked: managed mapping within the clean submap")); PG_V = pmap_valid_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); + pde = NULL; /* * In the case that a page table page is not @@ -7643,7 +7699,6 @@ */ if (va < VM_MAXUSER_ADDRESS) { pdp_entry_t *pdpe; - pd_entry_t *pde; vm_pindex_t ptepindex; /* @@ -7720,6 +7775,28 @@ if (va < VM_MAXUSER_ADDRESS) newpte |= PG_U | pmap_pkru_get(pmap, va); pte_store(pte, newpte); + +#if VM_NRESERVLEVEL > 0 + /* + * If both the PTP and the reservation are fully populated, then + * attempt promotion. + */ + if ((mpte == NULL || mpte->ref_count == NPTEPG) && + pmap_ps_enabled(pmap) && + (m->flags & PG_FICTITIOUS) == 0 && + vm_reserv_level_iffullpop(m) == 0) { + if (pde == NULL) + pde = pmap_pde(pmap, va); + pmap_promote_pde(pmap, pde, va, mpte, lockp); + + /* + * If promotion succeeds, then the next call to this function + * should not be given the unmapped PTP as a hint. + */ + mpte = NULL; + } +#endif + return (mpte); } @@ -8541,7 +8618,7 @@ } mpte = pmap_remove_pt_page(pmap, pv->pv_va); if (mpte != NULL) { - KASSERT(vm_page_all_valid(mpte), + KASSERT(vm_page_any_valid(mpte), ("pmap_remove_pages: pte page not promoted")); pmap_pt_page_count_adj(pmap, -1); KASSERT(mpte->ref_count == NPTEPG, @@ -8820,33 +8897,6 @@ pmap_delayed_invl_wait(m); } -static __inline boolean_t -safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) -{ - - if (!pmap_emulate_ad_bits(pmap)) - return (TRUE); - - KASSERT(pmap->pm_type == PT_EPT, ("invalid pm_type %d", pmap->pm_type)); - - /* - * XWR = 010 or 110 will cause an unconditional EPT misconfiguration - * so we don't let the referenced (aka EPT_PG_READ) bit to be cleared - * if the EPT_PG_WRITE bit is set. - */ - if ((pte & EPT_PG_WRITE) != 0) - return (FALSE); - - /* - * XWR = 100 is allowed only if the PMAP_SUPPORTS_EXEC_ONLY is set. - */ - if ((pte & EPT_PG_EXECUTE) == 0 || - ((pmap->pm_flags & PMAP_SUPPORTS_EXEC_ONLY) != 0)) - return (TRUE); - else - return (FALSE); -} - /* * pmap_ts_referenced: * diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -130,7 +130,7 @@ * (PTE) page mappings have identical settings for the following fields: */ #define PG_PTE_PROMOTE (PG_NX | PG_MANAGED | PG_W | PG_G | PG_PTE_CACHE | \ - PG_M | PG_A | PG_U | PG_RW | PG_V | PG_PKU_MASK) + PG_M | PG_U | PG_RW | PG_V | PG_PKU_MASK) /* * Page Protection Exception bits diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -3394,7 +3394,7 @@ * If this page table page was unmapped by a promotion, then it * contains valid mappings. Zero it to invalidate those mappings. */ - if (ml3->valid != 0) + if (vm_page_any_valid(ml3)) pagezero((void *)PHYS_TO_DMAP(ml3pa)); /* @@ -3452,7 +3452,7 @@ } else { ml3 = pmap_remove_pt_page(pmap, sva); if (ml3 != NULL) { - KASSERT(ml3->valid == VM_PAGE_BITS_ALL, + KASSERT(vm_page_any_valid(ml3), ("pmap_remove_l2: l3 page not promoted")); pmap_resident_count_dec(pmap, 1); KASSERT(ml3->ref_count == NL3PG, @@ -4015,14 +4015,26 @@ * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. * - * If "promoted" is false, then the page table page "mpte" must be zero filled. + * If "promoted" is false, then the page table page "mpte" must be zero filled; + * "mpte"'s valid field will be set to 0. + * + * If "promoted" is true and "all_l3e_AF_set" is false, then "mpte" must + * contain valid mappings with identical attributes except for ATTR_AF; + * "mpte"'s valid field will be set to 1. + * + * If "promoted" and "all_l3e_AF_set" are both true, then "mpte" must contain + * valid mappings with identical attributes including ATTR_AF; "mpte"'s valid + * field will be set to VM_PAGE_BITS_ALL. */ static __inline int -pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted) +pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted, + bool all_l3e_AF_set) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0; + KASSERT(promoted || !all_l3e_AF_set, + ("a zero-filled PTP can't have ATTR_AF set in every PTE")); + mpte->valid = promoted ? (all_l3e_AF_set ? VM_PAGE_BITS_ALL : 1) : 0; return (vm_radix_insert(&pmap->pm_root, mpte)); } @@ -4137,7 +4149,7 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, vm_page_t mpte, struct rwlock **lockp) { - pt_entry_t *firstl3, *l3, newl2, oldl3, pa; + pt_entry_t all_l3e_AF, *firstl3, *l3, newl2, oldl3, pa; PMAP_LOCK_ASSERT(pmap, MA_OWNED); PMAP_ASSERT_STAGE1(pmap); @@ -4185,19 +4197,17 @@ if (!atomic_fcmpset_64(firstl3, &newl2, newl2 & ~ATTR_SW_DBM)) goto setl2; newl2 &= ~ATTR_SW_DBM; - } - if ((newl2 & ATTR_AF) == 0) { - atomic_add_long(&pmap_l2_p_failures, 1); - CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" - " in pmap %p", va, pmap); - return; + CTR2(KTR_PMAP, "pmap_promote_l2: protect for va %#lx" + " in pmap %p", va & ~L2_OFFSET, pmap); } /* * Examine each of the other L3Es in the specified PTP. Abort if this * L3E maps an unexpected 4KB physical page or does not have identical - * characteristics to the first L3E. + * characteristics to the first L3E. If ATTR_AF is not set in every + * PTE, then request that the PTP be refilled on demotion. */ + all_l3e_AF = newl2 & ATTR_AF; pa = (PTE_TO_PHYS(newl2) | (newl2 & ATTR_DESCR_MASK)) + L2_SIZE - PAGE_SIZE; for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) { @@ -4221,15 +4231,25 @@ goto setl3; oldl3 &= ~ATTR_SW_DBM; } - if ((oldl3 & ATTR_MASK) != (newl2 & ATTR_MASK)) { + if ((oldl3 & (ATTR_MASK & ~ATTR_AF)) != (newl2 & (ATTR_MASK & + ~ATTR_AF))) { atomic_add_long(&pmap_l2_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" " in pmap %p", va, pmap); return; } + all_l3e_AF &= oldl3; pa -= PAGE_SIZE; } + /* + * Unless all PTEs have ATTR_AF set, clear it from the superpage + * mapping, so that promotions triggered by speculative mappings, + * such as pmap_enter_quick(), don't automatically mark the + * underlying pages as referenced. + */ + newl2 &= ~ATTR_AF | all_l3e_AF; + /* * Save the page table page in its current state until the L2 * mapping the superpage is demoted by pmap_demote_l2() or @@ -4242,7 +4262,7 @@ ("pmap_promote_l2: page table page is out of range")); KASSERT(mpte->pindex == pmap_l2_pindex(va), ("pmap_promote_l2: page table page's pindex is wrong")); - if (pmap_insert_pt_page(pmap, mpte, true)) { + if (pmap_insert_pt_page(pmap, mpte, true, all_l3e_AF != 0)) { atomic_add_long(&pmap_l2_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx in pmap %p", va, @@ -4820,7 +4840,7 @@ * the L2_TABLE entry. */ mt = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); - if (pmap_insert_pt_page(pmap, mt, false)) + if (pmap_insert_pt_page(pmap, mt, false, false)) panic("pmap_enter_l2: trie insert failed"); pmap_clear(l2); pmap_s1_invalidate_page(pmap, va, false); @@ -4958,6 +4978,7 @@ PMAP_ASSERT_STAGE1(pmap); KASSERT(ADDR_IS_CANONICAL(va), ("%s: Address not in canonical form: %lx", __func__, va)); + l2 = NULL; CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); /* @@ -5073,6 +5094,27 @@ pmap_store(l3, l3_val); dsb(ishst); +#if VM_NRESERVLEVEL > 0 + /* + * If both the PTP and the reservation are fully populated, then + * attempt promotion. + */ + if ((mpte == NULL || mpte->ref_count == NL3PG) && + pmap_ps_enabled(pmap) && pmap->pm_stage == PM_STAGE1 && + (m->flags & PG_FICTITIOUS) == 0 && + vm_reserv_level_iffullpop(m) == 0) { + if (l2 == NULL) + l2 = pmap_pde(pmap, va, &lvl); + pmap_promote_l2(pmap, l2, va, mpte, lockp); + + /* + * If promotion succeeds, then the next call to this function + * should not be given the unmapped PTP as a hint. + */ + mpte = NULL; + } +#endif + return (mpte); } @@ -5739,7 +5781,7 @@ ml3 = pmap_remove_pt_page(pmap, pv->pv_va); if (ml3 != NULL) { - KASSERT(ml3->valid == VM_PAGE_BITS_ALL, + KASSERT(vm_page_any_valid(ml3), ("pmap_remove_pages: l3 page not promoted")); pmap_resident_count_dec(pmap,1); KASSERT(ml3->ref_count == NL3PG, @@ -6875,6 +6917,36 @@ } } +static void +pmap_demote_l2_check(pt_entry_t *firstl3p __unused, pt_entry_t newl3e __unused) +{ +#ifdef INVARIANTS +#ifdef DIAGNOSTIC + pt_entry_t *xl3p, *yl3p; + + for (xl3p = firstl3p; xl3p < firstl3p + Ln_ENTRIES; + xl3p++, newl3e += PAGE_SIZE) { + if (PTE_TO_PHYS(pmap_load(xl3p)) != PTE_TO_PHYS(newl3e)) { + printf("pmap_demote_l2: xl3e %zd and newl3e map " + "different pages: found %#lx, expected %#lx\n", + xl3p - firstl3p, pmap_load(xl3p), newl3e); + printf("page table dump\n"); + for (yl3p = firstl3p; yl3p < firstl3p + Ln_ENTRIES; + yl3p++) { + printf("%zd %#lx\n", yl3p - firstl3p, + pmap_load(yl3p)); + } + panic("firstpte"); + } + } +#else + KASSERT(PTE_TO_PHYS(pmap_load(firstl3p)) == PTE_TO_PHYS(newl3e), + ("pmap_demote_l2: firstl3 and newl3e map different physical" + " addresses")); +#endif +#endif +} + static void pmap_demote_l2_abort(pmap_t pmap, vm_offset_t va, pt_entry_t *l2, struct rwlock **lockp) @@ -6986,14 +7058,24 @@ ("pmap_demote_l2: L2 entry is writeable but not dirty")); /* - * If the page table page is not leftover from an earlier promotion, - * or the mapping attributes have changed, (re)initialize the L3 table. + * If the PTP is not leftover from an earlier promotion or it does not + * have ATTR_AF set in every L3E, then fill it. The new L3Es will all + * have ATTR_AF set. * * When pmap_update_entry() clears the old L2 mapping, it (indirectly) * performs a dsb(). That dsb() ensures that the stores for filling * "l3" are visible before "l3" is added to the page table. */ - if (ml3->valid == 0 || (l3[0] & ATTR_MASK) != (newl3 & ATTR_MASK)) + if (!vm_page_all_valid(ml3)) + pmap_fill_l3(l3, newl3); + + pmap_demote_l2_check(l3, newl3); + + /* + * If the mapping has changed attributes, update the L3Es. + */ + if ((pmap_load(l3) & (ATTR_MASK & ~ATTR_AF)) != (newl3 & (ATTR_MASK & + ~ATTR_AF))) pmap_fill_l3(l3, newl3); /*