Index: sys/i386/i386/pmap.c =================================================================== --- sys/i386/i386/pmap.c +++ sys/i386/i386/pmap.c @@ -317,7 +317,8 @@ u_int flags, vm_page_t m); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); -static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted); +static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted, + bool allpte_PG_A_set); static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); @@ -327,7 +328,8 @@ static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde); static void pmap_pde_attr(pd_entry_t *pde, int cache_bits); #if VM_NRESERVLEVEL > 0 -static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); +static bool pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, + vm_page_t mpte); #endif static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); @@ -993,7 +995,7 @@ */ if (pseflag != 0 && KERNBASE <= i << PDRSHIFT && i << PDRSHIFT < KERNend && - pmap_insert_pt_page(kernel_pmap, mpte, true)) + pmap_insert_pt_page(kernel_pmap, mpte, true, true)) panic("pmap_init: pmap_insert_pt_page failed"); } PMAP_UNLOCK(kernel_pmap); @@ -1928,14 +1930,26 @@ * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. * - * If "promoted" is false, then the page table page "mpte" must be zero filled. + * If "promoted" is false, then the page table page "mpte" must be zero filled; + * "mpte"'s valid field will be set to 0. + * + * If "promoted" is true and "allpte_PG_A_set" is false, then "mpte" must + * contain valid mappings with identical attributes except for PG_A; "mpte"'s + * valid field will be set to 1. + * + * If "promoted" and "allpte_PG_A_set" are both true, then "mpte" must contain + * valid mappings with identical attributes including PG_A; "mpte"'s valid + * field will be set to VM_PAGE_BITS_ALL. */ static __inline int -pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted) +pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted, + bool allpte_PG_A_set) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0; + KASSERT(promoted || !allpte_PG_A_set, + ("a zero-filled PTP can't have PG_A set in every PTE")); + mpte->valid = promoted ? (allpte_PG_A_set ? VM_PAGE_BITS_ALL : 1) : 0; return (vm_radix_insert(&pmap->pm_root, mpte)); } @@ -2843,10 +2857,11 @@ newpte ^= PG_PDE_PAT | PG_PTE_PAT; /* - * If the page table page is not leftover from an earlier promotion, - * initialize it. + * If the PTP is not leftover from an earlier promotion or it does not + * have PG_A set in every PTE, then fill it. The new PTEs will all + * have PG_A set. */ - if (vm_page_none_valid(mpte)) + if (!vm_page_all_valid(mpte)) pmap_fill_ptp(firstpte, newpte); KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), @@ -2854,8 +2869,7 @@ " addresses")); /* - * If the mapping has changed attributes, update the page table - * entries. + * If the mapping has changed attributes, update the PTEs. */ if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) pmap_fill_ptp(firstpte, newpte); @@ -2985,7 +2999,7 @@ } else { mpte = pmap_remove_pt_page(pmap, sva); if (mpte != NULL) { - KASSERT(vm_page_all_valid(mpte), + KASSERT(vm_page_any_valid(mpte), ("pmap_remove_pde: pte page not promoted")); pmap->pm_stats.resident_count--; KASSERT(mpte->ref_count == NPTEPG, @@ -3469,38 +3483,56 @@ * pmap_clear_ptes() and pmap_ts_referenced() only read the PDE from the kernel * pmap. */ -static void -pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) +static bool +pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte) { pd_entry_t newpde; - pt_entry_t *firstpte, oldpte, pa, *pte; + pt_entry_t allpte_PG_A, *firstpte, oldpte, pa, *pte; #ifdef KTR vm_offset_t oldpteva; #endif - vm_page_t mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); + if (!pg_ps_enabled) + return (false); /* * Examine the first PTE in the specified PTP. Abort if this PTE is - * either invalid, unused, or does not map the first 4KB physical page + * either invalid or does not map the first 4KB physical page * within a 2- or 4MB page. */ firstpte = pmap_pte_quick(pmap, trunc_4mpage(va)); setpde: newpde = *firstpte; - if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) { + if ((newpde & ((PG_FRAME & PDRMASK) | PG_V)) != PG_V) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); - return; + return (false); } if ((*firstpte & PG_MANAGED) != 0 && pmap == kernel_pmap) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); - return; + return (false); } + + /* + * Both here and in the below "for" loop, to allow for repromotion + * after MADV_FREE, conditionally write protect a clean PTE before + * possibly aborting the promotion due to other PTE attributes. Why? + * Suppose that MADV_FREE is applied to a part of a superpage, the + * address range [S, E). pmap_advise() will demote the superpage + * mapping, destroy the 4KB page mapping at the end of [S, E), and + * clear PG_M and PG_A in the PTEs for the rest of [S, E). Later, + * imagine that the memory in [S, E) is recycled, but the last 4KB + * page in [S, E) is not the last to be rewritten, or simply accessed. + * In other words, there is still a 4KB page in [S, E), call it P, + * that is writeable but PG_M and PG_A are clear in P's PTE. Unless + * we write protect P before aborting the promotion, if and when P is + * finally rewritten, there won't be a page fault to trigger + * repromotion. + */ if ((newpde & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared without @@ -3510,6 +3542,8 @@ ~PG_RW)) goto setpde; newpde &= ~PG_RW; + CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx" + " in pmap %p", va & ~PDRMASK, pmap); } /* @@ -3517,15 +3551,16 @@ * PTE maps an unexpected 4KB physical page or does not have identical * characteristics to the first PTE. */ - pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE; + allpte_PG_A = newpde & PG_A; + pa = (newpde & (PG_PS_FRAME | PG_V)) + NBPDR - PAGE_SIZE; for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { setpte: oldpte = *pte; - if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) { + if ((oldpte & (PG_FRAME | PG_V)) != pa) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); - return; + return (false); } if ((oldpte & (PG_M | PG_RW)) == PG_RW) { /* @@ -3547,28 +3582,39 @@ pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); - return; + return (false); } + allpte_PG_A &= oldpte; pa -= PAGE_SIZE; } /* - * Save the page table page in its current state until the PDE - * mapping the superpage is demoted by pmap_demote_pde() or - * destroyed by pmap_remove_pde(). + * Unless all PTEs have PG_A set, clear it from the superpage mapping, + * so that promotions triggered by speculative mappings, such as + * pmap_enter_quick(), don't automatically mark the underlying pages + * as referenced. + */ + newpde &= ~PG_A | allpte_PG_A; + + /* + * Save the PTP in its current state until the PDE mapping the + * superpage is demoted by pmap_demote_pde() or destroyed by + * pmap_remove_pde(). If PG_A is not set in every PTE, then request + * that the PTP be refilled on demotion. */ - mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); + if (mpte == NULL) + mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_promote_pde: page table page is out of range")); KASSERT(mpte->pindex == va >> PDRSHIFT, ("pmap_promote_pde: page table page's pindex is wrong")); - if (pmap_insert_pt_page(pmap, mpte, true)) { + if (pmap_insert_pt_page(pmap, mpte, true, allpte_PG_A != 0)) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x in pmap %p", va, pmap); - return; + return (false); } /* @@ -3596,6 +3642,7 @@ pmap_pde_promotions++; CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x" " in pmap %p", va, pmap); + return (true); } #endif /* VM_NRESERVLEVEL > 0 */ @@ -3859,9 +3906,9 @@ * populated, then attempt promotion. */ if ((mpte == NULL || mpte->ref_count == NPTEPG) && - pg_ps_enabled && (m->flags & PG_FICTITIOUS) == 0 && + (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) - pmap_promote_pde(pmap, pde, va); + (void)pmap_promote_pde(pmap, pde, va, mpte); #endif rv = KERN_SUCCESS; @@ -3978,7 +4025,7 @@ * leave the kernel page table page zero filled. */ mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); - if (pmap_insert_pt_page(pmap, mt, false)) + if (pmap_insert_pt_page(pmap, mt, false, false)) panic("pmap_enter_pde: trie insert failed"); } } @@ -4084,12 +4131,14 @@ vm_prot_t prot, vm_page_t mpte) { pt_entry_t newpte, *pte; + pd_entry_t *pde; KASSERT(pmap != kernel_pmap || !VA_IS_CLEANMAP(va) || (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); + pde = NULL; /* * In the case that a page table page is not @@ -4109,7 +4158,8 @@ /* * Get the page directory entry */ - ptepa = pmap->pm_pdir[ptepindex]; + pde = &pmap->pm_pdir[ptepindex]; + ptepa = *pde; /* * If the page table page is mapped, we just increment @@ -4167,6 +4217,27 @@ if (pmap != kernel_pmap) newpte |= PG_U; pte_store_zero(pte, newpte); + +#if VM_NRESERVLEVEL > 0 + /* + * If both the PTP and the reservation are fully populated, then + * attempt promotion. + */ + if ((mpte == NULL || mpte->ref_count == NPTEPG) && + (m->flags & PG_FICTITIOUS) == 0 && + vm_reserv_level_iffullpop(m) == 0) { + if (pde == NULL) + pde = pmap_pde(pmap, va); + + /* + * If promotion succeeds, then the next call to this function + * should not be given the unmapped PTP as a hint. + */ + if (pmap_promote_pde(pmap, pde, va, mpte)) + mpte = NULL; + } +#endif + sched_unpin(); return (mpte); } @@ -4836,7 +4907,7 @@ } mpte = pmap_remove_pt_page(pmap, pv->pv_va); if (mpte != NULL) { - KASSERT(vm_page_all_valid(mpte), + KASSERT(vm_page_any_valid(mpte), ("pmap_remove_pages: pte page not promoted")); pmap->pm_stats.resident_count--; KASSERT(mpte->ref_count == NPTEPG, Index: sys/i386/include/pmap.h =================================================================== --- sys/i386/include/pmap.h +++ sys/i386/include/pmap.h @@ -83,7 +83,7 @@ * 4KB (PTE) page mappings have identical settings for the following fields: */ #define PG_PTE_PROMOTE (PG_MANAGED | PG_W | PG_G | PG_PTE_PAT | \ - PG_M | PG_A | PG_NC_PCD | PG_NC_PWT | PG_U | PG_RW | PG_V) + PG_M | PG_NC_PCD | PG_NC_PWT | PG_U | PG_RW | PG_V) /* * Page Protection Exception bits