Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -383,8 +383,6 @@ static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ static int ndmpdpphys; /* number of DMPDPphys pages */ -static vm_paddr_t KERNend; /* phys addr of end of bootstrap data */ - /* * pmap_mapdev support pre initialization (i.e. console) */ @@ -1331,6 +1329,9 @@ return (pg_nx); } +static int kern_promoted_end; /* Last index of the kernel page table + page shadowed by pre-promoted pde. */ + static void create_pagetables(vm_paddr_t *firstaddr) { @@ -1396,10 +1397,12 @@ KPTphys = allocpages(firstaddr, nkpt); KPDphys = allocpages(firstaddr, nkpdpe); - /* Fill in the underlying page table pages */ - /* XXX not fully used, underneath 2M pages */ + /* + * Fill in the underlying page table pages. Only used when + * PTmap mappings are demoted. + */ pt_p = (pt_entry_t *)KPTphys; - for (i = 0; ptoa(i) < *firstaddr; i++) + for (i = 0; ptoa(i) < round_2mpage(*firstaddr); i++) pt_p[i] = ptoa(i) | X86_PG_V | pg_g | bootaddr_rwx(ptoa(i)); /* Now map the page tables at their location within PTmap */ @@ -1407,13 +1410,16 @@ for (i = 0; i < nkpt; i++) pd_p[i] = (KPTphys + ptoa(i)) | X86_PG_RW | X86_PG_V; - /* Map from zero to end of allocations under 2M pages */ - /* This replaces some of the KPTphys entries above */ - for (i = 0; (i << PDRSHIFT) < *firstaddr; i++) + /* + * Map from zero to end of allocations under 2M pages. + * This replaces some of the KPTphys entries above. + */ + kern_promoted_end = *firstaddr; + for (i = 0; (i << PDRSHIFT) < kern_promoted_end; i++) /* Preset PG_M and PG_A because demotion expects it. */ pd_p[i] = (i << PDRSHIFT) | X86_PG_V | PG_PS | pg_g | X86_PG_M | X86_PG_A | bootaddr_rwx(i << PDRSHIFT); - + kern_promoted_end = i; /* * Because we map the physical blocks in 2M pages, adjust firstaddr * to record the physical blocks we've actually mapped into kernel @@ -1506,8 +1512,7 @@ u_long res; int i; - KERNend = *firstaddr; - res = atop(KERNend - (vm_paddr_t)kernphys); + res = atop(*firstaddr - (vm_paddr_t)kernphys); if (!pti) pg_g = X86_PG_G; @@ -1750,7 +1755,7 @@ mpte->pindex = pmap_pde_pindex(KERNBASE) + i; mpte->phys_addr = KPTphys + (i << PAGE_SHIFT); mpte->wire_count = 1; - if (i << PDRSHIFT < KERNend && + if (i < kern_promoted_end && pmap_insert_pt_page(kernel_pmap, mpte)) panic("pmap_init: pmap_insert_pt_page failed"); } @@ -4480,6 +4485,51 @@ return (rv); } +static void +pmap_demote_pde_check(pt_entry_t *firstpte __unused, pt_entry_t newpte __unused) +{ +#ifdef INVARIANTS +#ifdef DIAGNOSTIC + pt_entry_t *xpte, *ypte; + + for (xpte = firstpte; xpte < firstpte + NPTEPG; xpte++) { + if ((*xpte & PG_FRAME) != (newpte & PG_FRAME) + + ptoa(xpte - firstpte)) { + printf("pmap_demote_pde: xpte %zd and newpte map " + "different pages %#lx %#lx\n", + xpte - firstpte, *xpte, newpte); + printf("XX %lx %lx\n", (*xpte & PG_FRAME), + (newpte & PG_FRAME) + ptoa(xpte - firstpte)); + for (ypte = firstpte; ypte < firstpte + NPTEPG; ypte++) + printf("%zd %#lx\n", ypte - firstpte, *ypte); + panic("firstpte"); + } + } +#else + KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), + ("pmap_demote_pde: firstpte and newpte map different physical" + " addresses")); +#endif +#endif +} + +static void +pmap_demote_pde_fail(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, + pd_entry_t oldpde, struct rwlock **lockp) +{ + struct spglist free; + vm_offset_t sva; + + SLIST_INIT(&free); + sva = trunc_2mpage(va); + pmap_remove_pde(pmap, pde, sva, &free, lockp); + if ((oldpde & pmap_global_bit(pmap)) == 0) + pmap_invalidate_pde_page(pmap, sva, oldpde); + vm_page_free_pages_toq(&free, true); + CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx" + " in pmap %p", va, pmap); +} + static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp) @@ -4489,12 +4539,11 @@ pt_entry_t PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V; vm_paddr_t mptepa; vm_page_t mpte; - struct spglist free; - vm_offset_t sva; int PG_PTE_CACHE; + bool in_kernel; - PG_G = pmap_global_bit(pmap); PG_A = pmap_accessed_bit(pmap); + PG_G = pmap_global_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); PG_V = pmap_valid_bit(pmap); @@ -4502,14 +4551,22 @@ PG_PKU_MASK = pmap_pku_mask_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); + in_kernel = va >= VM_MAXUSER_ADDRESS; oldpde = *pde; KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); - if ((oldpde & PG_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) == - NULL) { + if ((oldpde & PG_A) == 0) { + pmap_demote_pde_fail(pmap, va, pde, oldpde, lockp); + return (FALSE); + } + mpte = pmap_remove_pt_page(pmap, va); + if (mpte == NULL) { KASSERT((oldpde & PG_W) == 0, ("pmap_demote_pde: page table page for a wired mapping" " is missing")); + KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS && + va < DMAP_MAX_ADDRESS), + ("pmap_demote_pde: No saved mpte for va %#lx", va)); /* * Invalidate the 2MB page mapping and return "failure" if the @@ -4523,22 +4580,17 @@ * is the only part of the kernel address space that must be * handled here. */ - if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, - pmap_pde_pindex(va), (va >= DMAP_MIN_ADDRESS && va < - DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | - VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { - SLIST_INIT(&free); - sva = trunc_2mpage(va); - pmap_remove_pde(pmap, pde, sva, &free, lockp); - if ((oldpde & PG_G) == 0) - pmap_invalidate_pde_page(pmap, sva, oldpde); - vm_page_free_pages_toq(&free, true); - CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx" - " in pmap %p", va, pmap); + mpte = vm_page_alloc(NULL, pmap_pde_pindex(va), + (in_kernel ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); + if (mpte == NULL) { + pmap_demote_pde_fail(pmap, va, pde, oldpde, lockp); return (FALSE); } - if (va < VM_MAXUSER_ADDRESS) + if (!in_kernel) { + mpte->wire_count = NPTEPG; pmap_resident_count_inc(pmap, 1); + } } mptepa = VM_PAGE_TO_PHYS(mpte); firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); @@ -4553,13 +4605,9 @@ /* * If the page table page is new, initialize it. */ - if (mpte->wire_count == 1) { - mpte->wire_count = NPTEPG; + if ((oldpde & PG_PROMOTED) == 0) pmap_fill_ptp(firstpte, newpte); - } - KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), - ("pmap_demote_pde: firstpte and newpte map different physical" - " addresses")); + pmap_demote_pde_check(firstpte, newpte); /* * If the mapping has changed attributes, update the page table @@ -4594,7 +4642,7 @@ /* * Invalidate a stale recursive mapping of the page table page. */ - if (va >= VM_MAXUSER_ADDRESS) + if (in_kernel) pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); /*