diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1276,7 +1276,7 @@ static void pmap_update_pde_invalidate(pmap_t, vm_offset_t va, pd_entry_t pde); static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, - struct rwlock **lockp, vm_offset_t va); + struct rwlock **lockp, vm_offset_t va, bool first_level); static pd_entry_t *pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp, struct rwlock **lockp); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, @@ -4167,8 +4167,8 @@ pml5index = pmap_pml5e_index(va); pml5 = &pmap->pm_pmltop[pml5index]; if ((*pml5 & PG_V) == 0) { - if (_pmap_allocpte(pmap, pmap_pml5e_pindex(va), lockp, va) == - NULL) + if (_pmap_allocpte(pmap, pmap_pml5e_pindex(va), lockp, va, + false) == NULL) return (NULL); allocated = true; } else { @@ -4204,8 +4204,8 @@ if ((*pml4 & PG_V) == 0) { /* Have to allocate a new pdp, recurse */ - if (_pmap_allocpte(pmap, pmap_pml4e_pindex(va), lockp, va) == - NULL) { + if (_pmap_allocpte(pmap, pmap_pml4e_pindex(va), lockp, va, + false) == NULL) { if (pmap_is_la57(pmap)) pmap_allocpte_free_unref(pmap, va, pmap_pml5e(pmap, va)); @@ -4231,12 +4231,11 @@ * This routine is called if the desired page table page does not exist. * * If page table page allocation fails, this routine may sleep before - * returning NULL. It sleeps only if a lock pointer was given. - * - * Note: If a page allocation fails at page table level two, three, or four, - * up to three pages may be held during the wait, only to be released - * afterwards. This conservative approach is easily argued to avoid - * race conditions. + * returning NULL. It sleeps only if a lock pointer was given. Sleep + * occurs right before returning from the top-level call, which is + * indicated by the first_level parameter. This way, we never drop + * pmap lock to sleep while a page table page has ref_count == 0, + * which prevents the page from being freed under us. * * The ptepindexes, i.e. page indices, of the page table pages encountered * while translating virtual address va are defined as follows: @@ -4272,7 +4271,7 @@ */ static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, - vm_offset_t va __unused) + vm_offset_t va, bool first_level) { vm_pindex_t pml5index, pml4index; pml5_entry_t *pml5, *pml5u; @@ -4293,21 +4292,8 @@ * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | - VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { - if (lockp != NULL) { - RELEASE_PV_LIST_LOCK(lockp); - PMAP_UNLOCK(pmap); - PMAP_ASSERT_NOT_IN_DI(); - vm_wait(NULL); - PMAP_LOCK(pmap); - } - - /* - * Indicate the need to retry. While waiting, the page table - * page may have been allocated. - */ - return (NULL); - } + VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) + goto wait; if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); @@ -4339,7 +4325,7 @@ if (pml4 == NULL) { vm_page_unwire_noq(m); vm_page_free_zero(m); - return (NULL); + goto wait; } KASSERT((*pml4 & PG_V) == 0, ("pmap %p va %#lx pml4 %#lx", pmap, va, *pml4)); @@ -4367,7 +4353,7 @@ if (pdp == NULL) { vm_page_unwire_noq(m); vm_page_free_zero(m); - return (NULL); + goto wait; } KASSERT((*pdp & PG_V) == 0, ("pmap %p va %#lx pdp %#lx", pmap, va, *pdp)); @@ -4378,17 +4364,17 @@ if (pdp == NULL) { vm_page_unwire_noq(m); vm_page_free_zero(m); - return (NULL); + goto wait; } if ((*pdp & PG_V) == 0) { /* Have to allocate a new pd, recurse */ if (_pmap_allocpte(pmap, pmap_pdpe_pindex(va), - lockp, va) == NULL) { + lockp, va, false) == NULL) { pmap_allocpte_free_unref(pmap, va, pmap_pml4e(pmap, va)); vm_page_unwire_noq(m); vm_page_free_zero(m); - return (NULL); + goto wait; } } else { /* Add reference to the pd page */ @@ -4405,8 +4391,22 @@ } pmap_resident_count_inc(pmap, 1); - return (m); + +wait: + if (first_level && lockp != NULL) { + RELEASE_PV_LIST_LOCK(lockp); + PMAP_UNLOCK(pmap); + PMAP_ASSERT_NOT_IN_DI(); + vm_wait(NULL); + PMAP_LOCK(pmap); + } + + /* + * Indicate the need to retry. While waiting, the page table + * page may have been allocated. + */ + return (NULL); } static pd_entry_t * @@ -4433,7 +4433,7 @@ } else if (va < VM_MAXUSER_ADDRESS) { /* Allocate a pd page. */ pdpindex = pmap_pde_pindex(va) >> NPDPEPGSHIFT; - pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, lockp, va); + pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, lockp, va, true); if (pdpg == NULL) { if (lockp != NULL) goto retry; @@ -4494,7 +4494,7 @@ * Here if the pte page isn't mapped, or if it has been * deallocated. */ - m = _pmap_allocpte(pmap, ptepindex, lockp, va); + m = _pmap_allocpte(pmap, ptepindex, lockp, va, true); if (m == NULL && lockp != NULL) goto retry; } @@ -6514,7 +6514,7 @@ pml4e = pmap_pml4e(pmap, va); if (pml4e == NULL || (*pml4e & PG_V) == 0) { mp = _pmap_allocpte(pmap, pmap_pml4e_pindex(va), - NULL, va); + NULL, va, true); if (mp == NULL) goto allocf; pdpe = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mp)); @@ -6535,7 +6535,7 @@ pde = pmap_pde(pmap, va); if (pde == NULL) { mp = _pmap_allocpte(pmap, pmap_pdpe_pindex(va), - NULL, va); + NULL, va, true); if (mp == NULL) goto allocf; pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mp)); @@ -6690,7 +6690,7 @@ */ nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; mpte = _pmap_allocpte(pmap, pmap_pde_pindex(va), - nosleep ? NULL : &lock, va); + nosleep ? NULL : &lock, va, true); if (mpte == NULL && nosleep) { rv = KERN_RESOURCE_SHORTAGE; goto out; @@ -7174,7 +7174,7 @@ * pointer, because we don't intend to sleep. */ mpte = _pmap_allocpte(pmap, ptepindex, NULL, - va); + va, true); if (mpte == NULL) return (mpte); } @@ -7503,8 +7503,8 @@ pdpe = pmap_pdpe(dst_pmap, addr); if (pdpe == NULL) { if (_pmap_allocpte(dst_pmap, - pmap_pml4e_pindex(addr), NULL, addr) == - NULL) + pmap_pml4e_pindex(addr), NULL, addr, + true) == NULL) break; pdpe = pmap_pdpe(dst_pmap, addr); } else {