diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1275,6 +1275,8 @@ pd_entry_t newpde); static void pmap_update_pde_invalidate(pmap_t, vm_offset_t va, pd_entry_t pde); +static vm_page_t pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, + struct rwlock **lockp, vm_offset_t va); static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, vm_offset_t va); static pd_entry_t *pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp, @@ -4167,8 +4169,8 @@ pml5index = pmap_pml5e_index(va); pml5 = &pmap->pm_pmltop[pml5index]; if ((*pml5 & PG_V) == 0) { - if (_pmap_allocpte(pmap, pmap_pml5e_pindex(va), lockp, va) == - NULL) + if (pmap_allocpte_nosleep(pmap, pmap_pml5e_pindex(va), lockp, + va) == NULL) return (NULL); allocated = true; } else { @@ -4204,8 +4206,8 @@ if ((*pml4 & PG_V) == 0) { /* Have to allocate a new pdp, recurse */ - if (_pmap_allocpte(pmap, pmap_pml4e_pindex(va), lockp, va) == - NULL) { + if (pmap_allocpte_nosleep(pmap, pmap_pml4e_pindex(va), lockp, + va) == NULL) { if (pmap_is_la57(pmap)) pmap_allocpte_free_unref(pmap, va, pmap_pml5e(pmap, va)); @@ -4231,12 +4233,11 @@ * This routine is called if the desired page table page does not exist. * * If page table page allocation fails, this routine may sleep before - * returning NULL. It sleeps only if a lock pointer was given. - * - * Note: If a page allocation fails at page table level two, three, or four, - * up to three pages may be held during the wait, only to be released - * afterwards. This conservative approach is easily argued to avoid - * race conditions. + * returning NULL. It sleeps only if a lock pointer was given. Sleep + * occurs right before returning from the top-level call, which is + * indicated by the first_level parameter. This way, we never drop + * pmap lock to sleep while a page table page has ref_count == 0, + * which prevents the page from being freed under us. * * The ptepindexes, i.e. page indices, of the page table pages encountered * while translating virtual address va are defined as follows: @@ -4271,8 +4272,8 @@ * since it is statically allocated by pmap_pinit() and not by _pmap_allocpte(). */ static vm_page_t -_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, - vm_offset_t va __unused) +pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, + vm_offset_t va) { vm_pindex_t pml5index, pml4index; pml5_entry_t *pml5, *pml5u; @@ -4293,21 +4294,8 @@ * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | - VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { - if (lockp != NULL) { - RELEASE_PV_LIST_LOCK(lockp); - PMAP_UNLOCK(pmap); - PMAP_ASSERT_NOT_IN_DI(); - vm_wait(NULL); - PMAP_LOCK(pmap); - } - - /* - * Indicate the need to retry. While waiting, the page table - * page may have been allocated. - */ + VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) return (NULL); - } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); @@ -4382,7 +4370,7 @@ } if ((*pdp & PG_V) == 0) { /* Have to allocate a new pd, recurse */ - if (_pmap_allocpte(pmap, pmap_pdpe_pindex(va), + if (pmap_allocpte_nosleep(pmap, pmap_pdpe_pindex(va), lockp, va) == NULL) { pmap_allocpte_free_unref(pmap, va, pmap_pml4e(pmap, va)); @@ -4405,7 +4393,23 @@ } pmap_resident_count_inc(pmap, 1); + return (m); +} +static vm_page_t +_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, + vm_offset_t va) +{ + vm_page_t m; + + m = pmap_allocpte_nosleep(pmap, ptepindex, lockp, va); + if (m == NULL && lockp != NULL) { + RELEASE_PV_LIST_LOCK(lockp); + PMAP_UNLOCK(pmap); + PMAP_ASSERT_NOT_IN_DI(); + vm_wait(NULL); + PMAP_LOCK(pmap); + } return (m); } @@ -7503,8 +7507,8 @@ pdpe = pmap_pdpe(dst_pmap, addr); if (pdpe == NULL) { if (_pmap_allocpte(dst_pmap, - pmap_pml4e_pindex(addr), NULL, addr) == - NULL) + pmap_pml4e_pindex(addr), NULL, + addr) == NULL) break; pdpe = pmap_pdpe(dst_pmap, addr); } else { @@ -9363,6 +9367,8 @@ pa = 0; val = 0; pdpe = pmap_pdpe(pmap, addr); + if (pdpe == NULL) + goto out; if ((*pdpe & PG_V) != 0) { if ((*pdpe & PG_PS) != 0) { pte = *pdpe; @@ -9398,6 +9404,7 @@ (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { *pap = pa; } +out: PMAP_UNLOCK(pmap); return (val); }