Changeset View
Standalone View
sys/amd64/amd64/pmap.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | |||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||||
* SUCH DAMAGE. | * SUCH DAMAGE. | ||||
* | * | ||||
* from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 | * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 | ||||
*/ | */ | ||||
/*- | /*- | ||||
* Copyright (c) 2003 Networks Associates Technology, Inc. | * Copyright (c) 2003 Networks Associates Technology, Inc. | ||||
* Copyright (c) 2014-2019 The FreeBSD Foundation | * Copyright (c) 2014-2020 The FreeBSD Foundation | ||||
* All rights reserved. | * All rights reserved. | ||||
* | * | ||||
* This software was developed for the FreeBSD Project by Jake Burkholder, | * This software was developed for the FreeBSD Project by Jake Burkholder, | ||||
* Safeport Network Services, and Network Associates Laboratories, the | * Safeport Network Services, and Network Associates Laboratories, the | ||||
* Security Research Division of Network Associates, Inc. under | * Security Research Division of Network Associates, Inc. under | ||||
* DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA | * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA | ||||
* CHATS research program. | * CHATS research program. | ||||
* | * | ||||
▲ Show 20 Lines • Show All 1,269 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
pdp_entry_t *pdpe; | pdp_entry_t *pdpe; | ||||
pt_entry_t PG_V; | pt_entry_t PG_V; | ||||
PG_V = pmap_valid_bit(pmap); | PG_V = pmap_valid_bit(pmap); | ||||
pdpe = pmap_pdpe(pmap, va); | pdpe = pmap_pdpe(pmap, va); | ||||
if (pdpe == NULL || (*pdpe & PG_V) == 0) | if (pdpe == NULL || (*pdpe & PG_V) == 0) | ||||
return (NULL); | return (NULL); | ||||
KASSERT((*pdpe & PG_PS) == 0, | |||||
("pmap_pde for 1G page, pmap %p va %#lx", pmap, va)); | |||||
return (pmap_pdpe_to_pde(pdpe, va)); | return (pmap_pdpe_to_pde(pdpe, va)); | ||||
} | } | ||||
/* Return a pointer to the PT slot that corresponds to a VA */ | /* Return a pointer to the PT slot that corresponds to a VA */ | ||||
static __inline pt_entry_t * | static __inline pt_entry_t * | ||||
pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) | pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) | ||||
{ | { | ||||
pt_entry_t *pte; | pt_entry_t *pte; | ||||
▲ Show 20 Lines • Show All 791 Lines • ▼ Show 20 Lines | pmap_init(void) | ||||
/* | /* | ||||
* Are large page mappings enabled? | * Are large page mappings enabled? | ||||
*/ | */ | ||||
TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); | TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); | ||||
if (pg_ps_enabled) { | if (pg_ps_enabled) { | ||||
KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, | KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, | ||||
("pmap_init: can't assign to pagesizes[1]")); | ("pmap_init: can't assign to pagesizes[1]")); | ||||
pagesizes[1] = NBPDR; | pagesizes[1] = NBPDR; | ||||
if ((amd_feature & AMDID_PAGE1GB) != 0) { | |||||
KASSERT(MAXPAGESIZES > 1 && pagesizes[2] == 0, | |||||
("pmap_init: can't assign to pagesizes[2]")); | |||||
pagesizes[2] = NBPDP; | |||||
} | } | ||||
} | |||||
/* | /* | ||||
* Initialize pv chunk lists. | * Initialize pv chunk lists. | ||||
*/ | */ | ||||
for (i = 0; i < PMAP_MEMDOM; i++) { | for (i = 0; i < PMAP_MEMDOM; i++) { | ||||
mtx_init(&pv_chunks[i].pvc_lock, "pmap pv chunk list", NULL, MTX_DEF); | mtx_init(&pv_chunks[i].pvc_lock, "pmap pv chunk list", NULL, MTX_DEF); | ||||
TAILQ_INIT(&pv_chunks[i].pvc_list); | TAILQ_INIT(&pv_chunks[i].pvc_list); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 1,185 Lines • ▼ Show 20 Lines | pmap_kextract(vm_offset_t va) | ||||
} else if (PMAP_ADDRESS_IN_LARGEMAP(va)) { | } else if (PMAP_ADDRESS_IN_LARGEMAP(va)) { | ||||
pa = pmap_large_map_kextract(va); | pa = pmap_large_map_kextract(va); | ||||
} else { | } else { | ||||
pde = *vtopde(va); | pde = *vtopde(va); | ||||
if (pde & PG_PS) { | if (pde & PG_PS) { | ||||
pa = (pde & PG_PS_FRAME) | (va & PDRMASK); | pa = (pde & PG_PS_FRAME) | (va & PDRMASK); | ||||
} else { | } else { | ||||
/* | /* | ||||
* Beware of a concurrent promotion that changes the | * Beware of a concurrent promotion that changes the | ||||
markj: This could be restructured a bit to avoid testing `m != NULL` after the check_page label:
```… | |||||
Done Inline ActionsI restructured as you suggested, but I do not believe that m!=NULL check can be dropped. PHYS_TO_VM_PAGE() can return NULL, for instance imagine that userspace mapped a PCI BAR outside DMAP. kib: I restructured as you suggested, but I do not believe that m!=NULL check can be dropped. | |||||
Not Done Inline ActionsI see, thanks. I didn't know about PCIOCBARMMAP. markj: I see, thanks. I didn't know about PCIOCBARMMAP. | |||||
* PDE at this point! For example, vtopte() must not | * PDE at this point! For example, vtopte() must not | ||||
* be used to access the PTE because it would use the | * be used to access the PTE because it would use the | ||||
* new PDE. It is, however, safe to use the old PDE | * new PDE. It is, however, safe to use the old PDE | ||||
* because the page table page is preserved by the | * because the page table page is preserved by the | ||||
* promotion. | * promotion. | ||||
*/ | */ | ||||
pa = *pmap_pde_to_pte(&pde, va); | pa = *pmap_pde_to_pte(&pde, va); | ||||
pa = (pa & PG_FRAME) | (va & PAGE_MASK); | pa = (pa & PG_FRAME) | (va & PAGE_MASK); | ||||
▲ Show 20 Lines • Show All 425 Lines • ▼ Show 20 Lines | |||||
* | * | ||||
* If page table page allocation fails, this routine may sleep before | * If page table page allocation fails, this routine may sleep before | ||||
* returning NULL. It sleeps only if a lock pointer was given. | * returning NULL. It sleeps only if a lock pointer was given. | ||||
* | * | ||||
* Note: If a page allocation fails at page table level two or three, | * Note: If a page allocation fails at page table level two or three, | ||||
* one or two pages may be held during the wait, only to be released | * one or two pages may be held during the wait, only to be released | ||||
* afterwards. This conservative approach is easily argued to avoid | * afterwards. This conservative approach is easily argued to avoid | ||||
* race conditions. | * race conditions. | ||||
* | |||||
* Page table entry at address va page index is defined as follows: | |||||
* - for page table (last level), ptepindex = pmap_pde_pindex(va) = | |||||
* = va >> PDRSHIFT, in other words, it is just the index of the PDE. | |||||
* - for page directory page, ptepindex = NUPDE (number of userland PD | |||||
* entries) + (pmap_pde_index(va) >> NPDEPGSHIFT) | |||||
* i.e. index of PDPE is put after the last index of PDE, | |||||
* - for page directory pointer page, ptepindex = NUPDE + NUPDPE + | |||||
* (pmap_pde_index(va) >> (NPDEPGSHIFT + NPML4EPGSHIFT), | |||||
* i.e. index of pml4e is put after the last index of PDPE. | |||||
* In other words, is it sequential number of the corresponding paging entry | |||||
* in the order where all entries of the same rank are put together, then | |||||
* ranks are put from deepest to root. | |||||
*/ | */ | ||||
static vm_page_t | static vm_page_t | ||||
_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) | _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) | ||||
{ | { | ||||
vm_page_t m, pdppg, pdpg; | vm_page_t m, pdppg, pdpg; | ||||
pt_entry_t PG_A, PG_M, PG_RW, PG_V; | pt_entry_t PG_A, PG_M, PG_RW, PG_V; | ||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
▲ Show 20 Lines • Show All 1,599 Lines • ▼ Show 20 Lines | |||||
* | * | ||||
* It is assumed that the start and end are properly | * It is assumed that the start and end are properly | ||||
* rounded to the page size. | * rounded to the page size. | ||||
*/ | */ | ||||
void | void | ||||
pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) | pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) | ||||
{ | { | ||||
struct rwlock *lock; | struct rwlock *lock; | ||||
vm_page_t mt; | |||||
vm_offset_t va_next; | vm_offset_t va_next; | ||||
pml4_entry_t *pml4e; | pml4_entry_t *pml4e; | ||||
pdp_entry_t *pdpe; | pdp_entry_t *pdpe; | ||||
pd_entry_t ptpaddr, *pde; | pd_entry_t ptpaddr, *pde; | ||||
pt_entry_t PG_G, PG_V; | pt_entry_t PG_G, PG_V; | ||||
struct spglist free; | struct spglist free; | ||||
int anyvalid; | int anyvalid; | ||||
Show All 36 Lines | for (; sva < eva; sva = va_next) { | ||||
if ((*pml4e & PG_V) == 0) { | if ((*pml4e & PG_V) == 0) { | ||||
va_next = (sva + NBPML4) & ~PML4MASK; | va_next = (sva + NBPML4) & ~PML4MASK; | ||||
if (va_next < sva) | if (va_next < sva) | ||||
va_next = eva; | va_next = eva; | ||||
continue; | continue; | ||||
} | } | ||||
pdpe = pmap_pml4e_to_pdpe(pml4e, sva); | pdpe = pmap_pml4e_to_pdpe(pml4e, sva); | ||||
if ((*pdpe & PG_V) == 0) { | |||||
va_next = (sva + NBPDP) & ~PDPMASK; | va_next = (sva + NBPDP) & ~PDPMASK; | ||||
if ((*pdpe & PG_V) == 0) { | |||||
if (va_next < sva) | if (va_next < sva) | ||||
va_next = eva; | va_next = eva; | ||||
continue; | continue; | ||||
} | } | ||||
KASSERT((*pdpe & PG_PS) == 0 || va_next <= eva, | |||||
("pmap_remove of non-transient 1G page " | |||||
"pdpe %#lx sva %#lx eva %#lx va_next %#lx", | |||||
*pdpe, sva, eva, va_next)); | |||||
if ((*pdpe & PG_PS) != 0) { | |||||
MPASS(pmap != kernel_pmap); /* XXXKIB */ | |||||
MPASS((*pdpe & (PG_MANAGED | PG_G)) == 0); | |||||
anyvalid = 1; | |||||
*pdpe = 0; | |||||
pmap_resident_count_dec(pmap, NBPDP / PAGE_SIZE); | |||||
mt = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, sva) & PG_FRAME); | |||||
pmap_unwire_ptp(pmap, sva, mt, &free); | |||||
continue; | |||||
} | |||||
/* | /* | ||||
* Calculate index for next page table. | * Calculate index for next page table. | ||||
*/ | */ | ||||
va_next = (sva + NBPDR) & ~PDRMASK; | va_next = (sva + NBPDR) & ~PDRMASK; | ||||
if (va_next < sva) | if (va_next < sva) | ||||
va_next = eva; | va_next = eva; | ||||
pde = pmap_pdpe_to_pde(pdpe, sva); | pde = pmap_pdpe_to_pde(pdpe, sva); | ||||
▲ Show 20 Lines • Show All 199 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* Set the physical protection on the | * Set the physical protection on the | ||||
* specified range of this map as requested. | * specified range of this map as requested. | ||||
*/ | */ | ||||
void | void | ||||
pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) | pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) | ||||
{ | { | ||||
vm_page_t m; | |||||
vm_offset_t va_next; | vm_offset_t va_next; | ||||
pml4_entry_t *pml4e; | pml4_entry_t *pml4e; | ||||
pdp_entry_t *pdpe; | pdp_entry_t *pdpe; | ||||
pd_entry_t ptpaddr, *pde; | pd_entry_t ptpaddr, *pde; | ||||
pt_entry_t *pte, PG_G, PG_M, PG_RW, PG_V; | pt_entry_t *pte, PG_G, PG_M, PG_RW, PG_V; | ||||
pt_entry_t obits, pbits; | |||||
boolean_t anychanged; | boolean_t anychanged; | ||||
KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); | KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); | ||||
if (prot == VM_PROT_NONE) { | if (prot == VM_PROT_NONE) { | ||||
pmap_remove(pmap, sva, eva); | pmap_remove(pmap, sva, eva); | ||||
return; | return; | ||||
} | } | ||||
Show All 34 Lines | for (; sva < eva; sva = va_next) { | ||||
if ((*pml4e & PG_V) == 0) { | if ((*pml4e & PG_V) == 0) { | ||||
va_next = (sva + NBPML4) & ~PML4MASK; | va_next = (sva + NBPML4) & ~PML4MASK; | ||||
if (va_next < sva) | if (va_next < sva) | ||||
va_next = eva; | va_next = eva; | ||||
continue; | continue; | ||||
} | } | ||||
pdpe = pmap_pml4e_to_pdpe(pml4e, sva); | pdpe = pmap_pml4e_to_pdpe(pml4e, sva); | ||||
if ((*pdpe & PG_V) == 0) { | |||||
va_next = (sva + NBPDP) & ~PDPMASK; | va_next = (sva + NBPDP) & ~PDPMASK; | ||||
if ((*pdpe & PG_V) == 0) { | |||||
if (va_next < sva) | if (va_next < sva) | ||||
va_next = eva; | va_next = eva; | ||||
continue; | continue; | ||||
} | } | ||||
KASSERT((*pdpe & PG_PS) == 0 || va_next <= eva, | |||||
("pmap_remove of non-transient 1G page " | |||||
"pdpe %#lx sva %#lx eva %#lx va_next %#lx", | |||||
*pdpe, sva, eva, va_next)); | |||||
if ((*pdpe & PG_PS) != 0) { | |||||
retry_pdpe: | |||||
obits = pbits = *pdpe; | |||||
MPASS((pbits & (PG_MANAGED | PG_G)) == 0); | |||||
MPASS(pmap != kernel_pmap); /* XXXKIB */ | |||||
Done Inline ActionsThe MI layer only calls pmap_protect() when restricting permissions. I believe this change is not sufficient to avoid soft faults after a protection change. markj: The MI layer only calls pmap_protect() when restricting permissions. I believe this change is… | |||||
Done Inline ActionsRight, I remembered it as a thing to do during the vm_map.c but failed. In fact, I also should disable changing the inheritance mode for largepage entries, because CoW cannot work there. kib: Right, I remembered it as a thing to do during the vm_map.c but failed. In fact, I also should… | |||||
if ((prot & VM_PROT_WRITE) == 0) | |||||
pbits &= ~(PG_RW | PG_M); | |||||
if ((prot & VM_PROT_EXECUTE) == 0) | |||||
pbits |= pg_nx; | |||||
if (pbits != obits) { | |||||
if (!atomic_cmpset_long(pdpe, obits, pbits)) | |||||
/* PG_PS cannot be cleared under us, */ | |||||
goto retry_pdpe; | |||||
anychanged = TRUE; | |||||
} | |||||
continue; | |||||
} | |||||
va_next = (sva + NBPDR) & ~PDRMASK; | va_next = (sva + NBPDR) & ~PDRMASK; | ||||
if (va_next < sva) | if (va_next < sva) | ||||
va_next = eva; | va_next = eva; | ||||
pde = pmap_pdpe_to_pde(pdpe, sva); | pde = pmap_pdpe_to_pde(pdpe, sva); | ||||
ptpaddr = *pde; | ptpaddr = *pde; | ||||
/* | /* | ||||
Show All 26 Lines | if ((ptpaddr & PG_PS) != 0) { | ||||
} | } | ||||
} | } | ||||
if (va_next > eva) | if (va_next > eva) | ||||
va_next = eva; | va_next = eva; | ||||
for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, | for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, | ||||
sva += PAGE_SIZE) { | sva += PAGE_SIZE) { | ||||
pt_entry_t obits, pbits; | |||||
vm_page_t m; | |||||
retry: | retry: | ||||
obits = pbits = *pte; | obits = pbits = *pte; | ||||
if ((pbits & PG_V) == 0) | if ((pbits & PG_V) == 0) | ||||
continue; | continue; | ||||
if ((prot & VM_PROT_WRITE) == 0) { | if ((prot & VM_PROT_WRITE) == 0) { | ||||
if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == | if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == | ||||
(PG_MANAGED | PG_M | PG_RW)) { | (PG_MANAGED | PG_M | PG_RW)) { | ||||
▲ Show 20 Lines • Show All 158 Lines • ▼ Show 20 Lines | else | ||||
pde_store(pde, PG_PROMOTED | PG_PS | newpde); | pde_store(pde, PG_PROMOTED | PG_PS | newpde); | ||||
atomic_add_long(&pmap_pde_promotions, 1); | atomic_add_long(&pmap_pde_promotions, 1); | ||||
CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx" | CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx" | ||||
" in pmap %p", va, pmap); | " in pmap %p", va, pmap); | ||||
} | } | ||||
#endif /* VM_NRESERVLEVEL > 0 */ | #endif /* VM_NRESERVLEVEL > 0 */ | ||||
static int | |||||
pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags, | |||||
int psind) | |||||
{ | |||||
vm_page_t mp; | |||||
pt_entry_t origpte, *pml4e, *pdpe, *pde, pten, PG_V; | |||||
vm_pindex_t ptepindex; | |||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | |||||
KASSERT(psind > 0 && psind < MAXPAGESIZES, | |||||
("psind %d unexpected", psind)); | |||||
KASSERT(((newpte & PG_FRAME) & (pagesizes[psind] - 1)) == 0, | |||||
("unaligned phys address %#lx newpte %#lx psind %d", | |||||
newpte & PG_FRAME, newpte, psind)); | |||||
KASSERT((va & (pagesizes[psind] - 1)) == 0, | |||||
("unaligned va %#lx psind %d", va, psind)); | |||||
KASSERT(va < VM_MAXUSER_ADDRESS, | |||||
("kernel mode non-transparent superpage")); /* XXXKIB */ | |||||
KASSERT(va + pagesizes[psind] < VM_MAXUSER_ADDRESS, | |||||
("overflowing user map va %#lx psind %d", va, psind)); /* XXXKIB */ | |||||
Done Inline ActionsShould we assert that PG_W is not set on the new entry, and simplify pm_stats.wired_count update conditions below? markj: Should we assert that PG_W is not set on the new entry, and simplify pm_stats.wired_count… | |||||
Done Inline ActionsI do not think so, this would break pmap_unwire(). I just forgot to pass PMAP_WIRED, that was the intent. kib: I do not think so, this would break pmap_unwire(). I just forgot to pass PMAP_WIRED, that was… | |||||
Done Inline ActionsI am confused, earlier you wrote:
and vm_map_entry_unwire() still skips pmap_unwire() for large pages. markj: I am confused, earlier you wrote:
> I just kept the wiring code as is, it bumps the entry user… | |||||
Done Inline ActionsThis is still not done, I will integrate your tests with my patch, then I want to think about this some more. kib: This is still not done, I will integrate your tests with my patch, then I want to think about… | |||||
Done Inline ActionsI will convert the tests to use the new interface and push some new tests, but it might not be done before tomorrow. markj: I will convert the tests to use the new interface and push some new tests, but it might not be… | |||||
PG_V = pmap_valid_bit(pmap); | |||||
restart: | |||||
pten = newpte; | |||||
if (va < VM_MAXUSER_ADDRESS && pmap->pm_type == PT_X86) | |||||
pten |= pmap_pkru_get(pmap, va); | |||||
ptepindex = pmap_pde_pindex(va); | |||||
if (psind == 2) { /* 1G */ | |||||
if (!pmap_pkru_same(pmap, va, va + NBPDP)) | |||||
return (KERN_PROTECTION_FAILURE); | |||||
pml4e = pmap_pml4e(pmap, va); | |||||
if ((*pml4e & PG_V) == 0) { | |||||
mp = _pmap_allocpte(pmap, NUPDE + NUPDPE + | |||||
((ptepindex - NUPDE) >> NPML4EPGSHIFT), NULL); | |||||
if (mp == NULL) { | |||||
if ((flags & PMAP_ENTER_NOSLEEP) != 0) | |||||
return (KERN_RESOURCE_SHORTAGE); | |||||
PMAP_UNLOCK(pmap); | |||||
vm_wait(NULL); | |||||
PMAP_LOCK(pmap); | |||||
/* | /* | ||||
* Restart at least to recalcuate the pkru | |||||
* key. Our caller must keep the map locked | |||||
* so no paging structure can be validated | |||||
* under us. | |||||
*/ | |||||
goto restart; | |||||
} | |||||
} else { | |||||
mp = PHYS_TO_VM_PAGE(*pml4e & PG_FRAME); | |||||
mp->ref_count++; | |||||
} | |||||
pdpe = pmap_pdpe(pmap, va); | |||||
KASSERT(pdpe != NULL, ("va %#lx lost pdpe", va)); | |||||
origpte = *pdpe; | |||||
KASSERT((origpte & PG_V) == 0 || ((origpte & PG_PS) != 0 && | |||||
(origpte & PG_FRAME) == (newpte & PG_FRAME)), | |||||
("va %#lx changing 1G phys page pdpe %#lx newpte %#lx", | |||||
va, origpte, newpte)); | |||||
if ((newpte & PG_W) != 0 && (origpte & PG_W) == 0) | |||||
pmap->pm_stats.wired_count += NBPDP / PAGE_SIZE; | |||||
else if ((newpte & PG_W) == 0 && (origpte & PG_W) != 0) | |||||
pmap->pm_stats.wired_count -= NBPDP / PAGE_SIZE; | |||||
*pdpe = newpte; | |||||
} else /* (psind == 1) */ { /* 2M */ | |||||
if (!pmap_pkru_same(pmap, va, va + NBPDR)) | |||||
return (KERN_PROTECTION_FAILURE); | |||||
pde = pmap_pde(pmap, va); | |||||
if (pde == NULL) { | |||||
mp = _pmap_allocpte(pmap, NUPDE + | |||||
(ptepindex >> NPDPEPGSHIFT), NULL); | |||||
if (mp == NULL) { | |||||
if ((flags & PMAP_ENTER_NOSLEEP) != 0) | |||||
return (KERN_RESOURCE_SHORTAGE); | |||||
PMAP_UNLOCK(pmap); | |||||
vm_wait(NULL); | |||||
PMAP_LOCK(pmap); | |||||
goto restart; | |||||
} | |||||
pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mp)); | |||||
pde = &pde[pmap_pde_index(va)]; | |||||
} else { | |||||
pdpe = pmap_pdpe(pmap, va); | |||||
MPASS(pdpe != NULL && (*pdpe & PG_V) != 0); | |||||
mp = PHYS_TO_VM_PAGE(*pdpe & PG_FRAME); | |||||
mp->ref_count++; | |||||
Done Inline ActionsI believe the increment must be conditional on (*pde & PG_V) == 0. markj: I believe the increment must be conditional on `(*pde & PG_V) == 0`. | |||||
Done Inline ActionsAnd for 1G too. kib: And for 1G too. | |||||
} | |||||
KASSERT(pde != NULL, ("va %#lx lost pde", va)); | |||||
origpte = *pde; | |||||
KASSERT((origpte & PG_V) == 0 || ((origpte & PG_PS) != 0 && | |||||
(origpte & PG_FRAME) == (newpte & PG_FRAME)), | |||||
("va %#lx changing 2M phys page pde %#lx newpte %#lx", | |||||
va, origpte, newpte)); | |||||
if ((newpte & PG_W) != 0 && (origpte & PG_W) == 0) | |||||
pmap->pm_stats.wired_count += NBPDR / PAGE_SIZE; | |||||
else if ((newpte & PG_W) == 0 && (origpte & PG_W) != 0) | |||||
pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; | |||||
*pde = newpte; | |||||
} | |||||
if ((origpte & PG_V) != 0) | |||||
pmap_resident_count_inc(pmap, pagesizes[psind] / PAGE_SIZE); | |||||
return (KERN_SUCCESS); | |||||
} | |||||
/* | |||||
* Insert the given physical page (p) at | * Insert the given physical page (p) at | ||||
* the specified virtual address (v) in the | * the specified virtual address (v) in the | ||||
* target physical map with the protection requested. | * target physical map with the protection requested. | ||||
* | * | ||||
* If specified, the page will be wired down, meaning | * If specified, the page will be wired down, meaning | ||||
* that the related pte can not be reclaimed. | * that the related pte can not be reclaimed. | ||||
* | * | ||||
* NB: This is the only routine which MAY NOT lazy-evaluate | * NB: This is the only routine which MAY NOT lazy-evaluate | ||||
▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines | pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, | ||||
if ((m->oflags & VPO_UNMANAGED) != 0) { | if ((m->oflags & VPO_UNMANAGED) != 0) { | ||||
if ((newpte & PG_RW) != 0) | if ((newpte & PG_RW) != 0) | ||||
newpte |= PG_M; | newpte |= PG_M; | ||||
} else | } else | ||||
newpte |= PG_MANAGED; | newpte |= PG_MANAGED; | ||||
lock = NULL; | lock = NULL; | ||||
PMAP_LOCK(pmap); | PMAP_LOCK(pmap); | ||||
if ((flags & PMAP_ENTER_LARGEPAGE) != 0) { | |||||
KASSERT((m->oflags & VPO_UNMANAGED) != 0, | |||||
("managed largepage va %#lx flags %#x", va, flags)); | |||||
rv = pmap_enter_largepage(pmap, va, newpte | PG_PS, flags, | |||||
psind); | |||||
goto out; | |||||
} | |||||
if (psind == 1) { | if (psind == 1) { | ||||
/* Assert the required virtual and physical alignment. */ | /* Assert the required virtual and physical alignment. */ | ||||
KASSERT((va & PDRMASK) == 0, ("pmap_enter: va unaligned")); | KASSERT((va & PDRMASK) == 0, ("pmap_enter: va unaligned")); | ||||
KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); | KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); | ||||
rv = pmap_enter_pde(pmap, va, newpte | PG_PS, flags, m, &lock); | rv = pmap_enter_pde(pmap, va, newpte | PG_PS, flags, m, &lock); | ||||
goto out; | goto out; | ||||
} | } | ||||
mpte = NULL; | mpte = NULL; | ||||
▲ Show 20 Lines • Show All 669 Lines • ▼ Show 20 Lines | |||||
*/ | */ | ||||
void | void | ||||
pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) | pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) | ||||
{ | { | ||||
vm_offset_t va_next; | vm_offset_t va_next; | ||||
pml4_entry_t *pml4e; | pml4_entry_t *pml4e; | ||||
pdp_entry_t *pdpe; | pdp_entry_t *pdpe; | ||||
pd_entry_t *pde; | pd_entry_t *pde; | ||||
pt_entry_t *pte, PG_V; | pt_entry_t *pte, PG_V, PG_G; | ||||
PG_V = pmap_valid_bit(pmap); | PG_V = pmap_valid_bit(pmap); | ||||
PG_G = pmap_global_bit(pmap); | |||||
PMAP_LOCK(pmap); | PMAP_LOCK(pmap); | ||||
for (; sva < eva; sva = va_next) { | for (; sva < eva; sva = va_next) { | ||||
pml4e = pmap_pml4e(pmap, sva); | pml4e = pmap_pml4e(pmap, sva); | ||||
if ((*pml4e & PG_V) == 0) { | if ((*pml4e & PG_V) == 0) { | ||||
va_next = (sva + NBPML4) & ~PML4MASK; | va_next = (sva + NBPML4) & ~PML4MASK; | ||||
if (va_next < sva) | if (va_next < sva) | ||||
va_next = eva; | va_next = eva; | ||||
continue; | continue; | ||||
} | } | ||||
pdpe = pmap_pml4e_to_pdpe(pml4e, sva); | pdpe = pmap_pml4e_to_pdpe(pml4e, sva); | ||||
if ((*pdpe & PG_V) == 0) { | if ((*pdpe & PG_V) == 0) { | ||||
va_next = (sva + NBPDP) & ~PDPMASK; | va_next = (sva + NBPDP) & ~PDPMASK; | ||||
if (va_next < sva) | if (va_next < sva) | ||||
va_next = eva; | va_next = eva; | ||||
continue; | continue; | ||||
} | } | ||||
KASSERT((*pdpe & PG_PS) == 0 || va_next <= eva, | |||||
("pmap_unwire of non-transient 1G page " | |||||
"pdpe %#lx sva %#lx eva %#lx va_next %#lx", | |||||
*pdpe, sva, eva, va_next)); | |||||
if ((*pdpe & PG_PS) != 0) { | |||||
MPASS(pmap != kernel_pmap); /* XXXKIB */ | |||||
MPASS((*pdpe & (PG_MANAGED | PG_G)) == 0); | |||||
atomic_clear_long(pdpe, PG_W); | |||||
pmap->pm_stats.wired_count -= NBPDP / PAGE_SIZE; | |||||
continue; | |||||
} | |||||
va_next = (sva + NBPDR) & ~PDRMASK; | va_next = (sva + NBPDR) & ~PDRMASK; | ||||
if (va_next < sva) | if (va_next < sva) | ||||
va_next = eva; | va_next = eva; | ||||
pde = pmap_pdpe_to_pde(pdpe, sva); | pde = pmap_pdpe_to_pde(pdpe, sva); | ||||
if ((*pde & PG_V) == 0) | if ((*pde & PG_V) == 0) | ||||
continue; | continue; | ||||
if ((*pde & PG_PS) != 0) { | if ((*pde & PG_PS) != 0) { | ||||
if ((*pde & PG_W) == 0) | if ((*pde & PG_W) == 0) | ||||
▲ Show 20 Lines • Show All 100 Lines • ▼ Show 20 Lines | for (addr = src_addr; addr < end_addr; addr = va_next) { | ||||
if ((*pdpe & PG_V) == 0) { | if ((*pdpe & PG_V) == 0) { | ||||
va_next = (addr + NBPDP) & ~PDPMASK; | va_next = (addr + NBPDP) & ~PDPMASK; | ||||
if (va_next < addr) | if (va_next < addr) | ||||
va_next = end_addr; | va_next = end_addr; | ||||
continue; | continue; | ||||
} | } | ||||
va_next = (addr + NBPDR) & ~PDRMASK; | va_next = (addr + NBPDR) & ~PDRMASK; | ||||
KASSERT((*pdpe & PG_PS) == 0 || va_next <= end_addr, | |||||
("pmap_copy of partial non-transient 1G page " | |||||
"pdpe %#lx sva %#lx eva %#lx va_next %#lx", | |||||
*pdpe, addr, end_addr, va_next)); | |||||
if ((*pdpe & PG_PS) != 0) | |||||
continue; | |||||
if (va_next < addr) | if (va_next < addr) | ||||
va_next = end_addr; | va_next = end_addr; | ||||
pde = pmap_pdpe_to_pde(pdpe, addr); | pde = pmap_pdpe_to_pde(pdpe, addr); | ||||
srcptepaddr = *pde; | srcptepaddr = *pde; | ||||
if (srcptepaddr == 0) | if (srcptepaddr == 0) | ||||
continue; | continue; | ||||
▲ Show 20 Lines • Show All 1,040 Lines • ▼ Show 20 Lines | if ((*pdpe & PG_V) == 0) { | ||||
va_next = (sva + NBPDP) & ~PDPMASK; | va_next = (sva + NBPDP) & ~PDPMASK; | ||||
if (va_next < sva) | if (va_next < sva) | ||||
va_next = eva; | va_next = eva; | ||||
continue; | continue; | ||||
} | } | ||||
va_next = (sva + NBPDR) & ~PDRMASK; | va_next = (sva + NBPDR) & ~PDRMASK; | ||||
if (va_next < sva) | if (va_next < sva) | ||||
va_next = eva; | va_next = eva; | ||||
KASSERT((*pdpe & PG_PS) == 0 || va_next <= eva, | |||||
("pmap_advise of non-transient 1G page " | |||||
"pdpe %#lx sva %#lx eva %#lx va_next %#lx", | |||||
*pdpe, sva, eva, va_next)); | |||||
if ((*pdpe & PG_PS) != 0) | |||||
continue; | |||||
pde = pmap_pdpe_to_pde(pdpe, sva); | pde = pmap_pdpe_to_pde(pdpe, sva); | ||||
oldpde = *pde; | oldpde = *pde; | ||||
if ((oldpde & PG_V) == 0) | if ((oldpde & PG_V) == 0) | ||||
continue; | continue; | ||||
else if ((oldpde & PG_PS) != 0) { | else if ((oldpde & PG_PS) != 0) { | ||||
if ((oldpde & PG_MANAGED) == 0) | if ((oldpde & PG_MANAGED) == 0) | ||||
continue; | continue; | ||||
lock = NULL; | lock = NULL; | ||||
▲ Show 20 Lines • Show All 743 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* Perform the pmap work for mincore(2). If the page is not both referenced and | * Perform the pmap work for mincore(2). If the page is not both referenced and | ||||
* modified by this pmap, returns its physical address so that the caller can | * modified by this pmap, returns its physical address so that the caller can | ||||
* find other mappings. | * find other mappings. | ||||
*/ | */ | ||||
int | int | ||||
pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap) | pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap) | ||||
{ | { | ||||
pdp_entry_t *pdpe; | |||||
pd_entry_t *pdep; | pd_entry_t *pdep; | ||||
pt_entry_t pte, PG_A, PG_M, PG_RW, PG_V; | pt_entry_t pte, PG_A, PG_M, PG_RW, PG_V; | ||||
vm_paddr_t pa; | vm_paddr_t pa; | ||||
int val; | int val; | ||||
PG_A = pmap_accessed_bit(pmap); | PG_A = pmap_accessed_bit(pmap); | ||||
PG_M = pmap_modified_bit(pmap); | PG_M = pmap_modified_bit(pmap); | ||||
PG_V = pmap_valid_bit(pmap); | PG_V = pmap_valid_bit(pmap); | ||||
PG_RW = pmap_rw_bit(pmap); | PG_RW = pmap_rw_bit(pmap); | ||||
PMAP_LOCK(pmap); | PMAP_LOCK(pmap); | ||||
pte = 0; | |||||
pa = 0; | |||||
val = 0; | |||||
pdpe = pmap_pdpe(pmap, addr); | |||||
if ((*pdpe & PG_V) != 0) { | |||||
if ((*pdpe & PG_PS) != 0) { | |||||
pte = *pdpe; | |||||
pa = ((pte & PG_PS_PDP_FRAME) | (addr & PDPMASK)) & | |||||
PG_FRAME; | |||||
val = MINCORE_SUPER; | |||||
} else { | |||||
pdep = pmap_pde(pmap, addr); | pdep = pmap_pde(pmap, addr); | ||||
if (pdep != NULL && (*pdep & PG_V)) { | if (pdep != NULL && (*pdep & PG_V) != 0) { | ||||
if (*pdep & PG_PS) { | if ((*pdep & PG_PS) != 0) { | ||||
pte = *pdep; | pte = *pdep; | ||||
/* Compute the physical address of the 4KB page. */ | /* Compute the physical address of the 4KB page. */ | ||||
pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) & | pa = ((pte & PG_PS_FRAME) | (addr & | ||||
PG_FRAME; | PDRMASK)) & PG_FRAME; | ||||
val = MINCORE_SUPER; | val = MINCORE_SUPER; | ||||
} else { | } else { | ||||
pte = *pmap_pde_to_pte(pdep, addr); | pte = *pmap_pde_to_pte(pdep, addr); | ||||
pa = pte & PG_FRAME; | pa = pte & PG_FRAME; | ||||
val = 0; | val = 0; | ||||
} | } | ||||
} else { | } | ||||
pte = 0; | } | ||||
pa = 0; | |||||
val = 0; | |||||
} | } | ||||
if ((pte & PG_V) != 0) { | if ((pte & PG_V) != 0) { | ||||
val |= MINCORE_INCORE; | val |= MINCORE_INCORE; | ||||
if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) | if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) | ||||
val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; | val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; | ||||
if ((pte & PG_A) != 0) | if ((pte & PG_A) != 0) | ||||
val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; | val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 1,969 Lines • Show Last 20 Lines |
This could be restructured a bit to avoid testing m != NULL after the check_page label: