Changeset View
Standalone View
sys/amd64/amd64/pmap.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 1,325 Lines • ▼ Show 20 Lines | bootaddr_rwx(vm_paddr_t pa) | ||||
* Note that fixups to the .text section will still work until we | * Note that fixups to the .text section will still work until we | ||||
* set CR0.WP. | * set CR0.WP. | ||||
*/ | */ | ||||
if (pa < round_2mpage(etext - KERNBASE)) | if (pa < round_2mpage(etext - KERNBASE)) | ||||
return (0); | return (0); | ||||
return (pg_nx); | return (pg_nx); | ||||
} | } | ||||
static void | static void | ||||
alc: Before introducing a new variable, please take a look at how KERNend is initialized and used. | |||||
Done Inline ActionsWe probably can remove KERNend if going ahead with my change. I think you mean that KERNend is de-facto unused in the patched pmap, and kern_promoted_end can be int, i.e. 4 bytes shorter. kib: We probably can remove KERNend if going ahead with my change. I think you mean that KERNend is… | |||||
create_pagetables(vm_paddr_t *firstaddr) | create_pagetables(vm_paddr_t *firstaddr) | ||||
{ | { | ||||
int i, j, ndm1g, nkpdpe, nkdmpde; | int i, j, ndm1g, nkpdpe, nkdmpde; | ||||
pt_entry_t *pt_p; | pt_entry_t *pt_p; | ||||
pd_entry_t *pd_p; | pd_entry_t *pd_p; | ||||
pdp_entry_t *pdp_p; | pdp_entry_t *pdp_p; | ||||
pml4_entry_t *p4_p; | pml4_entry_t *p4_p; | ||||
uint64_t DMPDkernphys; | uint64_t DMPDkernphys; | ||||
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | create_pagetables(vm_paddr_t *firstaddr) | ||||
nkpdpe = NKPDPE(nkpt); | nkpdpe = NKPDPE(nkpt); | ||||
KPTphys = allocpages(firstaddr, nkpt); | KPTphys = allocpages(firstaddr, nkpt); | ||||
KPDphys = allocpages(firstaddr, nkpdpe); | KPDphys = allocpages(firstaddr, nkpdpe); | ||||
/* Fill in the underlying page table pages */ | /* Fill in the underlying page table pages */ | ||||
/* XXX not fully used, underneath 2M pages */ | /* XXX not fully used, underneath 2M pages */ | ||||
pt_p = (pt_entry_t *)KPTphys; | pt_p = (pt_entry_t *)KPTphys; | ||||
for (i = 0; ptoa(i) < *firstaddr; i++) | for (i = 0; ptoa(i) < *firstaddr; i++) | ||||
Not Done Inline ActionsIf you're using rounded-up *firstaddr for this loop, you should just move the loop to after the point where you've rounded up *firstaddr in line 1045. dougm: If you're using rounded-up *firstaddr for this loop, you should just move the loop to after the… | |||||
Done Inline ActionsThis operation is boot-time only, so micro-optimizing it is not too important. The current sequence of initialization actually makes some sense from the hierarchical view on the page tables, and I want to keep it as is. kib: This operation is boot-time only, so micro-optimizing it is not too important. The current… | |||||
Not Done Inline ActionsUnless you want to set PG_PROMOTED on the PDEs below, this loop is pointless (and has been for quite some time). If we actually want to support demotion on the virtual address range being mapped here, we should be inserting these page table pages into the pmap's radix tree. alc: Unless you want to set PG_PROMOTED on the PDEs below, this loop is pointless (and has been for… | |||||
Not Done Inline Actions... and, in fact, pmap_init() is doing the insertion. alc: ... and, in fact, pmap_init() is doing the insertion. | |||||
pt_p[i] = ptoa(i) | X86_PG_V | pg_g | bootaddr_rwx(ptoa(i)); | pt_p[i] = ptoa(i) | X86_PG_V | pg_g | bootaddr_rwx(ptoa(i)); | ||||
/* Now map the page tables at their location within PTmap */ | /* Now map the page tables at their location within PTmap */ | ||||
pd_p = (pd_entry_t *)KPDphys; | pd_p = (pd_entry_t *)KPDphys; | ||||
for (i = 0; i < nkpt; i++) | for (i = 0; i < nkpt; i++) | ||||
pd_p[i] = (KPTphys + ptoa(i)) | X86_PG_RW | X86_PG_V; | pd_p[i] = (KPTphys + ptoa(i)) | X86_PG_RW | X86_PG_V; | ||||
/* Map from zero to end of allocations under 2M pages */ | /* Map from zero to end of allocations under 2M pages */ | ||||
▲ Show 20 Lines • Show All 333 Lines • ▼ Show 20 Lines | pmap_init(void) | ||||
PMAP_LOCK(kernel_pmap); | PMAP_LOCK(kernel_pmap); | ||||
for (i = 0; i < nkpt; i++) { | for (i = 0; i < nkpt; i++) { | ||||
mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT)); | mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT)); | ||||
KASSERT(mpte >= vm_page_array && | KASSERT(mpte >= vm_page_array && | ||||
mpte < &vm_page_array[vm_page_array_size], | mpte < &vm_page_array[vm_page_array_size], | ||||
("pmap_init: page table page is out of range")); | ("pmap_init: page table page is out of range")); | ||||
mpte->pindex = pmap_pde_pindex(KERNBASE) + i; | mpte->pindex = pmap_pde_pindex(KERNBASE) + i; | ||||
mpte->phys_addr = KPTphys + (i << PAGE_SHIFT); | mpte->phys_addr = KPTphys + (i << PAGE_SHIFT); | ||||
mpte->wire_count = 1; | mpte->wire_count = 1; | ||||
Not Done Inline ActionsThe kernel page table pages that are dynamically allocated by pmap_growkernel() will have a wire count of 1. alc: The kernel page table pages that are dynamically allocated by pmap_growkernel() will have a… | |||||
if (i << PDRSHIFT < KERNend && | if (i << PDRSHIFT < KERNend && | ||||
Not Done Inline ActionsI'm not convinced that KERNend is the correct value to use here. I think that it is too small. alc: I'm not convinced that KERNend is the correct value to use here. I think that it is too small. | |||||
pmap_insert_pt_page(kernel_pmap, mpte)) | pmap_insert_pt_page(kernel_pmap, mpte)) | ||||
panic("pmap_init: pmap_insert_pt_page failed"); | panic("pmap_init: pmap_insert_pt_page failed"); | ||||
} | } | ||||
PMAP_UNLOCK(kernel_pmap); | PMAP_UNLOCK(kernel_pmap); | ||||
vm_wire_add(nkpt); | vm_wire_add(nkpt); | ||||
/* | /* | ||||
* If the kernel is running on a virtual machine, then it must assume | * If the kernel is running on a virtual machine, then it must assume | ||||
▲ Show 20 Lines • Show All 2,713 Lines • ▼ Show 20 Lines | pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) | ||||
lock = NULL; | lock = NULL; | ||||
rv = pmap_demote_pde_locked(pmap, pde, va, &lock); | rv = pmap_demote_pde_locked(pmap, pde, va, &lock); | ||||
if (lock != NULL) | if (lock != NULL) | ||||
rw_wunlock(lock); | rw_wunlock(lock); | ||||
return (rv); | return (rv); | ||||
} | } | ||||
static void | |||||
pmap_demote_pde_check(pt_entry_t *firstpte __unused, pt_entry_t newpte __unused) | |||||
{ | |||||
#ifdef INVARIANTS | |||||
#ifdef DIAGNOSTIC | |||||
pt_entry_t *xpte, *ypte; | |||||
for (xpte = firstpte; xpte < firstpte + NPTEPG; xpte++) { | |||||
if ((*xpte & PG_FRAME) != (newpte & PG_FRAME) + | |||||
ptoa(xpte - firstpte)) { | |||||
Not Done Inline ActionsYou could simply add PAGE_SIZE to newpte at the end of each iteration. alc: You could simply add PAGE_SIZE to newpte at the end of each iteration. | |||||
printf("pmap_demote_pde: xpte %zd and newpte map " | |||||
"different pages %#lx %#lx\n", | |||||
Not Done Inline ActionsI suggest: "different pages: found %#lx, expected %#lx\n" alc: I suggest: "different pages: found %#lx, expected %#lx\n" | |||||
xpte - firstpte, *xpte, newpte); | |||||
printf("XX %lx %lx\n", (*xpte & PG_FRAME), | |||||
Not Done Inline ActionsThe "XX" should be replaced. alc: The "XX" should be replaced. | |||||
(newpte & PG_FRAME) + ptoa(xpte - firstpte)); | |||||
for (ypte = firstpte; ypte < firstpte + NPTEPG; ypte++) | |||||
printf("%zd %#lx\n", ypte - firstpte, *ypte); | |||||
panic("firstpte"); | |||||
} | |||||
} | |||||
#else | |||||
KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), | |||||
("pmap_demote_pde: firstpte and newpte map different physical" | |||||
" addresses")); | |||||
#endif | |||||
#endif | |||||
} | |||||
static void | |||||
pmap_demote_pde_fail(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, | |||||
pd_entry_t oldpde, struct rwlock **lockp) | |||||
{ | |||||
struct spglist free; | |||||
vm_offset_t sva; | |||||
SLIST_INIT(&free); | |||||
sva = trunc_2mpage(va); | |||||
pmap_remove_pde(pmap, pde, sva, &free, lockp); | |||||
if ((oldpde & pmap_global_bit(pmap)) == 0) | |||||
pmap_invalidate_pde_page(pmap, sva, oldpde); | |||||
vm_page_free_pages_toq(&free, true); | |||||
CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx" | |||||
" in pmap %p", va, pmap); | |||||
Not Done Inline ActionsYou should be able to unwrap the format string. alc: You should be able to unwrap the format string. | |||||
} | |||||
static boolean_t | static boolean_t | ||||
pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, | pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, | ||||
struct rwlock **lockp) | struct rwlock **lockp) | ||||
{ | { | ||||
pd_entry_t newpde, oldpde; | pd_entry_t newpde, oldpde; | ||||
pt_entry_t *firstpte, newpte; | pt_entry_t *firstpte, newpte; | ||||
pt_entry_t PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V; | pt_entry_t PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V; | ||||
vm_paddr_t mptepa; | vm_paddr_t mptepa; | ||||
vm_page_t mpte; | vm_page_t mpte; | ||||
struct spglist free; | |||||
vm_offset_t sva; | |||||
int PG_PTE_CACHE; | int PG_PTE_CACHE; | ||||
bool in_kernel; | |||||
PG_G = pmap_global_bit(pmap); | |||||
PG_A = pmap_accessed_bit(pmap); | PG_A = pmap_accessed_bit(pmap); | ||||
PG_G = pmap_global_bit(pmap); | |||||
alcUnsubmitted Not Done Inline ActionsYou've eliminated the only use of PG_G in this function. alc: You've eliminated the only use of PG_G in this function. | |||||
kibAuthorUnsubmitted Done Inline ActionsIt is used by PG_PTE_PROMOTED macro. I tried that in initial version. kib: It is used by PG_PTE_PROMOTED macro. I tried that in initial version. | |||||
PG_M = pmap_modified_bit(pmap); | PG_M = pmap_modified_bit(pmap); | ||||
PG_RW = pmap_rw_bit(pmap); | PG_RW = pmap_rw_bit(pmap); | ||||
PG_V = pmap_valid_bit(pmap); | PG_V = pmap_valid_bit(pmap); | ||||
PG_PTE_CACHE = pmap_cache_mask(pmap, 0); | PG_PTE_CACHE = pmap_cache_mask(pmap, 0); | ||||
PG_PKU_MASK = pmap_pku_mask_bit(pmap); | PG_PKU_MASK = pmap_pku_mask_bit(pmap); | ||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
in_kernel = va >= VM_MAXUSER_ADDRESS; | |||||
Not Done Inline ActionsI guess it's supposed to be MAXUSER_ADDRESS. Technically this is a functional change since before we were only allocating with interrupt priority for demotions in the direct map. I think it's fine, just contradicts the review description. markj: I guess it's supposed to be MAXUSER_ADDRESS. Technically this is a functional change since… | |||||
Not Done Inline ActionsThe only situation in which this code should ever allocate a kernel page table page is when it is called upon to demote a direct map mapping, so this doesn't really represent a functional change. alc: The only situation in which this code should ever allocate a kernel page table page is when it… | |||||
oldpde = *pde; | oldpde = *pde; | ||||
KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), | KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), | ||||
("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); | ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); | ||||
if ((oldpde & PG_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) == | if ((oldpde & PG_A) == 0) { | ||||
NULL) { | pmap_demote_pde_fail(pmap, va, pde, oldpde, lockp); | ||||
markjUnsubmitted Done Inline Actions_fail is a bit misleading here. Maybe _abort? markj: _fail is a bit misleading here. Maybe _abort? | |||||
return (FALSE); | |||||
} | |||||
mpte = pmap_remove_pt_page(pmap, va); | |||||
if (mpte == NULL) { | |||||
KASSERT((oldpde & PG_W) == 0, | KASSERT((oldpde & PG_W) == 0, | ||||
("pmap_demote_pde: page table page for a wired mapping" | ("pmap_demote_pde: page table page for a wired mapping" | ||||
" is missing")); | " is missing")); | ||||
KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS && | |||||
va < DMAP_MAX_ADDRESS), | |||||
("pmap_demote_pde: No saved mpte for va %#lx", va)); | |||||
/* | /* | ||||
* Invalidate the 2MB page mapping and return "failure" if the | * Invalidate the 2MB page mapping and return "failure" if the | ||||
* mapping was never accessed or the allocation of the new | * mapping was never accessed or the allocation of the new | ||||
* page table page fails. If the 2MB page mapping belongs to | * page table page fails. If the 2MB page mapping belongs to | ||||
* the direct map region of the kernel's address space, then | * the direct map region of the kernel's address space, then | ||||
* the page allocation request specifies the highest possible | * the page allocation request specifies the highest possible | ||||
* priority (VM_ALLOC_INTERRUPT). Otherwise, the priority is | * priority (VM_ALLOC_INTERRUPT). Otherwise, the priority is | ||||
* normal. Page table pages are preallocated for every other | * normal. Page table pages are preallocated for every other | ||||
* part of the kernel address space, so the direct map region | * part of the kernel address space, so the direct map region | ||||
* is the only part of the kernel address space that must be | * is the only part of the kernel address space that must be | ||||
* handled here. | * handled here. | ||||
*/ | */ | ||||
alcUnsubmitted Done Inline ActionsThe first part of this comment no longer belongs here. alc: The first part of this comment no longer belongs here. | |||||
Not Done Inline Actions"is for a kernel address, ..." alc: "is for a kernel address, ..." | |||||
if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, | mpte = vm_page_alloc(NULL, pmap_pde_pindex(va), | ||||
pmap_pde_pindex(va), (va >= DMAP_MIN_ADDRESS && va < | (in_kernel ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | | ||||
DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); | ||||
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { | if (mpte == NULL) { | ||||
SLIST_INIT(&free); | pmap_demote_pde_fail(pmap, va, pde, oldpde, lockp); | ||||
sva = trunc_2mpage(va); | |||||
pmap_remove_pde(pmap, pde, sva, &free, lockp); | |||||
if ((oldpde & PG_G) == 0) | |||||
pmap_invalidate_pde_page(pmap, sva, oldpde); | |||||
vm_page_free_pages_toq(&free, true); | |||||
CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx" | |||||
" in pmap %p", va, pmap); | |||||
return (FALSE); | return (FALSE); | ||||
} | } | ||||
if (va < VM_MAXUSER_ADDRESS) { | if (!in_kernel) { | ||||
mpte->wire_count = NPTEPG; | mpte->wire_count = NPTEPG; | ||||
pmap_resident_count_inc(pmap, 1); | pmap_resident_count_inc(pmap, 1); | ||||
} | } | ||||
} | } | ||||
mptepa = VM_PAGE_TO_PHYS(mpte); | mptepa = VM_PAGE_TO_PHYS(mpte); | ||||
firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); | firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); | ||||
newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; | newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; | ||||
KASSERT((oldpde & PG_A) != 0, | KASSERT((oldpde & PG_A) != 0, | ||||
("pmap_demote_pde: oldpde is missing PG_A")); | ("pmap_demote_pde: oldpde is missing PG_A")); | ||||
KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, | KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, | ||||
Not Done Inline Actions"abort" could reasonably be interpreted as leave the mapping in place. Instead, I would explicitly say, "invalidate the 2MB page mapping". alc: "abort" could reasonably be interpreted as leave the mapping in place. Instead, I would… | |||||
("pmap_demote_pde: oldpde is missing PG_M")); | ("pmap_demote_pde: oldpde is missing PG_M")); | ||||
newpte = oldpde & ~PG_PS; | newpte = oldpde & ~PG_PS; | ||||
newpte = pmap_swap_pat(pmap, newpte); | newpte = pmap_swap_pat(pmap, newpte); | ||||
/* | /* | ||||
* If the page table page is not leftover from an earlier promotion, | * If the page table page is not leftover from an earlier promotion, | ||||
* initialize it. | * initialize it. | ||||
*/ | */ | ||||
if ((oldpde & PG_PROMOTED) == 0) | if ((oldpde & PG_PROMOTED) == 0) | ||||
pmap_fill_ptp(firstpte, newpte); | pmap_fill_ptp(firstpte, newpte); | ||||
KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), | pmap_demote_pde_check(firstpte, newpte); | ||||
Not Done Inline ActionsWhen I wrote this block of code, the PG_PROMOTED flag did not yet exist. If, today, I were writing this block for the first time, I would test (oldpte & PG_PROMOTED) == 0 instead of mpte->wire_count == 1. alc: When I wrote this block of code, the PG_PROMOTED flag did not yet exist. If, today, I were… | |||||
("pmap_demote_pde: firstpte and newpte map different physical" | |||||
" addresses")); | |||||
/* | /* | ||||
* If the mapping has changed attributes, update the page table | * If the mapping has changed attributes, update the page table | ||||
* entries. | * entries. | ||||
*/ | */ | ||||
if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) | if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) | ||||
pmap_fill_ptp(firstpte, newpte); | pmap_fill_ptp(firstpte, newpte); | ||||
Not Done Inline ActionsI speculate that this block would no longer be needed if we test (oldpte & PG_PROMOTED) == 0 above. In particular, pmap_protect_pde() clears PG_PROMOTED if it removes access rights from the PDE. alc: I speculate that this block would no longer be needed if we test (oldpte & PG_PROMOTED) == 0… | |||||
/* | /* | ||||
* The spare PV entries must be reserved prior to demoting the | * The spare PV entries must be reserved prior to demoting the | ||||
* mapping, that is, prior to changing the PDE. Otherwise, the state | * mapping, that is, prior to changing the PDE. Otherwise, the state | ||||
* of the PDE and the PV lists will be inconsistent, which can result | * of the PDE and the PV lists will be inconsistent, which can result | ||||
* in reclaim_pv_chunk() attempting to remove a PV entry from the | * in reclaim_pv_chunk() attempting to remove a PV entry from the | ||||
* wrong PV list and pmap_pv_demote_pde() failing to find the expected | * wrong PV list and pmap_pv_demote_pde() failing to find the expected | ||||
* PV entry for the 2MB page mapping that is being demoted. | * PV entry for the 2MB page mapping that is being demoted. | ||||
Show All 11 Lines | pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, | ||||
if (workaround_erratum383) | if (workaround_erratum383) | ||||
pmap_update_pde(pmap, va, pde, newpde); | pmap_update_pde(pmap, va, pde, newpde); | ||||
else | else | ||||
pde_store(pde, newpde); | pde_store(pde, newpde); | ||||
/* | /* | ||||
* Invalidate a stale recursive mapping of the page table page. | * Invalidate a stale recursive mapping of the page table page. | ||||
*/ | */ | ||||
if (va >= VM_MAXUSER_ADDRESS) | if (in_kernel) | ||||
pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); | pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); | ||||
/* | /* | ||||
* Demote the PV entry. | * Demote the PV entry. | ||||
*/ | */ | ||||
if ((oldpde & PG_MANAGED) != 0) | if ((oldpde & PG_MANAGED) != 0) | ||||
pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, lockp); | pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, lockp); | ||||
atomic_add_long(&pmap_pde_demotions, 1); | atomic_add_long(&pmap_pde_demotions, 1); | ||||
CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx" | CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx" | ||||
" in pmap %p", va, pmap); | " in pmap %p", va, pmap); | ||||
Not Done Inline ActionsThis format string may be unwrapped as well. markj: This format string may be unwrapped as well. | |||||
return (TRUE); | return (TRUE); | ||||
} | } | ||||
/* | /* | ||||
* pmap_remove_kernel_pde: Remove a kernel superpage mapping. | * pmap_remove_kernel_pde: Remove a kernel superpage mapping. | ||||
*/ | */ | ||||
static void | static void | ||||
pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) | pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) | ||||
▲ Show 20 Lines • Show All 5,149 Lines • Show Last 20 Lines |
Before introducing a new variable, please take a look at how KERNend is initialized and used. I don't think that we need both KERNend and kern_promoted_end.