Changeset View
Standalone View
sys/amd64/amd64/pmap.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 1,328 Lines • ▼ Show 20 Lines | bootaddr_rwx(vm_paddr_t pa) | ||||
* Note that fixups to the .text section will still work until we | * Note that fixups to the .text section will still work until we | ||||
* set CR0.WP. | * set CR0.WP. | ||||
*/ | */ | ||||
if (pa < round_2mpage(etext - KERNBASE)) | if (pa < round_2mpage(etext - KERNBASE)) | ||||
return (0); | return (0); | ||||
return (pg_nx); | return (pg_nx); | ||||
} | } | ||||
static void | static void | ||||
alc: Before introducing a new variable, please take a look at how KERNend is initialized and used. | |||||
Done Inline ActionsWe probably can remove KERNend if going ahead with my change. I think you mean that KERNend is de-facto unused in the patched pmap, and kern_promoted_end can be int, i.e. 4 bytes shorter. kib: We probably can remove KERNend if going ahead with my change. I think you mean that KERNend is… | |||||
create_pagetables(vm_paddr_t *firstaddr) | create_pagetables(vm_paddr_t *firstaddr) | ||||
{ | { | ||||
int i, j, ndm1g, nkpdpe, nkdmpde; | int i, j, ndm1g, nkpdpe, nkdmpde; | ||||
pd_entry_t *pd_p; | pd_entry_t *pd_p; | ||||
pdp_entry_t *pdp_p; | pdp_entry_t *pdp_p; | ||||
pml4_entry_t *p4_p; | pml4_entry_t *p4_p; | ||||
uint64_t DMPDkernphys; | uint64_t DMPDkernphys; | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | create_pagetables(vm_paddr_t *firstaddr) | ||||
* all but the KPML4I'th one, so we need NKPML4E-1 extra (zeroed) | * all but the KPML4I'th one, so we need NKPML4E-1 extra (zeroed) | ||||
* pages. (pmap_enter requires a PD page to exist for each KPML4E.) | * pages. (pmap_enter requires a PD page to exist for each KPML4E.) | ||||
*/ | */ | ||||
nkpt_init(*firstaddr); | nkpt_init(*firstaddr); | ||||
nkpdpe = NKPDPE(nkpt); | nkpdpe = NKPDPE(nkpt); | ||||
KPTphys = allocpages(firstaddr, nkpt); | KPTphys = allocpages(firstaddr, nkpt); | ||||
KPDphys = allocpages(firstaddr, nkpdpe); | KPDphys = allocpages(firstaddr, nkpdpe); | ||||
/* | /* | ||||
Not Done Inline ActionsIf you're using rounded-up *firstaddr for this loop, you should just move the loop to after the point where you've rounded up *firstaddr in line 1045. dougm: If you're using rounded-up *firstaddr for this loop, you should just move the loop to after the… | |||||
Done Inline ActionsThis operation is boot-time only, so micro-optimizing it is not too important. The current sequence of initialization actually makes some sense from the hierarchical view on the page tables, and I want to keep it as is. kib: This operation is boot-time only, so micro-optimizing it is not too important. The current… | |||||
Not Done Inline ActionsUnless you want to set PG_PROMOTED on the PDEs below, this loop is pointless (and has been for quite some time). If we actually want to support demotion on the virtual address range being mapped here, we should be inserting these page table pages into the pmap's radix tree. alc: Unless you want to set PG_PROMOTED on the PDEs below, this loop is pointless (and has been for… | |||||
Not Done Inline Actions... and, in fact, pmap_init() is doing the insertion. alc: ... and, in fact, pmap_init() is doing the insertion. | |||||
* Connect the zero-filled PT pages to their PD entries. This | * Connect the zero-filled PT pages to their PD entries. This | ||||
* implicitly maps the PT pages at their correct locations within | * implicitly maps the PT pages at their correct locations within | ||||
* the PTmap. | * the PTmap. | ||||
*/ | */ | ||||
pd_p = (pd_entry_t *)KPDphys; | pd_p = (pd_entry_t *)KPDphys; | ||||
for (i = 0; i < nkpt; i++) | for (i = 0; i < nkpt; i++) | ||||
pd_p[i] = (KPTphys + ptoa(i)) | X86_PG_RW | X86_PG_V; | pd_p[i] = (KPTphys + ptoa(i)) | X86_PG_RW | X86_PG_V; | ||||
▲ Show 20 Lines • Show All 341 Lines • ▼ Show 20 Lines | pmap_init(void) | ||||
PMAP_LOCK(kernel_pmap); | PMAP_LOCK(kernel_pmap); | ||||
for (i = 0; i < nkpt; i++) { | for (i = 0; i < nkpt; i++) { | ||||
mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT)); | mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT)); | ||||
KASSERT(mpte >= vm_page_array && | KASSERT(mpte >= vm_page_array && | ||||
mpte < &vm_page_array[vm_page_array_size], | mpte < &vm_page_array[vm_page_array_size], | ||||
("pmap_init: page table page is out of range")); | ("pmap_init: page table page is out of range")); | ||||
mpte->pindex = pmap_pde_pindex(KERNBASE) + i; | mpte->pindex = pmap_pde_pindex(KERNBASE) + i; | ||||
mpte->phys_addr = KPTphys + (i << PAGE_SHIFT); | mpte->phys_addr = KPTphys + (i << PAGE_SHIFT); | ||||
mpte->wire_count = 1; | mpte->wire_count = 1; | ||||
Not Done Inline ActionsThe kernel page table pages that are dynamically allocated by pmap_growkernel() will have a wire count of 1. alc: The kernel page table pages that are dynamically allocated by pmap_growkernel() will have a… | |||||
if (i << PDRSHIFT < KERNend && | if (i << PDRSHIFT < KERNend && | ||||
Not Done Inline ActionsI'm not convinced that KERNend is the correct value to use here. I think that it is too small. alc: I'm not convinced that KERNend is the correct value to use here. I think that it is too small. | |||||
pmap_insert_pt_page(kernel_pmap, mpte)) | pmap_insert_pt_page(kernel_pmap, mpte)) | ||||
panic("pmap_init: pmap_insert_pt_page failed"); | panic("pmap_init: pmap_insert_pt_page failed"); | ||||
} | } | ||||
PMAP_UNLOCK(kernel_pmap); | PMAP_UNLOCK(kernel_pmap); | ||||
vm_wire_add(nkpt); | vm_wire_add(nkpt); | ||||
/* | /* | ||||
* If the kernel is running on a virtual machine, then it must assume | * If the kernel is running on a virtual machine, then it must assume | ||||
▲ Show 20 Lines • Show All 2,716 Lines • ▼ Show 20 Lines | pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) | ||||
lock = NULL; | lock = NULL; | ||||
rv = pmap_demote_pde_locked(pmap, pde, va, &lock); | rv = pmap_demote_pde_locked(pmap, pde, va, &lock); | ||||
if (lock != NULL) | if (lock != NULL) | ||||
rw_wunlock(lock); | rw_wunlock(lock); | ||||
return (rv); | return (rv); | ||||
} | } | ||||
static void | |||||
pmap_demote_pde_check(pt_entry_t *firstpte __unused, pt_entry_t newpte __unused) | |||||
{ | |||||
#ifdef INVARIANTS | |||||
#ifdef DIAGNOSTIC | |||||
pt_entry_t *xpte, *ypte; | |||||
for (xpte = firstpte; xpte < firstpte + NPTEPG; | |||||
xpte++, newpte += PAGE_SIZE) { | |||||
if ((*xpte & PG_FRAME) != (newpte & PG_FRAME)) { | |||||
Not Done Inline ActionsYou could simply add PAGE_SIZE to newpte at the end of each iteration. alc: You could simply add PAGE_SIZE to newpte at the end of each iteration. | |||||
printf("pmap_demote_pde: xpte %zd and newpte map " | |||||
"different pages table %#lx expected %#lx\n", | |||||
alcUnsubmitted Not Done Inline ActionsI suggest: "different pages: found %#lx, expected %#lx\n" alc: I suggest: "different pages: found %#lx, expected %#lx\n" | |||||
xpte - firstpte, *xpte, newpte); | |||||
printf("page table dump\n"); | |||||
Not Done Inline ActionsThe "XX" should be replaced. alc: The "XX" should be replaced. | |||||
for (ypte = firstpte; ypte < firstpte + NPTEPG; ypte++) | |||||
printf("%zd %#lx\n", ypte - firstpte, *ypte); | |||||
panic("firstpte"); | |||||
} | |||||
} | |||||
#else | |||||
KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), | |||||
("pmap_demote_pde: firstpte and newpte map different physical" | |||||
" addresses")); | |||||
#endif | |||||
#endif | |||||
} | |||||
static void | |||||
pmap_demote_pde_abort(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, | |||||
pd_entry_t oldpde, struct rwlock **lockp) | |||||
{ | |||||
struct spglist free; | |||||
vm_offset_t sva; | |||||
SLIST_INIT(&free); | |||||
sva = trunc_2mpage(va); | |||||
pmap_remove_pde(pmap, pde, sva, &free, lockp); | |||||
if ((oldpde & pmap_global_bit(pmap)) == 0) | |||||
pmap_invalidate_pde_page(pmap, sva, oldpde); | |||||
vm_page_free_pages_toq(&free, true); | |||||
CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx in pmap %p", | |||||
va, pmap); | |||||
} | |||||
Not Done Inline ActionsYou should be able to unwrap the format string. alc: You should be able to unwrap the format string. | |||||
static boolean_t | static boolean_t | ||||
pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, | pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, | ||||
struct rwlock **lockp) | struct rwlock **lockp) | ||||
{ | { | ||||
pd_entry_t newpde, oldpde; | pd_entry_t newpde, oldpde; | ||||
pt_entry_t *firstpte, newpte; | pt_entry_t *firstpte, newpte; | ||||
pt_entry_t PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V; | pt_entry_t PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V; | ||||
vm_paddr_t mptepa; | vm_paddr_t mptepa; | ||||
vm_page_t mpte; | vm_page_t mpte; | ||||
struct spglist free; | |||||
vm_offset_t sva; | |||||
int PG_PTE_CACHE; | int PG_PTE_CACHE; | ||||
bool in_kernel; | |||||
PG_G = pmap_global_bit(pmap); | |||||
PG_A = pmap_accessed_bit(pmap); | PG_A = pmap_accessed_bit(pmap); | ||||
PG_G = pmap_global_bit(pmap); | |||||
Not Done Inline ActionsYou've eliminated the only use of PG_G in this function. alc: You've eliminated the only use of PG_G in this function. | |||||
Done Inline ActionsIt is used by PG_PTE_PROMOTED macro. I tried that in initial version. kib: It is used by PG_PTE_PROMOTED macro. I tried that in initial version. | |||||
PG_M = pmap_modified_bit(pmap); | PG_M = pmap_modified_bit(pmap); | ||||
PG_RW = pmap_rw_bit(pmap); | PG_RW = pmap_rw_bit(pmap); | ||||
PG_V = pmap_valid_bit(pmap); | PG_V = pmap_valid_bit(pmap); | ||||
PG_PTE_CACHE = pmap_cache_mask(pmap, 0); | PG_PTE_CACHE = pmap_cache_mask(pmap, 0); | ||||
PG_PKU_MASK = pmap_pku_mask_bit(pmap); | PG_PKU_MASK = pmap_pku_mask_bit(pmap); | ||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
in_kernel = va >= VM_MAXUSER_ADDRESS; | |||||
Not Done Inline ActionsI guess it's supposed to be MAXUSER_ADDRESS. Technically this is a functional change since before we were only allocating with interrupt priority for demotions in the direct map. I think it's fine, just contradicts the review description. markj: I guess it's supposed to be MAXUSER_ADDRESS. Technically this is a functional change since… | |||||
Not Done Inline ActionsThe only situation in which this code should ever allocate a kernel page table page is when it is called upon to demote a direct map mapping, so this doesn't really represent a functional change. alc: The only situation in which this code should ever allocate a kernel page table page is when it… | |||||
oldpde = *pde; | oldpde = *pde; | ||||
KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), | KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), | ||||
("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); | ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); | ||||
if ((oldpde & PG_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) == | |||||
NULL) { | /* | ||||
Done Inline Actions_fail is a bit misleading here. Maybe _abort? markj: _fail is a bit misleading here. Maybe _abort? | |||||
* Invalidate the 2MB page mapping and return "failure" if the | |||||
* mapping was never accessed. | |||||
*/ | |||||
if ((oldpde & PG_A) == 0) { | |||||
pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp); | |||||
return (FALSE); | |||||
} | |||||
mpte = pmap_remove_pt_page(pmap, va); | |||||
if (mpte == NULL) { | |||||
KASSERT((oldpde & PG_W) == 0, | KASSERT((oldpde & PG_W) == 0, | ||||
("pmap_demote_pde: page table page for a wired mapping" | ("pmap_demote_pde: page table page for a wired mapping" | ||||
" is missing")); | " is missing")); | ||||
/* | /* | ||||
* Invalidate the 2MB page mapping and return "failure" if the | * If the page table page is missing and the mapping | ||||
* mapping was never accessed or the allocation of the new | * is for a kernel address, the mapping must belong to | ||||
Not Done Inline Actions"is for a kernel address, ..." alc: "is for a kernel address, ..." | |||||
* page table page fails. If the 2MB page mapping belongs to | * the direct map. Page table pages are preallocated | ||||
* the direct map region of the kernel's address space, then | * for every other part of the kernel address space, | ||||
* the page allocation request specifies the highest possible | * so the direct map region is the only part of the | ||||
* priority (VM_ALLOC_INTERRUPT). Otherwise, the priority is | * kernel address space that must be handled here. | ||||
* normal. Page table pages are preallocated for every other | |||||
* part of the kernel address space, so the direct map region | |||||
* is the only part of the kernel address space that must be | |||||
* handled here. | |||||
*/ | */ | ||||
if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, | KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS && | ||||
Done Inline ActionsThe first part of this comment no longer belongs here. alc: The first part of this comment no longer belongs here. | |||||
pmap_pde_pindex(va), (va >= DMAP_MIN_ADDRESS && va < | va < DMAP_MAX_ADDRESS), | ||||
DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | | ("pmap_demote_pde: No saved mpte for va %#lx", va)); | ||||
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { | |||||
SLIST_INIT(&free); | /* | ||||
sva = trunc_2mpage(va); | * If the 2MB page mapping belongs to the direct map | ||||
pmap_remove_pde(pmap, pde, sva, &free, lockp); | * region of the kernel's address space, then the page | ||||
if ((oldpde & PG_G) == 0) | * allocation request specifies the highest possible | ||||
pmap_invalidate_pde_page(pmap, sva, oldpde); | * priority (VM_ALLOC_INTERRUPT). Otherwise, the | ||||
vm_page_free_pages_toq(&free, true); | * priority is normal. | ||||
CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx" | */ | ||||
" in pmap %p", va, pmap); | mpte = vm_page_alloc(NULL, pmap_pde_pindex(va), | ||||
(in_kernel ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | | |||||
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); | |||||
/* | |||||
* If the allocation of the new page table page fails, | |||||
* invalidate the 2MB page mapping and return "failure". | |||||
Not Done Inline Actions"abort" could reasonably be interpreted as leave the mapping in place. Instead, I would explicitly say, "invalidate the 2MB page mapping". alc: "abort" could reasonably be interpreted as leave the mapping in place. Instead, I would… | |||||
*/ | |||||
if (mpte == NULL) { | |||||
pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp); | |||||
return (FALSE); | return (FALSE); | ||||
} | } | ||||
if (va < VM_MAXUSER_ADDRESS) { | |||||
if (!in_kernel) { | |||||
mpte->wire_count = NPTEPG; | mpte->wire_count = NPTEPG; | ||||
pmap_resident_count_inc(pmap, 1); | pmap_resident_count_inc(pmap, 1); | ||||
} | } | ||||
} | } | ||||
mptepa = VM_PAGE_TO_PHYS(mpte); | mptepa = VM_PAGE_TO_PHYS(mpte); | ||||
firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); | firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); | ||||
newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; | newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; | ||||
KASSERT((oldpde & PG_A) != 0, | KASSERT((oldpde & PG_A) != 0, | ||||
("pmap_demote_pde: oldpde is missing PG_A")); | ("pmap_demote_pde: oldpde is missing PG_A")); | ||||
KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, | KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, | ||||
("pmap_demote_pde: oldpde is missing PG_M")); | ("pmap_demote_pde: oldpde is missing PG_M")); | ||||
newpte = oldpde & ~PG_PS; | newpte = oldpde & ~PG_PS; | ||||
newpte = pmap_swap_pat(pmap, newpte); | newpte = pmap_swap_pat(pmap, newpte); | ||||
/* | /* | ||||
* If the page table page is not leftover from an earlier promotion, | * If the page table page is not leftover from an earlier promotion, | ||||
* initialize it. | * initialize it. | ||||
*/ | */ | ||||
if ((oldpde & PG_PROMOTED) == 0) | if ((oldpde & PG_PROMOTED) == 0) | ||||
pmap_fill_ptp(firstpte, newpte); | pmap_fill_ptp(firstpte, newpte); | ||||
KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), | pmap_demote_pde_check(firstpte, newpte); | ||||
Not Done Inline ActionsWhen I wrote this block of code, the PG_PROMOTED flag did not yet exist. If, today, I were writing this block for the first time, I would test (oldpte & PG_PROMOTED) == 0 instead of mpte->wire_count == 1. alc: When I wrote this block of code, the PG_PROMOTED flag did not yet exist. If, today, I were… | |||||
("pmap_demote_pde: firstpte and newpte map different physical" | |||||
" addresses")); | |||||
/* | /* | ||||
* If the mapping has changed attributes, update the page table | * If the mapping has changed attributes, update the page table | ||||
* entries. | * entries. | ||||
*/ | */ | ||||
if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) | if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) | ||||
pmap_fill_ptp(firstpte, newpte); | pmap_fill_ptp(firstpte, newpte); | ||||
Not Done Inline ActionsI speculate that this block would no longer be needed if we test (oldpte & PG_PROMOTED) == 0 above. In particular, pmap_protect_pde() clears PG_PROMOTED if it removes access rights from the PDE. alc: I speculate that this block would no longer be needed if we test (oldpte & PG_PROMOTED) == 0… | |||||
/* | /* | ||||
* The spare PV entries must be reserved prior to demoting the | * The spare PV entries must be reserved prior to demoting the | ||||
* mapping, that is, prior to changing the PDE. Otherwise, the state | * mapping, that is, prior to changing the PDE. Otherwise, the state | ||||
* of the PDE and the PV lists will be inconsistent, which can result | * of the PDE and the PV lists will be inconsistent, which can result | ||||
* in reclaim_pv_chunk() attempting to remove a PV entry from the | * in reclaim_pv_chunk() attempting to remove a PV entry from the | ||||
* wrong PV list and pmap_pv_demote_pde() failing to find the expected | * wrong PV list and pmap_pv_demote_pde() failing to find the expected | ||||
* PV entry for the 2MB page mapping that is being demoted. | * PV entry for the 2MB page mapping that is being demoted. | ||||
Show All 11 Lines | pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, | ||||
if (workaround_erratum383) | if (workaround_erratum383) | ||||
pmap_update_pde(pmap, va, pde, newpde); | pmap_update_pde(pmap, va, pde, newpde); | ||||
else | else | ||||
pde_store(pde, newpde); | pde_store(pde, newpde); | ||||
/* | /* | ||||
* Invalidate a stale recursive mapping of the page table page. | * Invalidate a stale recursive mapping of the page table page. | ||||
*/ | */ | ||||
if (va >= VM_MAXUSER_ADDRESS) | if (in_kernel) | ||||
pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); | pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); | ||||
/* | /* | ||||
* Demote the PV entry. | * Demote the PV entry. | ||||
*/ | */ | ||||
if ((oldpde & PG_MANAGED) != 0) | if ((oldpde & PG_MANAGED) != 0) | ||||
pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, lockp); | pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, lockp); | ||||
atomic_add_long(&pmap_pde_demotions, 1); | atomic_add_long(&pmap_pde_demotions, 1); | ||||
CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx" | CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx in pmap %p", | ||||
" in pmap %p", va, pmap); | va, pmap); | ||||
Not Done Inline ActionsThis format string may be unwrapped as well. markj: This format string may be unwrapped as well. | |||||
return (TRUE); | return (TRUE); | ||||
} | } | ||||
/* | /* | ||||
* pmap_remove_kernel_pde: Remove a kernel superpage mapping. | * pmap_remove_kernel_pde: Remove a kernel superpage mapping. | ||||
*/ | */ | ||||
static void | static void | ||||
pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) | pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) | ||||
▲ Show 20 Lines • Show All 5,182 Lines • Show Last 20 Lines |
Before introducing a new variable, please take a look at how KERNend is initialized and used. I don't think that we need both KERNend and kern_promoted_end.