Changeset View
Standalone View
amd64/amd64/pmap.c
Context not available. | |||||
*/ | */ | ||||
static caddr_t crashdumpmap; | static caddr_t crashdumpmap; | ||||
/* | |||||
* Internal flags for pmap_enter()'s helper functions. | |||||
*/ | |||||
#define PMAP_ENTER_NORECLAIM 0x1000000 /* Don't reclaim PV entries. */ | |||||
#define PMAP_ENTER_NOREPLACE 0x2000000 /* Don't replace mappings. */ | |||||
static void free_pv_chunk(struct pv_chunk *pc); | static void free_pv_chunk(struct pv_chunk *pc); | ||||
static void free_pv_entry(pmap_t pmap, pv_entry_t pv); | static void free_pv_entry(pmap_t pmap, pv_entry_t pv); | ||||
static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); | static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); | ||||
Context not available. | |||||
struct rwlock **lockp); | struct rwlock **lockp); | ||||
static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, | static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, | ||||
struct rwlock **lockp); | struct rwlock **lockp); | ||||
static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, | static bool pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde, | ||||
struct rwlock **lockp); | u_int flags, struct rwlock **lockp); | ||||
static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, | static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, | ||||
struct rwlock **lockp); | struct rwlock **lockp); | ||||
static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); | static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); | ||||
Context not available. | |||||
vm_offset_t va, struct rwlock **lockp); | vm_offset_t va, struct rwlock **lockp); | ||||
static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, | static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, | ||||
vm_offset_t va); | vm_offset_t va); | ||||
static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, | static bool pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, | ||||
vm_prot_t prot, struct rwlock **lockp); | vm_prot_t prot, struct rwlock **lockp); | ||||
static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, | |||||
u_int flags, vm_page_t m, struct rwlock **lockp); | |||||
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, | static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, | ||||
vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); | vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); | ||||
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); | static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); | ||||
Context not available. | |||||
return (mask); | return (mask); | ||||
} | } | ||||
static __inline boolean_t | bool | ||||
pmap_ps_enabled(pmap_t pmap) | pmap_ps_enabled(pmap_t pmap) | ||||
{ | { | ||||
Context not available. | |||||
} | } | ||||
/* | /* | ||||
* Conditionally create the PV entry for a 2MB page mapping if the required | * Create the PV entry for a 2MB page mapping. Always returns true unless the | ||||
* memory can be allocated without resorting to reclamation. | * flag PMAP_ENTER_NORECLAIM is specified. If that flag is specified, returns | ||||
* false if the PV entry cannot be allocated without resorting to reclamation. | |||||
*/ | */ | ||||
static boolean_t | static bool | ||||
pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, | pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde, u_int flags, | ||||
struct rwlock **lockp) | struct rwlock **lockp) | ||||
{ | { | ||||
struct md_page *pvh; | struct md_page *pvh; | ||||
pv_entry_t pv; | pv_entry_t pv; | ||||
vm_paddr_t pa; | |||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
/* Pass NULL instead of the lock pointer to disable reclamation. */ | /* Pass NULL instead of the lock pointer to disable reclamation. */ | ||||
if ((pv = get_pv_entry(pmap, NULL)) != NULL) { | if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ? | ||||
pv->pv_va = va; | NULL : lockp)) == NULL) | ||||
CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); | return (false); | ||||
pvh = pa_to_pvh(pa); | pv->pv_va = va; | ||||
TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); | pa = pde & PG_PS_FRAME; | ||||
pvh->pv_gen++; | CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); | ||||
return (TRUE); | pvh = pa_to_pvh(pa); | ||||
} else | TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); | ||||
return (FALSE); | pvh->pv_gen++; | ||||
return (true); | |||||
} | } | ||||
/* | /* | ||||
markj: Just for my own elucidation: the only reason for handling global mappings differently is that… | |||||
Not Done Inline ActionsIn regards to the first question, the answer is "Yes." Historically, pmap_invalidate_all() was never used on the kernel pmap, because it didn't invalidate PG_G mappings. In regards to the second question, the answer is that invlpg instructions have been used only when necessary because they have been quite costly in some x86 implementations. For example, my vague recollection is that they took 500 to 1000 cycles on the Pentium 4, whereas contemporaneous AMD processors were about 100 cycles. I don't know what their current cost is, but they are serializing instructions. That said, I do think that we are underutilizing pmap_invalidate_range(). For example, alc: In regards to the first question, the answer is "Yes." Historically, pmap_invalidate_all() was… | |||||
Not Done Inline ActionsI actually do not understand this break. kib: I actually do not understand this break. | |||||
Not Done Inline ActionsIgnore me, it is fine. kib: Ignore me, it is fine. | |||||
Context not available. | |||||
*/ | */ | ||||
int | int | ||||
pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, | pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, | ||||
u_int flags, int8_t psind __unused) | u_int flags, int8_t psind) | ||||
{ | { | ||||
struct rwlock *lock; | struct rwlock *lock; | ||||
pd_entry_t *pde; | pd_entry_t *pde; | ||||
Context not available. | |||||
("pmap_enter: managed mapping within the clean submap")); | ("pmap_enter: managed mapping within the clean submap")); | ||||
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) | if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) | ||||
VM_OBJECT_ASSERT_LOCKED(m->object); | VM_OBJECT_ASSERT_LOCKED(m->object); | ||||
KASSERT((flags & PMAP_ENTER_RESERVED) == 0, | |||||
("pmap_enter: flags %u has reserved bits set", flags)); | |||||
pa = VM_PAGE_TO_PHYS(m); | pa = VM_PAGE_TO_PHYS(m); | ||||
newpte = (pt_entry_t)(pa | PG_A | PG_V); | newpte = (pt_entry_t)(pa | PG_A | PG_V); | ||||
if ((flags & VM_PROT_WRITE) != 0) | if ((flags & VM_PROT_WRITE) != 0) | ||||
Context not available. | |||||
newpte |= PG_U; | newpte |= PG_U; | ||||
if (pmap == kernel_pmap) | if (pmap == kernel_pmap) | ||||
newpte |= PG_G; | newpte |= PG_G; | ||||
newpte |= pmap_cache_bits(pmap, m->md.pat_mode, 0); | newpte |= pmap_cache_bits(pmap, m->md.pat_mode, psind > 0); | ||||
/* | /* | ||||
* Set modified bit gratuitously for writeable mappings if | * Set modified bit gratuitously for writeable mappings if | ||||
Context not available. | |||||
} else | } else | ||||
newpte |= PG_MANAGED; | newpte |= PG_MANAGED; | ||||
mpte = NULL; | |||||
lock = NULL; | lock = NULL; | ||||
PMAP_LOCK(pmap); | PMAP_LOCK(pmap); | ||||
if (psind == 1) { | |||||
/* Assert the required virtual and physical alignment. */ | |||||
KASSERT((va & PDRMASK) == 0, ("pmap_enter: va unaligned")); | |||||
KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); | |||||
Done Inline ActionsI think that a KASSERT(m->psind == 1, ...) here would be appropriate. pmap_enter_pde() asserts that the virtual address is superpage aligned, but nowhere is there an assert on the physical address. alc: I think that a KASSERT(m->psind == 1, ...) here would be appropriate. pmap_enter_pde() asserts… | |||||
Not Done Inline ActionsMay be the assert on the alignment of the phys address is most useful then ? kib: May be the assert on the alignment of the phys address is most useful then ? | |||||
Not Done Inline ActionsMy rationale for using m->psind was that the physical address could be "accidentally" aligned, whereas m->psind != 0 guarantees that the entire superpage starting at m is allocated as well as aligned. alc: My rationale for using m->psind was that the physical address could be "accidentally" aligned… | |||||
Not Done Inline ActionsI would check both facts then. kib: I would check both facts then. | |||||
Not Done Inline ActionsThe above KASSERT()s check the exact same conditions that pmap_enter_object() has long used to verify both virtual and physical alignment before attempting a superpage mapping. m->psind > 0 is simultaneously guaranteeing alignment and that the physical superpage is fully allocated. alc: The above KASSERT()s check the exact same conditions that pmap_enter_object() has long used to… | |||||
rv = pmap_enter_pde(pmap, va, newpte | PG_PS, flags, m, &lock); | |||||
goto out; | |||||
} | |||||
mpte = NULL; | |||||
/* | /* | ||||
* In the case that a page table page is not | * In the case that a page table page is not | ||||
Context not available. | |||||
} | } | ||||
/* | /* | ||||
* Tries to create a 2MB page mapping. Returns TRUE if successful and FALSE | * Tries to create a read- and/or execute-only 2MB page mapping. Returns true | ||||
* otherwise. Fails if (1) a page table page cannot be allocated without | * if successful. Returns false if (1) a page table page cannot be allocated | ||||
* blocking, (2) a mapping already exists at the specified virtual address, or | * without sleeping, (2) a mapping already exists at the specified virtual | ||||
* (3) a pv entry cannot be allocated without reclaiming another pv entry. | * address, or (3) a PV entry cannot be allocated without reclaiming another | ||||
* PV entry. | |||||
*/ | */ | ||||
static boolean_t | static bool | ||||
pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, | pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, | ||||
struct rwlock **lockp) | struct rwlock **lockp) | ||||
{ | { | ||||
pd_entry_t *pde, newpde; | pd_entry_t newpde; | ||||
pt_entry_t PG_V; | pt_entry_t PG_V; | ||||
vm_page_t pdpg; | |||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | |||||
PG_V = pmap_valid_bit(pmap); | |||||
newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(pmap, m->md.pat_mode, 1) | | |||||
PG_PS | PG_V; | |||||
if ((m->oflags & VPO_UNMANAGED) == 0) | |||||
newpde |= PG_MANAGED; | |||||
if ((prot & VM_PROT_EXECUTE) == 0) | |||||
newpde |= pg_nx; | |||||
if (va < VM_MAXUSER_ADDRESS) | |||||
newpde |= PG_U; | |||||
return (pmap_enter_pde(pmap, va, newpde, PMAP_ENTER_NOSLEEP | | |||||
PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) == | |||||
KERN_SUCCESS); | |||||
} | |||||
/* | |||||
* Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if | |||||
* the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE | |||||
* otherwise. Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and | |||||
* a mapping already exists at the specified virtual address. Returns | |||||
* KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table | |||||
* page allocation failed. Returns KERN_RESOURCE_SHORTAGE if | |||||
* PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed. | |||||
* | |||||
* The parameter "m" is only used when creating a managed, writeable mapping. | |||||
*/ | |||||
static int | |||||
pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, | |||||
vm_page_t m, struct rwlock **lockp) | |||||
{ | |||||
struct spglist free; | struct spglist free; | ||||
pd_entry_t oldpde, *pde; | |||||
pt_entry_t PG_G, PG_RW, PG_V; | |||||
vm_page_t mt, pdpg; | |||||
PG_G = pmap_global_bit(pmap); | |||||
PG_RW = pmap_rw_bit(pmap); | |||||
KASSERT((newpde & (pmap_modified_bit(pmap) | PG_RW)) != PG_RW, | |||||
("pmap_enter_pde: newpde is missing PG_M")); | |||||
PG_V = pmap_valid_bit(pmap); | PG_V = pmap_valid_bit(pmap); | ||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
if ((pdpg = pmap_allocpde(pmap, va, NULL)) == NULL) { | if ((pdpg = pmap_allocpde(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ? | ||||
NULL : lockp)) == NULL) { | |||||
CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" | CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" | ||||
" in pmap %p", va, pmap); | " in pmap %p", va, pmap); | ||||
return (FALSE); | return (KERN_RESOURCE_SHORTAGE); | ||||
} | } | ||||
pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg)); | pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg)); | ||||
pde = &pde[pmap_pde_index(va)]; | pde = &pde[pmap_pde_index(va)]; | ||||
if ((*pde & PG_V) != 0) { | oldpde = *pde; | ||||
if ((oldpde & PG_V) != 0) { | |||||
KASSERT(pdpg->wire_count > 1, | KASSERT(pdpg->wire_count > 1, | ||||
("pmap_enter_pde: pdpg's wire count is too low")); | ("pmap_enter_pde: pdpg's wire count is too low")); | ||||
pdpg->wire_count--; | if ((flags & PMAP_ENTER_NOREPLACE) != 0) { | ||||
CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" | pdpg->wire_count--; | ||||
" in pmap %p", va, pmap); | CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" | ||||
return (FALSE); | " in pmap %p", va, pmap); | ||||
return (KERN_FAILURE); | |||||
} | |||||
/* Break the existing mapping(s). */ | |||||
SLIST_INIT(&free); | |||||
if ((oldpde & PG_PS) != 0) { | |||||
/* | |||||
* The reference to the PD page that was acquired by | |||||
* pmap_allocpde() ensures that it won't be freed. | |||||
* However, if the PDE resulted from a promotion, then | |||||
* a reserved PT page could be freed. | |||||
*/ | |||||
(void)pmap_remove_pde(pmap, pde, va, &free, lockp); | |||||
if ((oldpde & PG_G) == 0) | |||||
pmap_invalidate_pde_page(pmap, va, oldpde); | |||||
} else { | |||||
pmap_delayed_invl_started(); | |||||
if (pmap_remove_ptes(pmap, va, va + NBPDR, pde, &free, | |||||
lockp)) | |||||
pmap_invalidate_all(pmap); | |||||
pmap_delayed_invl_finished(); | |||||
} | |||||
pmap_free_zero_pages(&free); | |||||
if (va >= VM_MAXUSER_ADDRESS) { | |||||
mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); | |||||
if (pmap_insert_pt_page(pmap, mt)) { | |||||
/* | |||||
* XXX Currently, this can't happen because | |||||
* we do not perform pmap_enter(psind == 1) | |||||
* on the kernel pmap. | |||||
*/ | |||||
panic("pmap_enter_pde: trie insert failed"); | |||||
} | |||||
} else | |||||
KASSERT(*pde == 0, ("pmap_enter_pde: non-zero pde %p", | |||||
pde)); | |||||
} | } | ||||
newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(pmap, m->md.pat_mode, 1) | | if ((newpde & PG_MANAGED) != 0) { | ||||
PG_PS | PG_V; | |||||
if ((m->oflags & VPO_UNMANAGED) == 0) { | |||||
newpde |= PG_MANAGED; | |||||
/* | /* | ||||
* Abort this mapping if its PV entry could not be created. | * Abort this mapping if its PV entry could not be created. | ||||
*/ | */ | ||||
if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m), | if (!pmap_pv_insert_pde(pmap, va, newpde, flags, lockp)) { | ||||
lockp)) { | |||||
SLIST_INIT(&free); | SLIST_INIT(&free); | ||||
if (pmap_unwire_ptp(pmap, va, pdpg, &free)) { | if (pmap_unwire_ptp(pmap, va, pdpg, &free)) { | ||||
/* | /* | ||||
Context not available. | |||||
} | } | ||||
CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" | CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" | ||||
" in pmap %p", va, pmap); | " in pmap %p", va, pmap); | ||||
return (FALSE); | return (KERN_RESOURCE_SHORTAGE); | ||||
} | } | ||||
if ((newpde & PG_RW) != 0) { | |||||
for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) | |||||
vm_page_aflag_set(mt, PGA_WRITEABLE); | |||||
Not Done Inline ActionsI really wish we had a more superpage-friendly optimization than this simple per-page flag for tracking the possible existence of writeable mappings. This is the only part of pmap_enter_pde() that does 512 per-page operations. This is also why I didn't revise pmap_copy() to use pmap_enter_pde() as a helper function. If pmap_copy() is copying a PG_RW PDE, it can assume that the PGA_WRITEABLE flag is already set. alc: I really wish we had a more superpage-friendly optimization than this simple per-page flag for… | |||||
} | |||||
} | } | ||||
if ((prot & VM_PROT_EXECUTE) == 0) | |||||
newpde |= pg_nx; | |||||
if (va < VM_MAXUSER_ADDRESS) | |||||
newpde |= PG_U; | |||||
/* | /* | ||||
* Increment counters. | * Increment counters. | ||||
*/ | */ | ||||
if ((newpde & PG_W) != 0) | |||||
pmap->pm_stats.wired_count += NBPDR / PAGE_SIZE; | |||||
pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); | pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); | ||||
/* | /* | ||||
Context not available. | |||||
atomic_add_long(&pmap_pde_mappings, 1); | atomic_add_long(&pmap_pde_mappings, 1); | ||||
CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx" | CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx" | ||||
" in pmap %p", va, pmap); | " in pmap %p", va, pmap); | ||||
return (TRUE); | return (KERN_SUCCESS); | ||||
} | } | ||||
/* | /* | ||||
Context not available. | |||||
va = start + ptoa(diff); | va = start + ptoa(diff); | ||||
if ((va & PDRMASK) == 0 && va + NBPDR <= end && | if ((va & PDRMASK) == 0 && va + NBPDR <= end && | ||||
m->psind == 1 && pmap_ps_enabled(pmap) && | m->psind == 1 && pmap_ps_enabled(pmap) && | ||||
pmap_enter_pde(pmap, va, m, prot, &lock)) | pmap_enter_2mpage(pmap, va, m, prot, &lock)) | ||||
m = &m[NBPDR / PAGE_SIZE - 1]; | m = &m[NBPDR / PAGE_SIZE - 1]; | ||||
else | else | ||||
mpte = pmap_enter_quick_locked(pmap, va, m, prot, | mpte = pmap_enter_quick_locked(pmap, va, m, prot, | ||||
Context not available. | |||||
PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dst_pdpg)); | PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dst_pdpg)); | ||||
pde = &pde[pmap_pde_index(addr)]; | pde = &pde[pmap_pde_index(addr)]; | ||||
if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 || | if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 || | ||||
pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr & | pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr, | ||||
PG_PS_FRAME, &lock))) { | PMAP_ENTER_NORECLAIM, &lock))) { | ||||
*pde = srcptepaddr & ~PG_W; | *pde = srcptepaddr & ~PG_W; | ||||
pmap_resident_count_inc(dst_pmap, NBPDR / PAGE_SIZE); | pmap_resident_count_inc(dst_pmap, NBPDR / PAGE_SIZE); | ||||
atomic_add_long(&pmap_pde_mappings, 1); | atomic_add_long(&pmap_pde_mappings, 1); | ||||
Context not available. |
Just for my own elucidation: the only reason for handling global mappings differently is that pmap_invalidate_all(kernel_pmap) used to invalidate PG_G entries only if invlpcid was used?
Also, I don't quite understand why we're careful to avoid invalidating unmapped subranges of the requested range. The SDM says that invlpcid will raise #PF when invalidating a single address and an access of that address would cause a page fault, but we currently don't use invlpcid to invalidate a single address. invlpg doesn't have the same restriction. Is there some unrelated reason for doing things this way?