Changeset View
Standalone View
sys/riscv/riscv/pmap.c
Show First 20 Lines • Show All 3,114 Lines • ▼ Show 20 Lines | out: | ||||||||
if (lock != NULL) | if (lock != NULL) | ||||||||
rw_wunlock(lock); | rw_wunlock(lock); | ||||||||
rw_runlock(&pvh_global_lock); | rw_runlock(&pvh_global_lock); | ||||||||
PMAP_UNLOCK(pmap); | PMAP_UNLOCK(pmap); | ||||||||
return (rv); | return (rv); | ||||||||
} | } | ||||||||
/* | /* | ||||||||
* Tries to create a read- and/or execute-only 2MB page mapping. Returns true | * Tries to create a read- and/or execute-only 2MB page mapping. Returns | ||||||||
* if successful. Returns false if (1) a page table page cannot be allocated | * KERN_SUCCESS if the mapping was created. Otherwise, returns an error | ||||||||
* without sleeping, (2) a mapping already exists at the specified virtual | * value. See pmap_enter_l2() for the possible error values when "no sleep", | ||||||||
* address, or (3) a PV entry cannot be allocated without reclaiming another | * "no replace", and "no reclaim" are specified. | ||||||||
* PV entry. | |||||||||
*/ | */ | ||||||||
static bool | static int | ||||||||
pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, | pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, | ||||||||
struct rwlock **lockp) | struct rwlock **lockp) | ||||||||
{ | { | ||||||||
pd_entry_t new_l2; | pd_entry_t new_l2; | ||||||||
pn_t pn; | pn_t pn; | ||||||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||||||
pn = VM_PAGE_TO_PHYS(m) / PAGE_SIZE; | pn = VM_PAGE_TO_PHYS(m) / PAGE_SIZE; | ||||||||
new_l2 = (pd_entry_t)((pn << PTE_PPN0_S) | PTE_R | PTE_V); | new_l2 = (pd_entry_t)((pn << PTE_PPN0_S) | PTE_R | PTE_V); | ||||||||
if ((m->oflags & VPO_UNMANAGED) == 0) | if ((m->oflags & VPO_UNMANAGED) == 0) | ||||||||
new_l2 |= PTE_SW_MANAGED; | new_l2 |= PTE_SW_MANAGED; | ||||||||
if ((prot & VM_PROT_EXECUTE) != 0) | if ((prot & VM_PROT_EXECUTE) != 0) | ||||||||
new_l2 |= PTE_X; | new_l2 |= PTE_X; | ||||||||
if (va < VM_MAXUSER_ADDRESS) | if (va < VM_MAXUSER_ADDRESS) | ||||||||
new_l2 |= PTE_U; | new_l2 |= PTE_U; | ||||||||
return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP | | return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP | | ||||||||
PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) == | PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp)); | ||||||||
KERN_SUCCESS); | |||||||||
} | } | ||||||||
/* | /* | ||||||||
* Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if | * Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if | ||||||||
* the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE | * the mapping was created, and one of KERN_FAILURE, KERN_NO_SPACE, or | ||||||||
markj: This comment doesn't quite reflect reality anymore. | |||||||||
* otherwise. Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and | * KERN_RESOURCE_SHORTAGE otherwise. Returns KERN_FAILURE if | ||||||||
* a mapping already exists at the specified virtual address. Returns | * PMAP_ENTER_NOREPLACE was specified and a 4KB page mapping already exists | ||||||||
* KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table | * within the 2MB virtual address range starting at the specified virtual | ||||||||
* page allocation failed. Returns KERN_RESOURCE_SHORTAGE if | * address. Returns KERN_NO_SPACE if PMAP_ENTER_NOREPLACE was specified and a | ||||||||
* PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed. | * 2MB page mapping already exists at the specified virtual address. Returns | ||||||||
* KERN_RESOURCE_SHORTAGE if either (1) PMAP_ENTER_NOSLEEP was specified and a | |||||||||
* page table page allocation failed or (2) PMAP_ENTER_NORECLAIM was specified | |||||||||
* and a PV entry allocation failed. | |||||||||
* | * | ||||||||
* The parameter "m" is only used when creating a managed, writeable mapping. | * The parameter "m" is only used when creating a managed, writeable mapping. | ||||||||
*/ | */ | ||||||||
static int | static int | ||||||||
pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, | pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, | ||||||||
vm_page_t m, struct rwlock **lockp) | vm_page_t m, struct rwlock **lockp) | ||||||||
{ | { | ||||||||
struct spglist free; | struct spglist free; | ||||||||
Show All 11 Lines | pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, | ||||||||
} | } | ||||||||
l2 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(l2pg)); | l2 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(l2pg)); | ||||||||
l2 = &l2[pmap_l2_index(va)]; | l2 = &l2[pmap_l2_index(va)]; | ||||||||
if ((oldl2 = pmap_load(l2)) != 0) { | if ((oldl2 = pmap_load(l2)) != 0) { | ||||||||
KASSERT(l2pg->ref_count > 1, | KASSERT(l2pg->ref_count > 1, | ||||||||
("pmap_enter_l2: l2pg's ref count is too low")); | ("pmap_enter_l2: l2pg's ref count is too low")); | ||||||||
if ((flags & PMAP_ENTER_NOREPLACE) != 0) { | if ((flags & PMAP_ENTER_NOREPLACE) != 0) { | ||||||||
if ((oldl2 & PTE_RWX) != 0) { | |||||||||
l2pg->ref_count--; | l2pg->ref_count--; | ||||||||
CTR2(KTR_PMAP, | CTR2(KTR_PMAP, | ||||||||
"pmap_enter_l2: failed to replace existing mapping" | "pmap_enter_l2: no space for va %#lx" | ||||||||
" in pmap %p", va, pmap); | |||||||||
Done Inline Actions
markj: | |||||||||
return (KERN_NO_SPACE); | |||||||||
Done Inline ActionsDon't we need to take into account the possibility that the L3 table is empty? This can happen in the kernel map, where page table pages are never freed. markj: Don't we need to take into account the possibility that the L3 table is empty? This can happen… | |||||||||
Done Inline ActionsIs this the !pmap_every_pte_is_zero() check performed here by the other pmaps? I was wondering what the purpose of that was. mhorne: Is this the `!pmap_every_pte_is_zero()` check performed here by the other pmaps? I was… | |||||||||
Done Inline ActionsYes. It arises only for kernel pmaps, hence the va < VM_MAXUSER_ADDRESS check. I believe this would apply to riscv as well. Though, I think it should be quite rare for the kernel to map 2MB pages this way. markj: Yes. It arises only for kernel pmaps, hence the `va < VM_MAXUSER_ADDRESS` check. I believe this… | |||||||||
Done Inline Actionsmhorne: D36885 | |||||||||
} else { | |||||||||
l2pg->ref_count--; | |||||||||
CTR2(KTR_PMAP, "pmap_enter_l2:" | |||||||||
" failed to replace existing mapping" | |||||||||
" for va %#lx in pmap %p", va, pmap); | " for va %#lx in pmap %p", va, pmap); | ||||||||
return (KERN_FAILURE); | return (KERN_FAILURE); | ||||||||
} | } | ||||||||
} | |||||||||
SLIST_INIT(&free); | SLIST_INIT(&free); | ||||||||
if ((oldl2 & PTE_RWX) != 0) | if ((oldl2 & PTE_RWX) != 0) | ||||||||
(void)pmap_remove_l2(pmap, l2, va, | (void)pmap_remove_l2(pmap, l2, va, | ||||||||
pmap_load(pmap_l1(pmap, va)), &free, lockp); | pmap_load(pmap_l1(pmap, va)), &free, lockp); | ||||||||
else | else | ||||||||
for (sva = va; sva < va + L2_SIZE; sva += PAGE_SIZE) { | for (sva = va; sva < va + L2_SIZE; sva += PAGE_SIZE) { | ||||||||
l3 = pmap_l2_to_l3(l2, sva); | l3 = pmap_l2_to_l3(l2, sva); | ||||||||
if ((pmap_load(l3) & PTE_V) != 0 && | if ((pmap_load(l3) & PTE_V) != 0 && | ||||||||
▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines | |||||||||
void | void | ||||||||
pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, | pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, | ||||||||
vm_page_t m_start, vm_prot_t prot) | vm_page_t m_start, vm_prot_t prot) | ||||||||
{ | { | ||||||||
struct rwlock *lock; | struct rwlock *lock; | ||||||||
vm_offset_t va; | vm_offset_t va; | ||||||||
vm_page_t m, mpte; | vm_page_t m, mpte; | ||||||||
vm_pindex_t diff, psize; | vm_pindex_t diff, psize; | ||||||||
int rv; | |||||||||
VM_OBJECT_ASSERT_LOCKED(m_start->object); | VM_OBJECT_ASSERT_LOCKED(m_start->object); | ||||||||
psize = atop(end - start); | psize = atop(end - start); | ||||||||
mpte = NULL; | mpte = NULL; | ||||||||
m = m_start; | m = m_start; | ||||||||
lock = NULL; | lock = NULL; | ||||||||
rw_rlock(&pvh_global_lock); | rw_rlock(&pvh_global_lock); | ||||||||
PMAP_LOCK(pmap); | PMAP_LOCK(pmap); | ||||||||
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { | while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { | ||||||||
va = start + ptoa(diff); | va = start + ptoa(diff); | ||||||||
if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end && | if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end && | ||||||||
m->psind == 1 && pmap_ps_enabled(pmap) && | m->psind == 1 && pmap_ps_enabled(pmap) && | ||||||||
pmap_enter_2mpage(pmap, va, m, prot, &lock)) | ((rv = pmap_enter_2mpage(pmap, va, m, prot, &lock)) == | ||||||||
KERN_SUCCESS || rv == KERN_NO_SPACE)) | |||||||||
m = &m[L2_SIZE / PAGE_SIZE - 1]; | m = &m[L2_SIZE / PAGE_SIZE - 1]; | ||||||||
else | else | ||||||||
mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, | mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, | ||||||||
&lock); | &lock); | ||||||||
m = TAILQ_NEXT(m, listq); | m = TAILQ_NEXT(m, listq); | ||||||||
} | } | ||||||||
if (lock != NULL) | if (lock != NULL) | ||||||||
rw_wunlock(lock); | rw_wunlock(lock); | ||||||||
▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines | if (mpte && (mpte->pindex == l2pindex)) { | ||||||||
/* | /* | ||||||||
* If the page table page is mapped, we just increment | * If the page table page is mapped, we just increment | ||||||||
* the hold count, and activate it. Otherwise, we | * the hold count, and activate it. Otherwise, we | ||||||||
* attempt to allocate a page table page. If this | * attempt to allocate a page table page. If this | ||||||||
* attempt fails, we don't retry. Instead, we give up. | * attempt fails, we don't retry. Instead, we give up. | ||||||||
*/ | */ | ||||||||
if (l2 != NULL && pmap_load(l2) != 0) { | if (l2 != NULL && pmap_load(l2) != 0) { | ||||||||
phys = PTE_TO_PHYS(pmap_load(l2)); | phys = PTE_TO_PHYS(pmap_load(l2)); | ||||||||
mpte = PHYS_TO_VM_PAGE(phys); | mpte = PHYS_TO_VM_PAGE(phys); | ||||||||
mpte->ref_count++; | mpte->ref_count++; | ||||||||
Done Inline ActionsThis is where I've departed from how amd64 and arm64 handle this edge-case. Here, they perform a check like: if ((pmap_load(l2) & PTE_RWX) != 0) return (NULL); This eliminates the potential panic, but we still end up (uselessly) calling pmap_enter_quick_locked() for each 4K page in the superpage mapping -- see the KTR trace output. mhorne: This is where I've departed from how amd64 and arm64 handle this edge-case. Here, they perform… | |||||||||
Done Inline ActionsThis code is reachable via pmap_enter_quick(). I think the assertion you added cannot be triggered by its sole caller today, if only because pmap_is_prefaultable() will return false if the address is mapped as a superpage. But, today we permit callers of pmap_enter_quick() to "overwrite" existing L3 mappings (the function simply does nothing in that case), so it seems inconsistent to require that there is no pre-existing L2 mapping. So, I think amd64 and arm64's behaviour makes more sense, and their pmap_enter_object() implementations should instead be smarter in this scenario. markj: This code is reachable via pmap_enter_quick(). I think the assertion you added cannot be… | |||||||||
Not Done Inline ActionsYes, they could be smarter, but only sometimes. Specifically, if the old mapping is a superpage. On the other hand, hypothetically, the physical page could now be a complete, valid superpage, hence you find yourself in pmap_enter_2mpage(), but only a subset of the base pages are mapped. In that case, the code should continue to behave as it does now, trying pmap_enter_quick_locked() on every base page. alc: Yes, they could be smarter, but only sometimes. Specifically, if the old mapping is a… | |||||||||
} else { | } else { | ||||||||
/* | /* | ||||||||
* Pass NULL instead of the PV list lock | * Pass NULL instead of the PV list lock | ||||||||
* pointer, because we don't intend to sleep. | * pointer, because we don't intend to sleep. | ||||||||
*/ | */ | ||||||||
mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); | mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); | ||||||||
if (mpte == NULL) | if (mpte == NULL) | ||||||||
return (mpte); | return (mpte); | ||||||||
▲ Show 20 Lines • Show All 1,610 Lines • Show Last 20 Lines |
This comment doesn't quite reflect reality anymore.