Changeset View
Standalone View
sys/riscv/riscv/pmap.c
Show First 20 Lines • Show All 2,534 Lines • ▼ Show 20 Lines | pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, | ||||
return (true); | return (true); | ||||
} | } | ||||
#if VM_NRESERVLEVEL > 0 | #if VM_NRESERVLEVEL > 0 | ||||
static void | static void | ||||
pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, | pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, | ||||
struct rwlock **lockp) | struct rwlock **lockp) | ||||
{ | { | ||||
pt_entry_t *firstl3, *l3; | pt_entry_t *firstl3, firstl3e, *l3, l3e; | ||||
vm_paddr_t pa; | vm_paddr_t pa; | ||||
vm_page_t ml3; | vm_page_t ml3; | ||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
va &= ~L2_OFFSET; | va &= ~L2_OFFSET; | ||||
KASSERT((pmap_load(l2) & PTE_RWX) == 0, | KASSERT((pmap_load(l2) & PTE_RWX) == 0, | ||||
("pmap_promote_l2: invalid l2 entry %p", l2)); | ("pmap_promote_l2: invalid l2 entry %p", l2)); | ||||
firstl3 = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l2))); | firstl3 = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l2))); | ||||
pa = PTE_TO_PHYS(pmap_load(firstl3)); | firstl3e = pmap_load(firstl3); | ||||
pa = PTE_TO_PHYS(firstl3e); | |||||
if ((pa & L2_OFFSET) != 0) { | if ((pa & L2_OFFSET) != 0) { | ||||
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", | CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", | ||||
va, pmap); | va, pmap); | ||||
atomic_add_long(&pmap_l2_p_failures, 1); | atomic_add_long(&pmap_l2_p_failures, 1); | ||||
return; | return; | ||||
} | } | ||||
/* | |||||
* Downgrade a clean, writable mapping to read-only to ensure that the | |||||
jrtc27: Hm, PTE_W is an int not a pt_entry_t, does that not cause issues for the high bits? I guess… | |||||
Done Inline ActionsThat assumption is made in pretty much all of the pmaps, it seems. We could perhaps specify a type suffix of ul in the literal value for all of the constants, I think that would do the right thing were we ever to implement riscv32. markj: That assumption is made in pretty much all of the pmaps, it seems. We could perhaps specify a… | |||||
Not Done Inline ActionsYeah, I started writing the comment thinking you had a bug there and would trash the upper bits, but then it turned into general musings. As you say, it's a pretty common pattern, and not just for pmap code. As for riscv32, well, that seems like a bit of a waste of time, but hey if someone wants to do it (properly) I won't stop them... jrtc27: Yeah, I started writing the comment thinking you had a bug there and would trash the upper bits… | |||||
* hardware does not set PTE_D while we are comparing PTEs. We do not | |||||
* issue sfence.vma under the assumption that any cached translations | |||||
* will not be used to modify a clean mapping. A subsequent write will | |||||
freebsdphab-AX9_cmx.ietfng.orgUnsubmitted Not Done Inline ActionsJust checking, but: there's no possibility that these cached translations will persist past the lifetime of the page table page now being updated? That is, by the time the page containing these l3 PTEs is recycled, we certainly will have done a global TLB invalidation? (Otherwise it seems like there's risk that the PTW might occasionally corrupt the PTE_D bit in bit patterns that happen to look like the PTEs?) freebsdphab-AX9_cmx.ietfng.org: Just checking, but: there's no possibility that these cached translations will persist past the… | |||||
jrtc27Unsubmitted Not Done Inline ActionsSee arm64's version of this code; the pmap_insert_pt_page ensures that. jrtc27: See arm64's version of this code; the pmap_insert_pt_page ensures that. | |||||
markjAuthorUnsubmitted Done Inline ActionsRight, during promotion we don't discard the page containing the L3 PTEs. Instead it is saved in a per-pmap radix tree. During demotion we restore it. So it is ok to let stale TLB entries reference the old L3 entries so long as they are not clean and writable. markj: Right, during promotion we don't discard the page containing the L3 PTEs. Instead it is saved… | |||||
* trigger a page fault, at which point we lazily issue sfence.vma in | |||||
* pmap_fault(). | |||||
jrtc27Unsubmitted Done Inline ActionsI think the atomic compare-and-swap requirement for hardware dirty tracking is important to mention in here (and maybe also that software dirty tracking will fault on the cached translations already anyway due to missing PTE_D)? jrtc27: I think the atomic compare-and-swap requirement for hardware dirty tracking is important to… | |||||
*/ | |||||
while ((firstl3e & (PTE_W | PTE_D)) == PTE_W) { | |||||
if (atomic_fcmpset_64(firstl3, &firstl3e, firstl3e & ~PTE_W)) { | |||||
firstl3e &= ~PTE_W; | |||||
break; | |||||
} | |||||
} | |||||
pa += PAGE_SIZE; | pa += PAGE_SIZE; | ||||
for (l3 = firstl3 + 1; l3 < firstl3 + Ln_ENTRIES; l3++) { | for (l3 = firstl3 + 1; l3 < firstl3 + Ln_ENTRIES; l3++) { | ||||
if (PTE_TO_PHYS(pmap_load(l3)) != pa) { | l3e = pmap_load(l3); | ||||
if (PTE_TO_PHYS(l3e) != pa) { | |||||
CTR2(KTR_PMAP, | CTR2(KTR_PMAP, | ||||
"pmap_promote_l2: failure for va %#lx pmap %p", | "pmap_promote_l2: failure for va %#lx pmap %p", | ||||
va, pmap); | va, pmap); | ||||
atomic_add_long(&pmap_l2_p_failures, 1); | atomic_add_long(&pmap_l2_p_failures, 1); | ||||
return; | return; | ||||
} | } | ||||
if ((pmap_load(l3) & PTE_PROMOTE) != | while ((l3e & (PTE_W | PTE_D)) == PTE_W) { | ||||
(pmap_load(firstl3) & PTE_PROMOTE)) { | if (atomic_fcmpset_64(l3, &l3e, l3e & ~PTE_W)) { | ||||
l3e &= ~PTE_W; | |||||
break; | |||||
} | |||||
} | |||||
if ((l3e & PTE_PROMOTE) != (firstl3e & PTE_PROMOTE)) { | |||||
CTR2(KTR_PMAP, | CTR2(KTR_PMAP, | ||||
"pmap_promote_l2: failure for va %#lx pmap %p", | "pmap_promote_l2: failure for va %#lx pmap %p", | ||||
va, pmap); | va, pmap); | ||||
atomic_add_long(&pmap_l2_p_failures, 1); | atomic_add_long(&pmap_l2_p_failures, 1); | ||||
return; | return; | ||||
} | } | ||||
pa += PAGE_SIZE; | pa += PAGE_SIZE; | ||||
} | } | ||||
ml3 = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); | ml3 = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); | ||||
KASSERT(ml3->pindex == pmap_l2_pindex(va), | KASSERT(ml3->pindex == pmap_l2_pindex(va), | ||||
("pmap_promote_l2: page table page's pindex is wrong")); | ("pmap_promote_l2: page table page's pindex is wrong")); | ||||
if (pmap_insert_pt_page(pmap, ml3, true)) { | if (pmap_insert_pt_page(pmap, ml3, true)) { | ||||
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", | CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", | ||||
va, pmap); | va, pmap); | ||||
atomic_add_long(&pmap_l2_p_failures, 1); | atomic_add_long(&pmap_l2_p_failures, 1); | ||||
return; | return; | ||||
} | } | ||||
if ((pmap_load(firstl3) & PTE_SW_MANAGED) != 0) | if ((firstl3e & PTE_SW_MANAGED) != 0) | ||||
pmap_pv_promote_l2(pmap, va, PTE_TO_PHYS(pmap_load(firstl3)), | pmap_pv_promote_l2(pmap, va, PTE_TO_PHYS(firstl3e), lockp); | ||||
lockp); | |||||
pmap_store(l2, pmap_load(firstl3)); | pmap_store(l2, firstl3e); | ||||
atomic_add_long(&pmap_l2_promotions, 1); | atomic_add_long(&pmap_l2_promotions, 1); | ||||
CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va, | CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va, | ||||
pmap); | pmap); | ||||
} | } | ||||
#endif | #endif | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 2,047 Lines • Show Last 20 Lines |
Hm, PTE_W is an int not a pt_entry_t, does that not cause issues for the high bits? I guess it's fine because bitwise negation on signed integers is implementation-defined, and in this case two's complement means the int -> unsigned long conversion does the right thing... and we use this pattern elsewhere. Seems dangerous but I think this is all well-defined by a combination of the C spec and knowing the implementation is two's complement, though it would probably make sense for all the PTE_* constants to be the same type as pt_entry_t, both in width and signedness...