Changeset View
Standalone View
sys/arm64/arm64/pmap.c
Show First 20 Lines • Show All 376 Lines • ▼ Show 20 Lines | |||||||||
static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); | static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); | ||||||||
static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); | static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); | ||||||||
static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, | static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, | ||||||||
vm_offset_t va); | vm_offset_t va); | ||||||||
static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte); | static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte); | ||||||||
static bool pmap_activate_int(pmap_t pmap); | static bool pmap_activate_int(pmap_t pmap); | ||||||||
static void pmap_alloc_asid(pmap_t pmap); | static void pmap_alloc_asid(pmap_t pmap); | ||||||||
static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); | static int pmap_change_props_locked(vm_offset_t va, vm_size_t size, | ||||||||
vm_prot_t prot, int mode); | |||||||||
static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va); | static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va); | ||||||||
static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, | static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, | ||||||||
vm_offset_t va, struct rwlock **lockp); | vm_offset_t va, struct rwlock **lockp); | ||||||||
static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); | static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); | ||||||||
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, | static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, | ||||||||
vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); | vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); | ||||||||
static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, | static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, | ||||||||
u_int flags, vm_page_t m, struct rwlock **lockp); | u_int flags, vm_page_t m, struct rwlock **lockp); | ||||||||
▲ Show 20 Lines • Show All 5,550 Lines • ▼ Show 20 Lines | |||||||||
* virtual address range or the direct map. | * virtual address range or the direct map. | ||||||||
*/ | */ | ||||||||
int | int | ||||||||
pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) | pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) | ||||||||
{ | { | ||||||||
int error; | int error; | ||||||||
PMAP_LOCK(kernel_pmap); | PMAP_LOCK(kernel_pmap); | ||||||||
error = pmap_change_attr_locked(va, size, mode); | error = pmap_change_props_locked(va, size, PROT_NONE, mode); | ||||||||
PMAP_UNLOCK(kernel_pmap); | PMAP_UNLOCK(kernel_pmap); | ||||||||
return (error); | return (error); | ||||||||
} | } | ||||||||
/* | |||||||||
* Changes the specified virtual address range's protections to those | |||||||||
* specified by "prot". Like pmap_change_attr(), protections for aliases | |||||||||
* in the direct map are updated as well. Protections on aliasing mappings may | |||||||||
* be a subset of the requested protections; for example, mappings in the direct | |||||||||
* map are never executable. | |||||||||
*/ | |||||||||
int | |||||||||
pmap_change_prot(vm_offset_t va, vm_size_t size, vm_prot_t prot) | |||||||||
{ | |||||||||
int error; | |||||||||
/* Only supported within the kernel map. */ | |||||||||
if (va < VM_MIN_KERNEL_ADDRESS) | |||||||||
return (EINVAL); | |||||||||
PMAP_LOCK(kernel_pmap); | |||||||||
error = pmap_change_props_locked(va, size, prot, -1); | |||||||||
PMAP_UNLOCK(kernel_pmap); | |||||||||
return (error); | |||||||||
} | |||||||||
static int | static int | ||||||||
pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) | pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, | ||||||||
int mode) | |||||||||
{ | { | ||||||||
vm_offset_t base, offset, tmpva; | vm_offset_t base, offset, tmpva; | ||||||||
pt_entry_t l3, *pte, *newpte; | pt_entry_t l3, *pte, *newpte; | ||||||||
int lvl; | pt_entry_t bits, mask; | ||||||||
int lvl, rv; | |||||||||
PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); | PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); | ||||||||
base = trunc_page(va); | base = trunc_page(va); | ||||||||
offset = va & PAGE_MASK; | offset = va & PAGE_MASK; | ||||||||
size = round_page(offset + size); | size = round_page(offset + size); | ||||||||
if (!VIRT_IN_DMAP(base) && | if (!VIRT_IN_DMAP(base) && | ||||||||
!(base >= VM_MIN_KERNEL_ADDRESS && base < VM_MAX_KERNEL_ADDRESS)) | !(base >= VM_MIN_KERNEL_ADDRESS && base < VM_MAX_KERNEL_ADDRESS)) | ||||||||
return (EINVAL); | return (EINVAL); | ||||||||
bits = 0; | |||||||||
mask = 0; | |||||||||
if (mode != -1) { | |||||||||
bits = ATTR_S1_IDX(mode); | |||||||||
mask = ATTR_S1_IDX_MASK; | |||||||||
if (mode == VM_MEMATTR_DEVICE) { | |||||||||
mask |= ATTR_S1_XN; | |||||||||
bits |= ATTR_S1_XN; | |||||||||
} | |||||||||
} | |||||||||
if (prot != VM_PROT_NONE) { | |||||||||
/* Don't mark the DMAP as executable. It never is on arm64. */ | |||||||||
if (VIRT_IN_DMAP(base)) { | |||||||||
prot &= ~VM_PROT_EXECUTE; | |||||||||
/* | |||||||||
* XXX Mark the DMAP as writable for now. We rely | |||||||||
* on this in ddb & dtrace to insert breakpoint | |||||||||
* instructions. | |||||||||
markj: I would make this an XXX comment. Presumably we should have ddb and dtrace temporarily modify… | |||||||||
Done Inline ActionsI've committed D32053 that uses the DMAP region to get a RW address. I'm planning on having ddb and dtrace temporally upgrade the DMAP memory to RW if needed. andrew: I've committed D32053 that uses the DMAP region to get a RW address. I'm planning on having ddb… | |||||||||
*/ | |||||||||
prot |= VM_PROT_WRITE; | |||||||||
} | |||||||||
if ((prot & VM_PROT_WRITE) == 0) { | |||||||||
bits |= ATTR_S1_AP(ATTR_S1_AP_RO); | |||||||||
} | |||||||||
if ((prot & VM_PROT_EXECUTE) == 0) { | |||||||||
bits |= ATTR_S1_PXN; | |||||||||
} | |||||||||
bits |= ATTR_S1_UXN; | |||||||||
mask |= ATTR_S1_AP_MASK | ATTR_S1_XN; | |||||||||
} | |||||||||
for (tmpva = base; tmpva < base + size; ) { | for (tmpva = base; tmpva < base + size; ) { | ||||||||
pte = pmap_pte(kernel_pmap, tmpva, &lvl); | pte = pmap_pte(kernel_pmap, tmpva, &lvl); | ||||||||
if (pte == NULL) | if (pte == NULL) | ||||||||
return (EINVAL); | return (EINVAL); | ||||||||
if ((pmap_load(pte) & ATTR_S1_IDX_MASK) == ATTR_S1_IDX(mode)) { | if ((pmap_load(pte) & mask) == bits) { | ||||||||
/* | /* | ||||||||
* We already have the correct attribute, | * We already have the correct attribute, | ||||||||
* ignore this entry. | * ignore this entry. | ||||||||
*/ | */ | ||||||||
switch (lvl) { | switch (lvl) { | ||||||||
default: | default: | ||||||||
panic("Invalid DMAP table level: %d\n", lvl); | panic("Invalid DMAP table level: %d\n", lvl); | ||||||||
case 1: | case 1: | ||||||||
Show All 15 Lines | if ((pmap_load(pte) & mask) == bits) { | ||||||||
default: | default: | ||||||||
panic("Invalid DMAP table level: %d\n", lvl); | panic("Invalid DMAP table level: %d\n", lvl); | ||||||||
case 1: | case 1: | ||||||||
newpte = pmap_demote_l1(kernel_pmap, pte, | newpte = pmap_demote_l1(kernel_pmap, pte, | ||||||||
tmpva & ~L1_OFFSET); | tmpva & ~L1_OFFSET); | ||||||||
if (newpte == NULL) | if (newpte == NULL) | ||||||||
return (EINVAL); | return (EINVAL); | ||||||||
pte = pmap_l1_to_l2(pte, tmpva); | pte = pmap_l1_to_l2(pte, tmpva); | ||||||||
case 2: | case 2: | ||||||||
Not Done Inline ActionsA related observation but not strictly speaking an issue with this change: Consider a case like a large loadable kernel module, e.g., ZFS, where the code could be mapped as 2MB pages. Here, we are unconditionally demoting the mapping even if the protection change could be applied to the 2MB mapping. alc: A related observation but not strictly speaking an issue with this change: Consider a case like… | |||||||||
Done Inline ActionsI already had a work in progress patch for this, although it still needs some polish. andrew: I already had a work in progress patch for this, although it still needs some polish. | |||||||||
newpte = pmap_demote_l2(kernel_pmap, pte, | newpte = pmap_demote_l2(kernel_pmap, pte, | ||||||||
tmpva); | tmpva); | ||||||||
if (newpte == NULL) | if (newpte == NULL) | ||||||||
return (EINVAL); | return (EINVAL); | ||||||||
pte = pmap_l2_to_l3(pte, tmpva); | pte = pmap_l2_to_l3(pte, tmpva); | ||||||||
case 3: | case 3: | ||||||||
/* Update the entry */ | /* Update the entry */ | ||||||||
l3 = pmap_load(pte); | l3 = pmap_load(pte); | ||||||||
l3 &= ~ATTR_S1_IDX_MASK; | l3 &= ~mask; | ||||||||
l3 |= ATTR_S1_IDX(mode); | l3 |= bits; | ||||||||
if (mode == VM_MEMATTR_DEVICE) | |||||||||
l3 |= ATTR_S1_XN; | |||||||||
pmap_update_entry(kernel_pmap, pte, l3, tmpva, | pmap_update_entry(kernel_pmap, pte, l3, tmpva, | ||||||||
PAGE_SIZE); | PAGE_SIZE); | ||||||||
if (!VIRT_IN_DMAP(tmpva)) { | |||||||||
/* | /* | ||||||||
* Keep the DMAP memory in sync. | |||||||||
*/ | |||||||||
rv = pmap_change_props_locked( | |||||||||
PHYS_TO_DMAP(l3 & ~ATTR_MASK), | |||||||||
L3_SIZE, prot, mode); | |||||||||
if (rv != 0) | |||||||||
Not Done Inline ActionsI think it violates ARM by (even if only short-term) mapping the same physical page multiple times with different cache attributes. This is explicitly marked as undefined behavior in ARM. I'm afraid that the only conforming approach is the break-before-make approach - so first break all mappings and then recreate them with new attributes. mmel: I think it violates ARM by (even if only short-term) mapping the same physical page multiple… | |||||||||
return (rv); | |||||||||
Not Done Inline ActionsShouldn't we try to update the direct map all at once, like amd64's version does? Otherwise, even if this function avoids demoting large mappings as Alan suggests above, we will end up demoting anyway since the direct map alias is updated a page at a time. markj: Shouldn't we try to update the direct map all at once, like amd64's version does? Otherwise… | |||||||||
Done Inline ActionsIt shouldn't be too bad for the DMAP as we always map it RW so wouldn't be split when changing permissions. I can have a look at adding a variant of pmap_update_entry that takes multiple entries. We could then use it to change the permissions/attributes in a single operation & only demote when needed. andrew: It shouldn't be too bad for the DMAP as we always map it RW so wouldn't be split when changing… | |||||||||
} | |||||||||
/* | |||||||||
* If moving to a non-cacheable entry flush | * If moving to a non-cacheable entry flush | ||||||||
* the cache. | * the cache. | ||||||||
*/ | */ | ||||||||
if (mode == VM_MEMATTR_UNCACHEABLE) | if (mode == VM_MEMATTR_UNCACHEABLE) | ||||||||
cpu_dcache_wbinv_range(tmpva, L3_SIZE); | cpu_dcache_wbinv_range(tmpva, L3_SIZE); | ||||||||
break; | break; | ||||||||
} | } | ||||||||
▲ Show 20 Lines • Show All 144 Lines • ▼ Show 20 Lines | pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, | ||||||||
if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) { | if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) { | ||||||||
KASSERT((oldl2 & ATTR_SW_WIRED) == 0, | KASSERT((oldl2 & ATTR_SW_WIRED) == 0, | ||||||||
("pmap_demote_l2: page table page for a wired mapping" | ("pmap_demote_l2: page table page for a wired mapping" | ||||||||
" is missing")); | " is missing")); | ||||||||
/* | /* | ||||||||
* If the page table page is missing and the mapping | * If the page table page is missing and the mapping | ||||||||
* is for a kernel address, the mapping must belong to | * is for a kernel address, the mapping must belong to | ||||||||
* the direct map. Page table pages are preallocated | * either the direct map or the early kernel memory. | ||||||||
* for every other part of the kernel address space, | * Page table pages are preallocated for every other | ||||||||
* so the direct map region is the only part of the | * part of the kernel address space, so the direct map | ||||||||
* region and early kernel memory are the only parts of the | |||||||||
Not Done Inline Actions
alc: | |||||||||
* kernel address space that must be handled here. | * kernel address space that must be handled here. | ||||||||
*/ | */ | ||||||||
KASSERT(!ADDR_IS_KERNEL(va) || VIRT_IN_DMAP(va), | KASSERT(!ADDR_IS_KERNEL(va) || VIRT_IN_DMAP(va) || | ||||||||
(va >= VM_MIN_KERNEL_ADDRESS && va < kernel_vm_end), | |||||||||
Not Done Inline ActionsIsn't kernel_vm_end the appropriate upper bound there? BTW, on amd64 we avoid this by populating the radix tree with bootstrap PTPs in pmap_init(). markj: Isn't `kernel_vm_end` the appropriate upper bound there?
BTW, on amd64 we avoid this by… | |||||||||
("pmap_demote_l2: No saved mpte for va %#lx", va)); | ("pmap_demote_l2: No saved mpte for va %#lx", va)); | ||||||||
/* | /* | ||||||||
* If the 2MB page mapping belongs to the direct map | * If the 2MB page mapping belongs to the direct map | ||||||||
* region of the kernel's address space, then the page | * region of the kernel's address space, then the page | ||||||||
* allocation request specifies the highest possible | * allocation request specifies the highest possible | ||||||||
* priority (VM_ALLOC_INTERRUPT). Otherwise, the | * priority (VM_ALLOC_INTERRUPT). Otherwise, the | ||||||||
* priority is normal. | * priority is normal. | ||||||||
▲ Show 20 Lines • Show All 890 Lines • Show Last 20 Lines |
I would make this an XXX comment. Presumably we should have ddb and dtrace temporarily modify the mapping in order for them to do their work?
How do we know that ddb and dtrace always write through the direct map?