Changeset View
Standalone View
sys/arm64/arm64/pmap.c
Show First 20 Lines • Show All 211 Lines • ▼ Show 20 Lines | #define RELEASE_PV_LIST_LOCK(lockp) do { \ | ||||
} \ | } \ | ||||
} while (0) | } while (0) | ||||
#define VM_PAGE_TO_PV_LIST_LOCK(m) \ | #define VM_PAGE_TO_PV_LIST_LOCK(m) \ | ||||
PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) | PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) | ||||
struct pmap kernel_pmap_store; | struct pmap kernel_pmap_store; | ||||
/* Used for mapping ACPI memory before VM is initialized */ | |||||
#define PMAP_PREINIT_MAPPING_COUNT 32 | |||||
#define PMAP_PREINIT_MAPPING_SIZE (PMAP_PREINIT_MAPPING_COUNT * L2_SIZE) | |||||
static vm_offset_t preinit_map_va; /* Start VA of pre-init mapping space */ | |||||
static int vm_initialized = 0; /* No need to use pre-init maps when set */ | |||||
/* | |||||
* Reserve a few L2 blocks starting from 'preinit_map_va' pointer. | |||||
* Always map entire L2 block for simplicity. | |||||
* VA of L2 block = preinit_map_va + i * L2_SIZE | |||||
*/ | |||||
static struct pmap_preinit_mapping { | |||||
vm_paddr_t pa; | |||||
vm_offset_t va; | |||||
vm_size_t size; | |||||
} pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT]; | |||||
vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ | vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ | ||||
vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ | vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ | ||||
vm_offset_t kernel_vm_end = 0; | vm_offset_t kernel_vm_end = 0; | ||||
/* | /* | ||||
* Data for the pv entry allocation mechanism. | * Data for the pv entry allocation mechanism. | ||||
* Updates to pv_invl_gen are protected by the pv_list_locks[] | * Updates to pv_invl_gen are protected by the pv_list_locks[] | ||||
* elements, but reads are not. | * elements, but reads are not. | ||||
▲ Show 20 Lines • Show All 555 Lines • ▼ Show 20 Lines | #define alloc_pages(var, np) \ | ||||
/* Allocate dynamic per-cpu area. */ | /* Allocate dynamic per-cpu area. */ | ||||
alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); | alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); | ||||
dpcpu_init((void *)dpcpu, 0); | dpcpu_init((void *)dpcpu, 0); | ||||
/* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ | /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ | ||||
alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); | alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); | ||||
msgbufp = (void *)msgbufpv; | msgbufp = (void *)msgbufpv; | ||||
virtual_avail = roundup2(freemempos, L1_SIZE); | /* Reserve some VA space for early BIOS/ACPI mapping */ | ||||
preinit_map_va = roundup2(freemempos, L2_SIZE); | |||||
virtual_avail = preinit_map_va + PMAP_PREINIT_MAPPING_SIZE; | |||||
virtual_avail = roundup2(virtual_avail, L1_SIZE); | |||||
virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; | virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; | ||||
kernel_vm_end = virtual_avail; | kernel_vm_end = virtual_avail; | ||||
pa = pmap_early_vtophys(l1pt, freemempos); | pa = pmap_early_vtophys(l1pt, freemempos); | ||||
/* Finish initialising physmap */ | /* Finish initialising physmap */ | ||||
map_slot = used_map_slot; | map_slot = used_map_slot; | ||||
for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && | for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && | ||||
▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines | pmap_init(void) | ||||
*/ | */ | ||||
s = (vm_size_t)(pv_npg * sizeof(struct md_page)); | s = (vm_size_t)(pv_npg * sizeof(struct md_page)); | ||||
s = round_page(s); | s = round_page(s); | ||||
pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, | pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, | ||||
M_WAITOK | M_ZERO); | M_WAITOK | M_ZERO); | ||||
for (i = 0; i < pv_npg; i++) | for (i = 0; i < pv_npg; i++) | ||||
TAILQ_INIT(&pv_table[i].pv_list); | TAILQ_INIT(&pv_table[i].pv_list); | ||||
TAILQ_INIT(&pv_dummy.pv_list); | TAILQ_INIT(&pv_dummy.pv_list); | ||||
vm_initialized = 1; | |||||
} | } | ||||
static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0, | static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0, | ||||
"2MB page mapping counters"); | "2MB page mapping counters"); | ||||
static u_long pmap_l2_demotions; | static u_long pmap_l2_demotions; | ||||
SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD, | SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD, | ||||
&pmap_l2_demotions, 0, "2MB page demotions"); | &pmap_l2_demotions, 0, "2MB page demotions"); | ||||
▲ Show 20 Lines • Show All 3,351 Lines • ▼ Show 20 Lines | if ((m->aflags & PGA_WRITEABLE) == 0) | ||||
return; | return; | ||||
/* ARM64TODO: We lack support for tracking if a page is modified */ | /* ARM64TODO: We lack support for tracking if a page is modified */ | ||||
} | } | ||||
void * | void * | ||||
pmap_mapbios(vm_paddr_t pa, vm_size_t size) | pmap_mapbios(vm_paddr_t pa, vm_size_t size) | ||||
{ | { | ||||
struct pmap_preinit_mapping *ppim; | |||||
vm_offset_t va, offset; | |||||
pd_entry_t *pde; | |||||
pt_entry_t *l2; | |||||
int i, lvl, l2_blocks, free_l2_count, start_idx; | |||||
return ((void *)PHYS_TO_DMAP(pa)); | if (!vm_initialized) { | ||||
/* | |||||
* No L3 ptables so map entire L2 blocks where start VA is: | |||||
* preinit_map_va + start_idx * L2_SIZE | |||||
* There may be duplicate mappings (multiple VA -> same PA) but | |||||
* ARM64 dcache is always PIPT so that's acceptable. | |||||
*/ | |||||
if (size == 0) | |||||
return (NULL); | |||||
/* Calculate how many full L2 blocks are needed for the mapping */ | |||||
l2_blocks = (roundup2(pa + size, L2_SIZE) - rounddown2(pa, L2_SIZE)) >> L2_SHIFT; | |||||
offset = pa & L2_OFFSET; | |||||
if (preinit_map_va == 0) | |||||
return (NULL); | |||||
/* Map 2MiB L2 blocks from reserved VA space */ | |||||
free_l2_count = 0; | |||||
start_idx = -1; | |||||
/* Find enough free contiguous VA space */ | |||||
for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { | |||||
ppim = pmap_preinit_mapping + i; | |||||
if (free_l2_count > 0 && ppim->pa != 0) { | |||||
/* Not enough space here */ | |||||
free_l2_count = 0; | |||||
start_idx = -1; | |||||
continue; | |||||
} | } | ||||
if (ppim->pa == 0) { | |||||
/* Free L2 block */ | |||||
if (start_idx == -1) | |||||
start_idx = i; | |||||
free_l2_count++; | |||||
if (free_l2_count == l2_blocks) | |||||
break; | |||||
} | |||||
} | |||||
if (free_l2_count != l2_blocks) | |||||
panic("%s: too many preinit mappings", __func__); | |||||
va = preinit_map_va + (start_idx * L2_SIZE); | |||||
for (i = start_idx; i < start_idx + l2_blocks; i++) { | |||||
/* Mark entries as allocated */ | |||||
ppim = pmap_preinit_mapping + i; | |||||
ppim->pa = pa; | |||||
ppim->va = va + offset; | |||||
ppim->size = size; | |||||
} | |||||
/* Map L2 blocks */ | |||||
pa = rounddown2(pa, L2_SIZE); | |||||
for (i = 0; i < l2_blocks; i++) { | |||||
pde = pmap_pde(kernel_pmap, va, &lvl); | |||||
KASSERT(pde != NULL, | |||||
("pmap_mapbios: Invalid page entry, va: 0x%lx", va)); | |||||
KASSERT(lvl == 1, ("pmap_mapbios: Invalid level %d", lvl)); | |||||
andrew: Does this rely on the change to `pmap_bootstrap_dmap`? If not we should split this into two… | |||||
Not Done Inline ActionsYes, I think we can split it. I'll remove pmap_bootstrap_dmap changes from this patch so that you can commit it separately after mapbios/unmapbios changes have gone in. mst_semihalf.com: Yes, I think we can split it. I'll remove pmap_bootstrap_dmap changes from this patch so that… | |||||
/* Insert L2_BLOCK */ | |||||
l2 = pmap_l1_to_l2(pde, va); | |||||
pmap_load_store(l2, | |||||
pa | ATTR_DEFAULT | ATTR_XN | | |||||
ATTR_IDX(CACHED_MEMORY) | L2_BLOCK); | |||||
pmap_invalidate_range(kernel_pmap, va, va + L2_SIZE); | |||||
va += L2_SIZE; | |||||
pa += L2_SIZE; | |||||
} | |||||
va = preinit_map_va + (start_idx * L2_SIZE); | |||||
} else { | |||||
/* kva_alloc may be used to map the pages */ | |||||
offset = pa & PAGE_MASK; | |||||
size = round_page(offset + size); | |||||
va = kva_alloc(size); | |||||
if (va == 0) | |||||
panic("%s: Couldn't allocate KVA", __func__); | |||||
pde = pmap_pde(kernel_pmap, va, &lvl); | |||||
KASSERT(lvl == 2, ("pmap_mapbios: Invalid level %d", lvl)); | |||||
/* L3 table is linked */ | |||||
va = trunc_page(va); | |||||
pa = trunc_page(pa); | |||||
pmap_kenter(va, size, pa, CACHED_MEMORY); | |||||
} | |||||
return ((void *)(va + offset)); | |||||
} | |||||
void | void | ||||
pmap_unmapbios(vm_paddr_t pa, vm_size_t size) | pmap_unmapbios(vm_offset_t va, vm_size_t size) | ||||
{ | { | ||||
struct pmap_preinit_mapping *ppim; | |||||
vm_offset_t offset, tmpsize; | |||||
pd_entry_t *pde; | |||||
int i, lvl, l2_blocks; | |||||
l2_blocks = (roundup2(va + size, L2_SIZE) - rounddown2(va, L2_SIZE)) >> L2_SHIFT; | |||||
KASSERT(l2_blocks > 0, ("pmap_unmapbios: invalid size %lx", size)); | |||||
for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { | |||||
ppim = pmap_preinit_mapping + i; | |||||
if (ppim->va == va) { | |||||
KASSERT(ppim->size == size, ("pmap_unmapbios: size mismatch")); | |||||
andrewUnsubmitted Not Done Inline ActionsI'd prefer we remove the mapping here. It's undefined behaviour to map the same physical address twice with different attributes. andrew: I'd prefer we remove the mapping here. It's undefined behaviour to map the same physical… | |||||
ppim->va = 0; | |||||
ppim->pa = 0; | |||||
ppim->size = 0; | |||||
l2_blocks--; | |||||
if (l2_blocks == 0) | |||||
return; | |||||
} | |||||
} | |||||
/* Unmap the pages reserved with kva_alloc. */ | |||||
if (vm_initialized) { | |||||
offset = va & PAGE_MASK; | |||||
size = round_page(offset + size); | |||||
va = trunc_page(va); | |||||
pde = pmap_pde(kernel_pmap, va, &lvl); | |||||
KASSERT(pde != NULL, | |||||
("pmap_unmapbios: Invalid page entry, va: 0x%lx", va)); | |||||
KASSERT(lvl == 2, ("pmap_unmapbios: Invalid level %d", lvl)); | |||||
/* Unmap and invalidate the pages */ | |||||
for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) | |||||
pmap_kremove(va + tmpsize); | |||||
kva_free(va, size); | |||||
} | |||||
} | } | ||||
/* | /* | ||||
* Sets the memory attribute for the specified page. | * Sets the memory attribute for the specified page. | ||||
*/ | */ | ||||
void | void | ||||
pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) | pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) | ||||
{ | { | ||||
m->md.pv_memattr = ma; | m->md.pv_memattr = ma; | ||||
/* | /* | ||||
* If "m" is a normal page, update its direct mapping. This update | * If "m" is a normal page, update its direct mapping. This update | ||||
* can be relied upon to perform any cache operations that are | * can be relied upon to perform any cache operations that are | ||||
* required for data coherence. | * required for data coherence. | ||||
*/ | */ | ||||
if ((m->flags & PG_FICTITIOUS) == 0 && | if ((m->flags & PG_FICTITIOUS) == 0 && | ||||
pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE, | pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE, | ||||
m->md.pv_memattr) != 0) | m->md.pv_memattr) != 0) | ||||
Not Done Inline ActionsThis doesn't look right. What will happen if we have a mapping open across setting vm_initialized? andrew: This doesn't look right. What will happen if we have a mapping open across setting… | |||||
Not Done Inline ActionsThat's a caveat I forgot to mention - it wouldn't work if the mapping is held for so long. I wanted to keep the code simple and didn't see such usage of mapbios currently in the kernel so I concluded it would be invalid to use this interface in this way. Also there's one more problem with holding a mapping across setting vm_initialized. If the mapped physical memory is included in phys_avail (shouldn't be the case for ACPI tables but that's solely dependent on what EFI provides) then 'vm_initialized=1' means VM has already added it to its pool and may overwrite data in those pages at any time. The pre-init mapping is no longer valid in such a case, when vm_initialized is set - which is done in pmap_init. To add support for mappings held across VM initialization while keeping it simple, I suppose it would be enough to store original PA and size in the pre-init array, maybe increasing the size of the array, and always map new L2 block each time. This could lead to duplicate mappings (two VA blocks -> same PA block) but I think arm64 mandates PIPT d-caches so it shouldn't be a problem. mst_semihalf.com: That's a caveat I forgot to mention - it wouldn't work if the mapping is held for so long. I… | |||||
panic("memory attribute change on the direct map failed"); | panic("memory attribute change on the direct map failed"); | ||||
} | } | ||||
/* | /* | ||||
* Changes the specified virtual address range's memory type to that given by | * Changes the specified virtual address range's memory type to that given by | ||||
Not Done Inline ActionsYou should call kva_free after removing the maps, another CPU may allocate the same virtual address creating a race. andrew: You should call `kva_free` after removing the maps, another CPU may allocate the same virtual… | |||||
Not Done Inline ActionsGood point, I'll reverse the order. I wonder if it's necessary to remove the maps at all - on amd64 for example there's only kva_free() and return. mst_semihalf.com: Good point, I'll reverse the order. I wonder if it's necessary to remove the maps at all - on… | |||||
* the parameter "mode". The specified virtual address range must be | * the parameter "mode". The specified virtual address range must be | ||||
* completely contained within either the direct map or the kernel map. If | * completely contained within either the direct map or the kernel map. If | ||||
* the virtual address range is contained within the kernel map, then the | * the virtual address range is contained within the kernel map, then the | ||||
* memory type for each of the corresponding ranges of the direct map is also | * memory type for each of the corresponding ranges of the direct map is also | ||||
* changed. (The corresponding ranges of the direct map are those ranges that | * changed. (The corresponding ranges of the direct map are those ranges that | ||||
* map the same physical pages as the specified virtual address range.) These | * map the same physical pages as the specified virtual address range.) These | ||||
* changes to the direct map are necessary because Intel describes the | * changes to the direct map are necessary because Intel describes the | ||||
* behavior of their processors as "undefined" if two or more mappings to the | * behavior of their processors as "undefined" if two or more mappings to the | ||||
▲ Show 20 Lines • Show All 616 Lines • Show Last 20 Lines |
Does this rely on the change to pmap_bootstrap_dmap? If not we should split this into two patches. One for pmap_mapbios/pmap_unmapbios and any related changes, and another to further restrict the DMAP region.
I'd prefer to handle the DMAP change as it will need acknowledgements to the funding agency the work was carried out under.