Changeset View
Changeset View
Standalone View
Standalone View
sys/amd64/amd64/pmap.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 118 Lines • ▼ Show 20 Lines | |||||
#include <sys/ktr.h> | #include <sys/ktr.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/mman.h> | #include <sys/mman.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/rangeset.h> | #include <sys/rangeset.h> | ||||
#include <sys/rwlock.h> | #include <sys/rwlock.h> | ||||
#include <sys/sbuf.h> | |||||
#include <sys/sx.h> | #include <sys/sx.h> | ||||
#include <sys/turnstile.h> | #include <sys/turnstile.h> | ||||
#include <sys/vmem.h> | #include <sys/vmem.h> | ||||
#include <sys/vmmeter.h> | #include <sys/vmmeter.h> | ||||
#include <sys/sched.h> | #include <sys/sched.h> | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <sys/smp.h> | #include <sys/smp.h> | ||||
#ifdef DDB | #ifdef DDB | ||||
▲ Show 20 Lines • Show All 1,250 Lines • ▼ Show 20 Lines | |||||
* way. | * way. | ||||
* | * | ||||
* Note that this doesn't currently provide any protection for modules. | * Note that this doesn't currently provide any protection for modules. | ||||
*/ | */ | ||||
static inline pt_entry_t | static inline pt_entry_t | ||||
bootaddr_rwx(vm_paddr_t pa) | bootaddr_rwx(vm_paddr_t pa) | ||||
{ | { | ||||
printf("%s: pa %#lx\n", __func__, pa); | |||||
/* | /* | ||||
* Everything in the same 2M page as the start of the kernel | * Everything in the same 2M page as the start of the kernel | ||||
* should be static. On the other hand, things in the same 2M | * should be static. On the other hand, things in the same 2M | ||||
* page as the end of the kernel could be read-write/executable, | * page as the end of the kernel could be read-write/executable, | ||||
* as the kernel image is not guaranteed to end on a 2M boundary. | * as the kernel image is not guaranteed to end on a 2M boundary. | ||||
*/ | */ | ||||
if (pa < trunc_2mpage(btext - KERNBASE) || | if (pa < trunc_2mpage(btext - KERNBASE)) | ||||
pa >= trunc_2mpage(_end - KERNBASE)) | return (X86_PG_RW | pg_nx); | ||||
if (pa >= trunc_2mpage(_end - KERNBASE)) | |||||
return (X86_PG_RW); | return (X86_PG_RW); | ||||
/* | /* | ||||
* The linker should ensure that the read-only and read-write | * The linker should ensure that the read-only and read-write | ||||
* portions don't share the same 2M page, so this shouldn't | * portions don't share the same 2M page, so this shouldn't | ||||
* impact read-only data. However, in any case, any page with | * impact read-only data. However, in any case, any page with | ||||
* read-write data needs to be read-write. | * read-write data needs to be read-write. | ||||
*/ | */ | ||||
if (pa >= trunc_2mpage(brwsection - KERNBASE)) | if (pa >= trunc_2mpage(brwsection - KERNBASE)) | ||||
return (X86_PG_RW | pg_nx); | return (X86_PG_RW | pg_nx); | ||||
/* | /* | ||||
* Mark any 2M page containing kernel text as read-only. Mark | * Mark any 2M page containing kernel text as read-only. Mark | ||||
* other pages with read-only data as read-only and not executable. | * other pages with read-only data as read-only and not executable. | ||||
* (It is likely a small portion of the read-only data section will | * (It is likely a small portion of the read-only data section will | ||||
* be marked as read-only, but executable. This should be acceptable | * be marked as read-only, but executable. This should be acceptable | ||||
* since the read-only protection will keep the data from changing.) | * since the read-only protection will keep the data from changing.) | ||||
* Note that fixups to the .text section will still work until we | * Note that fixups to the .text section will still work until we | ||||
* set CR0.WP. | * set CR0.WP. | ||||
▲ Show 20 Lines • Show All 155 Lines • ▼ Show 20 Lines | create_pagetables(vm_paddr_t *firstaddr) | ||||
/* And recursively map PML4 to itself in order to get PTmap */ | /* And recursively map PML4 to itself in order to get PTmap */ | ||||
p4_p = (pml4_entry_t *)KPML4phys; | p4_p = (pml4_entry_t *)KPML4phys; | ||||
p4_p[PML4PML4I] = KPML4phys; | p4_p[PML4PML4I] = KPML4phys; | ||||
p4_p[PML4PML4I] |= X86_PG_RW | X86_PG_V | pg_nx; | p4_p[PML4PML4I] |= X86_PG_RW | X86_PG_V | pg_nx; | ||||
/* Connect the Direct Map slot(s) up to the PML4. */ | /* Connect the Direct Map slot(s) up to the PML4. */ | ||||
for (i = 0; i < ndmpdpphys; i++) { | for (i = 0; i < ndmpdpphys; i++) { | ||||
p4_p[DMPML4I + i] = DMPDPphys + ptoa(i); | p4_p[DMPML4I + i] = DMPDPphys + ptoa(i); | ||||
p4_p[DMPML4I + i] |= X86_PG_RW | X86_PG_V; | p4_p[DMPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx; | ||||
} | } | ||||
/* Connect the KVA slots up to the PML4 */ | /* Connect the KVA slots up to the PML4 */ | ||||
for (i = 0; i < NKPML4E; i++) { | for (i = 0; i < NKPML4E; i++) { | ||||
p4_p[KPML4BASE + i] = KPDPphys + ptoa(i); | p4_p[KPML4BASE + i] = KPDPphys + ptoa(i); | ||||
p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V; | p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 502 Lines • ▼ Show 20 Lines | case PT_EPT: | ||||
break; | break; | ||||
default: | default: | ||||
panic("pmap_cache_mask: invalid pm_type %d", pmap->pm_type); | panic("pmap_cache_mask: invalid pm_type %d", pmap->pm_type); | ||||
} | } | ||||
return (mask); | return (mask); | ||||
} | } | ||||
static int | |||||
pmap_pat_index(pmap_t pmap, pt_entry_t pte, bool is_pde) | |||||
{ | |||||
int pat_flag, pat_idx; | |||||
pat_idx = 0; | |||||
switch (pmap->pm_type) { | |||||
case PT_X86: | |||||
case PT_RVI: | |||||
/* The PAT bit is different for PTE's and PDE's. */ | |||||
pat_flag = is_pde ? X86_PG_PDE_PAT : X86_PG_PTE_PAT; | |||||
if ((pte & pat_flag) != 0) | |||||
pat_idx |= 0x4; | |||||
if ((pte & PG_NC_PCD) != 0) | |||||
pat_idx |= 0x2; | |||||
if ((pte & PG_NC_PWT) != 0) | |||||
pat_idx |= 0x1; | |||||
break; | |||||
case PT_EPT: | |||||
if ((pte & EPT_PG_IGNORE_PAT) != 0) | |||||
panic("EPT PTE %#lx has no PAT memory type", pte); | |||||
pat_idx = (pte & EPT_PG_MEMORY_TYPE(0x7)) >> 3; | |||||
break; | |||||
} | |||||
/* See pmap_init_pat(). */ | |||||
if (pat_idx == 4) | |||||
pat_idx = 0; | |||||
if (pat_idx == 7) | |||||
pat_idx = 3; | |||||
return (pat_idx); | |||||
} | |||||
bool | bool | ||||
pmap_ps_enabled(pmap_t pmap) | pmap_ps_enabled(pmap_t pmap) | ||||
{ | { | ||||
return (pg_ps_enabled && (pmap->pm_flags & PMAP_PDE_SUPERPAGE) != 0); | return (pg_ps_enabled && (pmap->pm_flags & PMAP_PDE_SUPERPAGE) != 0); | ||||
} | } | ||||
static void | static void | ||||
▲ Show 20 Lines • Show All 7,853 Lines • ▼ Show 20 Lines | if (error == 0) | ||||
pmap_pkru_update_range(pmap, sva, eva, 0); | pmap_pkru_update_range(pmap, sva, eva, 0); | ||||
PMAP_UNLOCK(pmap); | PMAP_UNLOCK(pmap); | ||||
if (error != ENOMEM) | if (error != ENOMEM) | ||||
break; | break; | ||||
vm_wait(NULL); | vm_wait(NULL); | ||||
} | } | ||||
return (error); | return (error); | ||||
} | } | ||||
/* | |||||
* Track a range of the kernel's virtual address space that is contiguous | |||||
* in various mapping attributes. | |||||
*/ | |||||
struct pmap_kernel_map_range { | |||||
vm_offset_t sva; | |||||
pt_entry_t attrs; | |||||
int ptes; | |||||
int pdes; | |||||
int pdpes; | |||||
}; | |||||
static void | |||||
sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range, | |||||
vm_offset_t eva) | |||||
{ | |||||
const char *mode; | |||||
int i, pat_idx; | |||||
if (eva <= range->sva) | |||||
return; | |||||
pat_idx = pmap_pat_index(kernel_pmap, range->attrs, true); | |||||
for (i = 0; i < PAT_INDEX_SIZE; i++) | |||||
if (pat_index[i] == pat_idx) | |||||
break; | |||||
switch (i) { | |||||
case PAT_WRITE_BACK: | |||||
mode = "WB"; | |||||
break; | |||||
case PAT_WRITE_THROUGH: | |||||
mode = "WT"; | |||||
break; | |||||
case PAT_UNCACHEABLE: | |||||
mode = "UC"; | |||||
break; | |||||
case PAT_WRITE_PROTECTED: | |||||
mode = "WP"; | |||||
break; | |||||
case PAT_WRITE_COMBINING: | |||||
mode = "WC"; | |||||
break; | |||||
default: | |||||
panic("sysctl_kmaps_dump: unknown PAT mode %d", i); | |||||
} | |||||
sbuf_printf(sb, "%#016lx-%#016lx r%c%c%c%c %s %d %d %d\n", | |||||
range->sva, eva, | |||||
(range->attrs & X86_PG_RW) != 0 ? 'w' : '-', | |||||
(range->attrs & pg_nx) != 0 ? '-' : 'x', | |||||
(range->attrs & X86_PG_U) != 0 ? 'u' : 's', | |||||
(range->attrs & X86_PG_G) != 0 ? 'g' : '-', | |||||
mode, range->pdpes, range->pdes, range->ptes); | |||||
/* Reset to sentinel value. */ | |||||
range->sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1); | |||||
} | |||||
/* | |||||
* Determine whether the attributes specified by a page table entry match those | |||||
* being tracked by the current range. This is not quite as simple as a direct | |||||
* flag comparison since some PAT modes have multiple representations. | |||||
*/ | |||||
static bool | |||||
sysctl_kmaps_match(struct pmap_kernel_map_range *range, pt_entry_t attrs, | |||||
pt_entry_t mask) | |||||
{ | |||||
pt_entry_t diff; | |||||
diff = (range->attrs ^ attrs) & mask; | |||||
if (diff == 0) | |||||
return (true); | |||||
if ((diff & ~X86_PG_PDE_PAT) == 0 && | |||||
pmap_pat_index(kernel_pmap, range->attrs, true) == | |||||
pmap_pat_index(kernel_pmap, attrs, true)) | |||||
return (true); | |||||
return (false); | |||||
} | |||||
static void | |||||
sysctl_kmaps_reinit(struct pmap_kernel_map_range *range, vm_offset_t va, | |||||
pt_entry_t attrs) | |||||
{ | |||||
memset(range, 0, sizeof(*range)); | |||||
range->sva = va; | |||||
range->attrs = attrs; | |||||
} | |||||
/* | |||||
* Called for every kernel page table entry. Maintain a range of kernel virtual | |||||
* addresses that is contiguous with respect to access permissions and cache | |||||
* mode. | |||||
*/ | |||||
static bool | |||||
sysctl_kmaps_update(struct sbuf *sb, struct pmap_kernel_map_range *range, | |||||
vm_offset_t va, pml4_entry_t *pml4, pdp_entry_t *pdp, pd_entry_t *pd, | |||||
pt_entry_t *pt) | |||||
{ | |||||
pt_entry_t attrs, mask; | |||||
if ((*pml4 & X86_PG_V) == 0) { | |||||
sysctl_kmaps_dump(sb, range, va); | |||||
return (false); | |||||
} | |||||
attrs = *pml4 & (X86_PG_RW | X86_PG_U | pg_nx); | |||||
mask = X86_PG_RW | X86_PG_U | pg_nx; | |||||
if (pdp == NULL) | |||||
return (true); | |||||
if ((*pdp & X86_PG_V) == 0) { | |||||
sysctl_kmaps_dump(sb, range, va); | |||||
return (false); | |||||
} | |||||
attrs |= *pdp & pg_nx; | |||||
attrs &= pg_nx | (*pdp & (X86_PG_RW | X86_PG_U)); | |||||
if ((*pdp & X86_PG_PS) != 0) { | |||||
attrs |= *pdp & (X86_PG_G | X86_PG_PDE_CACHE); | |||||
mask |= X86_PG_G | X86_PG_PDE_CACHE; | |||||
if (range->sva > va || | |||||
!sysctl_kmaps_match(range, attrs, mask)) { | |||||
sysctl_kmaps_dump(sb, range, va); | |||||
sysctl_kmaps_reinit(range, va, attrs); | |||||
} | |||||
range->pdpes++; | |||||
return (false); | |||||
} | |||||
if (pd == NULL) | |||||
return (true); | |||||
if ((*pd & X86_PG_V) == 0) { | |||||
sysctl_kmaps_dump(sb, range, va); | |||||
return (false); | |||||
} | |||||
attrs |= *pd & pg_nx; | |||||
attrs &= pg_nx | (*pd & (X86_PG_RW | X86_PG_U)); | |||||
if ((*pd & X86_PG_PS) != 0) { | |||||
attrs |= *pd & (X86_PG_G | X86_PG_PDE_CACHE); | |||||
mask |= X86_PG_G | X86_PG_PDE_CACHE; | |||||
if (range->sva > va || | |||||
!sysctl_kmaps_match(range, attrs, mask)) { | |||||
sysctl_kmaps_dump(sb, range, va); | |||||
sysctl_kmaps_reinit(range, va, attrs); | |||||
} | |||||
range->pdes++; | |||||
return (false); | |||||
} | |||||
if (pt == NULL) | |||||
return (true); | |||||
if ((*pt & X86_PG_V) == 0) { | |||||
sysctl_kmaps_dump(sb, range, va); | |||||
return (false); | |||||
} | |||||
attrs |= *pt & pg_nx; | |||||
attrs &= pg_nx | (*pt & (X86_PG_RW | X86_PG_U)); | |||||
attrs |= (*pt & (X86_PG_G | X86_PG_PTE_CACHE)); | |||||
/* | |||||
* PTEs use different bits for the PAT index; canonicalize by using the | |||||
* PDE format. | |||||
*/ | |||||
if ((*pt & X86_PG_PTE_PAT) != 0) { | |||||
attrs &= ~X86_PG_PTE_PAT; | |||||
attrs |= X86_PG_PDE_PAT; | |||||
} | |||||
mask |= X86_PG_G | X86_PG_PDE_CACHE; | |||||
if (range->sva > va || !sysctl_kmaps_match(range, attrs, mask)) { | |||||
sysctl_kmaps_dump(sb, range, va); | |||||
sysctl_kmaps_reinit(range, va, attrs); | |||||
} | |||||
range->ptes++; | |||||
return (true); | |||||
} | |||||
static int | |||||
sysctl_kmaps(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
struct pmap_kernel_map_range range; | |||||
struct sbuf sbuf, *sb; | |||||
pml4_entry_t *pml4; | |||||
pdp_entry_t *pdp; | |||||
pd_entry_t *pd; | |||||
pt_entry_t *pt; | |||||
vm_offset_t va; | |||||
int error, i, j, k, l; | |||||
error = sysctl_wire_old_buffer(req, 0); | |||||
if (error != 0) | |||||
return (error); | |||||
sb = &sbuf; | |||||
sbuf_new_for_sysctl(sb, NULL, PAGE_SIZE, req); | |||||
range.sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1); | |||||
PMAP_LOCK(kernel_pmap); | |||||
for (i = 0; i < NPML4EPG; i++) { | |||||
switch (i) { | |||||
case PML4PML4I: | |||||
sbuf_printf(sb, "\nRecursive map:\n"); | |||||
break; | |||||
case DMPML4I: | |||||
sbuf_printf(sb, "\nDirect map:\n"); | |||||
break; | |||||
case KPML4BASE: | |||||
sbuf_printf(sb, "\nKernel map:\n"); | |||||
break; | |||||
case LMSPML4I: | |||||
sbuf_printf(sb, "\nLarge map:\n"); | |||||
break; | |||||
} | |||||
va = KVADDR(i, 0, 0, 0); | |||||
pml4 = &kernel_pmap->pm_pml4[i]; | |||||
if (!sysctl_kmaps_update(sb, &range, va, pml4, NULL, NULL, | |||||
NULL)) | |||||
continue; | |||||
for (j = 0; j < NPDPEPG; j++) { | |||||
va = KVADDR(i, j, 0, 0); | |||||
pdp = pmap_pml4e_to_pdpe(pml4, va); | |||||
if (!sysctl_kmaps_update(sb, &range, va, pml4, pdp, | |||||
NULL, NULL)) | |||||
continue; | |||||
for (k = 0; k < NPDEPG; k++) { | |||||
va = KVADDR(i, j, k, 0); | |||||
pd = pmap_pdpe_to_pde(pdp, va); | |||||
if (!sysctl_kmaps_update(sb, &range, va, pml4, | |||||
pdp, pd, NULL)) | |||||
continue; | |||||
for (l = 0; l < NPTEPG; l++) { | |||||
va = KVADDR(i, j, k, l); | |||||
pt = pmap_pde_to_pte(pd, va); | |||||
(void)sysctl_kmaps_update(sb, &range, | |||||
va, pml4, pdp, pd, pt); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
PMAP_UNLOCK(kernel_pmap); | |||||
error = sbuf_finish(sb); | |||||
sbuf_delete(sb); | |||||
return (error); | |||||
} | |||||
SYSCTL_OID(_vm_pmap, OID_AUTO, kernel_maps, | |||||
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, | |||||
NULL, 0, sysctl_kmaps, "A", | |||||
"Dump kernel address layout"); | |||||
#ifdef DDB | #ifdef DDB | ||||
DB_SHOW_COMMAND(pte, pmap_print_pte) | DB_SHOW_COMMAND(pte, pmap_print_pte) | ||||
{ | { | ||||
pmap_t pmap; | pmap_t pmap; | ||||
pml4_entry_t *pml4; | pml4_entry_t *pml4; | ||||
pdp_entry_t *pdp; | pdp_entry_t *pdp; | ||||
pd_entry_t *pde; | pd_entry_t *pde; | ||||
▲ Show 20 Lines • Show All 49 Lines • Show Last 20 Lines |