Changeset View
Changeset View
Standalone View
Standalone View
head/sys/amd64/amd64/pmap.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 118 Lines • ▼ Show 20 Lines | |||||
#include <sys/ktr.h> | #include <sys/ktr.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/mman.h> | #include <sys/mman.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/rangeset.h> | #include <sys/rangeset.h> | ||||
#include <sys/rwlock.h> | #include <sys/rwlock.h> | ||||
#include <sys/sbuf.h> | |||||
#include <sys/sx.h> | #include <sys/sx.h> | ||||
#include <sys/turnstile.h> | #include <sys/turnstile.h> | ||||
#include <sys/vmem.h> | #include <sys/vmem.h> | ||||
#include <sys/vmmeter.h> | #include <sys/vmmeter.h> | ||||
#include <sys/sched.h> | #include <sys/sched.h> | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <sys/smp.h> | #include <sys/smp.h> | ||||
#ifdef DDB | #ifdef DDB | ||||
▲ Show 20 Lines • Show All 1,972 Lines • ▼ Show 20 Lines | case PT_EPT: | ||||
break; | break; | ||||
default: | default: | ||||
panic("pmap_cache_mask: invalid pm_type %d", pmap->pm_type); | panic("pmap_cache_mask: invalid pm_type %d", pmap->pm_type); | ||||
} | } | ||||
return (mask); | return (mask); | ||||
} | } | ||||
static int | |||||
pmap_pat_index(pmap_t pmap, pt_entry_t pte, bool is_pde) | |||||
{ | |||||
int pat_flag, pat_idx; | |||||
pat_idx = 0; | |||||
switch (pmap->pm_type) { | |||||
case PT_X86: | |||||
case PT_RVI: | |||||
/* The PAT bit is different for PTE's and PDE's. */ | |||||
pat_flag = is_pde ? X86_PG_PDE_PAT : X86_PG_PTE_PAT; | |||||
if ((pte & pat_flag) != 0) | |||||
pat_idx |= 0x4; | |||||
if ((pte & PG_NC_PCD) != 0) | |||||
pat_idx |= 0x2; | |||||
if ((pte & PG_NC_PWT) != 0) | |||||
pat_idx |= 0x1; | |||||
break; | |||||
case PT_EPT: | |||||
if ((pte & EPT_PG_IGNORE_PAT) != 0) | |||||
panic("EPT PTE %#lx has no PAT memory type", pte); | |||||
pat_idx = (pte & EPT_PG_MEMORY_TYPE(0x7)) >> 3; | |||||
break; | |||||
} | |||||
/* See pmap_init_pat(). */ | |||||
if (pat_idx == 4) | |||||
pat_idx = 0; | |||||
if (pat_idx == 7) | |||||
pat_idx = 3; | |||||
return (pat_idx); | |||||
} | |||||
bool | bool | ||||
pmap_ps_enabled(pmap_t pmap) | pmap_ps_enabled(pmap_t pmap) | ||||
{ | { | ||||
return (pg_ps_enabled && (pmap->pm_flags & PMAP_PDE_SUPERPAGE) != 0); | return (pg_ps_enabled && (pmap->pm_flags & PMAP_PDE_SUPERPAGE) != 0); | ||||
} | } | ||||
static void | static void | ||||
▲ Show 20 Lines • Show All 7,852 Lines • ▼ Show 20 Lines | if (error == 0) | ||||
pmap_pkru_update_range(pmap, sva, eva, 0); | pmap_pkru_update_range(pmap, sva, eva, 0); | ||||
PMAP_UNLOCK(pmap); | PMAP_UNLOCK(pmap); | ||||
if (error != ENOMEM) | if (error != ENOMEM) | ||||
break; | break; | ||||
vm_wait(NULL); | vm_wait(NULL); | ||||
} | } | ||||
return (error); | return (error); | ||||
} | } | ||||
/* | |||||
* Track a range of the kernel's virtual address space that is contiguous | |||||
* in various mapping attributes. | |||||
*/ | |||||
struct pmap_kernel_map_range { | |||||
vm_offset_t sva; | |||||
pt_entry_t attrs; | |||||
int ptes; | |||||
int pdes; | |||||
int pdpes; | |||||
}; | |||||
static void | |||||
sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range, | |||||
vm_offset_t eva) | |||||
{ | |||||
const char *mode; | |||||
int i, pat_idx; | |||||
if (eva <= range->sva) | |||||
return; | |||||
pat_idx = pmap_pat_index(kernel_pmap, range->attrs, true); | |||||
for (i = 0; i < PAT_INDEX_SIZE; i++) | |||||
if (pat_index[i] == pat_idx) | |||||
break; | |||||
switch (i) { | |||||
case PAT_WRITE_BACK: | |||||
mode = "WB"; | |||||
break; | |||||
case PAT_WRITE_THROUGH: | |||||
mode = "WT"; | |||||
break; | |||||
case PAT_UNCACHEABLE: | |||||
mode = "UC"; | |||||
break; | |||||
case PAT_WRITE_PROTECTED: | |||||
mode = "WP"; | |||||
break; | |||||
case PAT_WRITE_COMBINING: | |||||
mode = "WC"; | |||||
break; | |||||
default: | |||||
printf("%s: unknown PAT mode %#x for range %#016lx-%#016lx\n", | |||||
__func__, i, range->sva, eva); | |||||
mode = "??"; | |||||
break; | |||||
} | |||||
sbuf_printf(sb, "%#016lx-%#016lx r%c%c%c%c %s %d %d %d\n", | |||||
range->sva, eva, | |||||
(range->attrs & X86_PG_RW) != 0 ? 'w' : '-', | |||||
(range->attrs & pg_nx) != 0 ? '-' : 'x', | |||||
(range->attrs & X86_PG_U) != 0 ? 'u' : 's', | |||||
(range->attrs & X86_PG_G) != 0 ? 'g' : '-', | |||||
mode, range->pdpes, range->pdes, range->ptes); | |||||
/* Reset to sentinel value. */ | |||||
range->sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1); | |||||
} | |||||
/* | |||||
* Determine whether the attributes specified by a page table entry match those | |||||
* being tracked by the current range. This is not quite as simple as a direct | |||||
* flag comparison since some PAT modes have multiple representations. | |||||
*/ | |||||
static bool | |||||
sysctl_kmaps_match(struct pmap_kernel_map_range *range, pt_entry_t attrs) | |||||
{ | |||||
pt_entry_t diff, mask; | |||||
mask = X86_PG_G | X86_PG_RW | X86_PG_U | X86_PG_PDE_CACHE | pg_nx; | |||||
diff = (range->attrs ^ attrs) & mask; | |||||
if (diff == 0) | |||||
return (true); | |||||
if ((diff & ~X86_PG_PDE_PAT) == 0 && | |||||
pmap_pat_index(kernel_pmap, range->attrs, true) == | |||||
pmap_pat_index(kernel_pmap, attrs, true)) | |||||
return (true); | |||||
return (false); | |||||
} | |||||
static void | |||||
sysctl_kmaps_reinit(struct pmap_kernel_map_range *range, vm_offset_t va, | |||||
pt_entry_t attrs) | |||||
{ | |||||
memset(range, 0, sizeof(*range)); | |||||
range->sva = va; | |||||
range->attrs = attrs; | |||||
} | |||||
/* | |||||
* Given a leaf PTE, derive the mapping's attributes. If they do not match | |||||
* those of the current run, dump the address range and its attributes, and | |||||
* begin a new run. | |||||
*/ | |||||
static void | |||||
sysctl_kmaps_check(struct sbuf *sb, struct pmap_kernel_map_range *range, | |||||
vm_offset_t va, pml4_entry_t pml4e, pdp_entry_t pdpe, pd_entry_t pde, | |||||
pt_entry_t pte) | |||||
{ | |||||
pt_entry_t attrs; | |||||
attrs = pml4e & (X86_PG_RW | X86_PG_U | pg_nx); | |||||
attrs |= pdpe & pg_nx; | |||||
attrs &= pg_nx | (pdpe & (X86_PG_RW | X86_PG_U)); | |||||
if ((pdpe & PG_PS) != 0) { | |||||
attrs |= pdpe & (X86_PG_G | X86_PG_PDE_CACHE); | |||||
} else if (pde != 0) { | |||||
attrs |= pde & pg_nx; | |||||
attrs &= pg_nx | (pde & (X86_PG_RW | X86_PG_U)); | |||||
} | |||||
if ((pde & PG_PS) != 0) { | |||||
attrs |= pde & (X86_PG_G | X86_PG_PDE_CACHE); | |||||
} else if (pte != 0) { | |||||
attrs |= pte & pg_nx; | |||||
attrs &= pg_nx | (pte & (X86_PG_RW | X86_PG_U)); | |||||
attrs |= pte & (X86_PG_G | X86_PG_PTE_CACHE); | |||||
/* Canonicalize by always using the PDE PAT bit. */ | |||||
if ((attrs & X86_PG_PTE_PAT) != 0) | |||||
attrs ^= X86_PG_PDE_PAT | X86_PG_PTE_PAT; | |||||
} | |||||
if (range->sva > va || !sysctl_kmaps_match(range, attrs)) { | |||||
sysctl_kmaps_dump(sb, range, va); | |||||
sysctl_kmaps_reinit(range, va, attrs); | |||||
} | |||||
} | |||||
static int | |||||
sysctl_kmaps(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
struct pmap_kernel_map_range range; | |||||
struct sbuf sbuf, *sb; | |||||
pml4_entry_t pml4e; | |||||
pdp_entry_t *pdp, pdpe; | |||||
pd_entry_t *pd, pde; | |||||
pt_entry_t *pt, pte; | |||||
vm_offset_t sva; | |||||
vm_paddr_t pa; | |||||
int error, i, j, k, l; | |||||
error = sysctl_wire_old_buffer(req, 0); | |||||
if (error != 0) | |||||
return (error); | |||||
sb = &sbuf; | |||||
sbuf_new_for_sysctl(sb, NULL, PAGE_SIZE, req); | |||||
/* Sentinel value. */ | |||||
range.sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1); | |||||
/* | |||||
* Iterate over the kernel page tables without holding the kernel pmap | |||||
* lock. Outside of the large map, kernel page table pages are never | |||||
* freed, so at worst we will observe inconsistencies in the output. | |||||
* Within the large map, ensure that PDP and PD page addresses are | |||||
* valid before descending. | |||||
*/ | |||||
for (sva = 0, i = pmap_pml4e_index(sva); i < NPML4EPG; i++) { | |||||
switch (i) { | |||||
case PML4PML4I: | |||||
sbuf_printf(sb, "\nRecursive map:\n"); | |||||
break; | |||||
case DMPML4I: | |||||
sbuf_printf(sb, "\nDirect map:\n"); | |||||
break; | |||||
case KPML4BASE: | |||||
sbuf_printf(sb, "\nKernel map:\n"); | |||||
break; | |||||
case LMSPML4I: | |||||
sbuf_printf(sb, "\nLarge map:\n"); | |||||
break; | |||||
} | |||||
/* Convert to canonical form. */ | |||||
if (sva == 1ul << 47) | |||||
sva |= -1ul << 48; | |||||
restart: | |||||
pml4e = kernel_pmap->pm_pml4[i]; | |||||
if ((pml4e & X86_PG_V) == 0) { | |||||
sva = rounddown2(sva, NBPML4); | |||||
sysctl_kmaps_dump(sb, &range, sva); | |||||
sva += NBPML4; | |||||
continue; | |||||
} | |||||
pa = pml4e & PG_FRAME; | |||||
pdp = (pdp_entry_t *)PHYS_TO_DMAP(pa); | |||||
for (j = pmap_pdpe_index(sva); j < NPDPEPG; j++) { | |||||
pdpe = pdp[j]; | |||||
if ((pdpe & X86_PG_V) == 0) { | |||||
sva = rounddown2(sva, NBPDP); | |||||
sysctl_kmaps_dump(sb, &range, sva); | |||||
sva += NBPDP; | |||||
continue; | |||||
} | |||||
pa = pdpe & PG_FRAME; | |||||
if (PMAP_ADDRESS_IN_LARGEMAP(sva) && | |||||
vm_phys_paddr_to_vm_page(pa) == NULL) | |||||
goto restart; | |||||
if ((pdpe & PG_PS) != 0) { | |||||
sva = rounddown2(sva, NBPDP); | |||||
sysctl_kmaps_check(sb, &range, sva, pml4e, pdpe, | |||||
0, 0); | |||||
range.pdpes++; | |||||
sva += NBPDP; | |||||
continue; | |||||
} | |||||
pd = (pd_entry_t *)PHYS_TO_DMAP(pa); | |||||
for (k = pmap_pde_index(sva); k < NPDEPG; k++) { | |||||
pde = pd[k]; | |||||
if ((pde & X86_PG_V) == 0) { | |||||
sva = rounddown2(sva, NBPDR); | |||||
sysctl_kmaps_dump(sb, &range, sva); | |||||
sva += NBPDR; | |||||
continue; | |||||
} | |||||
pa = pde & PG_FRAME; | |||||
if (PMAP_ADDRESS_IN_LARGEMAP(sva) && | |||||
vm_phys_paddr_to_vm_page(pa) == NULL) | |||||
goto restart; | |||||
if ((pde & PG_PS) != 0) { | |||||
sva = rounddown2(sva, NBPDR); | |||||
sysctl_kmaps_check(sb, &range, sva, | |||||
pml4e, pdpe, pde, 0); | |||||
range.pdes++; | |||||
sva += NBPDR; | |||||
continue; | |||||
} | |||||
pt = (pt_entry_t *)PHYS_TO_DMAP(pa); | |||||
for (l = pmap_pte_index(sva); l < NPTEPG; l++, | |||||
sva += PAGE_SIZE) { | |||||
pte = pt[l]; | |||||
if ((pte & X86_PG_V) == 0) { | |||||
sysctl_kmaps_dump(sb, &range, | |||||
sva); | |||||
continue; | |||||
} | |||||
sysctl_kmaps_check(sb, &range, sva, | |||||
pml4e, pdpe, pde, pte); | |||||
range.ptes++; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
error = sbuf_finish(sb); | |||||
sbuf_delete(sb); | |||||
return (error); | |||||
} | |||||
SYSCTL_OID(_vm_pmap, OID_AUTO, kernel_maps, | |||||
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, | |||||
NULL, 0, sysctl_kmaps, "A", | |||||
"Dump kernel address layout"); | |||||
#ifdef DDB | #ifdef DDB | ||||
DB_SHOW_COMMAND(pte, pmap_print_pte) | DB_SHOW_COMMAND(pte, pmap_print_pte) | ||||
{ | { | ||||
pmap_t pmap; | pmap_t pmap; | ||||
pml4_entry_t *pml4; | pml4_entry_t *pml4; | ||||
pdp_entry_t *pdp; | pdp_entry_t *pdp; | ||||
pd_entry_t *pde; | pd_entry_t *pde; | ||||
▲ Show 20 Lines • Show All 49 Lines • Show Last 20 Lines |