Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -124,6 +124,7 @@ #include #include #include +#include #include #include #include @@ -2097,6 +2098,41 @@ return (mask); } +static int +pmap_pat_index(pmap_t pmap, pt_entry_t pte, bool is_pde) +{ + int pat_flag, pat_idx; + + pat_idx = 0; + switch (pmap->pm_type) { + case PT_X86: + case PT_RVI: + /* The PAT bit is different for PTE's and PDE's. */ + pat_flag = is_pde ? X86_PG_PDE_PAT : X86_PG_PTE_PAT; + + if ((pte & pat_flag) != 0) + pat_idx |= 0x4; + if ((pte & PG_NC_PCD) != 0) + pat_idx |= 0x2; + if ((pte & PG_NC_PWT) != 0) + pat_idx |= 0x1; + break; + case PT_EPT: + if ((pte & EPT_PG_IGNORE_PAT) != 0) + panic("EPT PTE %#lx has no PAT memory type", pte); + pat_idx = (pte & EPT_PG_MEMORY_TYPE(0x7)) >> 3; + break; + } + + /* See pmap_init_pat(). */ + if (pat_idx == 4) + pat_idx = 0; + if (pat_idx == 7) + pat_idx = 3; + + return (pat_idx); +} + bool pmap_ps_enabled(pmap_t pmap) { @@ -9967,6 +10003,260 @@ return (error); } +/* + * Track a range of the kernel's virtual address space that is contiguous + * in various mapping attributes. + */ +struct pmap_kernel_map_range { + vm_offset_t sva; + pt_entry_t attrs; + int ptes; + int pdes; + int pdpes; +}; + +static void +sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range, + vm_offset_t eva) +{ + const char *mode; + int i, pat_idx; + + if (eva <= range->sva) + return; + + pat_idx = pmap_pat_index(kernel_pmap, range->attrs, true); + for (i = 0; i < PAT_INDEX_SIZE; i++) + if (pat_index[i] == pat_idx) + break; + + switch (i) { + case PAT_WRITE_BACK: + mode = "WB"; + break; + case PAT_WRITE_THROUGH: + mode = "WT"; + break; + case PAT_UNCACHEABLE: + mode = "UC"; + break; + case PAT_WRITE_PROTECTED: + mode = "WP"; + break; + case PAT_WRITE_COMBINING: + mode = "WC"; + break; + default: + panic("sysctl_kmaps_dump: unknown PAT mode %d", i); + } + + sbuf_printf(sb, "%#016lx-%#016lx r%c%c%c%c %s %d %d %d\n", + range->sva, eva, + (range->attrs & X86_PG_RW) != 0 ? 'w' : '-', + (range->attrs & pg_nx) != 0 ? '-' : 'x', + (range->attrs & X86_PG_U) != 0 ? 'u' : 's', + (range->attrs & X86_PG_G) != 0 ? 'g' : '-', + mode, range->pdpes, range->pdes, range->ptes); + + /* Reset to sentinel value. */ + range->sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1); +} + +/* + * Determine whether the attributes specified by a page table entry match those + * being tracked by the current range. This is not quite as simple as a direct + * flag comparison since some PAT modes have multiple representations. + */ +static bool +sysctl_kmaps_match(struct pmap_kernel_map_range *range, pt_entry_t attrs, + pt_entry_t mask) +{ + pt_entry_t diff; + + diff = (range->attrs ^ attrs) & mask; + if (diff == 0) + return (true); + if ((diff & ~X86_PG_PDE_PAT) == 0 && + pmap_pat_index(kernel_pmap, range->attrs, true) == + pmap_pat_index(kernel_pmap, attrs, true)) + return (true); + return (false); +} + +static void +sysctl_kmaps_reinit(struct pmap_kernel_map_range *range, vm_offset_t va, + pt_entry_t attrs) +{ + + memset(range, 0, sizeof(*range)); + range->sva = va; + range->attrs = attrs; +} + +/* + * Called for every kernel page table entry. Maintain a range of kernel virtual + * addresses that is contiguous with respect to access permissions and cache + * mode. + */ +static bool +sysctl_kmaps_update(struct sbuf *sb, struct pmap_kernel_map_range *range, + vm_offset_t va, pml4_entry_t *pml4, pdp_entry_t *pdp, pd_entry_t *pd, + pt_entry_t *pt) +{ + pt_entry_t attrs, mask; + + if ((*pml4 & X86_PG_V) == 0) { + sysctl_kmaps_dump(sb, range, va); + return (false); + } + attrs = *pml4 & (X86_PG_RW | X86_PG_U | pg_nx); + mask = X86_PG_RW | X86_PG_U | pg_nx; + + if (pdp == NULL) + return (true); + if ((*pdp & X86_PG_V) == 0) { + sysctl_kmaps_dump(sb, range, va); + return (false); + } + attrs |= *pdp & pg_nx; + attrs &= pg_nx | (*pdp & (X86_PG_RW | X86_PG_U)); + if ((*pdp & X86_PG_PS) != 0) { + attrs |= *pdp & (X86_PG_G | X86_PG_PDE_CACHE); + mask |= X86_PG_G | X86_PG_PDE_CACHE; + + if (range->sva > va || + !sysctl_kmaps_match(range, attrs, mask)) { + sysctl_kmaps_dump(sb, range, va); + sysctl_kmaps_reinit(range, va, attrs); + } + range->pdpes++; + return (false); + } + + if (pd == NULL) + return (true); + if ((*pd & X86_PG_V) == 0) { + sysctl_kmaps_dump(sb, range, va); + return (false); + } + attrs |= *pd & pg_nx; + attrs &= pg_nx | (*pd & (X86_PG_RW | X86_PG_U)); + if ((*pd & X86_PG_PS) != 0) { + attrs |= *pd & (X86_PG_G | X86_PG_PDE_CACHE); + mask |= X86_PG_G | X86_PG_PDE_CACHE; + + if (range->sva > va || + !sysctl_kmaps_match(range, attrs, mask)) { + sysctl_kmaps_dump(sb, range, va); + sysctl_kmaps_reinit(range, va, attrs); + } + range->pdes++; + return (false); + } + + if (pt == NULL) + return (true); + if ((*pt & X86_PG_V) == 0) { + sysctl_kmaps_dump(sb, range, va); + return (false); + } + attrs |= *pt & pg_nx; + attrs &= pg_nx | (*pt & (X86_PG_RW | X86_PG_U)); + attrs |= (*pt & (X86_PG_G | X86_PG_PTE_CACHE)); + + /* + * PTEs use different bits for the PAT index; canonicalize by using the + * PDE format. + */ + if ((*pt & X86_PG_PTE_PAT) != 0) { + attrs &= ~X86_PG_PTE_PAT; + attrs |= X86_PG_PDE_PAT; + } + mask |= X86_PG_G | X86_PG_PDE_CACHE; + + if (range->sva > va || !sysctl_kmaps_match(range, attrs, mask)) { + sysctl_kmaps_dump(sb, range, va); + sysctl_kmaps_reinit(range, va, attrs); + } + range->ptes++; + + return (true); +} + +static int +sysctl_kmaps(SYSCTL_HANDLER_ARGS) +{ + struct pmap_kernel_map_range range; + struct sbuf sbuf, *sb; + pml4_entry_t *pml4; + pdp_entry_t *pdp; + pd_entry_t *pd; + pt_entry_t *pt; + vm_offset_t va; + int error, i, j, k, l; + + error = sysctl_wire_old_buffer(req, 0); + if (error != 0) + return (error); + sb = &sbuf; + sbuf_new_for_sysctl(sb, NULL, PAGE_SIZE, req); + + range.sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1); + + PMAP_LOCK(kernel_pmap); + for (i = 0; i < NPML4EPG; i++) { + switch (i) { + case PML4PML4I: + sbuf_printf(sb, "\nRecursive map:\n"); + break; + case DMPML4I: + sbuf_printf(sb, "\nDirect map:\n"); + break; + case KPML4BASE: + sbuf_printf(sb, "\nKernel map:\n"); + break; + case LMSPML4I: + sbuf_printf(sb, "\nLarge map:\n"); + break; + } + + va = KVADDR(i, 0, 0, 0); + pml4 = &kernel_pmap->pm_pml4[i]; + if (!sysctl_kmaps_update(sb, &range, va, pml4, NULL, NULL, + NULL)) + continue; + for (j = 0; j < NPDPEPG; j++) { + va = KVADDR(i, j, 0, 0); + pdp = pmap_pml4e_to_pdpe(pml4, va); + if (!sysctl_kmaps_update(sb, &range, va, pml4, pdp, + NULL, NULL)) + continue; + for (k = 0; k < NPDEPG; k++) { + va = KVADDR(i, j, k, 0); + pd = pmap_pdpe_to_pde(pdp, va); + if (!sysctl_kmaps_update(sb, &range, va, pml4, + pdp, pd, NULL)) + continue; + for (l = 0; l < NPTEPG; l++) { + va = KVADDR(i, j, k, l); + pt = pmap_pde_to_pte(pd, va); + (void)sysctl_kmaps_update(sb, &range, + va, pml4, pdp, pd, pt); + } + } + } + } + PMAP_UNLOCK(kernel_pmap); + + error = sbuf_finish(sb); + sbuf_delete(sb); + return (error); +} +SYSCTL_OID(_vm_pmap, OID_AUTO, kernel_maps, + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, + NULL, 0, sysctl_kmaps, "A", + "Dump kernel address layout"); + #ifdef DDB DB_SHOW_COMMAND(pte, pmap_print_pte) {