diff --git a/sys/riscv/include/md_var.h b/sys/riscv/include/md_var.h --- a/sys/riscv/include/md_var.h +++ b/sys/riscv/include/md_var.h @@ -44,6 +44,7 @@ /* Supervisor-mode extension support */ extern bool has_sstc; extern bool has_sscofpmf; +extern bool has_svpbmt; struct dumperinfo; struct minidumpstate; diff --git a/sys/riscv/include/pte.h b/sys/riscv/include/pte.h --- a/sys/riscv/include/pte.h +++ b/sys/riscv/include/pte.h @@ -83,6 +83,25 @@ #define PTE_PROMOTE (PTE_V | PTE_RWX | PTE_D | PTE_G | PTE_U | \ PTE_SW_MANAGED | PTE_SW_WIRED) +/* + * Svpbmt Memory Attribute (MA) bits [62:61]. + * + * +------+-------+------------------------------------------------------------+ + * | Mode | Value | Requested Memory Attributes | + * +------+-------+------------------------------------------------------------+ + * | PMA | 00 | None, inherited from Physical Memory Attributes (firmware) | + * | NC | 01 | Non-cacheable, idempotent, weakly-ordered (RVWMO), | + * | | | main memory | + * | IO | 10 | Non-cacheable, non-idempotent, strongly-ordered, I/O | + * | -- | 11 | Reserved | + * +------+-------+------------------------------------------------------------+ + */ +#define PTE_MA_SHIFT 61 +#define PTE_MA_MASK (0x3ul << PTE_MA_SHIFT) +#define PTE_MA_NONE (0ul) +#define PTE_MA_NC (1ul << PTE_MA_SHIFT) +#define PTE_MA_IO (2ul << PTE_MA_SHIFT) + /* Bits 63 - 54 are reserved for future use. */ #define PTE_HI_MASK 0xFFC0000000000000ULL diff --git a/sys/riscv/include/vm.h b/sys/riscv/include/vm.h --- a/sys/riscv/include/vm.h +++ b/sys/riscv/include/vm.h @@ -28,10 +28,14 @@ #define _MACHINE_VM_H_ /* Memory attribute configuration. */ -#define VM_MEMATTR_DEVICE 0 +#define VM_MEMATTR_PMA 0 #define VM_MEMATTR_UNCACHEABLE 1 -#define VM_MEMATTR_WRITE_BACK 2 +#define VM_MEMATTR_DEVICE 2 -#define VM_MEMATTR_DEFAULT VM_MEMATTR_WRITE_BACK +#define VM_MEMATTR_WRITE_BACK VM_MEMATTR_PMA +#define VM_MEMATTR_DEFAULT VM_MEMATTR_PMA + +#define VM_MEMATTR_LAST VM_MEMATTR_DEVICE +#define VM_MEMATTR_TOTAL (VM_MEMATTR_LAST + 1) #endif /* !_MACHINE_VM_H_ */ diff --git a/sys/riscv/riscv/identcpu.c b/sys/riscv/riscv/identcpu.c --- a/sys/riscv/riscv/identcpu.c +++ b/sys/riscv/riscv/identcpu.c @@ -74,6 +74,7 @@ /* Supervisor-mode extension support. */ bool __read_frequently has_sstc; bool __read_frequently has_sscofpmf; +bool __read_frequently has_svpbmt; struct cpu_desc { const char *cpu_mvendor_name; @@ -414,6 +415,7 @@ /* Supervisor-mode extension support. */ UPDATE_CAP(has_sstc, (desc->smode_extensions & SV_SSTC) != 0); UPDATE_CAP(has_sscofpmf, (desc->smode_extensions & SV_SSCOFPMF) != 0); + UPDATE_CAP(has_svpbmt, (desc->smode_extensions & SV_SVPBMT) != 0); #undef UPDATE_CAP } diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -369,6 +369,8 @@ ((((pte) & ~PTE_HI_MASK) >> PTE_PPN0_S) * PAGE_SIZE) #define L2PTE_TO_PHYS(l2) \ ((((l2) & ~PTE_HI_MASK) >> PTE_PPN1_S) << L2_SHIFT) +#define L1PTE_TO_PHYS(l1) \ + ((((l1) & ~PTE_HI_MASK) >> PTE_PPN2_S) << L1_SHIFT) #define PTE_TO_VM_PAGE(pte) PHYS_TO_VM_PAGE(PTE_TO_PHYS(pte)) /* @@ -533,6 +535,25 @@ mtx_unlock(&allpmaps_lock); } +/* + * Holds the PTE mode bits (defined in pte.h) for defining e.g. cacheability. + * + * The indices correspond to the VM_MEMATTR_* defines in riscv/include/vm.h. + * + * The array will be empty if no mode bits are supported by the CPU, e.g. when + * lacking the Svpbmt extension. + */ +static __read_frequently pt_entry_t memattr_bits[VM_MEMATTR_TOTAL]; +static __read_frequently pt_entry_t memattr_mask; + +static __inline pt_entry_t +pmap_memattr_bits(vm_memattr_t mode) +{ + KASSERT(pmap_is_valid_memattr(kernel_pmap, mode), + ("invalid memory mode %u\n", mode)); + return (memattr_bits[(int)mode]); +} + /* * This should only be used during pmap bootstrap e.g. by * pmap_create_pagetables(). @@ -568,6 +589,7 @@ vm_offset_t va; vm_paddr_t min_pa, max_pa, pa, endpa; pd_entry_t *l2; + pt_entry_t memattr; u_int l1slot, l2slot; int physmap_idx; @@ -583,6 +605,8 @@ dmap_phys_base = rounddown(min_pa, L1_SIZE); dmap_phys_max = max_pa; + memattr = pmap_memattr_bits(VM_MEMATTR_DEFAULT); + /* Walk the physmap table. */ l2 = NULL; l1slot = Ln_ENTRIES; /* sentinel value */ @@ -611,7 +635,7 @@ /* map l2 pages */ l2slot = pmap_l2_index(va); - pmap_store(&l2[l2slot], L2_PTE(pa, PTE_KERN)); + pmap_store(&l2[l2slot], L2_PTE(pa, PTE_KERN | memattr)); pa += L2_SIZE; va += L2_SIZE; @@ -621,7 +645,7 @@ while (pa + L1_SIZE - 1 < endpa) { /* map l1 pages */ l1slot = pmap_l1_index(va); - pmap_store(&l1[l1slot], L1_PTE(pa, PTE_KERN)); + pmap_store(&l1[l1slot], L1_PTE(pa, PTE_KERN | memattr)); pa += L1_SIZE; va += L1_SIZE; @@ -641,7 +665,7 @@ /* map l2 pages */ l2slot = pmap_l2_index(va); - pmap_store(&l2[l2slot], L2_PTE(pa, PTE_KERN)); + pmap_store(&l2[l2slot], L2_PTE(pa, PTE_KERN | memattr)); pa += L2_SIZE; va += L2_SIZE; @@ -673,6 +697,7 @@ vm_paddr_t *root_pt_phys) { pt_entry_t *l0, *l1, *kern_l2, *kern_l3, *devmap_l3; + pt_entry_t memattr; pd_entry_t *devmap_l2; vm_paddr_t kernend, freemempos, pa; int nkernl2, nkernl3, ndevmapl3; @@ -745,6 +770,9 @@ if (freemempos < roundup2(kernend, L2_SIZE)) freemempos = roundup2(kernend, L2_SIZE); + /* Memory attributes for standard/main memory. */ + memattr = pmap_memattr_bits(VM_MEMATTR_DEFAULT); + /* * Map the kernel (and preloaded modules or data) using L2 superpages. * @@ -757,7 +785,8 @@ */ slot = pmap_l2_index(KERNBASE); for (pa = kernstart; pa < kernend; pa += L2_SIZE, slot++) { - pmap_store(&kern_l2[slot], L2_PTE(pa, PTE_KERN | PTE_X)); + pmap_store(&kern_l2[slot], + L2_PTE(pa, PTE_KERN | PTE_X | memattr)); } /* @@ -830,6 +859,16 @@ */ CPU_SET(PCPU_GET(hart), &kernel_pmap->pm_active); + /* + * Set up the memory attribute bits. + */ + if (has_svpbmt) { + memattr_bits[VM_MEMATTR_PMA] = PTE_MA_NONE; + memattr_bits[VM_MEMATTR_UNCACHEABLE] = PTE_MA_NC; + memattr_bits[VM_MEMATTR_DEVICE] = PTE_MA_IO; + memattr_mask = PTE_MA_MASK; + } + /* Create a new set of pagetables to run the kernel in. */ freemempos = pmap_create_pagetables(kernstart, kernlen, &root_pt_phys); @@ -896,7 +935,7 @@ { TAILQ_INIT(&m->md.pv_list); - m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; + m->md.pv_memattr = VM_MEMATTR_DEFAULT; } /* @@ -1149,10 +1188,11 @@ ***************************************************/ void -pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode __unused) +pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode) { pt_entry_t entry; pt_entry_t *l3; + pt_entry_t memattr; vm_offset_t va; pn_t pn; @@ -1163,6 +1203,7 @@ KASSERT((size & PAGE_MASK) == 0, ("pmap_kenter_device: Mapping is not page-sized")); + memattr = pmap_memattr_bits(mode); va = sva; while (size != 0) { l3 = pmap_l3(kernel_pmap, va); @@ -1170,6 +1211,7 @@ pn = (pa / PAGE_SIZE); entry = PTE_KERN; + entry |= memattr; entry |= (pn << PTE_PPN0_S); pmap_store(l3, entry); @@ -1257,7 +1299,8 @@ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { - pt_entry_t *l3, pa; + pt_entry_t *l3; + vm_paddr_t pa; vm_offset_t va; vm_page_t m; pt_entry_t entry; @@ -1272,6 +1315,7 @@ l3 = pmap_l3(kernel_pmap, va); entry = PTE_KERN; + entry |= pmap_memattr_bits(m->md.pv_memattr); entry |= (pn << PTE_PPN0_S); pmap_store(l3, entry); @@ -3132,6 +3176,7 @@ new_l3 |= (pn << PTE_PPN0_S); if ((flags & PMAP_ENTER_WIRED) != 0) new_l3 |= PTE_SW_WIRED; + new_l3 |= pmap_memattr_bits(m->md.pv_memattr); /* * Set modified bit gratuitously for writeable mappings if @@ -3345,7 +3390,8 @@ PMAP_LOCK_ASSERT(pmap, MA_OWNED); pn = VM_PAGE_TO_PHYS(m) / PAGE_SIZE; - new_l2 = (pd_entry_t)((pn << PTE_PPN0_S) | PTE_R | PTE_V); + new_l2 = (pd_entry_t)((pn << PTE_PPN0_S) | PTE_R | PTE_V | + pmap_memattr_bits(m->md.pv_memattr)); if ((m->oflags & VPO_UNMANAGED) == 0) new_l2 |= PTE_SW_MANAGED; if ((prot & VM_PROT_EXECUTE) != 0) @@ -3688,7 +3734,7 @@ pmap_resident_count_inc(pmap, 1); newl3 = ((VM_PAGE_TO_PHYS(m) / PAGE_SIZE) << PTE_PPN0_S) | - PTE_V | PTE_R; + PTE_V | PTE_R | pmap_memattr_bits(m->md.pv_memattr); if ((prot & VM_PROT_EXECUTE) != 0) newl3 |= PTE_X; if ((m->oflags & VPO_UNMANAGED) == 0) @@ -4793,9 +4839,13 @@ pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) { vm_offset_t base, offset, tmpva; + vm_paddr_t phys; pd_entry_t *l1, l1e; pd_entry_t *l2, l2e; pt_entry_t *l3, l3e; + pt_entry_t bits, mask; + bool anychanged = false; + int error = 0; PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); base = trunc_page(va); @@ -4806,46 +4856,155 @@ !(base >= VM_MIN_KERNEL_ADDRESS && base < VM_MAX_KERNEL_ADDRESS)) return (EINVAL); + bits = pmap_memattr_bits(mode); + mask = memattr_mask; + + /* First loop: perform PTE validation and demotions as necessary. */ for (tmpva = base; tmpva < base + size; ) { l1 = pmap_l1(kernel_pmap, tmpva); if (l1 == NULL || ((l1e = pmap_load(l1)) & PTE_V) == 0) return (EINVAL); if ((l1e & PTE_RWX) != 0) { /* - * TODO: Demote if attributes don't match and there - * isn't an L1 page left in the range, and update the - * L1 entry if the attributes don't match but there is - * an L1 page left in the range, once we support the - * upcoming Svpbmt extension. + * If the existing PTE has the correct attributes, then + * no need to demote. */ - tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE; - continue; + if ((l1e & mask) == bits) { + tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE; + continue; + } + + /* + * If the 1GB page fits in the remaining range, we + * don't need to demote. + */ + if ((tmpva & L1_OFFSET) == 0 && + tmpva + L1_SIZE /* -1? */ < base + size) { + tmpva += L1_SIZE; + continue; + } + + if (!pmap_demote_l1(kernel_pmap, l1, tmpva)) + return (EINVAL); } l2 = pmap_l1_to_l2(l1, tmpva); if (l2 == NULL || ((l2e = pmap_load(l2)) & PTE_V) == 0) return (EINVAL); if ((l2e & PTE_RWX) != 0) { /* - * TODO: Demote if attributes don't match and there - * isn't an L2 page left in the range, and update the - * L2 entry if the attributes don't match but there is - * an L2 page left in the range, once we support the - * upcoming Svpbmt extension. + * If the existing PTE has the correct attributes, then + * no need to demote. */ - tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE; - continue; + if ((l2e & mask) == bits) { + tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE; + continue; + } + + /* + * If the 2MB page fits in the remaining range, we + * don't need to demote. + */ + if ((tmpva & L2_OFFSET) == 0 && + tmpva + L2_SIZE /* -1? */ < base + size) { + tmpva += L2_SIZE; + continue; + } + + if (!pmap_demote_l2(kernel_pmap, l2, tmpva)) + panic("l2 demotion failed"); } l3 = pmap_l2_to_l3(l2, tmpva); if (l3 == NULL || ((l3e = pmap_load(l3)) & PTE_V) == 0) return (EINVAL); - /* - * TODO: Update the L3 entry if the attributes don't match once - * we support the upcoming Svpbmt extension. - */ + + tmpva += PAGE_SIZE; + } + + /* Second loop: perform PTE updates. */ + for (tmpva = base; tmpva < base + size; ) { + l1 = pmap_l1(kernel_pmap, tmpva); + l1e = pmap_load(l1); + if ((l1e & PTE_RWX) != 0) { + /* Unchanged. */ + if ((l1e & mask) == bits) { + tmpva += L1_SIZE; + continue; + } + + l1e &= ~mask; + l1e |= bits; + pmap_store(l1, l1e); + anychanged = true; + + /* Update corresponding DMAP entry */ + phys = L1PTE_TO_PHYS(l1e); + if (!VIRT_IN_DMAP(tmpva) && PHYS_IN_DMAP(phys)) { + error = pmap_change_attr_locked( + PHYS_TO_DMAP(phys), L1_SIZE, mode); + if (error != 0) + break; + } + tmpva += L1_SIZE; + continue; + } + + l2 = pmap_l1_to_l2(l1, tmpva); + l2e = pmap_load(l2); + if ((l2e & PTE_RWX) != 0) { + /* Unchanged. */ + if ((l2e & mask) == bits) { + tmpva += L2_SIZE; + continue; + } + + l2e &= ~mask; + l2e |= bits; + pmap_store(l2, l2e); + anychanged = true; + + /* Update corresponding DMAP entry */ + phys = L2PTE_TO_PHYS(l2e); + if (!VIRT_IN_DMAP(tmpva) && PHYS_IN_DMAP(phys)) { + error = pmap_change_attr_locked( + PHYS_TO_DMAP(phys), L2_SIZE, mode); + if (error != 0) + break; + } + tmpva += L2_SIZE; + continue; + } + + l3 = pmap_l2_to_l3(l2, tmpva); + l3e = pmap_load(l3); + + /* Unchanged. */ + if ((l3e & mask) == bits) { + tmpva += PAGE_SIZE; + continue; + } + + l3e &= ~mask; + l3e |= bits; + pmap_store(l3, l3e); + anychanged = true; + + phys = PTE_TO_PHYS(l3e); + if (!VIRT_IN_DMAP(tmpva) && PHYS_IN_DMAP(phys)) { + error = pmap_change_attr_locked(PHYS_TO_DMAP(phys), + L3_SIZE, mode); + if (error != 0) + break; + } tmpva += PAGE_SIZE; } - return (0); + if (anychanged) { + pmap_invalidate_range(kernel_pmap, base, tmpva); + if (mode == VM_MEMATTR_UNCACHEABLE) + cpu_dcache_wbinv_range((void *)base, size); + } + + return (error); } /* @@ -5082,7 +5241,7 @@ pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode) { - return (mode >= VM_MEMATTR_DEVICE && mode <= VM_MEMATTR_WRITE_BACK); + return (mode >= VM_MEMATTR_DEFAULT && mode <= VM_MEMATTR_LAST); } bool @@ -5138,17 +5297,38 @@ sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range, vm_offset_t eva) { + char *mode; + int i; if (eva <= range->sva) return; - sbuf_printf(sb, "0x%016lx-0x%016lx r%c%c%c%c %d %d %d\n", + for (i = 0; i < nitems(memattr_bits); i++) + if ((range->attrs & memattr_mask) == memattr_bits[i]) + break; + + switch (i) { + case VM_MEMATTR_PMA: + mode = "PMA"; + break; + case VM_MEMATTR_UNCACHEABLE: + mode = "NC "; + break; + case VM_MEMATTR_DEVICE: + mode = "IO "; + break; + default: + mode = "???"; + break; + } + + sbuf_printf(sb, "0x%016lx-0x%016lx r%c%c%c%c %s %d %d %d\n", range->sva, eva, (range->attrs & PTE_W) == PTE_W ? 'w' : '-', (range->attrs & PTE_X) == PTE_X ? 'x' : '-', (range->attrs & PTE_U) == PTE_U ? 'u' : 's', (range->attrs & PTE_G) == PTE_G ? 'g' : '-', - range->l1pages, range->l2pages, range->l3pages); + mode, range->l1pages, range->l2pages, range->l3pages); /* Reset to sentinel value. */ range->sva = 0xfffffffffffffffful; @@ -5188,14 +5368,19 @@ /* The PTE global bit is inherited by lower levels. */ attrs = l1e & PTE_G; - if ((l1e & PTE_RWX) != 0) + if ((l1e & PTE_RWX) != 0) { attrs |= l1e & (PTE_RWX | PTE_U); - else if (l2e != 0) + attrs |= l1e & memattr_mask; + } else if (l2e != 0) attrs |= l2e & PTE_G; - if ((l2e & PTE_RWX) != 0) + + if ((l2e & PTE_RWX) != 0) { attrs |= l2e & (PTE_RWX | PTE_U); - else if (l3e != 0) + attrs |= l2e & memattr_mask; + } else if (l3e != 0) { attrs |= l3e & (PTE_RWX | PTE_U | PTE_G); + attrs |= l3e & memattr_mask; + } if (range->sva > va || !sysctl_kmaps_match(range, attrs)) { sysctl_kmaps_dump(sb, range, va);