Page MenuHomeFreeBSD

D23875.id69136.diff
No OneTemporary

D23875.id69136.diff

Index: sys/arm64/arm64/pmap.c
===================================================================
--- sys/arm64/arm64/pmap.c
+++ sys/arm64/arm64/pmap.c
@@ -151,6 +151,7 @@
#include <arm/include/physmem.h>
#define PMAP_ASSERT_STAGE1(pmap) MPASS((pmap)->pm_stage == PM_STAGE1)
+#define PMAP_ASSERT_STAGE2(pmap) MPASS((pmap)->pm_stage == PM_STAGE2)
#define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t)))
#define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t)))
@@ -294,6 +295,7 @@
};
static struct asid_set asids;
+static struct asid_set vmids;
static SYSCTL_NODE(_vm_pmap, OID_AUTO, asid, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"ASID allocator");
@@ -304,6 +306,17 @@
SYSCTL_INT(_vm_pmap_asid, OID_AUTO, epoch, CTLFLAG_RD, &asids.asid_epoch, 0,
"The current epoch number");
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, vmid, CTLFLAG_RD, 0, "VMID allocator");
+SYSCTL_INT(_vm_pmap_vmid, OID_AUTO, bits, CTLFLAG_RD, &vmids.asid_bits, 0,
+ "The number of bits in an VMID");
+SYSCTL_INT(_vm_pmap_vmid, OID_AUTO, next, CTLFLAG_RD, &vmids.asid_next, 0,
+ "The last allocated VMID plus one");
+SYSCTL_INT(_vm_pmap_vmid, OID_AUTO, epoch, CTLFLAG_RD, &vmids.asid_epoch, 0,
+ "The current epoch number");
+
+void (*pmap_clean_stage2_tlbi)(void);
+void (*pmap_invalidate_vpipt_icache)(void);
+
/*
* A pmap's cookie encodes an ASID and epoch number. Cookies for reserved
* ASIDs have a negative epoch number, specifically, INT_MIN. Cookies for
@@ -591,6 +604,60 @@
CTASSERT(L1_BLOCK == L2_BLOCK);
+static pt_entry_t
+pmap_pte_memattr(pmap_t pmap, vm_memattr_t memattr)
+{
+ pt_entry_t val;
+
+ if (pmap->pm_stage == PM_STAGE1) {
+ val = ATTR_S1_IDX(memattr);
+ if (memattr == VM_MEMATTR_DEVICE)
+ val |= ATTR_S1_XN;
+ return (val);
+ } else {
+ val = 0;
+
+ switch (memattr) {
+ case VM_MEMATTR_DEVICE:
+ return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_DEVICE_nGnRnE) |
+ ATTR_S2_XN(ATTR_S2_XN_ALL));
+ case VM_MEMATTR_UNCACHEABLE:
+ return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_NC));
+ case VM_MEMATTR_WRITE_BACK:
+ return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_WB));
+ case VM_MEMATTR_WRITE_THROUGH:
+ return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_WT));
+ }
+ }
+
+ /* We shouldn't get here */
+ MPASS(false);
+ return (0);
+}
+
+static pt_entry_t
+pmap_pte_prot(pmap_t pmap, vm_prot_t prot)
+{
+ pt_entry_t val;
+
+ val = 0;
+ if (pmap->pm_stage == PM_STAGE1) {
+ if ((prot & VM_PROT_EXECUTE) == 0)
+ val |= ATTR_S1_XN;
+ if ((prot & VM_PROT_WRITE) == 0)
+ val |= ATTR_S1_AP(ATTR_S1_AP_RO);
+ } else {
+ if ((prot & VM_PROT_WRITE) != 0)
+ val |= ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE);
+ if ((prot & VM_PROT_READ) != 0)
+ val |= ATTR_S2_S2AP(ATTR_S2_S2AP_READ);
+ if ((prot & VM_PROT_EXECUTE) == 0)
+ val |= ATTR_S2_XN(ATTR_S2_XN_ALL);
+ }
+
+ return (val);
+}
+
/*
* Checks if the PTE is dirty.
*/
@@ -987,7 +1054,8 @@
pmap_init(void)
{
vm_size_t s;
- int i, pv_npg;
+ uint64_t mmfr1;
+ int i, pv_npg, vmid_bits;
/*
* Are large page mappings enabled?
@@ -1005,6 +1073,16 @@
pmap_init_asids(&asids,
(READ_SPECIALREG(tcr_el1) & TCR_ASID_16) != 0 ? 16 : 8);
+ if (has_hyp()) {
+ mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
+ vmid_bits = 8;
+
+ if (ID_AA64MMFR1_VMIDBits_VAL(mmfr1) ==
+ ID_AA64MMFR1_VMIDBits_16)
+ vmid_bits = 16;
+ pmap_init_asids(&vmids, vmid_bits);
+ }
+
/*
* Initialize the pv chunk list mutex.
*/
@@ -1575,7 +1653,7 @@
}
int
-pmap_pinit(pmap_t pmap)
+pmap_pinit_stage(pmap_t pmap, enum pmap_stage stage)
{
vm_page_t l0pt;
@@ -1595,14 +1673,33 @@
pmap->pm_root.rt_root = 0;
bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
pmap->pm_cookie = COOKIE_FROM(-1, INT_MAX);
- pmap->pm_stage = PM_STAGE1;
- pmap->pm_asid_set = &asids;
+
+ pmap->pm_stage = stage;
+ switch (stage) {
+ case PM_STAGE1:
+ pmap->pm_asid_set = &asids;
+ break;
+ case PM_STAGE2:
+ pmap->pm_asid_set = &vmids;
+ break;
+ default:
+ panic("%s: Invalid pmap type %d", __func__, stage);
+ break;
+ }
+
/* XXX Temporarily disable deferred ASID allocation. */
pmap_alloc_asid(pmap);
return (1);
}
+int
+pmap_pinit(pmap_t pmap)
+{
+
+ return (pmap_pinit_stage(pmap, PM_STAGE1));
+}
+
/*
* This routine is called if the desired page table page does not exist.
*
@@ -3350,33 +3447,40 @@
boolean_t nosleep;
int lvl, rv;
- PMAP_ASSERT_STAGE1(pmap);
-
va = trunc_page(va);
if ((m->oflags & VPO_UNMANAGED) == 0)
VM_PAGE_OBJECT_BUSY_ASSERT(m);
pa = VM_PAGE_TO_PHYS(m);
- new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_S1_IDX(m->md.pv_memattr) |
- L3_PAGE);
- if ((prot & VM_PROT_WRITE) == 0)
- new_l3 |= ATTR_S1_AP(ATTR_S1_AP_RO);
- if ((prot & VM_PROT_EXECUTE) == 0 ||
- m->md.pv_memattr == VM_MEMATTR_DEVICE)
- new_l3 |= ATTR_S1_XN;
+ new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | L3_PAGE);
+ new_l3 |= pmap_pte_memattr(pmap, m->md.pv_memattr);
+ new_l3 |= pmap_pte_prot(pmap, prot);
+
if ((flags & PMAP_ENTER_WIRED) != 0)
new_l3 |= ATTR_SW_WIRED;
- if (va < VM_MAXUSER_ADDRESS)
- new_l3 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
- else
- new_l3 |= ATTR_S1_UXN;
- if (pmap != kernel_pmap)
- new_l3 |= ATTR_S1_nG;
+ if (pmap->pm_stage == PM_STAGE1) {
+ if (va < VM_MAXUSER_ADDRESS)
+ new_l3 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
+ else
+ new_l3 |= ATTR_S1_UXN;
+ if (pmap != kernel_pmap)
+ new_l3 |= ATTR_S1_nG;
+ } else {
+ /*
+ * Clear the access flag on executable mappings, these will be
+ * set later when the page is accessed. The fault handler is
+ * required to invalidate the I-cache.
+ */
+ if (prot & VM_PROT_EXECUTE)
+ new_l3 &= ~ATTR_AF;
+ }
if ((m->oflags & VPO_UNMANAGED) == 0) {
new_l3 |= ATTR_SW_MANAGED;
if ((prot & VM_PROT_WRITE) != 0) {
new_l3 |= ATTR_SW_DBM;
- if ((flags & VM_PROT_WRITE) == 0)
+ if ((flags & VM_PROT_WRITE) == 0) {
+ PMAP_ASSERT_STAGE1(pmap);
new_l3 |= ATTR_S1_AP(ATTR_S1_AP_RO);
+ }
}
}
@@ -3449,6 +3553,12 @@
* Is the specified virtual address already mapped?
*/
if (pmap_l3_valid(orig_l3)) {
+ /*
+ * Only allow adding new entries on stage 2 tables for now.
+ * This simplifies cache invalidation as we may need to call
+ * into EL2 to perform such actions.
+ */
+ PMAP_ASSERT_STAGE1(pmap);
/*
* Wiring change, just update stats. We don't worry about
* wiring PT pages as they remain resident as long as there
@@ -3545,26 +3655,33 @@
}
validate:
- /*
- * Sync icache if exec permission and attribute VM_MEMATTR_WRITE_BACK
- * is set. Do it now, before the mapping is stored and made
- * valid for hardware table walk. If done later, then other can
- * access this page before caches are properly synced.
- * Don't do it for kernel memory which is mapped with exec
- * permission even if the memory isn't going to hold executable
- * code. The only time when icache sync is needed is after
- * kernel module is loaded and the relocation info is processed.
- * And it's done in elf_cpu_load_file().
- */
- if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap &&
- m->md.pv_memattr == VM_MEMATTR_WRITE_BACK &&
- (opa != pa || (orig_l3 & ATTR_S1_XN)))
- cpu_icache_sync_range(PHYS_TO_DMAP(pa), PAGE_SIZE);
+ if (pmap->pm_stage == PM_STAGE1) {
+ /*
+ * Sync icache if exec permission and attribute
+ * VM_MEMATTR_WRITE_BACK is set. Do it now, before the mapping
+ * is stored and made valid for hardware table walk. If done
+ * later, then other can access this page before caches are
+ * properly synced. Don't do it for kernel memory which is
+ * mapped with exec permission even if the memory isn't going
+ * to hold executable code. The only time when icache sync is
+ * needed is after kernel module is loaded and the relocation
+ * info is processed. And it's done in elf_cpu_load_file().
+ */
+ if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap &&
+ m->md.pv_memattr == VM_MEMATTR_WRITE_BACK &&
+ (opa != pa || (orig_l3 & ATTR_S1_XN))) {
+ PMAP_ASSERT_STAGE1(pmap);
+ cpu_icache_sync_range(PHYS_TO_DMAP(pa), PAGE_SIZE);
+ }
+ } else {
+ cpu_dcache_wb_range(PHYS_TO_DMAP(pa), PAGE_SIZE);
+ }
/*
* Update the L3 entry
*/
if (pmap_l3_valid(orig_l3)) {
+ PMAP_ASSERT_STAGE1(pmap);
KASSERT(opa == pa, ("pmap_enter: invalid update"));
if ((orig_l3 & ~ATTR_AF) != (new_l3 & ~ATTR_AF)) {
/* same PA, different attributes */
@@ -3597,7 +3714,7 @@
#if VM_NRESERVLEVEL > 0
if ((mpte == NULL || mpte->ref_count == NL3PG) &&
- pmap_ps_enabled(pmap) &&
+ pmap_ps_enabled(pmap) && pmap->pm_stage == PM_STAGE1 &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
pmap_promote_l2(pmap, pde, va, &lock);
@@ -5868,8 +5985,10 @@
pmap_t curpmap;
int asid, cpuid, epoch;
struct asid_set *set;
+ enum pmap_stage stage;
- PMAP_ASSERT_STAGE1(pmap);
+ set = pmap->pm_asid_set;
+ stage = pmap->pm_stage;
set = pmap->pm_asid_set;
KASSERT(set != NULL, ("%s: NULL asid set", __func__));
@@ -5884,14 +6003,29 @@
epoch = 0;
set->asid_epoch = epoch;
dsb(ishst);
- __asm __volatile("tlbi vmalle1is");
+ if (stage == PM_STAGE1) {
+ __asm __volatile("tlbi vmalle1is");
+ } else {
+ KASSERT(pmap_clean_stage2_tlbi != NULL,
+ ("%s: Unset stage 2 tlb invalidation callback\n",
+ __func__));
+ pmap_clean_stage2_tlbi();
+ }
dsb(ish);
bit_nclear(set->asid_set, ASID_FIRST_AVAILABLE,
set->asid_set_size - 1);
CPU_FOREACH(cpuid) {
if (cpuid == curcpu)
continue;
- curpmap = pcpu_find(cpuid)->pc_curpmap;
+ if (stage == PM_STAGE1) {
+ curpmap = pcpu_find(cpuid)->pc_curpmap;
+ PMAP_ASSERT_STAGE1(pmap);
+ } else {
+ curpmap = pcpu_find(cpuid)->pc_curvmpmap;
+ if (curpmap == NULL)
+ continue;
+ PMAP_ASSERT_STAGE2(pmap);
+ }
KASSERT(curpmap->pm_asid_set == set, ("Incorrect set"));
asid = COOKIE_TO_ASID(curpmap->pm_cookie);
if (asid == -1)
@@ -5910,7 +6044,6 @@
struct asid_set *set;
int new_asid;
- PMAP_ASSERT_STAGE1(pmap);
set = pmap->pm_asid_set;
KASSERT(set != NULL, ("%s: NULL asid set", __func__));
@@ -5952,7 +6085,6 @@
pmap_to_ttbr0(pmap_t pmap)
{
- PMAP_ASSERT_STAGE1(pmap);
return (ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)) |
pmap->pm_l0_paddr);
}
@@ -5963,10 +6095,11 @@
struct asid_set *set;
int epoch;
- PMAP_ASSERT_STAGE1(pmap);
KASSERT(PCPU_GET(curpmap) != NULL, ("no active pmap"));
KASSERT(pmap != kernel_pmap, ("kernel pmap activation"));
- if (pmap == PCPU_GET(curpmap)) {
+
+ if ((pmap->pm_stage == PM_STAGE1 && pmap == PCPU_GET(curpmap)) ||
+ (pmap->pm_stage == PM_STAGE2 && pmap == PCPU_GET(curvmpmap))) {
/*
* Handle the possibility that the old thread was preempted
* after an "ic" or "tlbi" instruction but before it performed
@@ -5986,18 +6119,32 @@
* Ensure that the store to curpmap is globally visible before the
* load from asid_epoch is performed.
*/
- PCPU_SET(curpmap, pmap);
+ if (pmap->pm_stage == PM_STAGE1)
+ PCPU_SET(curpmap, pmap);
+ else
+ PCPU_SET(curvmpmap, pmap);
dsb(ish);
epoch = COOKIE_TO_EPOCH(pmap->pm_cookie);
if (epoch >= 0 && epoch != set->asid_epoch)
pmap_alloc_asid(pmap);
- set_ttbr0(pmap_to_ttbr0(pmap));
- if (PCPU_GET(bcast_tlbi_workaround) != 0)
- invalidate_local_icache();
+ if (pmap->pm_stage == PM_STAGE1) {
+ set_ttbr0(pmap_to_ttbr0(pmap));
+ if (PCPU_GET(bcast_tlbi_workaround) != 0)
+ invalidate_local_icache();
+ }
return (true);
}
+void
+pmap_activate_vm(pmap_t pmap)
+{
+
+ PMAP_ASSERT_STAGE2(pmap);
+
+ (void)pmap_activate_int(pmap);
+}
+
void
pmap_activate(struct thread *td)
{
@@ -6076,6 +6223,52 @@
}
}
+static int
+pmap_stage2_fault(pmap_t pmap, uint64_t esr, uint64_t far)
+{
+ pt_entry_t *ptep;
+ int rv, lvl;
+
+ PMAP_ASSERT_STAGE2(pmap);
+ rv = KERN_FAILURE;
+
+ /* Data and insn aborts use same encoding for FSC field. */
+ switch (esr & ISS_DATA_DFSC_MASK) {
+ case ISS_DATA_DFSC_AFF_L1:
+ case ISS_DATA_DFSC_AFF_L2:
+ case ISS_DATA_DFSC_AFF_L3:
+ PMAP_LOCK(pmap);
+ ptep = pmap_pte(pmap, far, &lvl);
+ if (ptep != NULL) {
+ if (icache_vmid) {
+ pmap_invalidate_vpipt_icache();
+ } else {
+ /*
+ * If accessing an executable page invalidate
+ * the I-cache so it will be valid when we
+ * continue execution in the guest. The D-cache
+ * is assumed to already be clean to the Point
+ * of Coherency.
+ */
+ if ((pmap_load(ptep) & ATTR_S2_XN_MASK) !=
+ ATTR_S2_XN(ATTR_S2_XN_NONE)) {
+ invalidate_icache();
+ }
+ }
+ pmap_set_bits(ptep, ATTR_AF);
+ rv = KERN_SUCCESS;
+ /*
+ * XXXMJ as an optimization we could mark the entry
+ * dirty if this is a write fault.
+ */
+ }
+ PMAP_UNLOCK(pmap);
+ break;
+ }
+
+ return (rv);
+}
+
int
pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far)
{
@@ -6084,7 +6277,6 @@
uint64_t ec, par;
int lvl, rv;
- PMAP_ASSERT_STAGE1(pmap);
rv = KERN_FAILURE;
ec = ESR_ELx_EXCEPTION(esr);
@@ -6098,6 +6290,9 @@
return (rv);
}
+ if (pmap->pm_stage == PM_STAGE2)
+ return (pmap_stage2_fault(pmap, esr, far));
+
/* Data and insn aborts use same encoding for FSC field. */
switch (esr & ISS_DATA_DFSC_MASK) {
case ISS_DATA_DFSC_AFF_L1:
Index: sys/arm64/include/cpufunc.h
===================================================================
--- sys/arm64/include/cpufunc.h
+++ sys/arm64/include/cpufunc.h
@@ -189,6 +189,16 @@
: "r" (ttbr0));
}
+static __inline void
+invalidate_icache(void)
+{
+
+ __asm __volatile(
+ "ic ialluis \n"
+ "dsb ish \n"
+ "isb \n");
+}
+
static __inline void
invalidate_local_icache(void)
{
Index: sys/arm64/include/pcpu.h
===================================================================
--- sys/arm64/include/pcpu.h
+++ sys/arm64/include/pcpu.h
@@ -46,8 +46,9 @@
pcpu_bp_harden pc_bp_harden; \
pcpu_ssbd pc_ssbd; \
struct pmap *pc_curpmap; \
+ struct pmap *pc_curvmpmap; \
u_int pc_bcast_tlbi_workaround; \
- char __pad[213]
+ char __pad[205]
#ifdef _KERNEL
Index: sys/arm64/include/pmap.h
===================================================================
--- sys/arm64/include/pmap.h
+++ sys/arm64/include/pmap.h
@@ -161,6 +161,8 @@
((((va) | (pa)) & L1_OFFSET) == 0 && (size) >= L1_SIZE)
void pmap_bootstrap(vm_offset_t, vm_offset_t, vm_paddr_t, vm_size_t);
+int pmap_pinit_stage(pmap_t, enum pmap_stage);
+void pmap_activate_vm(pmap_t);
int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode);
void pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode);
void pmap_kenter_device(vm_offset_t, vm_size_t, vm_paddr_t);
@@ -187,6 +189,8 @@
struct pcb *pmap_switch(struct thread *, struct thread *);
+extern void (*pmap_invalidate_vpipt_icache)(void);
+
static inline int
pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
{
Index: sys/arm64/include/pte.h
===================================================================
--- sys/arm64/include/pte.h
+++ sys/arm64/include/pte.h
@@ -53,11 +53,11 @@
#define ATTR_S1_XN (ATTR_S1_PXN | ATTR_S1_UXN)
#define ATTR_S2_XN(x) ((x) << 53)
-#define ATTR_S2_XN_MASK ATTR_S2_XN(3)
-#define ATTR_S2_XN_NONE 0 /* Allow execution at EL0 & EL1 */
-#define ATTR_S2_XN_EL1 1 /* Allow execution at EL0 */
-#define ATTR_S2_XN_ALL 2 /* No execution */
-#define ATTR_S2_XN_EL0 3 /* Allow execution at EL1 */
+#define ATTR_S2_XN_MASK ATTR_S2_XN(3UL)
+#define ATTR_S2_XN_NONE 0UL /* Allow execution at EL0 & EL1 */
+#define ATTR_S2_XN_EL1 1UL /* Allow execution at EL0 */
+#define ATTR_S2_XN_ALL 2UL /* No execution */
+#define ATTR_S2_XN_EL0 3UL /* Allow execution at EL1 */
#define ATTR_CONTIGUOUS (1UL << 52)
#define ATTR_DBM (1UL << 51)
@@ -80,9 +80,16 @@
#define ATTR_S1_IDX_MASK (7 << 2)
#define ATTR_S2_S2AP(x) ((x) << 6)
-#define ATTR_S1_S2AP_MASK ATTR_S2_S2AP(3)
-#define ATTR_S2_MEMATTR(x) ((x) << 2)
-#define ATTR_S2_MEMATTR_MASK ATTR_S2_MEMATTR(0xf)
+#define ATTR_S2_S2AP_MASK 3
+#define ATTR_S2_S2AP_READ 1
+#define ATTR_S2_S2AP_WRITE 2
+
+#define ATTR_S2_MEMATTR(x) ((x) << 2)
+#define ATTR_S2_MEMATTR_MASK ATTR_S2_MEMATTR(0xf)
+#define ATTR_S2_MEMATTR_DEVICE_nGnRnE 0x0
+#define ATTR_S2_MEMATTR_NC 0xf
+#define ATTR_S2_MEMATTR_WT 0xa
+#define ATTR_S2_MEMATTR_WB 0xf
#define ATTR_DEFAULT (ATTR_AF | ATTR_SH(ATTR_SH_IS))

File Metadata

Mime Type
text/plain
Expires
Mon, Dec 1, 3:40 PM (8 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
26443537
Default Alt Text
D23875.id69136.diff (15 KB)

Event Timeline