Index: sys/arm64/arm64/pmap.c =================================================================== --- sys/arm64/arm64/pmap.c +++ sys/arm64/arm64/pmap.c @@ -151,6 +151,7 @@ #include #define PMAP_ASSERT_STAGE1(pmap) MPASS((pmap)->pm_stage == PM_STAGE1) +#define PMAP_ASSERT_STAGE2(pmap) MPASS((pmap)->pm_stage == PM_STAGE2) #define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t))) #define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t))) @@ -304,6 +305,8 @@ SYSCTL_INT(_vm_pmap_asid, OID_AUTO, epoch, CTLFLAG_RD, &asids.asid_epoch, 0, "The current epoch number"); +void (*pmap_invalidate_vpipt_icache)(void); + /* * A pmap's cookie encodes an ASID and epoch number. Cookies for reserved * ASIDs have a negative epoch number, specifically, INT_MIN. Cookies for @@ -591,6 +594,60 @@ CTASSERT(L1_BLOCK == L2_BLOCK); +static pt_entry_t +pmap_pte_memattr(pmap_t pmap, vm_memattr_t memattr) +{ + pt_entry_t val; + + if (pmap->pm_stage == PM_STAGE1) { + val = ATTR_S1_IDX(memattr); + if (memattr == VM_MEMATTR_DEVICE) + val |= ATTR_S1_XN; + return (val); + } else { + val = 0; + + switch (memattr) { + case VM_MEMATTR_DEVICE: + return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_DEVICE_nGnRnE) | + ATTR_S2_XN(ATTR_S2_XN_ALL)); + case VM_MEMATTR_UNCACHEABLE: + return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_NC)); + case VM_MEMATTR_WRITE_BACK: + return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_WB)); + case VM_MEMATTR_WRITE_THROUGH: + return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_WT)); + } + } + + /* We shouldn't get here */ + MPASS(false); + return (0); +} + +static pt_entry_t +pmap_pte_prot(pmap_t pmap, vm_prot_t prot) +{ + pt_entry_t val; + + val = 0; + if (pmap->pm_stage == PM_STAGE1) { + if ((prot & VM_PROT_EXECUTE) == 0) + val |= ATTR_S1_XN; + if ((prot & VM_PROT_WRITE) == 0) + val |= ATTR_S1_AP(ATTR_S1_AP_RO); + } else { + if ((prot & VM_PROT_WRITE) != 0) + val |= ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE); + if ((prot & VM_PROT_READ) != 0) + val |= ATTR_S2_S2AP(ATTR_S2_S2AP_READ); + if ((prot & VM_PROT_EXECUTE) == 0) + val |= ATTR_S2_XN(ATTR_S2_XN_ALL); + } + + return (val); +} + /* * Checks if the PTE is dirty. */ @@ -3350,33 +3407,40 @@ boolean_t nosleep; int lvl, rv; - PMAP_ASSERT_STAGE1(pmap); - va = trunc_page(va); if ((m->oflags & VPO_UNMANAGED) == 0) VM_PAGE_OBJECT_BUSY_ASSERT(m); pa = VM_PAGE_TO_PHYS(m); - new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_S1_IDX(m->md.pv_memattr) | - L3_PAGE); - if ((prot & VM_PROT_WRITE) == 0) - new_l3 |= ATTR_S1_AP(ATTR_S1_AP_RO); - if ((prot & VM_PROT_EXECUTE) == 0 || - m->md.pv_memattr == VM_MEMATTR_DEVICE) - new_l3 |= ATTR_S1_XN; + new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | L3_PAGE); + new_l3 |= pmap_pte_memattr(pmap, m->md.pv_memattr); + new_l3 |= pmap_pte_prot(pmap, prot); + if ((flags & PMAP_ENTER_WIRED) != 0) new_l3 |= ATTR_SW_WIRED; - if (va < VM_MAXUSER_ADDRESS) - new_l3 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN; - else - new_l3 |= ATTR_S1_UXN; - if (pmap != kernel_pmap) - new_l3 |= ATTR_S1_nG; + if (pmap->pm_stage == PM_STAGE1) { + if (va < VM_MAXUSER_ADDRESS) + new_l3 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN; + else + new_l3 |= ATTR_S1_UXN; + if (pmap != kernel_pmap) + new_l3 |= ATTR_S1_nG; + } else { + /* + * Clear the access flag on executable mappings, these will be + * set later when the page is accessed. The fault handler is + * required to invalidate the I-cache. + */ + if (prot & VM_PROT_EXECUTE) + new_l3 &= ~ATTR_AF; + } if ((m->oflags & VPO_UNMANAGED) == 0) { new_l3 |= ATTR_SW_MANAGED; if ((prot & VM_PROT_WRITE) != 0) { new_l3 |= ATTR_SW_DBM; - if ((flags & VM_PROT_WRITE) == 0) + if ((flags & VM_PROT_WRITE) == 0) { + PMAP_ASSERT_STAGE1(pmap); new_l3 |= ATTR_S1_AP(ATTR_S1_AP_RO); + } } } @@ -3545,21 +3609,27 @@ } validate: - /* - * Sync icache if exec permission and attribute VM_MEMATTR_WRITE_BACK - * is set. Do it now, before the mapping is stored and made - * valid for hardware table walk. If done later, then other can - * access this page before caches are properly synced. - * Don't do it for kernel memory which is mapped with exec - * permission even if the memory isn't going to hold executable - * code. The only time when icache sync is needed is after - * kernel module is loaded and the relocation info is processed. - * And it's done in elf_cpu_load_file(). - */ - if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap && - m->md.pv_memattr == VM_MEMATTR_WRITE_BACK && - (opa != pa || (orig_l3 & ATTR_S1_XN))) - cpu_icache_sync_range(PHYS_TO_DMAP(pa), PAGE_SIZE); + if (pmap->pm_stage == PM_STAGE1) { + /* + * Sync icache if exec permission and attribute + * VM_MEMATTR_WRITE_BACK is set. Do it now, before the mapping + * is stored and made valid for hardware table walk. If done + * later, then other can access this page before caches are + * properly synced. Don't do it for kernel memory which is + * mapped with exec permission even if the memory isn't going + * to hold executable code. The only time when icache sync is + * needed is after kernel module is loaded and the relocation + * info is processed. And it's done in elf_cpu_load_file(). + */ + if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap && + m->md.pv_memattr == VM_MEMATTR_WRITE_BACK && + (opa != pa || (orig_l3 & ATTR_S1_XN))) { + PMAP_ASSERT_STAGE1(pmap); + cpu_icache_sync_range(PHYS_TO_DMAP(pa), PAGE_SIZE); + } + } else { + cpu_dcache_wb_range(PHYS_TO_DMAP(pa), PAGE_SIZE); + } /* * Update the L3 entry @@ -6076,6 +6146,52 @@ } } +static int +pmap_stage2_fault(pmap_t pmap, uint64_t esr, uint64_t far) +{ + pt_entry_t *ptep; + int rv, lvl; + + PMAP_ASSERT_STAGE2(pmap); + rv = KERN_FAILURE; + + /* Data and insn aborts use same encoding for FSC field. */ + switch (esr & ISS_DATA_DFSC_MASK) { + case ISS_DATA_DFSC_AFF_L1: + case ISS_DATA_DFSC_AFF_L2: + case ISS_DATA_DFSC_AFF_L3: + PMAP_LOCK(pmap); + ptep = pmap_pte(pmap, far, &lvl); + if (ptep != NULL) { + if (icache_vmid) { + pmap_invalidate_vpipt_icache(); + } else { + /* + * If accessing an executable page invalidate + * the I-cache so it will be valid when we + * continue execution in the guest. The D-cache + * is assumed to already be clean to the Point + * of Coherency. + */ + if ((pmap_load(ptep) & ATTR_S2_XN_MASK) != + ATTR_S2_XN(ATTR_S2_XN_NONE)) { + invalidate_icache(); + } + } + pmap_set_bits(ptep, ATTR_AF); + rv = KERN_SUCCESS; + /* + * XXXMJ as an optimization we could mark the entry + * dirty if this is a write fault. + */ + } + PMAP_UNLOCK(pmap); + break; + } + + return (rv); +} + int pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far) { @@ -6084,7 +6200,6 @@ uint64_t ec, par; int lvl, rv; - PMAP_ASSERT_STAGE1(pmap); rv = KERN_FAILURE; ec = ESR_ELx_EXCEPTION(esr); @@ -6098,6 +6213,9 @@ return (rv); } + if (pmap->pm_stage == PM_STAGE2) + return (pmap_stage2_fault(pmap, esr, far)); + /* Data and insn aborts use same encoding for FSC field. */ switch (esr & ISS_DATA_DFSC_MASK) { case ISS_DATA_DFSC_AFF_L1: Index: sys/arm64/include/cpufunc.h =================================================================== --- sys/arm64/include/cpufunc.h +++ sys/arm64/include/cpufunc.h @@ -189,6 +189,16 @@ : "r" (ttbr0)); } +static __inline void +invalidate_icache(void) +{ + + __asm __volatile( + "ic ialluis \n" + "dsb ish \n" + "isb \n"); +} + static __inline void invalidate_local_icache(void) { Index: sys/arm64/include/pmap.h =================================================================== --- sys/arm64/include/pmap.h +++ sys/arm64/include/pmap.h @@ -187,6 +187,8 @@ struct pcb *pmap_switch(struct thread *, struct thread *); +extern void (*pmap_invalidate_vpipt_icache)(void); + static inline int pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused) { Index: sys/arm64/include/pte.h =================================================================== --- sys/arm64/include/pte.h +++ sys/arm64/include/pte.h @@ -53,11 +53,11 @@ #define ATTR_S1_XN (ATTR_S1_PXN | ATTR_S1_UXN) #define ATTR_S2_XN(x) ((x) << 53) -#define ATTR_S2_XN_MASK ATTR_S2_XN(3) -#define ATTR_S2_XN_NONE 0 /* Allow execution at EL0 & EL1 */ -#define ATTR_S2_XN_EL1 1 /* Allow execution at EL0 */ -#define ATTR_S2_XN_ALL 2 /* No execution */ -#define ATTR_S2_XN_EL0 3 /* Allow execution at EL1 */ +#define ATTR_S2_XN_MASK ATTR_S2_XN(3UL) +#define ATTR_S2_XN_NONE 0UL /* Allow execution at EL0 & EL1 */ +#define ATTR_S2_XN_EL1 1UL /* Allow execution at EL0 */ +#define ATTR_S2_XN_ALL 2UL /* No execution */ +#define ATTR_S2_XN_EL0 3UL /* Allow execution at EL1 */ #define ATTR_CONTIGUOUS (1UL << 52) #define ATTR_DBM (1UL << 51) @@ -80,9 +80,16 @@ #define ATTR_S1_IDX_MASK (7 << 2) #define ATTR_S2_S2AP(x) ((x) << 6) -#define ATTR_S1_S2AP_MASK ATTR_S2_S2AP(3) -#define ATTR_S2_MEMATTR(x) ((x) << 2) -#define ATTR_S2_MEMATTR_MASK ATTR_S2_MEMATTR(0xf) +#define ATTR_S2_S2AP_MASK 3 +#define ATTR_S2_S2AP_READ 1 +#define ATTR_S2_S2AP_WRITE 2 + +#define ATTR_S2_MEMATTR(x) ((x) << 2) +#define ATTR_S2_MEMATTR_MASK ATTR_S2_MEMATTR(0xf) +#define ATTR_S2_MEMATTR_DEVICE_nGnRnE 0x0 +#define ATTR_S2_MEMATTR_NC 0xf +#define ATTR_S2_MEMATTR_WT 0xa +#define ATTR_S2_MEMATTR_WB 0xf #define ATTR_DEFAULT (ATTR_AF | ATTR_SH(ATTR_SH_IS))