Index: arm64/arm64/cpufunc_asm.S =================================================================== --- arm64/arm64/cpufunc_asm.S +++ arm64/arm64/cpufunc_asm.S @@ -93,14 +93,6 @@ * Generic functions to read/modify/write the internal coprocessor registers */ -ENTRY(arm64_setttb) - dsb ish - msr ttbr0_el1, x0 - dsb ish - isb - ret -END(arm64_setttb) - ENTRY(arm64_tlb_flushID) dsb ishst #ifdef SMP Index: arm64/arm64/efirt_machdep.c =================================================================== --- arm64/arm64/efirt_machdep.c +++ arm64/arm64/efirt_machdep.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -62,9 +63,9 @@ #include static vm_object_t obj_1t1_pt; -static vm_page_t efi_l0_page; +static vm_pindex_t efi_1t1_idx; static pd_entry_t *efi_l0; -static vm_pindex_t efi_1t1_idx; +static uint64_t efi_ttbr0; void efi_destroy_1t1_map(void) @@ -81,8 +82,9 @@ } obj_1t1_pt = NULL; + efi_1t1_idx = 0; efi_l0 = NULL; - efi_l0_page = NULL; + efi_ttbr0 = 0; } static vm_page_t @@ -164,6 +166,7 @@ struct efi_md *p; pt_entry_t *l3, l3_attr; vm_offset_t va; + vm_page_t efi_l0_page; uint64_t idx; int i, mode; @@ -172,10 +175,11 @@ L0_ENTRIES * Ln_ENTRIES * Ln_ENTRIES * Ln_ENTRIES, VM_PROT_ALL, 0, NULL); VM_OBJECT_WLOCK(obj_1t1_pt); - efi_1t1_idx = 0; efi_l0_page = efi_1t1_page(); VM_OBJECT_WUNLOCK(obj_1t1_pt); efi_l0 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(efi_l0_page)); + efi_ttbr0 = ASID_TO_OPERAND(ASID_RESERVED_FOR_EFI) | + VM_PAGE_TO_PHYS(efi_l0_page); for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, descsz)) { @@ -213,7 +217,7 @@ printf("MAP %lx mode %x pages %lu\n", p->md_phys, mode, p->md_pages); l3_attr = ATTR_DEFAULT | ATTR_IDX(mode) | ATTR_AP(ATTR_AP_RW) | - L3_PAGE; + ATTR_nG | L3_PAGE; if (mode == VM_MEMATTR_DEVICE || p->md_attr & EFI_MD_ATTR_XP) l3_attr |= ATTR_UXN | ATTR_PXN; @@ -236,15 +240,10 @@ efi_arch_enter(void) { - __asm __volatile( - "msr ttbr0_el1, %0 \n" - "isb \n" - "dsb ishst \n" - "tlbi vmalle1is \n" - "dsb ish \n" - "isb \n" - : : "r"(VM_PAGE_TO_PHYS(efi_l0_page))); + CTR1(KTR_SPARE5, "%s: xxx", __func__); + set_ttbr0(efi_ttbr0); + return (0); } @@ -264,14 +263,11 @@ "mrs x18, tpidr_el1 \n" ); td = curthread; - __asm __volatile( - "msr ttbr0_el1, %0 \n" - "isb \n" - "dsb ishst \n" - "tlbi vmalle1is \n" - "dsb ish \n" - "isb \n" - : : "r"(td->td_proc->p_md.md_l0addr)); + set_ttbr0(td->td_proc->p_md.md_ttbr0); + + CTR3(KTR_SPARE5, "%s: pid=%d, ttbr0=%lx", __func__, + td->td_proc->p_pid, + td->td_proc->p_md.md_ttbr0); } int Index: arm64/arm64/genassym.c =================================================================== --- arm64/arm64/genassym.c +++ arm64/arm64/genassym.c @@ -35,7 +35,6 @@ #include #include -#include ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); @@ -54,9 +53,6 @@ ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); -ASSYM(P_MD, offsetof(struct proc, p_md)); -ASSYM(MD_L0ADDR, offsetof(struct mdproc, md_l0addr)); - ASSYM(SF_UC, offsetof(struct sigframe, sf_uc)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); Index: arm64/arm64/locore.S =================================================================== --- arm64/arm64/locore.S +++ arm64/arm64/locore.S @@ -501,6 +501,9 @@ * x9 = PA start (trashed) * x10 = Entry count * x11, x12 and x13 are trashed + * + * XXX Currently, this function is only used to create mappings for the + * ttbr0 table, so we unconditionally set ATTR_nG. */ build_l1_block_pagetable: /* @@ -517,6 +520,7 @@ #ifdef SMP orr x12, x12, ATTR_SH(ATTR_SH_IS) #endif + orr x12, x12, #(ATTR_nG) /* Only use the output address bits */ lsr x9, x9, #L1_SHIFT @@ -597,11 +601,17 @@ msr mair_el1, x2 /* - * Setup TCR according to PARange bits from ID_AA64MMFR0_EL1. + * Setup TCR according to the PARange and ASIDBits fields + * from ID_AA64MMFR0_EL1. More precisely, set TCR_EL1.AS + * to 1 only if the ASIDBits field equals 0b0010. */ ldr x2, tcr mrs x3, id_aa64mmfr0_el1 bfi x2, x3, #32, #3 + and x3, x3, #0xF0 + cmp x3, #0x20 + cset x3, eq + bfi x2, x3, #36, #1 msr tcr_el1, x2 /* Setup SCTLR */ @@ -622,7 +632,7 @@ MAIR_ATTR(MAIR_NORMAL_WB, 2) | \ MAIR_ATTR(MAIR_NORMAL_WT, 3) tcr: - .quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_ASID_16 | TCR_TG1_4K | \ + .quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_TG1_4K | \ TCR_CACHE_ATTRS | TCR_SMP_ATTRS) sctlr_set: /* Bits to set */ Index: arm64/arm64/machdep.c =================================================================== --- arm64/arm64/machdep.c +++ arm64/arm64/machdep.c @@ -753,7 +753,7 @@ pcpup->pc_curpcb = thread0.td_pcb; /* Set the base address of translation table 0. */ - thread0.td_proc->p_md.md_l0addr = READ_SPECIALREG(ttbr0_el1); + thread0.td_proc->p_md.md_ttbr0 = READ_SPECIALREG(ttbr0_el1); } typedef struct { Index: arm64/arm64/pmap.c =================================================================== --- arm64/arm64/pmap.c +++ arm64/arm64/pmap.c @@ -276,6 +276,13 @@ static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); +/* ASID allocator */ +static struct unrhdr asid_unr; +static struct mtx asid_mtx; +static int asid_bits; +SYSCTL_INT(_vm_pmap, OID_AUTO, asid_bits, CTLFLAG_RD, &asid_bits, 0, + "The number of bits in an ASID"); + static int superpages_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0, @@ -786,6 +793,10 @@ uint64_t kern_delta; int i; + /* Verify that the ASID is set through TTBR0. */ + KASSERT((READ_SPECIALREG(tcr_el1) & TCR_A1) == 0, + ("pmap_bootstrap: TCR_EL1.A1 != 0")); + kern_delta = KERNBASE - kernstart; printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); @@ -794,6 +805,7 @@ /* Set this early so we can use the pagetable walking functions */ kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; + kernel_pmap->pm_asid = -1; PMAP_LOCK_INIT(kernel_pmap); /* Assume the address we were loaded to is a valid physical address */ @@ -908,6 +920,11 @@ int i, pv_npg; /* + * Determine whether an ASID is 8 or 16 bits in size. + */ + asid_bits = (READ_SPECIALREG(tcr_el1) & TCR_ASID_16) != 0 ? 16 : 8; + + /* * Are large page mappings enabled? */ TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled); @@ -918,6 +935,13 @@ } /* + * Initialize the ASID allocator. + */ + mtx_init(&asid_mtx, "asid", NULL, MTX_DEF); + init_unrhdr(&asid_unr, ASID_FIRST_AVAILABLE, (1 << asid_bits) - 1, + &asid_mtx); + + /* * Initialize the pv chunk list mutex. */ mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); @@ -971,30 +995,42 @@ static __inline void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { + uint64_t r; sched_pin(); - __asm __volatile( - "dsb ishst \n" - "tlbi vaae1is, %0 \n" - "dsb ish \n" - "isb \n" - : : "r"(va >> PAGE_SHIFT)); + dsb(ishst); + if (pmap == kernel_pmap) { + r = atop(va); + __asm __volatile("tlbi vaae1is, %0" : : "r" (r)); + } else { + r = ASID_TO_OPERAND(pmap->pm_asid) | atop(va); + __asm __volatile("tlbi vae1is, %0" : : "r" (r)); + } + dsb(ish); + isb(); sched_unpin(); } static __inline void pmap_invalidate_range_nopin(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - vm_offset_t addr; + uint64_t end, r, start; dsb(ishst); - for (addr = sva; addr < eva; addr += PAGE_SIZE) { - __asm __volatile( - "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT)); + if (pmap == kernel_pmap) { + start = atop(sva); + end = atop(eva); + for (r = start; r < end; r++) + __asm __volatile("tlbi vaae1is, %0" : : "r" (r)); + } else { + start = end = ASID_TO_OPERAND(pmap->pm_asid); + start |= atop(sva); + end |= atop(eva); + for (r = start; r < end; r++) + __asm __volatile("tlbi vae1is, %0" : : "r" (r)); } - __asm __volatile( - "dsb ish \n" - "isb \n"); + dsb(ish); + isb(); } static __inline void @@ -1009,13 +1045,18 @@ static __inline void pmap_invalidate_all(pmap_t pmap) { + uint64_t r; sched_pin(); - __asm __volatile( - "dsb ishst \n" - "tlbi vmalle1is \n" - "dsb ish \n" - "isb \n"); + dsb(ishst); + if (pmap == kernel_pmap) { + __asm __volatile("tlbi vmalle1is"); + } else { + r = ASID_TO_OPERAND(pmap->pm_asid); + __asm __volatile("tlbi aside1is, %0" : : "r" (r)); + } + dsb(ish); + isb(); sched_unpin(); } @@ -1442,9 +1483,14 @@ pmap_pinit0(pmap_t pmap) { + printf("pmap_kextract(kernel_pmap->pm_l0) = %lx\n", + pmap_kextract((vm_offset_t)kernel_pmap->pm_l0)); + printf("ttbr0 = %lx\n", READ_SPECIALREG(ttbr0_el1)); + PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); - pmap->pm_l0 = kernel_pmap->pm_l0; + pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(READ_SPECIALREG(ttbr0_el1)); + pmap->pm_asid = ASID_RESERVED_FOR_PID_0; pmap->pm_root.rt_root = 0; } @@ -1467,6 +1513,9 @@ if ((l0pt->flags & PG_ZERO) == 0) pagezero(pmap->pm_l0); + if ((pmap->pm_asid = alloc_unr(&asid_unr)) == -1) + panic("alloc_unr: ASID allocation failed"); + pmap->pm_root.rt_root = 0; bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); @@ -1717,6 +1766,9 @@ KASSERT(vm_radix_is_empty(&pmap->pm_root), ("pmap_release: pmap has reserved page table page(s)")); + free_unr(&asid_unr, pmap->pm_asid); + pmap->pm_asid = -1; + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); vm_page_unwire_noq(m); @@ -3194,6 +3246,8 @@ new_l3 |= ATTR_SW_WIRED; if (va < VM_MAXUSER_ADDRESS) new_l3 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; + if (pmap != kernel_pmap) + new_l3 |= ATTR_nG; if ((m->oflags & VPO_UNMANAGED) == 0) { new_l3 |= ATTR_SW_MANAGED; if ((prot & VM_PROT_WRITE) != 0) { @@ -3456,6 +3510,8 @@ new_l2 |= ATTR_XN; if (va < VM_MAXUSER_ADDRESS) new_l2 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; + if (pmap != kernel_pmap) + new_l2 |= ATTR_nG; return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP | PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) == KERN_SUCCESS); @@ -3754,6 +3810,8 @@ l3_val |= ATTR_XN; if (va < VM_MAXUSER_ADDRESS) l3_val |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; + if (pmap != kernel_pmap) + l3_val |= ATTR_nG; /* * Now validate mapping with RO protection @@ -5679,20 +5737,28 @@ return (val); } +uint64_t +pmap_to_ttbr0(pmap_t pmap) +{ + + return (ASID_TO_OPERAND(pmap->pm_asid) | + pmap_kextract((vm_offset_t)pmap->pm_l0)); +} + void pmap_activate(struct thread *td) { - pmap_t pmap; + struct proc *p; critical_enter(); - pmap = vmspace_pmap(td->td_proc->p_vmspace); - td->td_proc->p_md.md_l0addr = vtophys(pmap->pm_l0); - __asm __volatile( - "msr ttbr0_el1, %0 \n" - "isb \n" - : : "r"(td->td_proc->p_md.md_l0addr)); - pmap_invalidate_all(pmap); + p = td->td_proc; + p->p_md.md_ttbr0 = pmap_to_ttbr0(vmspace_pmap(p->p_vmspace)); + set_ttbr0(p->p_md.md_ttbr0); critical_exit(); + + CTR3(KTR_SPARE5, "%s: pid=%d, ttbr0=%lx", __func__, + p->p_pid, + p->p_md.md_ttbr0); } struct pcb * @@ -5714,18 +5780,12 @@ */ if (old == NULL || - old->td_proc->p_md.md_l0addr != new->td_proc->p_md.md_l0addr) { - __asm __volatile( - /* Switch to the new pmap */ - "msr ttbr0_el1, %0 \n" - "isb \n" + old->td_proc->p_md.md_ttbr0 != new->td_proc->p_md.md_ttbr0) { + set_ttbr0(new->td_proc->p_md.md_ttbr0); - /* Invalidate the TLB */ - "dsb ishst \n" - "tlbi vmalle1is \n" - "dsb ish \n" - "isb \n" - : : "r"(new->td_proc->p_md.md_l0addr)); + CTR3(KTR_SPARE5, "%s: pid=%d, ttbr0=%lx", __func__, + new->td_proc->p_pid, + new->td_proc->p_md.md_ttbr0); /* * Stop userspace from training the branch predictor against Index: arm64/arm64/vm_machdep.c =================================================================== --- arm64/arm64/vm_machdep.c +++ arm64/arm64/vm_machdep.c @@ -91,8 +91,8 @@ td2->td_pcb = pcb2; bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); - td2->td_proc->p_md.md_l0addr = - vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l0); + td2->td_proc->p_md.md_ttbr0 = + pmap_to_ttbr0(vmspace_pmap(td2->td_proc->p_vmspace)); tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1); bcopy(td1->td_frame, tf, sizeof(*tf)); Index: arm64/include/armreg.h =================================================================== --- arm64/include/armreg.h +++ arm64/include/armreg.h @@ -597,7 +597,7 @@ #define PSR_FLAGS 0xf0000000 /* TCR_EL1 - Translation Control Register */ -#define TCR_ASID_16 (1 << 36) +#define TCR_ASID_16 (0x1UL << 36) #define TCR_IPS_SHIFT 32 #define TCR_IPS_32BIT (0 << TCR_IPS_SHIFT) @@ -618,6 +618,8 @@ #define TCR_ORGN1_WBWA (0x1UL << TCR_ORGN1_SHIFT) #define TCR_IRGN1_SHIFT 24 #define TCR_IRGN1_WBWA (0x1UL << TCR_IRGN1_SHIFT) +#define TCR_A1_SHIFT 22 +#define TCR_A1 (0x1UL << TCR_A1_SHIFT) #define TCR_SH0_SHIFT 12 #define TCR_SH0_IS (0x3UL << TCR_SH0_SHIFT) #define TCR_ORGN0_SHIFT 10 Index: arm64/include/cpufunc.h =================================================================== --- arm64/include/cpufunc.h +++ arm64/include/cpufunc.h @@ -178,6 +178,17 @@ __asm __volatile("clrex" : : : "memory"); } +static __inline void +set_ttbr0(uint64_t ttbr0) +{ + + __asm __volatile( + "msr ttbr0_el1, %0 \n" + "isb \n" + : + : "r" (ttbr0)); +} + extern int64_t dcache_line_size; extern int64_t icache_line_size; extern int64_t idcache_line_size; @@ -185,7 +196,6 @@ #define cpu_nullop() arm64_nullop() #define cpufunc_nullop() arm64_nullop() -#define cpu_setttb(a) arm64_setttb(a) #define cpu_tlb_flushID() arm64_tlb_flushID() @@ -198,7 +208,6 @@ #define cpu_icache_sync_range_checked(a, s) arm64_icache_sync_range_checked((a), (s)) void arm64_nullop(void); -void arm64_setttb(vm_offset_t); void arm64_tlb_flushID(void); void arm64_icache_sync_range(vm_offset_t, vm_size_t); int arm64_icache_sync_range_checked(vm_offset_t, vm_size_t); Index: arm64/include/pmap.h =================================================================== --- arm64/include/pmap.h +++ arm64/include/pmap.h @@ -83,6 +83,7 @@ pd_entry_t *pm_l0; TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ struct vm_radix pm_root; /* spare page table pages */ + int pm_asid; }; typedef struct pmap *pmap_t; @@ -132,6 +133,15 @@ #define PMAP_TRYLOCK(pmap) mtx_trylock(&(pmap)->pm_mtx) #define PMAP_UNLOCK(pmap) mtx_unlock(&(pmap)->pm_mtx) +#define ASID_RESERVED_FOR_PID_0 0 +#define ASID_RESERVED_FOR_EFI 1 +#define ASID_FIRST_AVAILABLE (ASID_RESERVED_FOR_EFI + 1) +#define ASID_TO_OPERAND_SHIFT 48 +#define ASID_TO_OPERAND(asid) ({ \ + KASSERT((asid) != -1, ("invalid ASID")); \ + (uint64_t)(asid) << ASID_TO_OPERAND_SHIFT; \ +}) + extern vm_offset_t virtual_avail; extern vm_offset_t virtual_end; @@ -144,6 +154,7 @@ void pmap_bootstrap(vm_offset_t, vm_offset_t, vm_paddr_t, vm_size_t); int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode); +uint64_t pmap_to_ttbr0(pmap_t pmap); void pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode); void pmap_kenter_device(vm_offset_t, vm_size_t, vm_paddr_t); vm_paddr_t pmap_kextract(vm_offset_t va); Index: arm64/include/proc.h =================================================================== --- arm64/include/proc.h +++ arm64/include/proc.h @@ -40,7 +40,7 @@ }; struct mdproc { - vm_offset_t md_l0addr; + uint64_t md_ttbr0; }; #define KINFO_PROC_SIZE 1088