Index: head/sys/arm64/arm64/cpu_errata.c =================================================================== --- head/sys/arm64/arm64/cpu_errata.c +++ head/sys/arm64/arm64/cpu_errata.c @@ -59,6 +59,7 @@ static cpu_quirk_install install_psci_bp_hardening; static cpu_quirk_install install_ssbd_workaround; +static cpu_quirk_install install_thunderx_bcast_tlbi_workaround; static struct cpu_quirks cpu_quirks[] = { { @@ -92,6 +93,18 @@ .midr_value = 0, .quirk_install = install_ssbd_workaround, }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = + CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX, 0, 0), + .quirk_install = install_thunderx_bcast_tlbi_workaround, + }, + { + .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, + .midr_value = + CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX_81XX, 0, 0), + .quirk_install = install_thunderx_bcast_tlbi_workaround, + }, }; static void @@ -135,6 +148,30 @@ default: PCPU_SET(ssbd, smccc_arch_workaround_2); break; + } +} + +/* + * Workaround Cavium erratum 27456. + * + * Invalidate the local icache when changing address spaces. + */ +static void +install_thunderx_bcast_tlbi_workaround(void) +{ + u_int midr; + + midr = get_midr(); + if (CPU_PART(midr) == CPU_PART_THUNDERX_81XX) + PCPU_SET(bcast_tlbi_workaround, 1); + else if (CPU_PART(midr) == CPU_PART_THUNDERX) { + if (CPU_VAR(midr) == 0) { + /* ThunderX 1.x */ + PCPU_SET(bcast_tlbi_workaround, 1); + } else if (CPU_VAR(midr) == 1 && CPU_REV(midr) <= 1) { + /* ThunderX 2.0 - 2.1 */ + PCPU_SET(bcast_tlbi_workaround, 1); + } } } Index: head/sys/arm64/arm64/cpufunc_asm.S =================================================================== --- head/sys/arm64/arm64/cpufunc_asm.S +++ head/sys/arm64/arm64/cpufunc_asm.S @@ -93,14 +93,6 @@ * Generic functions to read/modify/write the internal coprocessor registers */ -ENTRY(arm64_setttb) - dsb ish - msr ttbr0_el1, x0 - dsb ish - isb - ret -END(arm64_setttb) - ENTRY(arm64_tlb_flushID) dsb ishst #ifdef SMP Index: head/sys/arm64/arm64/efirt_machdep.c =================================================================== --- head/sys/arm64/arm64/efirt_machdep.c +++ head/sys/arm64/arm64/efirt_machdep.c @@ -62,9 +62,9 @@ #include static vm_object_t obj_1t1_pt; -static vm_page_t efi_l0_page; -static pd_entry_t *efi_l0; static vm_pindex_t efi_1t1_idx; +static pd_entry_t *efi_l0; +static uint64_t efi_ttbr0; void efi_destroy_1t1_map(void) @@ -81,8 +81,9 @@ } obj_1t1_pt = NULL; + efi_1t1_idx = 0; efi_l0 = NULL; - efi_l0_page = NULL; + efi_ttbr0 = 0; } static vm_page_t @@ -164,6 +165,7 @@ struct efi_md *p; pt_entry_t *l3, l3_attr; vm_offset_t va; + vm_page_t efi_l0_page; uint64_t idx; int i, mode; @@ -172,10 +174,11 @@ L0_ENTRIES * Ln_ENTRIES * Ln_ENTRIES * Ln_ENTRIES, VM_PROT_ALL, 0, NULL); VM_OBJECT_WLOCK(obj_1t1_pt); - efi_1t1_idx = 0; efi_l0_page = efi_1t1_page(); VM_OBJECT_WUNLOCK(obj_1t1_pt); efi_l0 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(efi_l0_page)); + efi_ttbr0 = ASID_TO_OPERAND(ASID_RESERVED_FOR_EFI) | + VM_PAGE_TO_PHYS(efi_l0_page); for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, descsz)) { @@ -213,7 +216,7 @@ printf("MAP %lx mode %x pages %lu\n", p->md_phys, mode, p->md_pages); l3_attr = ATTR_DEFAULT | ATTR_IDX(mode) | ATTR_AP(ATTR_AP_RW) | - L3_PAGE; + ATTR_nG | L3_PAGE; if (mode == VM_MEMATTR_DEVICE || p->md_attr & EFI_MD_ATTR_XP) l3_attr |= ATTR_UXN | ATTR_PXN; @@ -236,42 +239,37 @@ efi_arch_enter(void) { - __asm __volatile( - "msr ttbr0_el1, %0 \n" - "isb \n" - "dsb ishst \n" - "tlbi vmalle1is \n" - "dsb ish \n" - "isb \n" - : : "r"(VM_PAGE_TO_PHYS(efi_l0_page))); + CRITICAL_ASSERT(curthread); + /* + * Temporarily switch to EFI's page table. However, we leave curpmap + * unchanged in order to prevent its ASID from being reclaimed before + * we switch back to its page table in efi_arch_leave(). + */ + set_ttbr0(efi_ttbr0); + if (PCPU_GET(bcast_tlbi_workaround) != 0) + invalidate_local_icache(); + return (0); } void efi_arch_leave(void) { - struct thread *td; /* * Restore the pcpu pointer. Some UEFI implementations trash it and * we don't store it before calling into them. To fix this we need * to restore it after returning to the kernel context. As reading - * curthread will access x18 we need to restore it before loading - * the thread pointer. + * curpmap will access x18 we need to restore it before loading + * the pmap pointer. */ __asm __volatile( "mrs x18, tpidr_el1 \n" ); - td = curthread; - __asm __volatile( - "msr ttbr0_el1, %0 \n" - "isb \n" - "dsb ishst \n" - "tlbi vmalle1is \n" - "dsb ish \n" - "isb \n" - : : "r"(td->td_proc->p_md.md_l0addr)); + set_ttbr0(pmap_to_ttbr0(PCPU_GET(curpmap))); + if (PCPU_GET(bcast_tlbi_workaround) != 0) + invalidate_local_icache(); } int Index: head/sys/arm64/arm64/genassym.c =================================================================== --- head/sys/arm64/arm64/genassym.c +++ head/sys/arm64/arm64/genassym.c @@ -35,7 +35,6 @@ #include #include -#include ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); @@ -53,9 +52,6 @@ ASSYM(PCB_TPIDRRO, offsetof(struct pcb, pcb_tpidrro_el0)); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); - -ASSYM(P_MD, offsetof(struct proc, p_md)); -ASSYM(MD_L0ADDR, offsetof(struct mdproc, md_l0addr)); ASSYM(SF_UC, offsetof(struct sigframe, sf_uc)); Index: head/sys/arm64/arm64/locore.S =================================================================== --- head/sys/arm64/arm64/locore.S +++ head/sys/arm64/arm64/locore.S @@ -392,14 +392,15 @@ bl link_l0_pagetable /* - * Build the TTBR0 maps. + * Build the TTBR0 maps. As TTBR0 maps, they must specify ATTR_nG. + * They are only needed early on, so the VA = PA map is uncached. */ add x27, x24, #PAGE_SIZE mov x6, x27 /* The initial page table */ #if defined(SOCDEV_PA) && defined(SOCDEV_VA) /* Create a table for the UART */ - mov x7, #DEVICE_MEM + mov x7, #(ATTR_nG | ATTR_IDX(DEVICE_MEM)) mov x8, #(SOCDEV_VA) /* VA start */ mov x9, #(SOCDEV_PA) /* PA start */ mov x10, #1 @@ -407,7 +408,7 @@ #endif /* Create the VA = PA map */ - mov x7, #NORMAL_UNCACHED /* Uncached as it's only needed early on */ + mov x7, #(ATTR_nG | ATTR_IDX(NORMAL_UNCACHED)) mov x9, x27 mov x8, x9 /* VA start (== PA start) */ mov x10, #1 @@ -497,7 +498,7 @@ /* * Builds count 1 GiB page table entry * x6 = L1 table - * x7 = Type (0 = Device, 1 = Normal) + * x7 = Variable lower block attributes * x8 = VA start * x9 = PA start (trashed) * x10 = Entry count @@ -512,8 +513,7 @@ and x11, x11, #Ln_ADDR_MASK /* Build the L1 block entry */ - lsl x12, x7, #2 - orr x12, x12, #L1_BLOCK + orr x12, x7, #L1_BLOCK orr x12, x12, #(ATTR_AF) #ifdef SMP orr x12, x12, ATTR_SH(ATTR_SH_IS) @@ -599,11 +599,17 @@ msr mair_el1, x2 /* - * Setup TCR according to PARange bits from ID_AA64MMFR0_EL1. + * Setup TCR according to the PARange and ASIDBits fields + * from ID_AA64MMFR0_EL1. More precisely, set TCR_EL1.AS + * to 1 only if the ASIDBits field equals 0b0010. */ ldr x2, tcr mrs x3, id_aa64mmfr0_el1 bfi x2, x3, #32, #3 + and x3, x3, #0xF0 + cmp x3, #0x20 + cset x3, eq + bfi x2, x3, #36, #1 msr tcr_el1, x2 /* Setup SCTLR */ @@ -624,7 +630,7 @@ MAIR_ATTR(MAIR_NORMAL_WB, 2) | \ MAIR_ATTR(MAIR_NORMAL_WT, 3) tcr: - .quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_ASID_16 | TCR_TG1_4K | \ + .quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_TG1_4K | \ TCR_CACHE_ATTRS | TCR_SMP_ATTRS) sctlr_set: /* Bits to set */ Index: head/sys/arm64/arm64/machdep.c =================================================================== --- head/sys/arm64/arm64/machdep.c +++ head/sys/arm64/arm64/machdep.c @@ -797,9 +797,6 @@ thread0.td_pcb->pcb_vfpcpu = UINT_MAX; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; - - /* Set the base address of translation table 0. */ - thread0.td_proc->p_md.md_l0addr = READ_SPECIALREG(ttbr0_el1); } typedef struct { Index: head/sys/arm64/arm64/mp_machdep.c =================================================================== --- head/sys/arm64/arm64/mp_machdep.c +++ head/sys/arm64/arm64/mp_machdep.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -192,6 +193,7 @@ init_secondary(uint64_t cpu) { struct pcpu *pcpup; + pmap_t pmap0; pcpup = &__pcpu[cpu]; /* @@ -210,6 +212,12 @@ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pcpup->pc_curthread = pcpup->pc_idlethread; pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb; + + /* Initialize curpmap to match TTBR0's current setting. */ + pmap0 = vmspace_pmap(&vmspace0); + KASSERT(pmap_to_ttbr0(pmap0) == READ_SPECIALREG(ttbr0_el1), + ("pmap0 doesn't match cpu %ld's ttbr0", cpu)); + pcpup->pc_curpmap = pmap0; /* * Identify current CPU. This is necessary to setup Index: head/sys/arm64/arm64/pmap.c =================================================================== --- head/sys/arm64/arm64/pmap.c +++ head/sys/arm64/arm64/pmap.c @@ -113,6 +113,7 @@ #include #include #include +#include #include #include #include @@ -276,6 +277,48 @@ static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); +/* + * This ASID allocator uses a bit vector ("asid_set") to remember which ASIDs + * that it has currently allocated to a pmap, a cursor ("asid_next") to + * optimize its search for a free ASID in the bit vector, and an epoch number + * ("asid_epoch") to indicate when it has reclaimed all previously allocated + * ASIDs that are not currently active on a processor. + * + * The current epoch number is always in the range [0, INT_MAX). Negative + * numbers and INT_MAX are reserved for special cases that are described + * below. + */ +static SYSCTL_NODE(_vm_pmap, OID_AUTO, asid, CTLFLAG_RD, 0, "ASID allocator"); +static int asid_bits; +SYSCTL_INT(_vm_pmap_asid, OID_AUTO, bits, CTLFLAG_RD, &asid_bits, 0, + "The number of bits in an ASID"); +static bitstr_t *asid_set; +static int asid_set_size; +static int asid_next; +SYSCTL_INT(_vm_pmap_asid, OID_AUTO, next, CTLFLAG_RD, &asid_next, 0, + "The last allocated ASID plus one"); +static int asid_epoch; +SYSCTL_INT(_vm_pmap_asid, OID_AUTO, epoch, CTLFLAG_RD, &asid_epoch, 0, + "The current epoch number"); +static struct mtx asid_set_mutex; + +/* + * A pmap's cookie encodes an ASID and epoch number. Cookies for reserved + * ASIDs have a negative epoch number, specifically, INT_MIN. Cookies for + * dynamically allocated ASIDs have a non-negative epoch number. + * + * An invalid ASID is represented by -1. + * + * There are two special-case cookie values: (1) COOKIE_FROM(-1, INT_MIN), + * which indicates that an ASID should never be allocated to the pmap, and + * (2) COOKIE_FROM(-1, INT_MAX), which indicates that an ASID should be + * allocated when the pmap is next activated. + */ +#define COOKIE_FROM(asid, epoch) ((long)((u_int)(asid) | \ + ((u_long)(epoch) << 32))) +#define COOKIE_TO_ASID(cookie) ((int)(cookie)) +#define COOKIE_TO_EPOCH(cookie) ((int)((u_long)(cookie) >> 32)) + static int superpages_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0, @@ -295,6 +338,8 @@ static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); +static bool pmap_activate_int(pmap_t pmap); +static void pmap_alloc_asid(pmap_t pmap); static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va); static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, @@ -308,6 +353,7 @@ pd_entry_t l1e, struct spglist *free, struct rwlock **lockp); static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, pd_entry_t l2e, struct spglist *free, struct rwlock **lockp); +static void pmap_reset_asid_set(void); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp); @@ -786,6 +832,10 @@ uint64_t kern_delta; int i; + /* Verify that the ASID is set through TTBR0. */ + KASSERT((READ_SPECIALREG(tcr_el1) & TCR_A1) == 0, + ("pmap_bootstrap: TCR_EL1.A1 != 0")); + kern_delta = KERNBASE - kernstart; printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); @@ -795,6 +845,8 @@ /* Set this early so we can use the pagetable walking functions */ kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; PMAP_LOCK_INIT(kernel_pmap); + kernel_pmap->pm_l0_paddr = l0pt - kern_delta; + kernel_pmap->pm_cookie = COOKIE_FROM(-1, INT_MIN); /* Assume the address we were loaded to is a valid physical address */ min_pa = KERNBASE - kern_delta; @@ -908,6 +960,11 @@ int i, pv_npg; /* + * Determine whether an ASID is 8 or 16 bits in size. + */ + asid_bits = (READ_SPECIALREG(tcr_el1) & TCR_ASID_16) != 0 ? 16 : 8; + + /* * Are large page mappings enabled? */ TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled); @@ -918,6 +975,18 @@ } /* + * Initialize the ASID allocator. At this point, we are still too + * early in the overall initialization process to use bit_alloc(). + */ + asid_set_size = 1 << asid_bits; + asid_set = (bitstr_t *)kmem_malloc(bitstr_size(asid_set_size), + M_WAITOK | M_ZERO); + for (i = 0; i < ASID_FIRST_AVAILABLE; i++) + bit_set(asid_set, i); + asid_next = ASID_FIRST_AVAILABLE; + mtx_init(&asid_set_mutex, "asid set", NULL, MTX_SPIN); + + /* * Initialize the pv chunk list mutex. */ mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); @@ -971,30 +1040,42 @@ static __inline void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { + uint64_t r; sched_pin(); - __asm __volatile( - "dsb ishst \n" - "tlbi vaae1is, %0 \n" - "dsb ish \n" - "isb \n" - : : "r"(va >> PAGE_SHIFT)); + dsb(ishst); + if (pmap == kernel_pmap) { + r = atop(va); + __asm __volatile("tlbi vaae1is, %0" : : "r" (r)); + } else { + r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)) | atop(va); + __asm __volatile("tlbi vae1is, %0" : : "r" (r)); + } + dsb(ish); + isb(); sched_unpin(); } static __inline void pmap_invalidate_range_nopin(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - vm_offset_t addr; + uint64_t end, r, start; dsb(ishst); - for (addr = sva; addr < eva; addr += PAGE_SIZE) { - __asm __volatile( - "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT)); + if (pmap == kernel_pmap) { + start = atop(sva); + end = atop(eva); + for (r = start; r < end; r++) + __asm __volatile("tlbi vaae1is, %0" : : "r" (r)); + } else { + start = end = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)); + start |= atop(sva); + end |= atop(eva); + for (r = start; r < end; r++) + __asm __volatile("tlbi vae1is, %0" : : "r" (r)); } - __asm __volatile( - "dsb ish \n" - "isb \n"); + dsb(ish); + isb(); } static __inline void @@ -1009,13 +1090,18 @@ static __inline void pmap_invalidate_all(pmap_t pmap) { + uint64_t r; sched_pin(); - __asm __volatile( - "dsb ishst \n" - "tlbi vmalle1is \n" - "dsb ish \n" - "isb \n"); + dsb(ishst); + if (pmap == kernel_pmap) { + __asm __volatile("tlbi vmalle1is"); + } else { + r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)); + __asm __volatile("tlbi aside1is, %0" : : "r" (r)); + } + dsb(ish); + isb(); sched_unpin(); } @@ -1446,14 +1532,17 @@ PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); - pmap->pm_l0 = kernel_pmap->pm_l0; + pmap->pm_l0_paddr = READ_SPECIALREG(ttbr0_el1); + pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr); pmap->pm_root.rt_root = 0; + pmap->pm_cookie = COOKIE_FROM(ASID_RESERVED_FOR_PID_0, INT_MIN); + + PCPU_SET(curpmap, pmap); } int pmap_pinit(pmap_t pmap) { - vm_paddr_t l0phys; vm_page_t l0pt; /* @@ -1463,14 +1552,15 @@ VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) vm_wait(NULL); - l0phys = VM_PAGE_TO_PHYS(l0pt); - pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys); + pmap->pm_l0_paddr = VM_PAGE_TO_PHYS(l0pt); + pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr); if ((l0pt->flags & PG_ZERO) == 0) pagezero(pmap->pm_l0); pmap->pm_root.rt_root = 0; bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); + pmap->pm_cookie = COOKIE_FROM(-1, INT_MAX); return (1); } @@ -1712,6 +1802,7 @@ pmap_release(pmap_t pmap) { vm_page_t m; + int asid; KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", @@ -1719,8 +1810,16 @@ KASSERT(vm_radix_is_empty(&pmap->pm_root), ("pmap_release: pmap has reserved page table page(s)")); - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); + mtx_lock_spin(&asid_set_mutex); + if (COOKIE_TO_EPOCH(pmap->pm_cookie) == asid_epoch) { + asid = COOKIE_TO_ASID(pmap->pm_cookie); + KASSERT(asid >= ASID_FIRST_AVAILABLE && asid < asid_set_size, + ("pmap_release: pmap cookie has out-of-range asid")); + bit_clear(asid_set, asid); + } + mtx_unlock_spin(&asid_set_mutex); + m = PHYS_TO_VM_PAGE(pmap->pm_l0_paddr); vm_page_unwire_noq(m); vm_page_free_zero(m); } @@ -3198,6 +3297,8 @@ new_l3 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; else new_l3 |= ATTR_UXN; + if (pmap != kernel_pmap) + new_l3 |= ATTR_nG; if ((m->oflags & VPO_UNMANAGED) == 0) { new_l3 |= ATTR_SW_MANAGED; if ((prot & VM_PROT_WRITE) != 0) { @@ -3462,6 +3563,8 @@ new_l2 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; else new_l2 |= ATTR_UXN; + if (pmap != kernel_pmap) + new_l2 |= ATTR_nG; return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP | PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) == KERN_SUCCESS); @@ -3762,6 +3865,8 @@ l3_val |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; else l3_val |= ATTR_UXN; + if (pmap != kernel_pmap) + l3_val |= ATTR_nG; /* * Now validate mapping with RO protection @@ -4299,6 +4404,8 @@ int allfree, field, freed, idx, lvl; vm_paddr_t pa; + KASSERT(pmap == PCPU_GET(curpmap), ("non-current pmap %p", pmap)); + lock = NULL; SLIST_INIT(&free); @@ -5671,24 +5778,134 @@ return (val); } +/* + * Garbage collect every ASID that is neither active on a processor nor + * reserved. + */ +static void +pmap_reset_asid_set(void) +{ + pmap_t pmap; + int asid, cpuid, epoch; + + mtx_assert(&asid_set_mutex, MA_OWNED); + + /* + * Ensure that the store to asid_epoch is globally visible before the + * loads from pc_curpmap are performed. + */ + epoch = asid_epoch + 1; + if (epoch == INT_MAX) + epoch = 0; + asid_epoch = epoch; + dsb(ishst); + __asm __volatile("tlbi vmalle1is"); + dsb(ish); + bit_nclear(asid_set, ASID_FIRST_AVAILABLE, asid_set_size - 1); + CPU_FOREACH(cpuid) { + if (cpuid == curcpu) + continue; + pmap = pcpu_find(cpuid)->pc_curpmap; + asid = COOKIE_TO_ASID(pmap->pm_cookie); + if (asid == -1) + continue; + bit_set(asid_set, asid); + pmap->pm_cookie = COOKIE_FROM(asid, epoch); + } +} + +/* + * Allocate a new ASID for the specified pmap. + */ +static void +pmap_alloc_asid(pmap_t pmap) +{ + int new_asid; + + mtx_lock_spin(&asid_set_mutex); + + /* + * While this processor was waiting to acquire the asid set mutex, + * pmap_reset_asid_set() running on another processor might have + * updated this pmap's cookie to the current epoch. In which case, we + * don't need to allocate a new ASID. + */ + if (COOKIE_TO_EPOCH(pmap->pm_cookie) == asid_epoch) + goto out; + + bit_ffc_at(asid_set, asid_next, asid_set_size, &new_asid); + if (new_asid == -1) { + bit_ffc_at(asid_set, ASID_FIRST_AVAILABLE, asid_next, + &new_asid); + if (new_asid == -1) { + pmap_reset_asid_set(); + bit_ffc_at(asid_set, ASID_FIRST_AVAILABLE, + asid_set_size, &new_asid); + KASSERT(new_asid != -1, ("ASID allocation failure")); + } + } + bit_set(asid_set, new_asid); + asid_next = new_asid + 1; + pmap->pm_cookie = COOKIE_FROM(new_asid, asid_epoch); +out: + mtx_unlock_spin(&asid_set_mutex); +} + +/* + * Compute the value that should be stored in ttbr0 to activate the specified + * pmap. This value may change from time to time. + */ +uint64_t +pmap_to_ttbr0(pmap_t pmap) +{ + + return (ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)) | + pmap->pm_l0_paddr); +} + +static bool +pmap_activate_int(pmap_t pmap) +{ + int epoch; + + KASSERT(PCPU_GET(curpmap) != NULL, ("no active pmap")); + KASSERT(pmap != kernel_pmap, ("kernel pmap activation")); + if (pmap == PCPU_GET(curpmap)) + return (false); + + /* + * Ensure that the store to curpmap is globally visible before the + * load from asid_epoch is performed. + */ + PCPU_SET(curpmap, pmap); + dsb(ish); + epoch = COOKIE_TO_EPOCH(pmap->pm_cookie); + if (epoch >= 0 && epoch != asid_epoch) + pmap_alloc_asid(pmap); + + set_ttbr0(pmap_to_ttbr0(pmap)); + if (PCPU_GET(bcast_tlbi_workaround) != 0) + invalidate_local_icache(); + return (true); +} + void pmap_activate(struct thread *td) { pmap_t pmap; - critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); - td->td_proc->p_md.md_l0addr = vtophys(pmap->pm_l0); - __asm __volatile( - "msr ttbr0_el1, %0 \n" - "isb \n" - : : "r"(td->td_proc->p_md.md_l0addr)); - pmap_invalidate_all(pmap); + critical_enter(); + (void)pmap_activate_int(pmap); critical_exit(); } +/* + * To eliminate the unused parameter "old", we would have to add an instruction + * to cpu_switch(). + */ struct pcb * -pmap_switch(struct thread *old, struct thread *new) +pmap_switch(struct thread *old __unused, struct thread *new) { pcpu_bp_harden bp_harden; struct pcb *pcb; @@ -5705,20 +5922,7 @@ * to a user process. */ - if (old == NULL || - old->td_proc->p_md.md_l0addr != new->td_proc->p_md.md_l0addr) { - __asm __volatile( - /* Switch to the new pmap */ - "msr ttbr0_el1, %0 \n" - "isb \n" - - /* Invalidate the TLB */ - "dsb ishst \n" - "tlbi vmalle1is \n" - "dsb ish \n" - "isb \n" - : : "r"(new->td_proc->p_md.md_l0addr)); - + if (pmap_activate_int(vmspace_pmap(new->td_proc->p_vmspace))) { /* * Stop userspace from training the branch predictor against * other processes. This will call into a CPU specific Index: head/sys/arm64/arm64/vm_machdep.c =================================================================== --- head/sys/arm64/arm64/vm_machdep.c +++ head/sys/arm64/arm64/vm_machdep.c @@ -91,9 +91,6 @@ td2->td_pcb = pcb2; bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); - td2->td_proc->p_md.md_l0addr = - vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l0); - tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1); bcopy(td1->td_frame, tf, sizeof(*tf)); tf->tf_x[0] = 0; Index: head/sys/arm64/include/armreg.h =================================================================== --- head/sys/arm64/include/armreg.h +++ head/sys/arm64/include/armreg.h @@ -619,7 +619,7 @@ #define PSR_FLAGS 0xf0000000 /* TCR_EL1 - Translation Control Register */ -#define TCR_ASID_16 (1 << 36) +#define TCR_ASID_16 (0x1UL << 36) #define TCR_IPS_SHIFT 32 #define TCR_IPS_32BIT (0 << TCR_IPS_SHIFT) @@ -640,6 +640,8 @@ #define TCR_ORGN1_WBWA (0x1UL << TCR_ORGN1_SHIFT) #define TCR_IRGN1_SHIFT 24 #define TCR_IRGN1_WBWA (0x1UL << TCR_IRGN1_SHIFT) +#define TCR_A1_SHIFT 22 +#define TCR_A1 (0x1UL << TCR_A1_SHIFT) #define TCR_SH0_SHIFT 12 #define TCR_SH0_IS (0x3UL << TCR_SH0_SHIFT) #define TCR_ORGN0_SHIFT 10 Index: head/sys/arm64/include/cpufunc.h =================================================================== --- head/sys/arm64/include/cpufunc.h +++ head/sys/arm64/include/cpufunc.h @@ -178,6 +178,27 @@ __asm __volatile("clrex" : : : "memory"); } +static __inline void +set_ttbr0(uint64_t ttbr0) +{ + + __asm __volatile( + "msr ttbr0_el1, %0 \n" + "isb \n" + : + : "r" (ttbr0)); +} + +static __inline void +invalidate_local_icache(void) +{ + + __asm __volatile( + "ic iallu \n" + "dsb nsh \n" + "isb \n"); +} + extern int64_t dcache_line_size; extern int64_t icache_line_size; extern int64_t idcache_line_size; @@ -185,7 +206,6 @@ #define cpu_nullop() arm64_nullop() #define cpufunc_nullop() arm64_nullop() -#define cpu_setttb(a) arm64_setttb(a) #define cpu_tlb_flushID() arm64_tlb_flushID() @@ -198,7 +218,6 @@ #define cpu_icache_sync_range_checked(a, s) arm64_icache_sync_range_checked((a), (s)) void arm64_nullop(void); -void arm64_setttb(vm_offset_t); void arm64_tlb_flushID(void); void arm64_icache_sync_range(vm_offset_t, vm_size_t); int arm64_icache_sync_range_checked(vm_offset_t, vm_size_t); Index: head/sys/arm64/include/pcpu.h =================================================================== --- head/sys/arm64/include/pcpu.h +++ head/sys/arm64/include/pcpu.h @@ -40,12 +40,14 @@ struct debug_monitor_state; #define PCPU_MD_FIELDS \ - u_int pc_acpi_id; /* ACPI CPU id */ \ - u_int pc_midr; /* stored MIDR value */ \ + u_int pc_acpi_id; /* ACPI CPU id */ \ + u_int pc_midr; /* stored MIDR value */ \ uint64_t pc_clock; \ pcpu_bp_harden pc_bp_harden; \ pcpu_ssbd pc_ssbd; \ - char __pad[225] + struct pmap *pc_curpmap; \ + u_int pc_bcast_tlbi_workaround; \ + char __pad[213] #ifdef _KERNEL Index: head/sys/arm64/include/pmap.h =================================================================== --- head/sys/arm64/include/pmap.h +++ head/sys/arm64/include/pmap.h @@ -79,10 +79,12 @@ struct pmap { struct mtx pm_mtx; - struct pmap_statistics pm_stats; /* pmap statictics */ + struct pmap_statistics pm_stats; /* pmap statistics */ + vm_paddr_t pm_l0_paddr; pd_entry_t *pm_l0; TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ struct vm_radix pm_root; /* spare page table pages */ + long pm_cookie; /* encodes the pmap's ASID */ }; typedef struct pmap *pmap_t; @@ -132,6 +134,15 @@ #define PMAP_TRYLOCK(pmap) mtx_trylock(&(pmap)->pm_mtx) #define PMAP_UNLOCK(pmap) mtx_unlock(&(pmap)->pm_mtx) +#define ASID_RESERVED_FOR_PID_0 0 +#define ASID_RESERVED_FOR_EFI 1 +#define ASID_FIRST_AVAILABLE (ASID_RESERVED_FOR_EFI + 1) +#define ASID_TO_OPERAND_SHIFT 48 +#define ASID_TO_OPERAND(asid) ({ \ + KASSERT((asid) != -1, ("invalid ASID")); \ + (uint64_t)(asid) << ASID_TO_OPERAND_SHIFT; \ +}) + extern vm_offset_t virtual_avail; extern vm_offset_t virtual_end; @@ -152,6 +163,7 @@ void *pmap_mapdev_attr(vm_offset_t pa, vm_size_t size, vm_memattr_t ma); bool pmap_page_is_mapped(vm_page_t m); bool pmap_ps_enabled(pmap_t pmap); +uint64_t pmap_to_ttbr0(pmap_t pmap); void *pmap_mapdev(vm_offset_t, vm_size_t); void *pmap_mapbios(vm_paddr_t, vm_size_t); Index: head/sys/arm64/include/proc.h =================================================================== --- head/sys/arm64/include/proc.h +++ head/sys/arm64/include/proc.h @@ -40,7 +40,7 @@ }; struct mdproc { - vm_offset_t md_l0addr; + long md_dummy; }; #define KINFO_PROC_SIZE 1088