diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index 3cb5543101d2..89848e584e77 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -1,2892 +1,2892 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2008-2015 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is also stored by the * logical address mapping module, this module may throw away valid virtual * to physical mappings at almost any time. However, invalidations of * mappings must be done as requested. * * In order to cope with hardware architectures which make virtual to * physical map invalidates expensive, this module may delay invalidate * reduced protection operations until such time as they are actually * necessary. This module is given full information as to which processors * are currently using which maps, and to when physical maps must be made * correct. */ #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mmu_oea64.h" #include "mmu_if.h" #include "moea64_if.h" void moea64_release_vsid(uint64_t vsid); uintptr_t moea64_get_unique_vsid(void); #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) #define ENABLE_TRANS(msr) mtmsr(msr) #define VSID_MAKE(sr, hash) ((sr) | (((hash) & 0xfffff) << 4)) #define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff) #define VSID_HASH_MASK 0x0000007fffffffffULL /* * Locking semantics: * * There are two locks of interest: the page locks and the pmap locks, which * protect their individual PVO lists and are locked in that order. The contents * of all PVO entries are protected by the locks of their respective pmaps. * The pmap of any PVO is guaranteed not to change so long as the PVO is linked * into any list. * */ #define PV_LOCK_COUNT PA_LOCK_COUNT*3 static struct mtx_padalign pv_lock[PV_LOCK_COUNT]; #define PV_LOCKPTR(pa) ((struct mtx *)(&pv_lock[pa_index(pa) % PV_LOCK_COUNT])) #define PV_LOCK(pa) mtx_lock(PV_LOCKPTR(pa)) #define PV_UNLOCK(pa) mtx_unlock(PV_LOCKPTR(pa)) #define PV_LOCKASSERT(pa) mtx_assert(PV_LOCKPTR(pa), MA_OWNED) #define PV_PAGE_LOCK(m) PV_LOCK(VM_PAGE_TO_PHYS(m)) #define PV_PAGE_UNLOCK(m) PV_UNLOCK(VM_PAGE_TO_PHYS(m)) #define PV_PAGE_LOCKASSERT(m) PV_LOCKASSERT(VM_PAGE_TO_PHYS(m)) struct ofw_map { cell_t om_va; cell_t om_len; uint64_t om_pa; cell_t om_mode; }; extern unsigned char _etext[]; extern unsigned char _end[]; extern void *slbtrap, *slbtrapend; /* * Map of physical memory regions. */ static struct mem_region *regions; static struct mem_region *pregions; static u_int phys_avail_count; static int regions_sz, pregions_sz; extern void bs_remap_earlyboot(void); /* * Lock for the SLB tables. */ struct mtx moea64_slb_mutex; /* * PTEG data. */ -u_int moea64_pteg_count; -u_int moea64_pteg_mask; +u_long moea64_pteg_count; +u_long moea64_pteg_mask; /* * PVO data. */ uma_zone_t moea64_pvo_zone; /* zone for pvo entries */ static struct pvo_entry *moea64_bpvo_pool; static int moea64_bpvo_pool_index = 0; static int moea64_bpvo_pool_size = 327680; TUNABLE_INT("machdep.moea64_bpvo_pool_size", &moea64_bpvo_pool_size); SYSCTL_INT(_machdep, OID_AUTO, moea64_allocated_bpvo_entries, CTLFLAG_RD, &moea64_bpvo_pool_index, 0, ""); #define VSID_NBPW (sizeof(u_int32_t) * 8) #ifdef __powerpc64__ #define NVSIDS (NPMAPS * 16) #define VSID_HASHMASK 0xffffffffUL #else #define NVSIDS NPMAPS #define VSID_HASHMASK 0xfffffUL #endif static u_int moea64_vsid_bitmap[NVSIDS / VSID_NBPW]; static boolean_t moea64_initialized = FALSE; /* * Statistics. */ u_int moea64_pte_valid = 0; u_int moea64_pte_overflow = 0; u_int moea64_pvo_entries = 0; u_int moea64_pvo_enter_calls = 0; u_int moea64_pvo_remove_calls = 0; SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_valid, CTLFLAG_RD, &moea64_pte_valid, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_overflow, CTLFLAG_RD, &moea64_pte_overflow, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_entries, CTLFLAG_RD, &moea64_pvo_entries, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_enter_calls, CTLFLAG_RD, &moea64_pvo_enter_calls, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD, &moea64_pvo_remove_calls, 0, ""); vm_offset_t moea64_scratchpage_va[2]; struct pvo_entry *moea64_scratchpage_pvo[2]; struct mtx moea64_scratchpage_mtx; uint64_t moea64_large_page_mask = 0; uint64_t moea64_large_page_size = 0; int moea64_large_page_shift = 0; /* * PVO calls. */ static int moea64_pvo_enter(mmu_t mmu, struct pvo_entry *pvo, struct pvo_head *pvo_head); static void moea64_pvo_remove_from_pmap(mmu_t mmu, struct pvo_entry *pvo); static void moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo); static struct pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t); /* * Utility routines. */ static boolean_t moea64_query_bit(mmu_t, vm_page_t, uint64_t); static u_int moea64_clear_bit(mmu_t, vm_page_t, uint64_t); static void moea64_kremove(mmu_t, vm_offset_t); static void moea64_syncicache(mmu_t, pmap_t pmap, vm_offset_t va, vm_paddr_t pa, vm_size_t sz); static void moea64_pmap_init_qpages(void); /* * Kernel MMU interface */ void moea64_clear_modify(mmu_t, vm_page_t); void moea64_copy_page(mmu_t, vm_page_t, vm_page_t); void moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, vm_page_t *mb, vm_offset_t b_offset, int xfersize); int moea64_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, u_int flags, int8_t psind); void moea64_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t, vm_prot_t); void moea64_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t); vm_paddr_t moea64_extract(mmu_t, pmap_t, vm_offset_t); vm_page_t moea64_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); void moea64_init(mmu_t); boolean_t moea64_is_modified(mmu_t, vm_page_t); boolean_t moea64_is_prefaultable(mmu_t, pmap_t, vm_offset_t); boolean_t moea64_is_referenced(mmu_t, vm_page_t); int moea64_ts_referenced(mmu_t, vm_page_t); vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, int); boolean_t moea64_page_exists_quick(mmu_t, pmap_t, vm_page_t); void moea64_page_init(mmu_t, vm_page_t); int moea64_page_wired_mappings(mmu_t, vm_page_t); void moea64_pinit(mmu_t, pmap_t); void moea64_pinit0(mmu_t, pmap_t); void moea64_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); void moea64_qenter(mmu_t, vm_offset_t, vm_page_t *, int); void moea64_qremove(mmu_t, vm_offset_t, int); void moea64_release(mmu_t, pmap_t); void moea64_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); void moea64_remove_pages(mmu_t, pmap_t); void moea64_remove_all(mmu_t, vm_page_t); void moea64_remove_write(mmu_t, vm_page_t); void moea64_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t); void moea64_zero_page(mmu_t, vm_page_t); void moea64_zero_page_area(mmu_t, vm_page_t, int, int); void moea64_activate(mmu_t, struct thread *); void moea64_deactivate(mmu_t, struct thread *); void *moea64_mapdev(mmu_t, vm_paddr_t, vm_size_t); void *moea64_mapdev_attr(mmu_t, vm_paddr_t, vm_size_t, vm_memattr_t); void moea64_unmapdev(mmu_t, vm_offset_t, vm_size_t); vm_paddr_t moea64_kextract(mmu_t, vm_offset_t); void moea64_page_set_memattr(mmu_t, vm_page_t m, vm_memattr_t ma); void moea64_kenter_attr(mmu_t, vm_offset_t, vm_paddr_t, vm_memattr_t ma); void moea64_kenter(mmu_t, vm_offset_t, vm_paddr_t); boolean_t moea64_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t); static void moea64_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t); void moea64_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va); void moea64_scan_init(mmu_t mmu); vm_offset_t moea64_quick_enter_page(mmu_t mmu, vm_page_t m); void moea64_quick_remove_page(mmu_t mmu, vm_offset_t addr); static int moea64_map_user_ptr(mmu_t mmu, pmap_t pm, volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen); static int moea64_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, int *is_user, vm_offset_t *decoded_addr); static mmu_method_t moea64_methods[] = { MMUMETHOD(mmu_clear_modify, moea64_clear_modify), MMUMETHOD(mmu_copy_page, moea64_copy_page), MMUMETHOD(mmu_copy_pages, moea64_copy_pages), MMUMETHOD(mmu_enter, moea64_enter), MMUMETHOD(mmu_enter_object, moea64_enter_object), MMUMETHOD(mmu_enter_quick, moea64_enter_quick), MMUMETHOD(mmu_extract, moea64_extract), MMUMETHOD(mmu_extract_and_hold, moea64_extract_and_hold), MMUMETHOD(mmu_init, moea64_init), MMUMETHOD(mmu_is_modified, moea64_is_modified), MMUMETHOD(mmu_is_prefaultable, moea64_is_prefaultable), MMUMETHOD(mmu_is_referenced, moea64_is_referenced), MMUMETHOD(mmu_ts_referenced, moea64_ts_referenced), MMUMETHOD(mmu_map, moea64_map), MMUMETHOD(mmu_page_exists_quick,moea64_page_exists_quick), MMUMETHOD(mmu_page_init, moea64_page_init), MMUMETHOD(mmu_page_wired_mappings,moea64_page_wired_mappings), MMUMETHOD(mmu_pinit, moea64_pinit), MMUMETHOD(mmu_pinit0, moea64_pinit0), MMUMETHOD(mmu_protect, moea64_protect), MMUMETHOD(mmu_qenter, moea64_qenter), MMUMETHOD(mmu_qremove, moea64_qremove), MMUMETHOD(mmu_release, moea64_release), MMUMETHOD(mmu_remove, moea64_remove), MMUMETHOD(mmu_remove_pages, moea64_remove_pages), MMUMETHOD(mmu_remove_all, moea64_remove_all), MMUMETHOD(mmu_remove_write, moea64_remove_write), MMUMETHOD(mmu_sync_icache, moea64_sync_icache), MMUMETHOD(mmu_unwire, moea64_unwire), MMUMETHOD(mmu_zero_page, moea64_zero_page), MMUMETHOD(mmu_zero_page_area, moea64_zero_page_area), MMUMETHOD(mmu_activate, moea64_activate), MMUMETHOD(mmu_deactivate, moea64_deactivate), MMUMETHOD(mmu_page_set_memattr, moea64_page_set_memattr), MMUMETHOD(mmu_quick_enter_page, moea64_quick_enter_page), MMUMETHOD(mmu_quick_remove_page, moea64_quick_remove_page), /* Internal interfaces */ MMUMETHOD(mmu_mapdev, moea64_mapdev), MMUMETHOD(mmu_mapdev_attr, moea64_mapdev_attr), MMUMETHOD(mmu_unmapdev, moea64_unmapdev), MMUMETHOD(mmu_kextract, moea64_kextract), MMUMETHOD(mmu_kenter, moea64_kenter), MMUMETHOD(mmu_kenter_attr, moea64_kenter_attr), MMUMETHOD(mmu_dev_direct_mapped,moea64_dev_direct_mapped), MMUMETHOD(mmu_scan_init, moea64_scan_init), MMUMETHOD(mmu_dumpsys_map, moea64_dumpsys_map), MMUMETHOD(mmu_map_user_ptr, moea64_map_user_ptr), MMUMETHOD(mmu_decode_kernel_ptr, moea64_decode_kernel_ptr), { 0, 0 } }; MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods, 0); static struct pvo_head * vm_page_to_pvoh(vm_page_t m) { mtx_assert(PV_LOCKPTR(VM_PAGE_TO_PHYS(m)), MA_OWNED); return (&m->md.mdpg_pvoh); } static struct pvo_entry * alloc_pvo_entry(int bootstrap) { struct pvo_entry *pvo; if (!moea64_initialized || bootstrap) { if (moea64_bpvo_pool_index >= moea64_bpvo_pool_size) { panic("moea64_enter: bpvo pool exhausted, %d, %d, %zd", moea64_bpvo_pool_index, moea64_bpvo_pool_size, moea64_bpvo_pool_size * sizeof(struct pvo_entry)); } pvo = &moea64_bpvo_pool[ atomic_fetchadd_int(&moea64_bpvo_pool_index, 1)]; bzero(pvo, sizeof(*pvo)); pvo->pvo_vaddr = PVO_BOOTSTRAP; } else { pvo = uma_zalloc(moea64_pvo_zone, M_NOWAIT); bzero(pvo, sizeof(*pvo)); } return (pvo); } static void init_pvo_entry(struct pvo_entry *pvo, pmap_t pmap, vm_offset_t va) { uint64_t vsid; uint64_t hash; int shift; PMAP_LOCK_ASSERT(pmap, MA_OWNED); pvo->pvo_pmap = pmap; va &= ~ADDR_POFF; pvo->pvo_vaddr |= va; vsid = va_to_vsid(pmap, va); pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT) | (vsid << 16); shift = (pvo->pvo_vaddr & PVO_LARGE) ? moea64_large_page_shift : ADDR_PIDX_SHFT; hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)va & ADDR_PIDX) >> shift); pvo->pvo_pte.slot = (hash & moea64_pteg_mask) << 3; } static void free_pvo_entry(struct pvo_entry *pvo) { if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP)) uma_zfree(moea64_pvo_zone, pvo); } void moea64_pte_from_pvo(const struct pvo_entry *pvo, struct lpte *lpte) { lpte->pte_hi = (pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) & LPTE_AVPN_MASK; lpte->pte_hi |= LPTE_VALID; if (pvo->pvo_vaddr & PVO_LARGE) lpte->pte_hi |= LPTE_BIG; if (pvo->pvo_vaddr & PVO_WIRED) lpte->pte_hi |= LPTE_WIRED; if (pvo->pvo_vaddr & PVO_HID) lpte->pte_hi |= LPTE_HID; lpte->pte_lo = pvo->pvo_pte.pa; /* Includes WIMG bits */ if (pvo->pvo_pte.prot & VM_PROT_WRITE) lpte->pte_lo |= LPTE_BW; else lpte->pte_lo |= LPTE_BR; if (!(pvo->pvo_pte.prot & VM_PROT_EXECUTE)) lpte->pte_lo |= LPTE_NOEXEC; } static __inline uint64_t moea64_calc_wimg(vm_paddr_t pa, vm_memattr_t ma) { uint64_t pte_lo; int i; if (ma != VM_MEMATTR_DEFAULT) { switch (ma) { case VM_MEMATTR_UNCACHEABLE: return (LPTE_I | LPTE_G); case VM_MEMATTR_CACHEABLE: return (LPTE_M); case VM_MEMATTR_WRITE_COMBINING: case VM_MEMATTR_WRITE_BACK: case VM_MEMATTR_PREFETCHABLE: return (LPTE_I); case VM_MEMATTR_WRITE_THROUGH: return (LPTE_W | LPTE_M); } } /* * Assume the page is cache inhibited and access is guarded unless * it's in our available memory array. */ pte_lo = LPTE_I | LPTE_G; for (i = 0; i < pregions_sz; i++) { if ((pa >= pregions[i].mr_start) && (pa < (pregions[i].mr_start + pregions[i].mr_size))) { pte_lo &= ~(LPTE_I | LPTE_G); pte_lo |= LPTE_M; break; } } return pte_lo; } /* * Quick sort callout for comparing memory regions. */ static int om_cmp(const void *a, const void *b); static int om_cmp(const void *a, const void *b) { const struct ofw_map *mapa; const struct ofw_map *mapb; mapa = a; mapb = b; if (mapa->om_pa < mapb->om_pa) return (-1); else if (mapa->om_pa > mapb->om_pa) return (1); else return (0); } static void moea64_add_ofw_mappings(mmu_t mmup, phandle_t mmu, size_t sz) { struct ofw_map translations[sz/(4*sizeof(cell_t))]; /*>= 4 cells per */ pcell_t acells, trans_cells[sz/sizeof(cell_t)]; struct pvo_entry *pvo; register_t msr; vm_offset_t off; vm_paddr_t pa_base; int i, j; bzero(translations, sz); OF_getencprop(OF_finddevice("/"), "#address-cells", &acells, sizeof(acells)); if (OF_getencprop(mmu, "translations", trans_cells, sz) == -1) panic("moea64_bootstrap: can't get ofw translations"); CTR0(KTR_PMAP, "moea64_add_ofw_mappings: translations"); sz /= sizeof(cell_t); for (i = 0, j = 0; i < sz; j++) { translations[j].om_va = trans_cells[i++]; translations[j].om_len = trans_cells[i++]; translations[j].om_pa = trans_cells[i++]; if (acells == 2) { translations[j].om_pa <<= 32; translations[j].om_pa |= trans_cells[i++]; } translations[j].om_mode = trans_cells[i++]; } KASSERT(i == sz, ("Translations map has incorrect cell count (%d/%zd)", i, sz)); sz = j; qsort(translations, sz, sizeof (*translations), om_cmp); for (i = 0; i < sz; i++) { pa_base = translations[i].om_pa; #ifndef __powerpc64__ if ((translations[i].om_pa >> 32) != 0) panic("OFW translations above 32-bit boundary!"); #endif if (pa_base % PAGE_SIZE) panic("OFW translation not page-aligned (phys)!"); if (translations[i].om_va % PAGE_SIZE) panic("OFW translation not page-aligned (virt)!"); CTR3(KTR_PMAP, "translation: pa=%#zx va=%#x len=%#x", pa_base, translations[i].om_va, translations[i].om_len); /* Now enter the pages for this mapping */ DISABLE_TRANS(msr); for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) { /* If this address is direct-mapped, skip remapping */ if (hw_direct_map && translations[i].om_va == PHYS_TO_DMAP(pa_base) && moea64_calc_wimg(pa_base + off, VM_MEMATTR_DEFAULT) == LPTE_M) continue; PMAP_LOCK(kernel_pmap); pvo = moea64_pvo_find_va(kernel_pmap, translations[i].om_va + off); PMAP_UNLOCK(kernel_pmap); if (pvo != NULL) continue; moea64_kenter(mmup, translations[i].om_va + off, pa_base + off); } ENABLE_TRANS(msr); } } #ifdef __powerpc64__ static void moea64_probe_large_page(void) { uint16_t pvr = mfpvr() >> 16; switch (pvr) { case IBM970: case IBM970FX: case IBM970MP: powerpc_sync(); isync(); mtspr(SPR_HID4, mfspr(SPR_HID4) & ~HID4_970_DISABLE_LG_PG); powerpc_sync(); isync(); /* FALLTHROUGH */ default: if (moea64_large_page_size == 0) { moea64_large_page_size = 0x1000000; /* 16 MB */ moea64_large_page_shift = 24; } } moea64_large_page_mask = moea64_large_page_size - 1; } static void moea64_bootstrap_slb_prefault(vm_offset_t va, int large) { struct slb *cache; struct slb entry; uint64_t esid, slbe; uint64_t i; cache = PCPU_GET(aim.slb); esid = va >> ADDR_SR_SHFT; slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID; for (i = 0; i < 64; i++) { if (cache[i].slbe == (slbe | i)) return; } entry.slbe = slbe; entry.slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT; if (large) entry.slbv |= SLBV_L; slb_insert_kernel(entry.slbe, entry.slbv); } #endif static void moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { struct pvo_entry *pvo; register_t msr; vm_paddr_t pa; vm_offset_t size, off; uint64_t pte_lo; int i; if (moea64_large_page_size == 0) hw_direct_map = 0; DISABLE_TRANS(msr); if (hw_direct_map) { PMAP_LOCK(kernel_pmap); for (i = 0; i < pregions_sz; i++) { for (pa = pregions[i].mr_start; pa < pregions[i].mr_start + pregions[i].mr_size; pa += moea64_large_page_size) { pte_lo = LPTE_M; pvo = alloc_pvo_entry(1 /* bootstrap */); pvo->pvo_vaddr |= PVO_WIRED | PVO_LARGE; init_pvo_entry(pvo, kernel_pmap, PHYS_TO_DMAP(pa)); /* * Set memory access as guarded if prefetch within * the page could exit the available physmem area. */ if (pa & moea64_large_page_mask) { pa &= moea64_large_page_mask; pte_lo |= LPTE_G; } if (pa + moea64_large_page_size > pregions[i].mr_start + pregions[i].mr_size) pte_lo |= LPTE_G; pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; pvo->pvo_pte.pa = pa | pte_lo; moea64_pvo_enter(mmup, pvo, NULL); } } PMAP_UNLOCK(kernel_pmap); } /* * Make sure the kernel and BPVO pool stay mapped on systems either * without a direct map or on which the kernel is not already executing * out of the direct-mapped region. */ if (!hw_direct_map || kernelstart < DMAP_BASE_ADDRESS) { for (pa = kernelstart & ~PAGE_MASK; pa < kernelend; pa += PAGE_SIZE) moea64_kenter(mmup, pa, pa); } if (!hw_direct_map) { size = moea64_bpvo_pool_size*sizeof(struct pvo_entry); off = (vm_offset_t)(moea64_bpvo_pool); for (pa = off; pa < off + size; pa += PAGE_SIZE) moea64_kenter(mmup, pa, pa); } ENABLE_TRANS(msr); /* * Allow user to override unmapped_buf_allowed for testing. * XXXKIB Only direct map implementation was tested. */ if (!TUNABLE_INT_FETCH("vfs.unmapped_buf_allowed", &unmapped_buf_allowed)) unmapped_buf_allowed = hw_direct_map; } /* Quick sort callout for comparing physical addresses. */ static int pa_cmp(const void *a, const void *b) { const vm_paddr_t *pa = a, *pb = b; if (*pa < *pb) return (-1); else if (*pa > *pb) return (1); else return (0); } void moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { int i, j; vm_size_t physsz, hwphyssz; vm_paddr_t kernelphysstart, kernelphysend; int rm_pavail; #ifndef __powerpc64__ /* We don't have a direct map since there is no BAT */ hw_direct_map = 0; /* Make sure battable is zero, since we have no BAT */ for (i = 0; i < 16; i++) { battable[i].batu = 0; battable[i].batl = 0; } #else moea64_probe_large_page(); /* Use a direct map if we have large page support */ if (moea64_large_page_size > 0) hw_direct_map = 1; else hw_direct_map = 0; /* Install trap handlers for SLBs */ bcopy(&slbtrap, (void *)EXC_DSE,(size_t)&slbtrapend - (size_t)&slbtrap); bcopy(&slbtrap, (void *)EXC_ISE,(size_t)&slbtrapend - (size_t)&slbtrap); __syncicache((void *)EXC_DSE, 0x80); __syncicache((void *)EXC_ISE, 0x80); #endif kernelphysstart = kernelstart & ~DMAP_BASE_ADDRESS; kernelphysend = kernelend & ~DMAP_BASE_ADDRESS; /* Get physical memory regions from firmware */ mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); CTR0(KTR_PMAP, "moea64_bootstrap: physical memory"); if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz) panic("moea64_bootstrap: phys_avail too small"); phys_avail_count = 0; physsz = 0; hwphyssz = 0; TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); for (i = 0, j = 0; i < regions_sz; i++, j += 2) { CTR3(KTR_PMAP, "region: %#zx - %#zx (%#zx)", regions[i].mr_start, regions[i].mr_start + regions[i].mr_size, regions[i].mr_size); if (hwphyssz != 0 && (physsz + regions[i].mr_size) >= hwphyssz) { if (physsz < hwphyssz) { phys_avail[j] = regions[i].mr_start; phys_avail[j + 1] = regions[i].mr_start + hwphyssz - physsz; physsz = hwphyssz; phys_avail_count++; } break; } phys_avail[j] = regions[i].mr_start; phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size; phys_avail_count++; physsz += regions[i].mr_size; } /* Check for overlap with the kernel and exception vectors */ rm_pavail = 0; for (j = 0; j < 2*phys_avail_count; j+=2) { if (phys_avail[j] < EXC_LAST) phys_avail[j] += EXC_LAST; if (phys_avail[j] >= kernelphysstart && phys_avail[j+1] <= kernelphysend) { phys_avail[j] = phys_avail[j+1] = ~0; rm_pavail++; continue; } if (kernelphysstart >= phys_avail[j] && kernelphysstart < phys_avail[j+1]) { if (kernelphysend < phys_avail[j+1]) { phys_avail[2*phys_avail_count] = (kernelphysend & ~PAGE_MASK) + PAGE_SIZE; phys_avail[2*phys_avail_count + 1] = phys_avail[j+1]; phys_avail_count++; } phys_avail[j+1] = kernelphysstart & ~PAGE_MASK; } if (kernelphysend >= phys_avail[j] && kernelphysend < phys_avail[j+1]) { if (kernelphysstart > phys_avail[j]) { phys_avail[2*phys_avail_count] = phys_avail[j]; phys_avail[2*phys_avail_count + 1] = kernelphysstart & ~PAGE_MASK; phys_avail_count++; } phys_avail[j] = (kernelphysend & ~PAGE_MASK) + PAGE_SIZE; } } /* Remove physical available regions marked for removal (~0) */ if (rm_pavail) { qsort(phys_avail, 2*phys_avail_count, sizeof(phys_avail[0]), pa_cmp); phys_avail_count -= rm_pavail; for (i = 2*phys_avail_count; i < 2*(phys_avail_count + rm_pavail); i+=2) phys_avail[i] = phys_avail[i+1] = 0; } physmem = btoc(physsz); #ifdef PTEGCOUNT moea64_pteg_count = PTEGCOUNT; #else moea64_pteg_count = 0x1000; while (moea64_pteg_count < physmem) moea64_pteg_count <<= 1; moea64_pteg_count >>= 1; #endif /* PTEGCOUNT */ } void moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { int i; /* * Set PTEG mask */ moea64_pteg_mask = moea64_pteg_count - 1; /* * Initialize SLB table lock and page locks */ mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF); for (i = 0; i < PV_LOCK_COUNT; i++) mtx_init(&pv_lock[i], "page pv", NULL, MTX_DEF); /* * Initialise the bootstrap pvo pool. */ moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc( moea64_bpvo_pool_size*sizeof(struct pvo_entry), 0); moea64_bpvo_pool_index = 0; /* Place at address usable through the direct map */ if (hw_direct_map) moea64_bpvo_pool = (struct pvo_entry *) PHYS_TO_DMAP((uintptr_t)moea64_bpvo_pool); /* * Make sure kernel vsid is allocated as well as VSID 0. */ #ifndef __powerpc64__ moea64_vsid_bitmap[(KERNEL_VSIDBITS & (NVSIDS - 1)) / VSID_NBPW] |= 1 << (KERNEL_VSIDBITS % VSID_NBPW); moea64_vsid_bitmap[0] |= 1; #endif /* * Initialize the kernel pmap (which is statically allocated). */ #ifdef __powerpc64__ for (i = 0; i < 64; i++) { pcpup->pc_aim.slb[i].slbv = 0; pcpup->pc_aim.slb[i].slbe = 0; } #else for (i = 0; i < 16; i++) kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i; #endif kernel_pmap->pmap_phys = kernel_pmap; CPU_FILL(&kernel_pmap->pm_active); RB_INIT(&kernel_pmap->pmap_pvo); PMAP_LOCK_INIT(kernel_pmap); /* * Now map in all the other buffers we allocated earlier */ moea64_setup_direct_map(mmup, kernelstart, kernelend); } void moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { ihandle_t mmui; phandle_t chosen; phandle_t mmu; ssize_t sz; int i; vm_offset_t pa, va; void *dpcpu; /* * Set up the Open Firmware pmap and add its mappings if not in real * mode. */ chosen = OF_finddevice("/chosen"); if (chosen != -1 && OF_getencprop(chosen, "mmu", &mmui, 4) != -1) { mmu = OF_instance_to_package(mmui); if (mmu == -1 || (sz = OF_getproplen(mmu, "translations")) == -1) sz = 0; if (sz > 6144 /* tmpstksz - 2 KB headroom */) panic("moea64_bootstrap: too many ofw translations"); if (sz > 0) moea64_add_ofw_mappings(mmup, mmu, sz); } /* * Calculate the last available physical address. */ Maxmem = 0; for (i = 0; phys_avail[i + 2] != 0; i += 2) Maxmem = max(Maxmem, powerpc_btop(phys_avail[i + 1])); /* * Initialize MMU. */ MMU_CPU_BOOTSTRAP(mmup,0); mtmsr(mfmsr() | PSL_DR | PSL_IR); pmap_bootstrapped++; /* * Set the start and end of kva. */ virtual_avail = VM_MIN_KERNEL_ADDRESS; virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS; /* * Map the entire KVA range into the SLB. We must not fault there. */ #ifdef __powerpc64__ for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH) moea64_bootstrap_slb_prefault(va, 0); #endif /* * Remap any early IO mappings (console framebuffer, etc.) */ bs_remap_earlyboot(); /* * Figure out how far we can extend virtual_end into segment 16 * without running into existing mappings. Segment 16 is guaranteed * to contain neither RAM nor devices (at least on Apple hardware), * but will generally contain some OFW mappings we should not * step on. */ #ifndef __powerpc64__ /* KVA is in high memory on PPC64 */ PMAP_LOCK(kernel_pmap); while (virtual_end < VM_MAX_KERNEL_ADDRESS && moea64_pvo_find_va(kernel_pmap, virtual_end+1) == NULL) virtual_end += PAGE_SIZE; PMAP_UNLOCK(kernel_pmap); #endif /* * Allocate a kernel stack with a guard page for thread0 and map it * into the kernel page map. */ pa = moea64_bootstrap_alloc(kstack_pages * PAGE_SIZE, PAGE_SIZE); va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; virtual_avail = va + kstack_pages * PAGE_SIZE; CTR2(KTR_PMAP, "moea64_bootstrap: kstack0 at %#x (%#x)", pa, va); thread0.td_kstack = va; thread0.td_kstack_pages = kstack_pages; for (i = 0; i < kstack_pages; i++) { moea64_kenter(mmup, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } /* * Allocate virtual address space for the message buffer. */ pa = msgbuf_phys = moea64_bootstrap_alloc(msgbufsize, PAGE_SIZE); msgbufp = (struct msgbuf *)virtual_avail; va = virtual_avail; virtual_avail += round_page(msgbufsize); while (va < virtual_avail) { moea64_kenter(mmup, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } /* * Allocate virtual address space for the dynamic percpu area. */ pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE); dpcpu = (void *)virtual_avail; va = virtual_avail; virtual_avail += DPCPU_SIZE; while (va < virtual_avail) { moea64_kenter(mmup, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } dpcpu_init(dpcpu, curcpu); /* * Allocate some things for page zeroing. We put this directly * in the page table and use MOEA64_PTE_REPLACE to avoid any * of the PVO book-keeping or other parts of the VM system * from even knowing that this hack exists. */ if (!hw_direct_map) { mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL, MTX_DEF); for (i = 0; i < 2; i++) { moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE; virtual_end -= PAGE_SIZE; moea64_kenter(mmup, moea64_scratchpage_va[i], 0); PMAP_LOCK(kernel_pmap); moea64_scratchpage_pvo[i] = moea64_pvo_find_va( kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]); PMAP_UNLOCK(kernel_pmap); } } } static void moea64_pmap_init_qpages(void) { struct pcpu *pc; int i; if (hw_direct_map) return; CPU_FOREACH(i) { pc = pcpu_find(i); pc->pc_qmap_addr = kva_alloc(PAGE_SIZE); if (pc->pc_qmap_addr == 0) panic("pmap_init_qpages: unable to allocate KVA"); PMAP_LOCK(kernel_pmap); pc->pc_aim.qmap_pvo = moea64_pvo_find_va(kernel_pmap, pc->pc_qmap_addr); PMAP_UNLOCK(kernel_pmap); mtx_init(&pc->pc_aim.qmap_lock, "qmap lock", NULL, MTX_DEF); } } SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, moea64_pmap_init_qpages, NULL); /* * Activate a user pmap. This mostly involves setting some non-CPU * state. */ void moea64_activate(mmu_t mmu, struct thread *td) { pmap_t pm; pm = &td->td_proc->p_vmspace->vm_pmap; CPU_SET(PCPU_GET(cpuid), &pm->pm_active); #ifdef __powerpc64__ PCPU_SET(aim.userslb, pm->pm_slb); __asm __volatile("slbmte %0, %1; isync" :: "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); #else PCPU_SET(curpmap, pm->pmap_phys); mtsrin(USER_SR << ADDR_SR_SHFT, td->td_pcb->pcb_cpu.aim.usr_vsid); #endif } void moea64_deactivate(mmu_t mmu, struct thread *td) { pmap_t pm; __asm __volatile("isync; slbie %0" :: "r"(USER_ADDR)); pm = &td->td_proc->p_vmspace->vm_pmap; CPU_CLR(PCPU_GET(cpuid), &pm->pm_active); #ifdef __powerpc64__ PCPU_SET(aim.userslb, NULL); #else PCPU_SET(curpmap, NULL); #endif } void moea64_unwire(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) { struct pvo_entry key, *pvo; vm_page_t m; int64_t refchg; key.pvo_vaddr = sva; PMAP_LOCK(pm); for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); pvo != NULL && PVO_VADDR(pvo) < eva; pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) { if ((pvo->pvo_vaddr & PVO_WIRED) == 0) panic("moea64_unwire: pvo %p is missing PVO_WIRED", pvo); pvo->pvo_vaddr &= ~PVO_WIRED; refchg = MOEA64_PTE_REPLACE(mmu, pvo, 0 /* No invalidation */); if ((pvo->pvo_vaddr & PVO_MANAGED) && (pvo->pvo_pte.prot & VM_PROT_WRITE)) { if (refchg < 0) refchg = LPTE_CHG; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); refchg |= atomic_readandclear_32(&m->md.mdpg_attrs); if (refchg & LPTE_CHG) vm_page_dirty(m); if (refchg & LPTE_REF) vm_page_aflag_set(m, PGA_REFERENCED); } pm->pm_stats.wired_count--; } PMAP_UNLOCK(pm); } /* * This goes through and sets the physical address of our * special scratch PTE to the PA we want to zero or copy. Because * of locking issues (this can get called in pvo_enter() by * the UMA allocator), we can't use most other utility functions here */ static __inline void moea64_set_scratchpage_pa(mmu_t mmup, int which, vm_paddr_t pa) { KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!")); mtx_assert(&moea64_scratchpage_mtx, MA_OWNED); moea64_scratchpage_pvo[which]->pvo_pte.pa = moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa; MOEA64_PTE_REPLACE(mmup, moea64_scratchpage_pvo[which], MOEA64_PTE_INVALIDATE); isync(); } void moea64_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst) { vm_offset_t dst; vm_offset_t src; dst = VM_PAGE_TO_PHYS(mdst); src = VM_PAGE_TO_PHYS(msrc); if (hw_direct_map) { bcopy((void *)PHYS_TO_DMAP(src), (void *)PHYS_TO_DMAP(dst), PAGE_SIZE); } else { mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 0, src); moea64_set_scratchpage_pa(mmu, 1, dst); bcopy((void *)moea64_scratchpage_va[0], (void *)moea64_scratchpage_va[1], PAGE_SIZE); mtx_unlock(&moea64_scratchpage_mtx); } } static inline void moea64_copy_pages_dmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, vm_page_t *mb, vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_offset_t a_pg_offset, b_pg_offset; int cnt; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); a_cp = (char *)(uintptr_t)PHYS_TO_DMAP( VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])) + a_pg_offset; b_pg_offset = b_offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - b_pg_offset); b_cp = (char *)(uintptr_t)PHYS_TO_DMAP( VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])) + b_pg_offset; bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } } static inline void moea64_copy_pages_nodmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, vm_page_t *mb, vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_offset_t a_pg_offset, b_pg_offset; int cnt; mtx_lock(&moea64_scratchpage_mtx); while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); moea64_set_scratchpage_pa(mmu, 0, VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); a_cp = (char *)moea64_scratchpage_va[0] + a_pg_offset; b_pg_offset = b_offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - b_pg_offset); moea64_set_scratchpage_pa(mmu, 1, VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); b_cp = (char *)moea64_scratchpage_va[1] + b_pg_offset; bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } mtx_unlock(&moea64_scratchpage_mtx); } void moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, vm_page_t *mb, vm_offset_t b_offset, int xfersize) { if (hw_direct_map) { moea64_copy_pages_dmap(mmu, ma, a_offset, mb, b_offset, xfersize); } else { moea64_copy_pages_nodmap(mmu, ma, a_offset, mb, b_offset, xfersize); } } void moea64_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) { vm_paddr_t pa = VM_PAGE_TO_PHYS(m); if (size + off > PAGE_SIZE) panic("moea64_zero_page: size + off > PAGE_SIZE"); if (hw_direct_map) { bzero((caddr_t)(uintptr_t)PHYS_TO_DMAP(pa) + off, size); } else { mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 0, pa); bzero((caddr_t)moea64_scratchpage_va[0] + off, size); mtx_unlock(&moea64_scratchpage_mtx); } } /* * Zero a page of physical memory by temporarily mapping it */ void moea64_zero_page(mmu_t mmu, vm_page_t m) { vm_paddr_t pa = VM_PAGE_TO_PHYS(m); vm_offset_t va, off; if (!hw_direct_map) { mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 0, pa); va = moea64_scratchpage_va[0]; } else { va = PHYS_TO_DMAP(pa); } for (off = 0; off < PAGE_SIZE; off += cacheline_size) __asm __volatile("dcbz 0,%0" :: "r"(va + off)); if (!hw_direct_map) mtx_unlock(&moea64_scratchpage_mtx); } vm_offset_t moea64_quick_enter_page(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo; vm_paddr_t pa = VM_PAGE_TO_PHYS(m); if (hw_direct_map) return (PHYS_TO_DMAP(pa)); /* * MOEA64_PTE_REPLACE does some locking, so we can't just grab * a critical section and access the PCPU data like on i386. * Instead, pin the thread and grab the PCPU lock to prevent * a preempting thread from using the same PCPU data. */ sched_pin(); mtx_assert(PCPU_PTR(aim.qmap_lock), MA_NOTOWNED); pvo = PCPU_GET(aim.qmap_pvo); mtx_lock(PCPU_PTR(aim.qmap_lock)); pvo->pvo_pte.pa = moea64_calc_wimg(pa, pmap_page_get_memattr(m)) | (uint64_t)pa; MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_INVALIDATE); isync(); return (PCPU_GET(qmap_addr)); } void moea64_quick_remove_page(mmu_t mmu, vm_offset_t addr) { if (hw_direct_map) return; mtx_assert(PCPU_PTR(aim.qmap_lock), MA_OWNED); KASSERT(PCPU_GET(qmap_addr) == addr, ("moea64_quick_remove_page: invalid address")); mtx_unlock(PCPU_PTR(aim.qmap_lock)); sched_unpin(); } /* * Map the given physical page at the specified virtual address in the * target pmap with the protection requested. If specified the page * will be wired down. */ int moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind) { struct pvo_entry *pvo, *oldpvo; struct pvo_head *pvo_head; uint64_t pte_lo; int error; if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); pvo = alloc_pvo_entry(0); pvo->pvo_pmap = NULL; /* to be filled in later */ pvo->pvo_pte.prot = prot; pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m)); pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | pte_lo; if ((flags & PMAP_ENTER_WIRED) != 0) pvo->pvo_vaddr |= PVO_WIRED; if ((m->oflags & VPO_UNMANAGED) != 0 || !moea64_initialized) { pvo_head = NULL; } else { pvo_head = &m->md.mdpg_pvoh; pvo->pvo_vaddr |= PVO_MANAGED; } for (;;) { PV_PAGE_LOCK(m); PMAP_LOCK(pmap); if (pvo->pvo_pmap == NULL) init_pvo_entry(pvo, pmap, va); if (prot & VM_PROT_WRITE) if (pmap_bootstrapped && (m->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(m, PGA_WRITEABLE); oldpvo = moea64_pvo_find_va(pmap, va); if (oldpvo != NULL) { if (oldpvo->pvo_vaddr == pvo->pvo_vaddr && oldpvo->pvo_pte.pa == pvo->pvo_pte.pa && oldpvo->pvo_pte.prot == prot) { /* Identical mapping already exists */ error = 0; /* If not in page table, reinsert it */ if (MOEA64_PTE_SYNCH(mmu, oldpvo) < 0) { moea64_pte_overflow--; MOEA64_PTE_INSERT(mmu, oldpvo); } /* Then just clean up and go home */ PV_PAGE_UNLOCK(m); PMAP_UNLOCK(pmap); free_pvo_entry(pvo); break; } /* Otherwise, need to kill it first */ KASSERT(oldpvo->pvo_pmap == pmap, ("pmap of old " "mapping does not match new mapping")); moea64_pvo_remove_from_pmap(mmu, oldpvo); } error = moea64_pvo_enter(mmu, pvo, pvo_head); PV_PAGE_UNLOCK(m); PMAP_UNLOCK(pmap); /* Free any dead pages */ if (oldpvo != NULL) { PV_LOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); moea64_pvo_remove_from_page(mmu, oldpvo); PV_UNLOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); free_pvo_entry(oldpvo); } if (error != ENOMEM) break; if ((flags & PMAP_ENTER_NOSLEEP) != 0) return (KERN_RESOURCE_SHORTAGE); VM_OBJECT_ASSERT_UNLOCKED(m->object); vm_wait(NULL); } /* * Flush the page from the instruction cache if this page is * mapped executable and cacheable. */ if (pmap != kernel_pmap && !(m->aflags & PGA_EXECUTABLE) && (pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { vm_page_aflag_set(m, PGA_EXECUTABLE); moea64_syncicache(mmu, pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE); } return (KERN_SUCCESS); } static void moea64_syncicache(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_paddr_t pa, vm_size_t sz) { /* * This is much trickier than on older systems because * we can't sync the icache on physical addresses directly * without a direct map. Instead we check a couple of cases * where the memory is already mapped in and, failing that, * use the same trick we use for page zeroing to create * a temporary mapping for this physical address. */ if (!pmap_bootstrapped) { /* * If PMAP is not bootstrapped, we are likely to be * in real mode. */ __syncicache((void *)(uintptr_t)pa, sz); } else if (pmap == kernel_pmap) { __syncicache((void *)va, sz); } else if (hw_direct_map) { __syncicache((void *)(uintptr_t)PHYS_TO_DMAP(pa), sz); } else { /* Use the scratch page to set up a temp mapping */ mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 1, pa & ~ADDR_POFF); __syncicache((void *)(moea64_scratchpage_va[1] + (va & ADDR_POFF)), sz); mtx_unlock(&moea64_scratchpage_mtx); } } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void moea64_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_page_t m; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); m = m_start; while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { moea64_enter(mmu, pm, start + ptoa(diff), m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP, 0); m = TAILQ_NEXT(m, listq); } } void moea64_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot) { moea64_enter(mmu, pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP, 0); } vm_paddr_t moea64_extract(mmu_t mmu, pmap_t pm, vm_offset_t va) { struct pvo_entry *pvo; vm_paddr_t pa; PMAP_LOCK(pm); pvo = moea64_pvo_find_va(pm, va); if (pvo == NULL) pa = 0; else pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo)); PMAP_UNLOCK(pm); return (pa); } /* * Atomically extract and hold the physical page with the given * pmap and virtual address pair if that mapping permits the given * protection. */ vm_page_t moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) { struct pvo_entry *pvo; vm_page_t m; vm_paddr_t pa; m = NULL; pa = 0; PMAP_LOCK(pmap); retry: pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) { if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.pa & LPTE_RPGN, &pa)) goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); vm_page_hold(m); } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } static mmu_t installed_mmu; static void * moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, int wait) { struct pvo_entry *pvo; vm_offset_t va; vm_page_t m; int needed_lock; /* * This entire routine is a horrible hack to avoid bothering kmem * for new KVA addresses. Because this can get called from inside * kmem allocation routines, calling kmem for a new address here * can lead to multiply locking non-recursive mutexes. */ *flags = UMA_SLAB_PRIV; needed_lock = !PMAP_LOCKED(kernel_pmap); m = vm_page_alloc_domain(NULL, 0, domain, malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); if (m == NULL) return (NULL); va = VM_PAGE_TO_PHYS(m); pvo = alloc_pvo_entry(1 /* bootstrap */); pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE; pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | LPTE_M; if (needed_lock) PMAP_LOCK(kernel_pmap); init_pvo_entry(pvo, kernel_pmap, va); pvo->pvo_vaddr |= PVO_WIRED; moea64_pvo_enter(installed_mmu, pvo, NULL); if (needed_lock) PMAP_UNLOCK(kernel_pmap); if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) bzero((void *)va, PAGE_SIZE); return (void *)va; } extern int elf32_nxstack; void moea64_init(mmu_t mmu) { CTR0(KTR_PMAP, "moea64_init"); moea64_pvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); if (!hw_direct_map) { installed_mmu = mmu; uma_zone_set_allocf(moea64_pvo_zone, moea64_uma_page_alloc); } #ifdef COMPAT_FREEBSD32 elf32_nxstack = 1; #endif moea64_initialized = TRUE; } boolean_t moea64_is_referenced(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_is_referenced: page %p is not managed", m)); return (moea64_query_bit(mmu, m, LPTE_REF)); } boolean_t moea64_is_modified(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_is_modified: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have LPTE_CHG set. */ VM_OBJECT_ASSERT_LOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); return (moea64_query_bit(mmu, m, LPTE_CHG)); } boolean_t moea64_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va) { struct pvo_entry *pvo; boolean_t rv = TRUE; PMAP_LOCK(pmap); pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); if (pvo != NULL) rv = FALSE; PMAP_UNLOCK(pmap); return (rv); } void moea64_clear_modify(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("moea64_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have LPTE_CHG * set. If the object containing the page is locked and the page is * not exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; moea64_clear_bit(mmu, m, LPTE_CHG); } /* * Clear the write and modified bits in each of the given page's mappings. */ void moea64_remove_write(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo; int64_t refchg, ret; pmap_t pmap; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_remove_write: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; powerpc_sync(); PV_PAGE_LOCK(m); refchg = 0; LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); if (!(pvo->pvo_vaddr & PVO_DEAD) && (pvo->pvo_pte.prot & VM_PROT_WRITE)) { pvo->pvo_pte.prot &= ~VM_PROT_WRITE; ret = MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_PROT_UPDATE); if (ret < 0) ret = LPTE_CHG; refchg |= ret; if (pvo->pvo_pmap == kernel_pmap) isync(); } PMAP_UNLOCK(pmap); } if ((refchg | atomic_readandclear_32(&m->md.mdpg_attrs)) & LPTE_CHG) vm_page_dirty(m); vm_page_aflag_clear(m, PGA_WRITEABLE); PV_PAGE_UNLOCK(m); } /* * moea64_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int moea64_ts_referenced(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_ts_referenced: page %p is not managed", m)); return (moea64_clear_bit(mmu, m, LPTE_REF)); } /* * Modify the WIMG settings of all mappings for a page. */ void moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma) { struct pvo_entry *pvo; int64_t refchg; pmap_t pmap; uint64_t lo; if ((m->oflags & VPO_UNMANAGED) != 0) { m->md.mdpg_cache_attrs = ma; return; } lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma); PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); if (!(pvo->pvo_vaddr & PVO_DEAD)) { pvo->pvo_pte.pa &= ~LPTE_WIMG; pvo->pvo_pte.pa |= lo; refchg = MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_INVALIDATE); if (refchg < 0) refchg = (pvo->pvo_pte.prot & VM_PROT_WRITE) ? LPTE_CHG : 0; if ((pvo->pvo_vaddr & PVO_MANAGED) && (pvo->pvo_pte.prot & VM_PROT_WRITE)) { refchg |= atomic_readandclear_32(&m->md.mdpg_attrs); if (refchg & LPTE_CHG) vm_page_dirty(m); if (refchg & LPTE_REF) vm_page_aflag_set(m, PGA_REFERENCED); } if (pvo->pvo_pmap == kernel_pmap) isync(); } PMAP_UNLOCK(pmap); } m->md.mdpg_cache_attrs = ma; PV_PAGE_UNLOCK(m); } /* * Map a wired page into kernel virtual address space. */ void moea64_kenter_attr(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma) { int error; struct pvo_entry *pvo, *oldpvo; pvo = alloc_pvo_entry(0); pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; pvo->pvo_pte.pa = (pa & ~ADDR_POFF) | moea64_calc_wimg(pa, ma); pvo->pvo_vaddr |= PVO_WIRED; PMAP_LOCK(kernel_pmap); oldpvo = moea64_pvo_find_va(kernel_pmap, va); if (oldpvo != NULL) moea64_pvo_remove_from_pmap(mmu, oldpvo); init_pvo_entry(pvo, kernel_pmap, va); error = moea64_pvo_enter(mmu, pvo, NULL); PMAP_UNLOCK(kernel_pmap); /* Free any dead pages */ if (oldpvo != NULL) { PV_LOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); moea64_pvo_remove_from_page(mmu, oldpvo); PV_UNLOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); free_pvo_entry(oldpvo); } if (error != 0 && error != ENOENT) panic("moea64_kenter: failed to enter va %#zx pa %#jx: %d", va, (uintmax_t)pa, error); } void moea64_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa) { moea64_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); } /* * Extract the physical page address associated with the given kernel virtual * address. */ vm_paddr_t moea64_kextract(mmu_t mmu, vm_offset_t va) { struct pvo_entry *pvo; vm_paddr_t pa; /* * Shortcut the direct-mapped case when applicable. We never put * anything but 1:1 (or 62-bit aliased) mappings below * VM_MIN_KERNEL_ADDRESS. */ if (va < VM_MIN_KERNEL_ADDRESS) return (va & ~DMAP_BASE_ADDRESS); PMAP_LOCK(kernel_pmap); pvo = moea64_pvo_find_va(kernel_pmap, va); KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR, va)); pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo)); PMAP_UNLOCK(kernel_pmap); return (pa); } /* * Remove a wired page from kernel virtual address space. */ void moea64_kremove(mmu_t mmu, vm_offset_t va) { moea64_remove(mmu, kernel_pmap, va, va + PAGE_SIZE); } /* * Provide a kernel pointer corresponding to a given userland pointer. * The returned pointer is valid until the next time this function is * called in this thread. This is used internally in copyin/copyout. */ static int moea64_map_user_ptr(mmu_t mmu, pmap_t pm, volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen) { size_t l; #ifdef __powerpc64__ struct slb *slb; #endif register_t slbv; *kaddr = (char *)USER_ADDR + ((uintptr_t)uaddr & ~SEGMENT_MASK); l = ((char *)USER_ADDR + SEGMENT_LENGTH) - (char *)(*kaddr); if (l > ulen) l = ulen; if (klen) *klen = l; else if (l != ulen) return (EFAULT); #ifdef __powerpc64__ /* Try lockless look-up first */ slb = user_va_to_slb_entry(pm, (vm_offset_t)uaddr); if (slb == NULL) { /* If it isn't there, we need to pre-fault the VSID */ PMAP_LOCK(pm); slbv = va_to_vsid(pm, (vm_offset_t)uaddr) << SLBV_VSID_SHIFT; PMAP_UNLOCK(pm); } else { slbv = slb->slbv; } /* Mark segment no-execute */ slbv |= SLBV_N; #else slbv = va_to_vsid(pm, (vm_offset_t)uaddr); /* Mark segment no-execute */ slbv |= SR_N; #endif /* If we have already set this VSID, we can just return */ if (curthread->td_pcb->pcb_cpu.aim.usr_vsid == slbv) return (0); __asm __volatile("isync"); curthread->td_pcb->pcb_cpu.aim.usr_segm = (uintptr_t)uaddr >> ADDR_SR_SHFT; curthread->td_pcb->pcb_cpu.aim.usr_vsid = slbv; #ifdef __powerpc64__ __asm __volatile ("slbie %0; slbmte %1, %2; isync" :: "r"(USER_ADDR), "r"(slbv), "r"(USER_SLB_SLBE)); #else __asm __volatile("mtsr %0,%1; isync" :: "n"(USER_SR), "r"(slbv)); #endif return (0); } /* * Figure out where a given kernel pointer (usually in a fault) points * to from the VM's perspective, potentially remapping into userland's * address space. */ static int moea64_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, int *is_user, vm_offset_t *decoded_addr) { vm_offset_t user_sr; if ((addr >> ADDR_SR_SHFT) == (USER_ADDR >> ADDR_SR_SHFT)) { user_sr = curthread->td_pcb->pcb_cpu.aim.usr_segm; addr &= ADDR_PIDX | ADDR_POFF; addr |= user_sr << ADDR_SR_SHFT; *decoded_addr = addr; *is_user = 1; } else { *decoded_addr = addr; *is_user = 0; } return (0); } /* * Map a range of physical addresses into kernel virtual address space. * * The value passed in *virt is a suggested virtual address for the mapping. * Architectures which can support a direct-mapped physical to virtual region * can return the appropriate address within that region, leaving '*virt' * unchanged. Other architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped region. */ vm_offset_t moea64_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start, vm_paddr_t pa_end, int prot) { vm_offset_t sva, va; if (hw_direct_map) { /* * Check if every page in the region is covered by the direct * map. The direct map covers all of physical memory. Use * moea64_calc_wimg() as a shortcut to see if the page is in * physical memory as a way to see if the direct map covers it. */ for (va = pa_start; va < pa_end; va += PAGE_SIZE) if (moea64_calc_wimg(va, VM_MEMATTR_DEFAULT) != LPTE_M) break; if (va == pa_end) return (PHYS_TO_DMAP(pa_start)); } sva = *virt; va = sva; /* XXX respect prot argument */ for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE) moea64_kenter(mmu, va, pa_start); *virt = va; return (sva); } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t moea64_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) { int loops; struct pvo_entry *pvo; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_page_exists_quick: page %p is not managed", m)); loops = 0; rv = FALSE; PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { if (!(pvo->pvo_vaddr & PVO_DEAD) && pvo->pvo_pmap == pmap) { rv = TRUE; break; } if (++loops >= 16) break; } PV_PAGE_UNLOCK(m); return (rv); } void moea64_page_init(mmu_t mmu __unused, vm_page_t m) { m->md.mdpg_attrs = 0; m->md.mdpg_cache_attrs = VM_MEMATTR_DEFAULT; LIST_INIT(&m->md.mdpg_pvoh); } /* * Return the number of managed mappings to the given physical page * that are wired. */ int moea64_page_wired_mappings(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo; int count; count = 0; if ((m->oflags & VPO_UNMANAGED) != 0) return (count); PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) if ((pvo->pvo_vaddr & (PVO_DEAD | PVO_WIRED)) == PVO_WIRED) count++; PV_PAGE_UNLOCK(m); return (count); } static uintptr_t moea64_vsidcontext; uintptr_t moea64_get_unique_vsid(void) { u_int entropy; register_t hash; uint32_t mask; int i; entropy = 0; __asm __volatile("mftb %0" : "=r"(entropy)); mtx_lock(&moea64_slb_mutex); for (i = 0; i < NVSIDS; i += VSID_NBPW) { u_int n; /* * Create a new value by mutiplying by a prime and adding in * entropy from the timebase register. This is to make the * VSID more random so that the PT hash function collides * less often. (Note that the prime casues gcc to do shifts * instead of a multiply.) */ moea64_vsidcontext = (moea64_vsidcontext * 0x1105) + entropy; hash = moea64_vsidcontext & (NVSIDS - 1); if (hash == 0) /* 0 is special, avoid it */ continue; n = hash >> 5; mask = 1 << (hash & (VSID_NBPW - 1)); hash = (moea64_vsidcontext & VSID_HASHMASK); if (moea64_vsid_bitmap[n] & mask) { /* collision? */ /* anything free in this bucket? */ if (moea64_vsid_bitmap[n] == 0xffffffff) { entropy = (moea64_vsidcontext >> 20); continue; } i = ffs(~moea64_vsid_bitmap[n]) - 1; mask = 1 << i; hash &= rounddown2(VSID_HASHMASK, VSID_NBPW); hash |= i; } if (hash == VSID_VRMA) /* also special, avoid this too */ continue; KASSERT(!(moea64_vsid_bitmap[n] & mask), ("Allocating in-use VSID %#zx\n", hash)); moea64_vsid_bitmap[n] |= mask; mtx_unlock(&moea64_slb_mutex); return (hash); } mtx_unlock(&moea64_slb_mutex); panic("%s: out of segments",__func__); } #ifdef __powerpc64__ void moea64_pinit(mmu_t mmu, pmap_t pmap) { RB_INIT(&pmap->pmap_pvo); pmap->pm_slb_tree_root = slb_alloc_tree(); pmap->pm_slb = slb_alloc_user_cache(); pmap->pm_slb_len = 0; } #else void moea64_pinit(mmu_t mmu, pmap_t pmap) { int i; uint32_t hash; RB_INIT(&pmap->pmap_pvo); if (pmap_bootstrapped) pmap->pmap_phys = (pmap_t)moea64_kextract(mmu, (vm_offset_t)pmap); else pmap->pmap_phys = pmap; /* * Allocate some segment registers for this pmap. */ hash = moea64_get_unique_vsid(); for (i = 0; i < 16; i++) pmap->pm_sr[i] = VSID_MAKE(i, hash); KASSERT(pmap->pm_sr[0] != 0, ("moea64_pinit: pm_sr[0] = 0")); } #endif /* * Initialize the pmap associated with process 0. */ void moea64_pinit0(mmu_t mmu, pmap_t pm) { PMAP_LOCK_INIT(pm); moea64_pinit(mmu, pm); bzero(&pm->pm_stats, sizeof(pm->pm_stats)); } /* * Set the physical protection on the specified range of this map as requested. */ static void moea64_pvo_protect(mmu_t mmu, pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot) { struct vm_page *pg; vm_prot_t oldprot; int32_t refchg; PMAP_LOCK_ASSERT(pm, MA_OWNED); /* * Change the protection of the page. */ oldprot = pvo->pvo_pte.prot; pvo->pvo_pte.prot = prot; pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); /* * If the PVO is in the page table, update mapping */ refchg = MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_PROT_UPDATE); if (refchg < 0) refchg = (oldprot & VM_PROT_WRITE) ? LPTE_CHG : 0; if (pm != kernel_pmap && pg != NULL && !(pg->aflags & PGA_EXECUTABLE) && (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { if ((pg->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(pg, PGA_EXECUTABLE); moea64_syncicache(mmu, pm, PVO_VADDR(pvo), pvo->pvo_pte.pa & LPTE_RPGN, PAGE_SIZE); } /* * Update vm about the REF/CHG bits if the page is managed and we have * removed write access. */ if (pg != NULL && (pvo->pvo_vaddr & PVO_MANAGED) && (oldprot & VM_PROT_WRITE)) { refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs); if (refchg & LPTE_CHG) vm_page_dirty(pg); if (refchg & LPTE_REF) vm_page_aflag_set(pg, PGA_REFERENCED); } } void moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { struct pvo_entry *pvo, *tpvo, key; CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm, sva, eva, prot); KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap, ("moea64_protect: non current pmap")); if ((prot & VM_PROT_READ) == VM_PROT_NONE) { moea64_remove(mmu, pm, sva, eva); return; } PMAP_LOCK(pm); key.pvo_vaddr = sva; for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) { tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo); moea64_pvo_protect(mmu, pm, pvo, prot); } PMAP_UNLOCK(pm); } /* * Map a list of wired pages into kernel virtual address space. This is * intended for temporary mappings which do not need page modification or * references recorded. Existing mappings in the region are overwritten. */ void moea64_qenter(mmu_t mmu, vm_offset_t va, vm_page_t *m, int count) { while (count-- > 0) { moea64_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); va += PAGE_SIZE; m++; } } /* * Remove page mappings from kernel virtual address space. Intended for * temporary mappings entered by moea64_qenter. */ void moea64_qremove(mmu_t mmu, vm_offset_t va, int count) { while (count-- > 0) { moea64_kremove(mmu, va); va += PAGE_SIZE; } } void moea64_release_vsid(uint64_t vsid) { int idx, mask; mtx_lock(&moea64_slb_mutex); idx = vsid & (NVSIDS-1); mask = 1 << (idx % VSID_NBPW); idx /= VSID_NBPW; KASSERT(moea64_vsid_bitmap[idx] & mask, ("Freeing unallocated VSID %#jx", vsid)); moea64_vsid_bitmap[idx] &= ~mask; mtx_unlock(&moea64_slb_mutex); } void moea64_release(mmu_t mmu, pmap_t pmap) { /* * Free segment registers' VSIDs */ #ifdef __powerpc64__ slb_free_tree(pmap); slb_free_user_cache(pmap->pm_slb); #else KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0")); moea64_release_vsid(VSID_TO_HASH(pmap->pm_sr[0])); #endif } /* * Remove all pages mapped by the specified pmap */ void moea64_remove_pages(mmu_t mmu, pmap_t pm) { struct pvo_entry *pvo, *tpvo; struct pvo_tree tofree; RB_INIT(&tofree); PMAP_LOCK(pm); RB_FOREACH_SAFE(pvo, pvo_tree, &pm->pmap_pvo, tpvo) { if (pvo->pvo_vaddr & PVO_WIRED) continue; /* * For locking reasons, remove this from the page table and * pmap, but save delinking from the vm_page for a second * pass */ moea64_pvo_remove_from_pmap(mmu, pvo); RB_INSERT(pvo_tree, &tofree, pvo); } PMAP_UNLOCK(pm); RB_FOREACH_SAFE(pvo, pvo_tree, &tofree, tpvo) { PV_LOCK(pvo->pvo_pte.pa & LPTE_RPGN); moea64_pvo_remove_from_page(mmu, pvo); PV_UNLOCK(pvo->pvo_pte.pa & LPTE_RPGN); RB_REMOVE(pvo_tree, &tofree, pvo); free_pvo_entry(pvo); } } /* * Remove the given range of addresses from the specified map. */ void moea64_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) { struct pvo_entry *pvo, *tpvo, key; struct pvo_tree tofree; /* * Perform an unsynchronized read. This is, however, safe. */ if (pm->pm_stats.resident_count == 0) return; key.pvo_vaddr = sva; RB_INIT(&tofree); PMAP_LOCK(pm); for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) { tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo); /* * For locking reasons, remove this from the page table and * pmap, but save delinking from the vm_page for a second * pass */ moea64_pvo_remove_from_pmap(mmu, pvo); RB_INSERT(pvo_tree, &tofree, pvo); } PMAP_UNLOCK(pm); RB_FOREACH_SAFE(pvo, pvo_tree, &tofree, tpvo) { PV_LOCK(pvo->pvo_pte.pa & LPTE_RPGN); moea64_pvo_remove_from_page(mmu, pvo); PV_UNLOCK(pvo->pvo_pte.pa & LPTE_RPGN); RB_REMOVE(pvo_tree, &tofree, pvo); free_pvo_entry(pvo); } } /* * Remove physical page from all pmaps in which it resides. moea64_pvo_remove() * will reflect changes in pte's back to the vm_page. */ void moea64_remove_all(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo, *next_pvo; struct pvo_head freequeue; int wasdead; pmap_t pmap; LIST_INIT(&freequeue); PV_PAGE_LOCK(m); LIST_FOREACH_SAFE(pvo, vm_page_to_pvoh(m), pvo_vlink, next_pvo) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); wasdead = (pvo->pvo_vaddr & PVO_DEAD); if (!wasdead) moea64_pvo_remove_from_pmap(mmu, pvo); moea64_pvo_remove_from_page(mmu, pvo); if (!wasdead) LIST_INSERT_HEAD(&freequeue, pvo, pvo_vlink); PMAP_UNLOCK(pmap); } KASSERT(!pmap_page_is_mapped(m), ("Page still has mappings")); KASSERT(!(m->aflags & PGA_WRITEABLE), ("Page still writable")); PV_PAGE_UNLOCK(m); /* Clean up UMA allocations */ LIST_FOREACH_SAFE(pvo, &freequeue, pvo_vlink, next_pvo) free_pvo_entry(pvo); } /* * Allocate a physical page of memory directly from the phys_avail map. * Can only be called from moea64_bootstrap before avail start and end are * calculated. */ vm_offset_t moea64_bootstrap_alloc(vm_size_t size, u_int align) { vm_offset_t s, e; int i, j; size = round_page(size); for (i = 0; phys_avail[i + 1] != 0; i += 2) { if (align != 0) s = roundup2(phys_avail[i], align); else s = phys_avail[i]; e = s + size; if (s < phys_avail[i] || e > phys_avail[i + 1]) continue; if (s + size > platform_real_maxaddr()) continue; if (s == phys_avail[i]) { phys_avail[i] += size; } else if (e == phys_avail[i + 1]) { phys_avail[i + 1] -= size; } else { for (j = phys_avail_count * 2; j > i; j -= 2) { phys_avail[j] = phys_avail[j - 2]; phys_avail[j + 1] = phys_avail[j - 1]; } phys_avail[i + 3] = phys_avail[i + 1]; phys_avail[i + 1] = s; phys_avail[i + 2] = e; phys_avail_count++; } return (s); } panic("moea64_bootstrap_alloc: could not allocate memory"); } static int moea64_pvo_enter(mmu_t mmu, struct pvo_entry *pvo, struct pvo_head *pvo_head) { int first, err; PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); KASSERT(moea64_pvo_find_va(pvo->pvo_pmap, PVO_VADDR(pvo)) == NULL, ("Existing mapping for VA %#jx", (uintmax_t)PVO_VADDR(pvo))); moea64_pvo_enter_calls++; /* * Add to pmap list */ RB_INSERT(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo); /* * Remember if the list was empty and therefore will be the first * item. */ if (pvo_head != NULL) { if (LIST_FIRST(pvo_head) == NULL) first = 1; LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink); } if (pvo->pvo_vaddr & PVO_WIRED) pvo->pvo_pmap->pm_stats.wired_count++; pvo->pvo_pmap->pm_stats.resident_count++; /* * Insert it into the hardware page table */ err = MOEA64_PTE_INSERT(mmu, pvo); if (err != 0) { panic("moea64_pvo_enter: overflow"); } moea64_pvo_entries++; if (pvo->pvo_pmap == kernel_pmap) isync(); #ifdef __powerpc64__ /* * Make sure all our bootstrap mappings are in the SLB as soon * as virtual memory is switched on. */ if (!pmap_bootstrapped) moea64_bootstrap_slb_prefault(PVO_VADDR(pvo), pvo->pvo_vaddr & PVO_LARGE); #endif return (first ? ENOENT : 0); } static void moea64_pvo_remove_from_pmap(mmu_t mmu, struct pvo_entry *pvo) { struct vm_page *pg; int32_t refchg; KASSERT(pvo->pvo_pmap != NULL, ("Trying to remove PVO with no pmap")); PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); KASSERT(!(pvo->pvo_vaddr & PVO_DEAD), ("Trying to remove dead PVO")); /* * If there is an active pte entry, we need to deactivate it */ refchg = MOEA64_PTE_UNSET(mmu, pvo); if (refchg < 0) { /* * If it was evicted from the page table, be pessimistic and * dirty the page. */ if (pvo->pvo_pte.prot & VM_PROT_WRITE) refchg = LPTE_CHG; else refchg = 0; } /* * Update our statistics. */ pvo->pvo_pmap->pm_stats.resident_count--; if (pvo->pvo_vaddr & PVO_WIRED) pvo->pvo_pmap->pm_stats.wired_count--; /* * Remove this PVO from the pmap list. */ RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo); /* * Mark this for the next sweep */ pvo->pvo_vaddr |= PVO_DEAD; /* Send RC bits to VM */ if ((pvo->pvo_vaddr & PVO_MANAGED) && (pvo->pvo_pte.prot & VM_PROT_WRITE)) { pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); if (pg != NULL) { refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs); if (refchg & LPTE_CHG) vm_page_dirty(pg); if (refchg & LPTE_REF) vm_page_aflag_set(pg, PGA_REFERENCED); } } } static void moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo) { struct vm_page *pg; KASSERT(pvo->pvo_vaddr & PVO_DEAD, ("Trying to delink live page")); /* Use NULL pmaps as a sentinel for races in page deletion */ if (pvo->pvo_pmap == NULL) return; pvo->pvo_pmap = NULL; /* * Update vm about page writeability/executability if managed */ PV_LOCKASSERT(pvo->pvo_pte.pa & LPTE_RPGN); if (pvo->pvo_vaddr & PVO_MANAGED) { pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); if (pg != NULL) { LIST_REMOVE(pvo, pvo_vlink); if (LIST_EMPTY(vm_page_to_pvoh(pg))) vm_page_aflag_clear(pg, PGA_WRITEABLE | PGA_EXECUTABLE); } } moea64_pvo_entries--; moea64_pvo_remove_calls++; } static struct pvo_entry * moea64_pvo_find_va(pmap_t pm, vm_offset_t va) { struct pvo_entry key; PMAP_LOCK_ASSERT(pm, MA_OWNED); key.pvo_vaddr = va & ~ADDR_POFF; return (RB_FIND(pvo_tree, &pm->pmap_pvo, &key)); } static boolean_t moea64_query_bit(mmu_t mmu, vm_page_t m, uint64_t ptebit) { struct pvo_entry *pvo; int64_t ret; boolean_t rv; /* * See if this bit is stored in the page already. */ if (m->md.mdpg_attrs & ptebit) return (TRUE); /* * Examine each PTE. Sync so that any pending REF/CHG bits are * flushed to the PTEs. */ rv = FALSE; powerpc_sync(); PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { ret = 0; /* * See if this pvo has a valid PTE. if so, fetch the * REF/CHG bits from the valid PTE. If the appropriate * ptebit is set, return success. */ PMAP_LOCK(pvo->pvo_pmap); if (!(pvo->pvo_vaddr & PVO_DEAD)) ret = MOEA64_PTE_SYNCH(mmu, pvo); PMAP_UNLOCK(pvo->pvo_pmap); if (ret > 0) { atomic_set_32(&m->md.mdpg_attrs, ret & (LPTE_CHG | LPTE_REF)); if (ret & ptebit) { rv = TRUE; break; } } } PV_PAGE_UNLOCK(m); return (rv); } static u_int moea64_clear_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit) { u_int count; struct pvo_entry *pvo; int64_t ret; /* * Sync so that any pending REF/CHG bits are flushed to the PTEs (so * we can reset the right ones). */ powerpc_sync(); /* * For each pvo entry, clear the pte's ptebit. */ count = 0; PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { ret = 0; PMAP_LOCK(pvo->pvo_pmap); if (!(pvo->pvo_vaddr & PVO_DEAD)) ret = MOEA64_PTE_CLEAR(mmu, pvo, ptebit); PMAP_UNLOCK(pvo->pvo_pmap); if (ret > 0 && (ret & ptebit)) count++; } atomic_clear_32(&m->md.mdpg_attrs, ptebit); PV_PAGE_UNLOCK(m); return (count); } boolean_t moea64_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size) { struct pvo_entry *pvo, key; vm_offset_t ppa; int error = 0; if (hw_direct_map && mem_valid(pa, size) == 0) return (0); PMAP_LOCK(kernel_pmap); ppa = pa & ~ADDR_POFF; key.pvo_vaddr = DMAP_BASE_ADDRESS + ppa; for (pvo = RB_FIND(pvo_tree, &kernel_pmap->pmap_pvo, &key); ppa < pa + size; ppa += PAGE_SIZE, pvo = RB_NEXT(pvo_tree, &kernel_pmap->pmap_pvo, pvo)) { if (pvo == NULL || (pvo->pvo_pte.pa & LPTE_RPGN) != ppa) { error = EFAULT; break; } } PMAP_UNLOCK(kernel_pmap); return (error); } /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ void * moea64_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) { vm_offset_t va, tmpva, ppa, offset; ppa = trunc_page(pa); offset = pa & PAGE_MASK; size = roundup2(offset + size, PAGE_SIZE); va = kva_alloc(size); if (!va) panic("moea64_mapdev: Couldn't alloc kernel virtual memory"); for (tmpva = va; size > 0;) { moea64_kenter_attr(mmu, tmpva, ppa, ma); size -= PAGE_SIZE; tmpva += PAGE_SIZE; ppa += PAGE_SIZE; } return ((void *)(va + offset)); } void * moea64_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size) { return moea64_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT); } void moea64_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) { vm_offset_t base, offset; base = trunc_page(va); offset = va & PAGE_MASK; size = roundup2(offset + size, PAGE_SIZE); kva_free(base, size); } void moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) { struct pvo_entry *pvo; vm_offset_t lim; vm_paddr_t pa; vm_size_t len; PMAP_LOCK(pm); while (sz > 0) { lim = round_page(va); len = MIN(lim - va, sz); pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF); if (pvo != NULL && !(pvo->pvo_pte.pa & LPTE_I)) { pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va & ADDR_POFF); moea64_syncicache(mmu, pm, va, pa, len); } va += len; sz -= len; } PMAP_UNLOCK(pm); } void moea64_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va) { *va = (void *)(uintptr_t)pa; } extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1]; void moea64_scan_init(mmu_t mmu) { struct pvo_entry *pvo; vm_offset_t va; int i; if (!do_minidump) { /* Initialize phys. segments for dumpsys(). */ memset(&dump_map, 0, sizeof(dump_map)); mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); for (i = 0; i < pregions_sz; i++) { dump_map[i].pa_start = pregions[i].mr_start; dump_map[i].pa_size = pregions[i].mr_size; } return; } /* Virtual segments for minidumps: */ memset(&dump_map, 0, sizeof(dump_map)); /* 1st: kernel .data and .bss. */ dump_map[0].pa_start = trunc_page((uintptr_t)_etext); dump_map[0].pa_size = round_page((uintptr_t)_end) - dump_map[0].pa_start; /* 2nd: msgbuf and tables (see pmap_bootstrap()). */ dump_map[1].pa_start = (vm_paddr_t)(uintptr_t)msgbufp->msg_ptr; dump_map[1].pa_size = round_page(msgbufp->msg_size); /* 3rd: kernel VM. */ va = dump_map[1].pa_start + dump_map[1].pa_size; /* Find start of next chunk (from va). */ while (va < virtual_end) { /* Don't dump the buffer cache. */ if (va >= kmi.buffer_sva && va < kmi.buffer_eva) { va = kmi.buffer_eva; continue; } pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF); if (pvo != NULL && !(pvo->pvo_vaddr & PVO_DEAD)) break; va += PAGE_SIZE; } if (va < virtual_end) { dump_map[2].pa_start = va; va += PAGE_SIZE; /* Find last page in chunk. */ while (va < virtual_end) { /* Don't run into the buffer cache. */ if (va == kmi.buffer_sva) break; pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF); if (pvo != NULL && !(pvo->pvo_vaddr & PVO_DEAD)) break; va += PAGE_SIZE; } dump_map[2].pa_size = va - dump_map[2].pa_start; } } diff --git a/sys/powerpc/aim/mmu_oea64.h b/sys/powerpc/aim/mmu_oea64.h index 43dc646cf28b..ba300049ed4b 100644 --- a/sys/powerpc/aim/mmu_oea64.h +++ b/sys/powerpc/aim/mmu_oea64.h @@ -1,89 +1,89 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2010 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _POWERPC_AIM_MMU_OEA64_H #define _POWERPC_AIM_MMU_OEA64_H #include extern mmu_def_t oea64_mmu; /* * Helper routines */ /* Allocate physical memory for use in moea64_bootstrap. */ vm_offset_t moea64_bootstrap_alloc(vm_size_t, u_int); /* Set an LPTE structure to match the contents of a PVO */ void moea64_pte_from_pvo(const struct pvo_entry *pvo, struct lpte *lpte); /* * Flags */ #define MOEA64_PTE_PROT_UPDATE 1 #define MOEA64_PTE_INVALIDATE 2 /* * Bootstrap subroutines * * An MMU_BOOTSTRAP() implementation looks like this: * moea64_early_bootstrap(); * Allocate Page Table * moea64_mid_bootstrap(); * Add mappings for MMU resources * moea64_late_bootstrap(); */ void moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend); void moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend); void moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend); /* * Statistics */ extern u_int moea64_pte_valid; extern u_int moea64_pte_overflow; /* * State variables */ extern int moea64_large_page_shift; extern uint64_t moea64_large_page_size; -extern u_int moea64_pteg_count; -extern u_int moea64_pteg_mask; +extern u_long moea64_pteg_count; +extern u_long moea64_pteg_mask; extern int n_slbs; #endif /* _POWERPC_AIM_MMU_OEA64_H */ diff --git a/sys/powerpc/aim/moea64_native.c b/sys/powerpc/aim/moea64_native.c index cb27cfca34ce..e29fe89b7771 100644 --- a/sys/powerpc/aim/moea64_native.c +++ b/sys/powerpc/aim/moea64_native.c @@ -1,681 +1,709 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND 4-Clause-BSD * * Copyright (c) 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Matt Thomas of Allegro Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $ */ /*- * Copyright (C) 2001 Benno Rice. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Native 64-bit page table operations for running without a hypervisor. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include "mmu_oea64.h" #include "mmu_if.h" #include "moea64_if.h" #define PTESYNC() __asm __volatile("ptesync"); #define TLBSYNC() __asm __volatile("tlbsync; ptesync"); #define SYNC() __asm __volatile("sync"); #define EIEIO() __asm __volatile("eieio"); #define VSID_HASH_MASK 0x0000007fffffffffULL +/* POWER9 only permits a 64k partition table size. */ +#define PART_SIZE 0x10000 + static __inline void TLBIE(uint64_t vpn) { #ifndef __powerpc64__ register_t vpn_hi, vpn_lo; register_t msr; register_t scratch, intr; #endif static volatile u_int tlbie_lock = 0; vpn <<= ADDR_PIDX_SHFT; vpn &= ~(0xffffULL << 48); /* Hobo spinlock: we need stronger guarantees than mutexes provide */ while (!atomic_cmpset_int(&tlbie_lock, 0, 1)); isync(); /* Flush instruction queue once lock acquired */ #ifdef __powerpc64__ __asm __volatile("tlbie %0" :: "r"(vpn) : "memory"); __asm __volatile("eieio; tlbsync; ptesync" ::: "memory"); #else vpn_hi = (uint32_t)(vpn >> 32); vpn_lo = (uint32_t)vpn; intr = intr_disable(); __asm __volatile("\ mfmsr %0; \ mr %1, %0; \ insrdi %1,%5,1,0; \ mtmsrd %1; isync; \ \ sld %1,%2,%4; \ or %1,%1,%3; \ tlbie %1; \ \ mtmsrd %0; isync; \ eieio; \ tlbsync; \ ptesync;" : "=r"(msr), "=r"(scratch) : "r"(vpn_hi), "r"(vpn_lo), "r"(32), "r"(1) : "memory"); intr_restore(intr); #endif /* No barriers or special ops -- taken care of by ptesync above */ tlbie_lock = 0; } #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) #define ENABLE_TRANS(msr) mtmsr(msr) /* * PTEG data. */ +static volatile struct pate *moea64_part_table; static volatile struct lpte *moea64_pteg_table; static struct rwlock moea64_eviction_lock; /* * PTE calls. */ static int moea64_pte_insert_native(mmu_t, struct pvo_entry *); static int64_t moea64_pte_synch_native(mmu_t, struct pvo_entry *); static int64_t moea64_pte_clear_native(mmu_t, struct pvo_entry *, uint64_t); static int64_t moea64_pte_replace_native(mmu_t, struct pvo_entry *, int); static int64_t moea64_pte_unset_native(mmu_t mmu, struct pvo_entry *); /* * Utility routines. */ static void moea64_bootstrap_native(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend); static void moea64_cpu_bootstrap_native(mmu_t, int ap); static void tlbia(void); static mmu_method_t moea64_native_methods[] = { /* Internal interfaces */ MMUMETHOD(mmu_bootstrap, moea64_bootstrap_native), MMUMETHOD(mmu_cpu_bootstrap, moea64_cpu_bootstrap_native), MMUMETHOD(moea64_pte_synch, moea64_pte_synch_native), MMUMETHOD(moea64_pte_clear, moea64_pte_clear_native), MMUMETHOD(moea64_pte_unset, moea64_pte_unset_native), MMUMETHOD(moea64_pte_replace, moea64_pte_replace_native), MMUMETHOD(moea64_pte_insert, moea64_pte_insert_native), { 0, 0 } }; MMU_DEF_INHERIT(oea64_mmu_native, MMU_TYPE_G5, moea64_native_methods, 0, oea64_mmu); static int64_t moea64_pte_synch_native(mmu_t mmu, struct pvo_entry *pvo) { volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; struct lpte properpt; uint64_t ptelo; PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); moea64_pte_from_pvo(pvo, &properpt); rw_rlock(&moea64_eviction_lock); if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != (properpt.pte_hi & LPTE_AVPN_MASK)) { /* Evicted */ rw_runlock(&moea64_eviction_lock); return (-1); } PTESYNC(); ptelo = be64toh(pt->pte_lo); rw_runlock(&moea64_eviction_lock); return (ptelo & (LPTE_REF | LPTE_CHG)); } static int64_t moea64_pte_clear_native(mmu_t mmu, struct pvo_entry *pvo, uint64_t ptebit) { volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; struct lpte properpt; uint64_t ptelo; PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); moea64_pte_from_pvo(pvo, &properpt); rw_rlock(&moea64_eviction_lock); if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != (properpt.pte_hi & LPTE_AVPN_MASK)) { /* Evicted */ rw_runlock(&moea64_eviction_lock); return (-1); } if (ptebit == LPTE_REF) { /* See "Resetting the Reference Bit" in arch manual */ PTESYNC(); /* 2-step here safe: precision is not guaranteed */ ptelo = be64toh(pt->pte_lo); /* One-byte store to avoid touching the C bit */ ((volatile uint8_t *)(&pt->pte_lo))[6] = #if BYTE_ORDER == BIG_ENDIAN ((uint8_t *)(&properpt.pte_lo))[6]; #else ((uint8_t *)(&properpt.pte_lo))[1]; #endif rw_runlock(&moea64_eviction_lock); critical_enter(); TLBIE(pvo->pvo_vpn); critical_exit(); } else { rw_runlock(&moea64_eviction_lock); ptelo = moea64_pte_unset_native(mmu, pvo); moea64_pte_insert_native(mmu, pvo); } return (ptelo & (LPTE_REF | LPTE_CHG)); } static int64_t moea64_pte_unset_native(mmu_t mmu, struct pvo_entry *pvo) { volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; struct lpte properpt; uint64_t ptelo; moea64_pte_from_pvo(pvo, &properpt); rw_rlock(&moea64_eviction_lock); if ((be64toh(pt->pte_hi & LPTE_AVPN_MASK)) != (properpt.pte_hi & LPTE_AVPN_MASK)) { /* Evicted */ moea64_pte_overflow--; rw_runlock(&moea64_eviction_lock); return (-1); } /* * Invalidate the pte, briefly locking it to collect RC bits. No * atomics needed since this is protected against eviction by the lock. */ isync(); critical_enter(); pt->pte_hi = be64toh((pt->pte_hi & ~LPTE_VALID) | LPTE_LOCKED); PTESYNC(); TLBIE(pvo->pvo_vpn); ptelo = be64toh(pt->pte_lo); *((volatile int32_t *)(&pt->pte_hi) + 1) = 0; /* Release lock */ critical_exit(); rw_runlock(&moea64_eviction_lock); /* Keep statistics */ moea64_pte_valid--; return (ptelo & (LPTE_CHG | LPTE_REF)); } static int64_t moea64_pte_replace_native(mmu_t mmu, struct pvo_entry *pvo, int flags) { volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; struct lpte properpt; int64_t ptelo; if (flags == 0) { /* Just some software bits changing. */ moea64_pte_from_pvo(pvo, &properpt); rw_rlock(&moea64_eviction_lock); if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != (properpt.pte_hi & LPTE_AVPN_MASK)) { rw_runlock(&moea64_eviction_lock); return (-1); } pt->pte_hi = htobe64(properpt.pte_hi); ptelo = be64toh(pt->pte_lo); rw_runlock(&moea64_eviction_lock); } else { /* Otherwise, need reinsertion and deletion */ ptelo = moea64_pte_unset_native(mmu, pvo); moea64_pte_insert_native(mmu, pvo); } return (ptelo); } static void moea64_cpu_bootstrap_native(mmu_t mmup, int ap) { int i = 0; #ifdef __powerpc64__ struct slb *slb = PCPU_GET(aim.slb); register_t seg0; #endif /* * Initialize segment registers and MMU */ mtmsr(mfmsr() & ~PSL_DR & ~PSL_IR); /* * Install kernel SLB entries */ #ifdef __powerpc64__ __asm __volatile ("slbia"); __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : "r"(0)); for (i = 0; i < n_slbs; i++) { if (!(slb[i].slbe & SLBE_VALID)) continue; __asm __volatile ("slbmte %0, %1" :: "r"(slb[i].slbv), "r"(slb[i].slbe)); } #else for (i = 0; i < 16; i++) mtsrin(i << ADDR_SR_SHFT, kernel_pmap->pm_sr[i]); #endif /* * Install page table */ - __asm __volatile ("ptesync; mtsdr1 %0; isync" - :: "r"(((uintptr_t)moea64_pteg_table & ~DMAP_BASE_ADDRESS) - | (uintptr_t)(flsl(moea64_pteg_mask >> 11)))); + if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) { + mtspr(SPR_PTCR, + ((uintptr_t)moea64_part_table & ~DMAP_BASE_ADDRESS) | + flsl((PART_SIZE >> 12) - 1)); + } else { + __asm __volatile ("ptesync; mtsdr1 %0; isync" + :: "r"(((uintptr_t)moea64_pteg_table & ~DMAP_BASE_ADDRESS) + | (uintptr_t)(flsl(moea64_pteg_mask >> 11)))); + } tlbia(); } static void moea64_bootstrap_native(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { vm_size_t size; vm_offset_t off; vm_paddr_t pa; register_t msr; moea64_early_bootstrap(mmup, kernelstart, kernelend); /* * Allocate PTEG table. */ size = moea64_pteg_count * sizeof(struct lpteg); CTR2(KTR_PMAP, "moea64_bootstrap: %d PTEGs, %d bytes", moea64_pteg_count, size); rw_init(&moea64_eviction_lock, "pte eviction"); /* * We now need to allocate memory. This memory, to be allocated, * has to reside in a page table. The page table we are about to * allocate. We don't have BAT. So drop to data real mode for a minute * as a measure of last resort. We do this a couple times. */ - moea64_pteg_table = (struct lpte *)moea64_bootstrap_alloc(size, size); + if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) { + moea64_part_table = + (struct pate *)moea64_bootstrap_alloc(PART_SIZE, PART_SIZE); + if (hw_direct_map) + moea64_part_table = + (struct pate *)PHYS_TO_DMAP((vm_offset_t)moea64_part_table); + } + /* + * PTEG table must be aligned on a 256k boundary, but can be placed + * anywhere with that alignment. + */ + moea64_pteg_table = (struct lpte *)moea64_bootstrap_alloc(size, 256*1024); if (hw_direct_map) moea64_pteg_table = (struct lpte *)PHYS_TO_DMAP((vm_offset_t)moea64_pteg_table); DISABLE_TRANS(msr); + if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) { + bzero(__DEVOLATILE(void *, moea64_part_table), PART_SIZE); + moea64_part_table[0].pagetab = + ((uintptr_t)moea64_pteg_table & ~DMAP_BASE_ADDRESS) | + (uintptr_t)(flsl((moea64_pteg_count - 1) >> 11)); + } bzero(__DEVOLATILE(void *, moea64_pteg_table), moea64_pteg_count * sizeof(struct lpteg)); ENABLE_TRANS(msr); CTR1(KTR_PMAP, "moea64_bootstrap: PTEG table at %p", moea64_pteg_table); moea64_mid_bootstrap(mmup, kernelstart, kernelend); /* * Add a mapping for the page table itself if there is no direct map. */ if (!hw_direct_map) { size = moea64_pteg_count * sizeof(struct lpteg); off = (vm_offset_t)(moea64_pteg_table); DISABLE_TRANS(msr); for (pa = off; pa < off + size; pa += PAGE_SIZE) pmap_kenter(pa, pa); ENABLE_TRANS(msr); } /* Bring up virtual memory */ moea64_late_bootstrap(mmup, kernelstart, kernelend); } static void tlbia(void) { vm_offset_t i; #ifndef __powerpc64__ register_t msr, scratch; #endif i = 0xc00; /* IS = 11 */ switch (mfpvr() >> 16) { case IBM970: case IBM970FX: case IBM970MP: case IBM970GX: case IBMPOWER4: case IBMPOWER4PLUS: case IBMPOWER5: case IBMPOWER5PLUS: i = 0; /* IS not supported */ break; } TLBSYNC(); for (; i < 0x200000; i += 0x00001000) { #ifdef __powerpc64__ __asm __volatile("tlbiel %0" :: "r"(i)); #else __asm __volatile("\ mfmsr %0; \ mr %1, %0; \ insrdi %1,%3,1,0; \ mtmsrd %1; \ isync; \ \ tlbiel %2; \ \ mtmsrd %0; \ isync;" : "=r"(msr), "=r"(scratch) : "r"(i), "r"(1)); #endif } EIEIO(); TLBSYNC(); } static int atomic_pte_lock(volatile struct lpte *pte, uint64_t bitmask, uint64_t *oldhi) { int ret; uint32_t oldhihalf; /* * Note: in principle, if just the locked bit were set here, we * could avoid needing the eviction lock. However, eviction occurs * so rarely that it isn't worth bothering about in practice. */ __asm __volatile ( "1:\tlwarx %1, 0, %3\n\t" /* load old value */ "and. %0,%1,%4\n\t" /* check if any bits set */ "bne 2f\n\t" /* exit if any set */ "stwcx. %5, 0, %3\n\t" /* attempt to store */ "bne- 1b\n\t" /* spin if failed */ "li %0, 1\n\t" /* success - retval = 1 */ "b 3f\n\t" /* we've succeeded */ "2:\n\t" "stwcx. %1, 0, %3\n\t" /* clear reservation (74xx) */ "li %0, 0\n\t" /* failure - retval = 0 */ "3:\n\t" : "=&r" (ret), "=&r"(oldhihalf), "=m" (pte->pte_hi) : "r" ((volatile char *)&pte->pte_hi + 4), "r" ((uint32_t)bitmask), "r" ((uint32_t)LPTE_LOCKED), "m" (pte->pte_hi) : "cr0", "cr1", "cr2", "memory"); *oldhi = (pte->pte_hi & 0xffffffff00000000ULL) | oldhihalf; return (ret); } static uintptr_t moea64_insert_to_pteg_native(struct lpte *pvo_pt, uintptr_t slotbase, uint64_t mask) { volatile struct lpte *pt; uint64_t oldptehi, va; uintptr_t k; int i, j; /* Start at a random slot */ i = mftb() % 8; for (j = 0; j < 8; j++) { k = slotbase + (i + j) % 8; pt = &moea64_pteg_table[k]; /* Invalidate and seize lock only if no bits in mask set */ if (atomic_pte_lock(pt, mask, &oldptehi)) /* Lock obtained */ break; } if (j == 8) return (-1); if (oldptehi & LPTE_VALID) { KASSERT(!(oldptehi & LPTE_WIRED), ("Unmapped wired entry")); /* * Need to invalidate old entry completely: see * "Modifying a Page Table Entry". Need to reconstruct * the virtual address for the outgoing entry to do that. */ if (oldptehi & LPTE_BIG) va = oldptehi >> moea64_large_page_shift; else va = oldptehi >> ADDR_PIDX_SHFT; if (oldptehi & LPTE_HID) va = (((k >> 3) ^ moea64_pteg_mask) ^ va) & VSID_HASH_MASK; else va = ((k >> 3) ^ va) & VSID_HASH_MASK; va |= (oldptehi & LPTE_AVPN_MASK) << (ADDR_API_SHFT64 - ADDR_PIDX_SHFT); PTESYNC(); TLBIE(va); moea64_pte_valid--; moea64_pte_overflow++; } /* * Update the PTE as per "Adding a Page Table Entry". Lock is released * by setting the high doubleworld. */ pt->pte_lo = htobe64(pvo_pt->pte_lo); EIEIO(); pt->pte_hi = htobe64(pvo_pt->pte_hi); PTESYNC(); /* Keep statistics */ moea64_pte_valid++; return (k); } static int moea64_pte_insert_native(mmu_t mmu, struct pvo_entry *pvo) { struct lpte insertpt; uintptr_t slot; /* Initialize PTE */ moea64_pte_from_pvo(pvo, &insertpt); /* Make sure further insertion is locked out during evictions */ rw_rlock(&moea64_eviction_lock); /* * First try primary hash. */ pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */ slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot, LPTE_VALID | LPTE_WIRED | LPTE_LOCKED); if (slot != -1) { rw_runlock(&moea64_eviction_lock); pvo->pvo_pte.slot = slot; return (0); } /* * Now try secondary hash. */ pvo->pvo_vaddr ^= PVO_HID; insertpt.pte_hi ^= LPTE_HID; pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot, LPTE_VALID | LPTE_WIRED | LPTE_LOCKED); if (slot != -1) { rw_runlock(&moea64_eviction_lock); pvo->pvo_pte.slot = slot; return (0); } /* * Out of luck. Find a PTE to sacrifice. */ /* Lock out all insertions for a bit */ if (!rw_try_upgrade(&moea64_eviction_lock)) { rw_runlock(&moea64_eviction_lock); rw_wlock(&moea64_eviction_lock); } slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot, LPTE_WIRED | LPTE_LOCKED); if (slot != -1) { rw_wunlock(&moea64_eviction_lock); pvo->pvo_pte.slot = slot; return (0); } /* Try other hash table. Now we're getting desperate... */ pvo->pvo_vaddr ^= PVO_HID; insertpt.pte_hi ^= LPTE_HID; pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot, LPTE_WIRED | LPTE_LOCKED); if (slot != -1) { rw_wunlock(&moea64_eviction_lock); pvo->pvo_pte.slot = slot; return (0); } /* No freeable slots in either PTEG? We're hosed. */ rw_wunlock(&moea64_eviction_lock); panic("moea64_pte_insert: overflow"); return (-1); } diff --git a/sys/powerpc/include/pte.h b/sys/powerpc/include/pte.h index fb986ce4b749..32eecaedb922 100644 --- a/sys/powerpc/include/pte.h +++ b/sys/powerpc/include/pte.h @@ -1,418 +1,424 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: pte.h,v 1.2 1998/08/31 14:43:40 tsubai Exp $ * $FreeBSD$ */ #ifndef _MACHINE_PTE_H_ #define _MACHINE_PTE_H_ #if defined(AIM) /* * Page Table Entries */ #ifndef LOCORE /* 32-bit PTE */ struct pte { u_int32_t pte_hi; u_int32_t pte_lo; }; struct pteg { struct pte pt[8]; }; /* 64-bit (long) PTE */ struct lpte { u_int64_t pte_hi; u_int64_t pte_lo; }; struct lpteg { struct lpte pt[8]; }; +/* Partition table entry */ +struct pate { + u_int64_t pagetab; + u_int64_t proctab; +}; + #endif /* LOCORE */ /* 32-bit PTE definitions */ /* High word: */ #define PTE_VALID 0x80000000 #define PTE_VSID_SHFT 7 #define PTE_HID 0x00000040 #define PTE_API 0x0000003f /* Low word: */ #define PTE_RPGN 0xfffff000 #define PTE_REF 0x00000100 #define PTE_CHG 0x00000080 #define PTE_WIMG 0x00000078 #define PTE_W 0x00000040 #define PTE_I 0x00000020 #define PTE_M 0x00000010 #define PTE_G 0x00000008 #define PTE_PP 0x00000003 #define PTE_SO 0x00000000 /* Super. Only (U: XX, S: RW) */ #define PTE_SW 0x00000001 /* Super. Write-Only (U: RO, S: RW) */ #define PTE_BW 0x00000002 /* Supervisor (U: RW, S: RW) */ #define PTE_BR 0x00000003 /* Both Read Only (U: RO, S: RO) */ #define PTE_RW PTE_BW #define PTE_RO PTE_BR #define PTE_EXEC 0x00000200 /* pseudo bit in attrs; page is exec */ /* 64-bit PTE definitions */ /* High quadword: */ #define LPTE_VSID_SHIFT 12 #define LPTE_AVPN_MASK 0xFFFFFFFFFFFFFF80ULL #define LPTE_API 0x0000000000000F80ULL #define LPTE_SWBITS 0x0000000000000078ULL #define LPTE_WIRED 0x0000000000000010ULL #define LPTE_LOCKED 0x0000000000000008ULL #define LPTE_BIG 0x0000000000000004ULL /* 4kb/16Mb page */ #define LPTE_HID 0x0000000000000002ULL #define LPTE_VALID 0x0000000000000001ULL /* Low quadword: */ #define EXTEND_PTE(x) UINT64_C(x) /* make constants 64-bit */ #define LPTE_RPGN 0xfffffffffffff000ULL #define LPTE_REF EXTEND_PTE( PTE_REF ) #define LPTE_CHG EXTEND_PTE( PTE_CHG ) #define LPTE_WIMG EXTEND_PTE( PTE_WIMG ) #define LPTE_W EXTEND_PTE( PTE_W ) #define LPTE_I EXTEND_PTE( PTE_I ) #define LPTE_M EXTEND_PTE( PTE_M ) #define LPTE_G EXTEND_PTE( PTE_G ) #define LPTE_NOEXEC 0x0000000000000004ULL #define LPTE_PP EXTEND_PTE( PTE_PP ) #define LPTE_SO EXTEND_PTE( PTE_SO ) /* Super. Only */ #define LPTE_SW EXTEND_PTE( PTE_SW ) /* Super. Write-Only */ #define LPTE_BW EXTEND_PTE( PTE_BW ) /* Supervisor */ #define LPTE_BR EXTEND_PTE( PTE_BR ) /* Both Read Only */ #define LPTE_RW LPTE_BW #define LPTE_RO LPTE_BR /* POWER ISA 3.0 Radix Table Definitions */ #define RPTE_VALID 0x8000000000000000ULL #define RPTE_LEAF 0x4000000000000000ULL /* is a PTE: always 1 */ #define RPTE_SW0 0x2000000000000000ULL #define RPTE_RPN_MASK 0x00FFFFFFFFFFF000ULL #define RPTE_RPN_SHIFT 12 #define RPTE_SW1 0x0000000000000800ULL #define RPTE_SW2 0x0000000000000400ULL #define RPTE_SW3 0x0000000000000200ULL #define RPTE_R 0x0000000000000100ULL #define RPTE_C 0x0000000000000080ULL #define RPTE_ATTR_MASK 0x0000000000000030ULL #define RPTE_ATTR_MEM 0x0000000000000000ULL /* PTE M */ #define RPTE_ATTR_SAO 0x0000000000000010ULL /* PTE WIM */ #define RPTE_ATTR_GUARDEDIO 0x0000000000000020ULL /* PTE IMG */ #define RPTE_ATTR_UNGUARDEDIO 0x0000000000000030ULL /* PTE IM */ #define RPTE_EAA_MASK 0x000000000000000FULL #define RPTE_EAA_P 0x0000000000000008ULL /* Supervisor only */ #define RPTE_EAA_R 0x0000000000000004ULL /* Read allowed */ #define RPTE_EAA_W 0x0000000000000002ULL /* Write (+read) */ #define RPTE_EAA_X 0x0000000000000001ULL /* Execute allowed */ #define RPDE_VALID RPTE_VALID #define RPDE_LEAF RPTE_LEAF /* is a PTE: always 0 */ #define RPDE_NLB_MASK 0x0FFFFFFFFFFFFF00ULL #define RPDE_NLB_SHIFT 8 #define RPDE_NLS_MASK 0x000000000000001FULL #ifndef LOCORE typedef struct pte pte_t; typedef struct lpte lpte_t; #endif /* LOCORE */ /* * Extract bits from address */ #define ADDR_SR_SHFT 28 #define ADDR_PIDX 0x0ffff000UL #define ADDR_PIDX_SHFT 12 #define ADDR_API_SHFT 22 #define ADDR_API_SHFT64 16 #define ADDR_POFF 0x00000fffUL /* * Bits in DSISR: */ #define DSISR_DIRECT 0x80000000 #define DSISR_NOTFOUND 0x40000000 #define DSISR_PROTECT 0x08000000 #define DSISR_INVRX 0x04000000 #define DSISR_STORE 0x02000000 #define DSISR_DABR 0x00400000 #define DSISR_SEGMENT 0x00200000 #define DSISR_EAR 0x00100000 /* * Bits in SRR1 on ISI: */ #define ISSRR1_NOTFOUND 0x40000000 #define ISSRR1_DIRECT 0x10000000 #define ISSRR1_PROTECT 0x08000000 #define ISSRR1_SEGMENT 0x00200000 #else /* BOOKE */ #include #ifdef __powerpc64__ #include /* * The virtual address is: * * 4K page size * +-----+-----+-----+-------+-------------+-------------+----------------+ * | - |p2d#h| - | p2d#l | dir# | pte# | off in 4K page | * +-----+-----+-----+-------+-------------+-------------+----------------+ * 63 62 61 60 59 40 39 30 29 ^ 21 20 ^ 12 11 0 * | | * index in 1 page of pointers * * 1st level - pointers to page table directory (pp2d) * * pp2d consists of PP2D_NENTRIES entries, each being a pointer to * second level entity, i.e. the page table directory (pdir). */ #define HARDWARE_WALKER #define PP2D_H_H 61 #define PP2D_H_L 60 #define PP2D_L_H 39 #define PP2D_L_L 30 /* >30 would work with no page table pool */ #ifndef LOCORE #define PP2D_SIZE (1UL << PP2D_L_L) /* va range mapped by pp2d */ #else #define PP2D_SIZE (1 << PP2D_L_L) /* va range mapped by pp2d */ #endif #define PP2D_L_SHIFT PP2D_L_L #define PP2D_L_NUM (PP2D_L_H-PP2D_L_L+1) #define PP2D_L_MASK ((1<> PP2D_H_SHIFT) & PP2D_H_MASK) | ((va >> PP2D_L_SHIFT) & PP2D_L_MASK)) #define PP2D_NENTRIES (1<<(PP2D_L_NUM+PP2D_H_NUM)) #define PP2D_ENTRY_SHIFT 3 /* log2 (sizeof(struct pte_entry **)) */ /* * 2nd level - page table directory (pdir) * * pdir consists of PDIR_NENTRIES entries, each being a pointer to * second level entity, i.e. the actual page table (ptbl). */ #define PDIR_H (PP2D_L_L-1) #define PDIR_L 21 #define PDIR_NUM (PDIR_H-PDIR_L+1) #define PDIR_SIZE (1 << PDIR_L) /* va range mapped by pdir */ #define PDIR_MASK ((1<> PDIR_SHIFT) & PDIR_MASK) #define PDIR_ENTRY_SHIFT 3 /* log2 (sizeof(struct pte_entry *)) */ #define PDIR_PAGES ((PDIR_NENTRIES * (1<> PTBL_SHIFT) & PTBL_MASK) #define PTBL_ENTRY_SHIFT 3 /* log2 (sizeof (struct pte_entry)) */ #define PTBL_PAGES ((PTBL_NENTRIES * (1<> PDIR_SHIFT) #define PDIR_ENTRY_SHIFT 2 /* entry size is 2^2 = 4 bytes */ /* * 2nd level - page table (ptbl) * * Page table covers 1024 page table entries. Page * table entry (pte) is 32 bit wide and defines mapping * for a single page. */ #define PTBL_SHIFT PAGE_SHIFT #define PTBL_SIZE PAGE_SIZE /* va range mapped by ptbl entry */ #define PTBL_MASK ((PDIR_SIZE - 1) & ~((1 << PAGE_SHIFT) - 1)) #define PTBL_NENTRIES 1024 /* number of pages mapped by ptbl */ /* Returns ptbl entry number for given va */ #define PTBL_IDX(va) (((va) & PTBL_MASK) >> PTBL_SHIFT) /* Size of ptbl in pages, 1024 entries, each sizeof(struct pte_entry). */ #define PTBL_PAGES 2 #define PTBL_ENTRY_SHIFT 3 /* entry size is 2^3 = 8 bytes */ #endif /* * Flags for pte_remove() routine. */ #define PTBL_HOLD 0x00000001 /* do not unhold ptbl pages */ #define PTBL_UNHOLD 0x00000002 /* unhold and attempt to free ptbl pages */ #define PTBL_HOLD_FLAG(pmap) (((pmap) == kernel_pmap) ? PTBL_HOLD : PTBL_UNHOLD) /* * Page Table Entry definitions and macros. * * RPN need only be 32-bit because Book-E has 36-bit addresses, and the smallest * page size is 4k (12-bit mask), so RPN can really fit into 24 bits. */ #ifndef LOCORE typedef uint64_t pte_t; #endif /* RPN mask, TLB0 4K pages */ #define PTE_PA_MASK PAGE_MASK #if defined(BOOKE_E500) /* PTE bits assigned to MAS2, MAS3 flags */ #define PTE_MAS2_SHIFT 19 #define PTE_W (MAS2_W << PTE_MAS2_SHIFT) #define PTE_I (MAS2_I << PTE_MAS2_SHIFT) #define PTE_M (MAS2_M << PTE_MAS2_SHIFT) #define PTE_G (MAS2_G << PTE_MAS2_SHIFT) #define PTE_MAS2_MASK (MAS2_G | MAS2_M | MAS2_I | MAS2_W) #define PTE_MAS3_SHIFT 2 #define PTE_UX (MAS3_UX << PTE_MAS3_SHIFT) #define PTE_SX (MAS3_SX << PTE_MAS3_SHIFT) #define PTE_UW (MAS3_UW << PTE_MAS3_SHIFT) #define PTE_SW (MAS3_SW << PTE_MAS3_SHIFT) #define PTE_UR (MAS3_UR << PTE_MAS3_SHIFT) #define PTE_SR (MAS3_SR << PTE_MAS3_SHIFT) #define PTE_MAS3_MASK ((MAS3_UX | MAS3_SX | MAS3_UW \ | MAS3_SW | MAS3_UR | MAS3_SR) << PTE_MAS3_SHIFT) #define PTE_PS_SHIFT 8 #define PTE_PS_4KB (2 << PTE_PS_SHIFT) #elif defined(BOOKE_PPC4XX) #define PTE_WL1 TLB_WL1 #define PTE_IL2I TLB_IL2I #define PTE_IL2D TLB_IL2D #define PTE_W TLB_W #define PTE_I TLB_I #define PTE_M TLB_M #define PTE_G TLB_G #define PTE_UX TLB_UX #define PTE_SX TLB_SX #define PTE_UW TLB_UW #define PTE_SW TLB_SW #define PTE_UR TLB_UR #define PTE_SR TLB_SR #endif /* Other PTE flags */ #define PTE_VALID 0x00000001 /* Valid */ #define PTE_MODIFIED 0x00001000 /* Modified */ #define PTE_WIRED 0x00002000 /* Wired */ #define PTE_MANAGED 0x00000002 /* Managed */ #define PTE_REFERENCED 0x00040000 /* Referenced */ /* * Page Table Entry definitions and macros. * * We use the hardware page table entry format: * * 63 24 23 19 18 17 14 13 12 11 8 7 6 5 4 3 2 1 0 * --------------------------------------------------------------- * ARPN(12:51) WIMGE R U0:U3 SW0 C PSIZE UX SX UW SW UR SR SW1 V * --------------------------------------------------------------- */ /* PTE fields. */ #define PTE_TSIZE_SHIFT (63-54) #define PTE_TSIZE_MASK 0x7 #define PTE_TSIZE_SHIFT_DIRECT (63-55) #define PTE_TSIZE_MASK_DIRECT 0xf #define PTE_PS_DIRECT(ps) (ps<> PTE_TSIZE_SHIFT) & PTE_TSIZE_MASK) #define PTE_TSIZE_DIRECT(pte) (int)((*pte >> PTE_TSIZE_SHIFT_DIRECT) & PTE_TSIZE_MASK_DIRECT) /* Macro argument must of pte_t type. */ #define PTE_ARPN_SHIFT 12 #define PTE_FLAGS_MASK 0x00ffffff #define PTE_RPN_FROM_PA(pa) (((pa) & ~PAGE_MASK) << PTE_ARPN_SHIFT) #define PTE_PA(pte) ((vm_paddr_t)(*pte >> PTE_ARPN_SHIFT) & ~PAGE_MASK) #define PTE_ISVALID(pte) ((*pte) & PTE_VALID) #define PTE_ISWIRED(pte) ((*pte) & PTE_WIRED) #define PTE_ISMANAGED(pte) ((*pte) & PTE_MANAGED) #define PTE_ISMODIFIED(pte) ((*pte) & PTE_MODIFIED) #define PTE_ISREFERENCED(pte) ((*pte) & PTE_REFERENCED) #endif /* BOOKE */ #endif /* _MACHINE_PTE_H_ */ diff --git a/sys/powerpc/include/spr.h b/sys/powerpc/include/spr.h index a80a9006f939..8a868d5dd054 100644 --- a/sys/powerpc/include/spr.h +++ b/sys/powerpc/include/spr.h @@ -1,846 +1,847 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2001 The NetBSD Foundation, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: spr.h,v 1.25 2002/08/14 15:38:40 matt Exp $ * $FreeBSD$ */ #ifndef _POWERPC_SPR_H_ #define _POWERPC_SPR_H_ #ifndef _LOCORE #define mtspr(reg, val) \ __asm __volatile("mtspr %0,%1" : : "K"(reg), "r"(val)) #define mfspr(reg) \ ( { register_t val; \ __asm __volatile("mfspr %0,%1" : "=r"(val) : "K"(reg)); \ val; } ) #ifndef __powerpc64__ /* The following routines allow manipulation of the full 64-bit width * of SPRs on 64 bit CPUs in bridge mode */ #define mtspr64(reg,valhi,vallo,scratch) \ __asm __volatile(" \ mfmsr %0; \ insrdi %0,%5,1,0; \ mtmsrd %0; \ isync; \ \ sld %1,%1,%4; \ or %1,%1,%2; \ mtspr %3,%1; \ srd %1,%1,%4; \ \ clrldi %0,%0,1; \ mtmsrd %0; \ isync;" \ : "=r"(scratch), "=r"(valhi) : "r"(vallo), "K"(reg), "r"(32), "r"(1)) #define mfspr64upper(reg,scratch) \ ( { register_t val; \ __asm __volatile(" \ mfmsr %0; \ insrdi %0,%4,1,0; \ mtmsrd %0; \ isync; \ \ mfspr %1,%2; \ srd %1,%1,%3; \ \ clrldi %0,%0,1; \ mtmsrd %0; \ isync;" \ : "=r"(scratch), "=r"(val) : "K"(reg), "r"(32), "r"(1)); \ val; } ) #endif #endif /* _LOCORE */ /* * Special Purpose Register declarations. * * The first column in the comments indicates which PowerPC * architectures the SPR is valid on - 4 for 4xx series, * 6 for 6xx/7xx series and 8 for 8xx and 8xxx series. */ #define SPR_MQ 0x000 /* .6. 601 MQ register */ #define SPR_XER 0x001 /* 468 Fixed Point Exception Register */ #define SPR_RTCU_R 0x004 /* .6. 601 RTC Upper - Read */ #define SPR_RTCL_R 0x005 /* .6. 601 RTC Lower - Read */ #define SPR_LR 0x008 /* 468 Link Register */ #define SPR_CTR 0x009 /* 468 Count Register */ #define SPR_DSCR 0x011 /* Data Stream Control Register */ #define SPR_DSISR 0x012 /* .68 DSI exception source */ #define DSISR_DIRECT 0x80000000 /* Direct-store error exception */ #define DSISR_NOTFOUND 0x40000000 /* Translation not found */ #define DSISR_PROTECT 0x08000000 /* Memory access not permitted */ #define DSISR_INVRX 0x04000000 /* Reserve-indexed insn direct-store access */ #define DSISR_STORE 0x02000000 /* Store operation */ #define DSISR_DABR 0x00400000 /* DABR match */ #define DSISR_SEGMENT 0x00200000 /* XXX; not in 6xx PEM */ #define DSISR_EAR 0x00100000 /* eciwx/ecowx && EAR[E] == 0 */ #define SPR_DAR 0x013 /* .68 Data Address Register */ #define SPR_RTCU_W 0x014 /* .6. 601 RTC Upper - Write */ #define SPR_RTCL_W 0x015 /* .6. 601 RTC Lower - Write */ #define SPR_DEC 0x016 /* .68 DECrementer register */ #define SPR_SDR1 0x019 /* .68 Page table base address register */ #define SPR_SRR0 0x01a /* 468 Save/Restore Register 0 */ #define SPR_SRR1 0x01b /* 468 Save/Restore Register 1 */ #define SRR1_ISI_PFAULT 0x40000000 /* ISI page not found */ #define SRR1_ISI_NOEXECUTE 0x10000000 /* Memory marked no-execute */ #define SRR1_ISI_PP 0x08000000 /* PP bits forbid access */ #define SPR_DECAR 0x036 /* ..8 Decrementer auto reload */ #define SPR_EIE 0x050 /* ..8 Exception Interrupt ??? */ #define SPR_EID 0x051 /* ..8 Exception Interrupt ??? */ #define SPR_NRI 0x052 /* ..8 Exception Interrupt ??? */ #define SPR_FSCR 0x099 /* Facility Status and Control Register */ #define FSCR_IC_MASK 0xFF00000000000000ULL /* FSCR[0:7] is Interrupt Cause */ #define FSCR_IC_FP 0x0000000000000000ULL /* FP unavailable */ #define FSCR_IC_VSX 0x0100000000000000ULL /* VSX unavailable */ #define FSCR_IC_DSCR 0x0200000000000000ULL /* Access to the DSCR at SPRs 3 or 17 */ #define FSCR_IC_PM 0x0300000000000000ULL /* Read or write access of a Performance Monitor SPR in group A */ #define FSCR_IC_BHRB 0x0400000000000000ULL /* Execution of a BHRB Instruction */ #define FSCR_IC_HTM 0x0500000000000000ULL /* Access to a Transactional Memory */ /* Reserved 0x0600000000000000ULL */ #define FSCR_IC_EBB 0x0700000000000000ULL /* Access to Event-Based Branch */ #define FSCR_IC_TAR 0x0800000000000000ULL /* Access to Target Address Register */ #define FSCR_IC_STOP 0x0900000000000000ULL /* Access to the 'stop' instruction in privileged non-hypervisor state */ #define FSCR_IC_MSG 0x0A00000000000000ULL /* Access to 'msgsndp' or 'msgclrp' instructions */ #define FSCR_IC_SCV 0x0C00000000000000ULL /* Execution of a 'scv' instruction */ #define SPR_USPRG0 0x100 /* 4.. User SPR General 0 */ #define SPR_VRSAVE 0x100 /* .6. AltiVec VRSAVE */ #define SPR_SPRG0 0x110 /* 468 SPR General 0 */ #define SPR_SPRG1 0x111 /* 468 SPR General 1 */ #define SPR_SPRG2 0x112 /* 468 SPR General 2 */ #define SPR_SPRG3 0x113 /* 468 SPR General 3 */ #define SPR_SPRG4 0x114 /* 4.. SPR General 4 */ #define SPR_SPRG5 0x115 /* 4.. SPR General 5 */ #define SPR_SPRG6 0x116 /* 4.. SPR General 6 */ #define SPR_SPRG7 0x117 /* 4.. SPR General 7 */ #define SPR_SCOMC 0x114 /* ... SCOM Address Register (970) */ #define SPR_SCOMD 0x115 /* ... SCOM Data Register (970) */ #define SPR_ASR 0x118 /* ... Address Space Register (PPC64) */ #define SPR_EAR 0x11a /* .68 External Access Register */ #define SPR_PVR 0x11f /* 468 Processor Version Register */ #define MPC601 0x0001 #define MPC603 0x0003 #define MPC604 0x0004 #define MPC602 0x0005 #define MPC603e 0x0006 #define MPC603ev 0x0007 #define MPC750 0x0008 #define MPC750CL 0x7000 /* Nintendo Wii's Broadway */ #define MPC604ev 0x0009 #define MPC7400 0x000c #define MPC620 0x0014 #define IBM403 0x0020 #define IBM401A1 0x0021 #define IBM401B2 0x0022 #define IBM401C2 0x0023 #define IBM401D2 0x0024 #define IBM401E2 0x0025 #define IBM401F2 0x0026 #define IBM401G2 0x0027 #define IBMRS64II 0x0033 #define IBMRS64III 0x0034 #define IBMPOWER4 0x0035 #define IBMRS64III_2 0x0036 #define IBMRS64IV 0x0037 #define IBMPOWER4PLUS 0x0038 #define IBM970 0x0039 #define IBMPOWER5 0x003a #define IBMPOWER5PLUS 0x003b #define IBM970FX 0x003c #define IBMPOWER6 0x003e #define IBMPOWER7 0x003f #define IBMPOWER3 0x0040 #define IBMPOWER3PLUS 0x0041 #define IBM970MP 0x0044 #define IBM970GX 0x0045 #define IBMPOWERPCA2 0x0049 #define IBMPOWER7PLUS 0x004a #define IBMPOWER8E 0x004b #define IBMPOWER8 0x004d #define IBMPOWER9 0x004e #define MPC860 0x0050 #define IBMCELLBE 0x0070 #define MPC8240 0x0081 #define PA6T 0x0090 #define IBM405GP 0x4011 #define IBM405L 0x4161 #define IBM750FX 0x7000 #define MPC745X_P(v) ((v & 0xFFF8) == 0x8000) #define MPC7450 0x8000 #define MPC7455 0x8001 #define MPC7457 0x8002 #define MPC7447A 0x8003 #define MPC7448 0x8004 #define MPC7410 0x800c #define MPC8245 0x8081 #define FSL_E500v1 0x8020 #define FSL_E500v2 0x8021 #define FSL_E500mc 0x8023 #define FSL_E5500 0x8024 #define FSL_E6500 0x8040 #define FSL_E300C1 0x8083 #define FSL_E300C2 0x8084 #define FSL_E300C3 0x8085 #define FSL_E300C4 0x8086 #define SPR_LPCR 0x13e /* Logical Partitioning Control */ #define LPCR_LPES 0x008 /* Bit 60 */ #define LPCR_PECE_DRBL (1ULL << 16) /* Directed Privileged Doorbell */ #define LPCR_PECE_HDRBL (1ULL << 15) /* Directed Hypervisor Doorbell */ #define LPCR_PECE_EXT (1ULL << 14) /* External exceptions */ #define LPCR_PECE_DECR (1ULL << 13) /* Decrementer exceptions */ #define LPCR_PECE_ME (1ULL << 12) /* Machine Check and Hypervisor */ /* Maintenance exceptions */ #define LPCR_PECE_WAKESET (LPCR_PECE_EXT | LPCR_PECE_DECR | LPCR_PECE_ME) #define SPR_EPCR 0x133 #define EPCR_EXTGS 0x80000000 #define EPCR_DTLBGS 0x40000000 #define EPCR_ITLBGS 0x20000000 #define EPCR_DSIGS 0x10000000 #define EPCR_ISIGS 0x08000000 #define EPCR_DUVGS 0x04000000 #define EPCR_ICM 0x02000000 #define EPCR_GICMGS 0x01000000 #define EPCR_DGTMI 0x00800000 #define EPCR_DMIUH 0x00400000 #define EPCR_PMGS 0x00200000 #define SPR_SPEFSCR 0x200 /* ..8 Signal Processing Engine FSCR. */ #define SPR_LPCR 0x13e /* Logical Partitioning Control */ #define LPCR_LPES 0x008 /* Bit 60 */ #define SPR_LPID 0x13f /* Logical Partitioning Control */ +#define SPR_PTCR 0x1d0 /* Partition Table Control Register */ #define SPR_IBAT0U 0x210 /* .68 Instruction BAT Reg 0 Upper */ #define SPR_IBAT0U 0x210 /* .6. Instruction BAT Reg 0 Upper */ #define SPR_IBAT0L 0x211 /* .6. Instruction BAT Reg 0 Lower */ #define SPR_IBAT1U 0x212 /* .6. Instruction BAT Reg 1 Upper */ #define SPR_IBAT1L 0x213 /* .6. Instruction BAT Reg 1 Lower */ #define SPR_IBAT2U 0x214 /* .6. Instruction BAT Reg 2 Upper */ #define SPR_IBAT2L 0x215 /* .6. Instruction BAT Reg 2 Lower */ #define SPR_IBAT3U 0x216 /* .6. Instruction BAT Reg 3 Upper */ #define SPR_IBAT3L 0x217 /* .6. Instruction BAT Reg 3 Lower */ #define SPR_DBAT0U 0x218 /* .6. Data BAT Reg 0 Upper */ #define SPR_DBAT0L 0x219 /* .6. Data BAT Reg 0 Lower */ #define SPR_DBAT1U 0x21a /* .6. Data BAT Reg 1 Upper */ #define SPR_DBAT1L 0x21b /* .6. Data BAT Reg 1 Lower */ #define SPR_DBAT2U 0x21c /* .6. Data BAT Reg 2 Upper */ #define SPR_DBAT2L 0x21d /* .6. Data BAT Reg 2 Lower */ #define SPR_DBAT3U 0x21e /* .6. Data BAT Reg 3 Upper */ #define SPR_DBAT3L 0x21f /* .6. Data BAT Reg 3 Lower */ #define SPR_IC_CST 0x230 /* ..8 Instruction Cache CSR */ #define IC_CST_IEN 0x80000000 /* I cache is ENabled (RO) */ #define IC_CST_CMD_INVALL 0x0c000000 /* I cache invalidate all */ #define IC_CST_CMD_UNLOCKALL 0x0a000000 /* I cache unlock all */ #define IC_CST_CMD_UNLOCK 0x08000000 /* I cache unlock block */ #define IC_CST_CMD_LOADLOCK 0x06000000 /* I cache load & lock block */ #define IC_CST_CMD_DISABLE 0x04000000 /* I cache disable */ #define IC_CST_CMD_ENABLE 0x02000000 /* I cache enable */ #define IC_CST_CCER1 0x00200000 /* I cache error type 1 (RO) */ #define IC_CST_CCER2 0x00100000 /* I cache error type 2 (RO) */ #define IC_CST_CCER3 0x00080000 /* I cache error type 3 (RO) */ #define SPR_IBAT4U 0x230 /* .6. Instruction BAT Reg 4 Upper */ #define SPR_IC_ADR 0x231 /* ..8 Instruction Cache Address */ #define SPR_IBAT4L 0x231 /* .6. Instruction BAT Reg 4 Lower */ #define SPR_IC_DAT 0x232 /* ..8 Instruction Cache Data */ #define SPR_IBAT5U 0x232 /* .6. Instruction BAT Reg 5 Upper */ #define SPR_IBAT5L 0x233 /* .6. Instruction BAT Reg 5 Lower */ #define SPR_IBAT6U 0x234 /* .6. Instruction BAT Reg 6 Upper */ #define SPR_IBAT6L 0x235 /* .6. Instruction BAT Reg 6 Lower */ #define SPR_IBAT7U 0x236 /* .6. Instruction BAT Reg 7 Upper */ #define SPR_IBAT7L 0x237 /* .6. Instruction BAT Reg 7 Lower */ #define SPR_DC_CST 0x230 /* ..8 Data Cache CSR */ #define DC_CST_DEN 0x80000000 /* D cache ENabled (RO) */ #define DC_CST_DFWT 0x40000000 /* D cache Force Write-Thru (RO) */ #define DC_CST_LES 0x20000000 /* D cache Little Endian Swap (RO) */ #define DC_CST_CMD_FLUSH 0x0e000000 /* D cache invalidate all */ #define DC_CST_CMD_INVALL 0x0c000000 /* D cache invalidate all */ #define DC_CST_CMD_UNLOCKALL 0x0a000000 /* D cache unlock all */ #define DC_CST_CMD_UNLOCK 0x08000000 /* D cache unlock block */ #define DC_CST_CMD_CLRLESWAP 0x07000000 /* D cache clr little-endian swap */ #define DC_CST_CMD_LOADLOCK 0x06000000 /* D cache load & lock block */ #define DC_CST_CMD_SETLESWAP 0x05000000 /* D cache set little-endian swap */ #define DC_CST_CMD_DISABLE 0x04000000 /* D cache disable */ #define DC_CST_CMD_CLRFWT 0x03000000 /* D cache clear forced write-thru */ #define DC_CST_CMD_ENABLE 0x02000000 /* D cache enable */ #define DC_CST_CMD_SETFWT 0x01000000 /* D cache set forced write-thru */ #define DC_CST_CCER1 0x00200000 /* D cache error type 1 (RO) */ #define DC_CST_CCER2 0x00100000 /* D cache error type 2 (RO) */ #define DC_CST_CCER3 0x00080000 /* D cache error type 3 (RO) */ #define SPR_DBAT4U 0x238 /* .6. Data BAT Reg 4 Upper */ #define SPR_DC_ADR 0x231 /* ..8 Data Cache Address */ #define SPR_DBAT4L 0x239 /* .6. Data BAT Reg 4 Lower */ #define SPR_DC_DAT 0x232 /* ..8 Data Cache Data */ #define SPR_DBAT5U 0x23a /* .6. Data BAT Reg 5 Upper */ #define SPR_DBAT5L 0x23b /* .6. Data BAT Reg 5 Lower */ #define SPR_DBAT6U 0x23c /* .6. Data BAT Reg 6 Upper */ #define SPR_DBAT6L 0x23d /* .6. Data BAT Reg 6 Lower */ #define SPR_DBAT7U 0x23e /* .6. Data BAT Reg 7 Upper */ #define SPR_DBAT7L 0x23f /* .6. Data BAT Reg 7 Lower */ #define SPR_SPRG8 0x25c /* ..8 SPR General 8 */ #define SPR_MI_CTR 0x310 /* ..8 IMMU control */ #define Mx_CTR_GPM 0x80000000 /* Group Protection Mode */ #define Mx_CTR_PPM 0x40000000 /* Page Protection Mode */ #define Mx_CTR_CIDEF 0x20000000 /* Cache-Inhibit DEFault */ #define MD_CTR_WTDEF 0x20000000 /* Write-Through DEFault */ #define Mx_CTR_RSV4 0x08000000 /* Reserve 4 TLB entries */ #define MD_CTR_TWAM 0x04000000 /* TableWalk Assist Mode */ #define Mx_CTR_PPCS 0x02000000 /* Priv/user state compare mode */ #define Mx_CTR_TLB_INDX 0x000001f0 /* TLB index mask */ #define Mx_CTR_TLB_INDX_BITPOS 8 /* TLB index shift */ #define SPR_MI_AP 0x312 /* ..8 IMMU access protection */ #define Mx_GP_SUPER(n) (0 << (2*(15-(n)))) /* access is supervisor */ #define Mx_GP_PAGE (1 << (2*(15-(n)))) /* access is page protect */ #define Mx_GP_SWAPPED (2 << (2*(15-(n)))) /* access is swapped */ #define Mx_GP_USER (3 << (2*(15-(n)))) /* access is user */ #define SPR_MI_EPN 0x313 /* ..8 IMMU effective number */ #define Mx_EPN_EPN 0xfffff000 /* Effective Page Number mask */ #define Mx_EPN_EV 0x00000020 /* Entry Valid */ #define Mx_EPN_ASID 0x0000000f /* Address Space ID */ #define SPR_MI_TWC 0x315 /* ..8 IMMU tablewalk control */ #define MD_TWC_L2TB 0xfffff000 /* Level-2 Tablewalk Base */ #define Mx_TWC_APG 0x000001e0 /* Access Protection Group */ #define Mx_TWC_G 0x00000010 /* Guarded memory */ #define Mx_TWC_PS 0x0000000c /* Page Size (L1) */ #define MD_TWC_WT 0x00000002 /* Write-Through */ #define Mx_TWC_V 0x00000001 /* Entry Valid */ #define SPR_MI_RPN 0x316 /* ..8 IMMU real (phys) page number */ #define Mx_RPN_RPN 0xfffff000 /* Real Page Number */ #define Mx_RPN_PP 0x00000ff0 /* Page Protection */ #define Mx_RPN_SPS 0x00000008 /* Small Page Size */ #define Mx_RPN_SH 0x00000004 /* SHared page */ #define Mx_RPN_CI 0x00000002 /* Cache Inhibit */ #define Mx_RPN_V 0x00000001 /* Valid */ #define SPR_MD_CTR 0x318 /* ..8 DMMU control */ #define SPR_M_CASID 0x319 /* ..8 CASID */ #define M_CASID 0x0000000f /* Current AS Id */ #define SPR_MD_AP 0x31a /* ..8 DMMU access protection */ #define SPR_MD_EPN 0x31b /* ..8 DMMU effective number */ #define SPR_970MMCR0 0x31b /* ... Monitor Mode Control Register 0 (PPC 970) */ #define SPR_970MMCR0_PMC1SEL(x) ((x) << 8) /* PMC1 selector (970) */ #define SPR_970MMCR0_PMC2SEL(x) ((x) << 1) /* PMC2 selector (970) */ #define SPR_970MMCR1 0x31e /* ... Monitor Mode Control Register 1 (PPC 970) */ #define SPR_970MMCR1_PMC3SEL(x) (((x) & 0x1f) << 27) /* PMC 3 selector */ #define SPR_970MMCR1_PMC4SEL(x) (((x) & 0x1f) << 22) /* PMC 4 selector */ #define SPR_970MMCR1_PMC5SEL(x) (((x) & 0x1f) << 17) /* PMC 5 selector */ #define SPR_970MMCR1_PMC6SEL(x) (((x) & 0x1f) << 12) /* PMC 6 selector */ #define SPR_970MMCR1_PMC7SEL(x) (((x) & 0x1f) << 7) /* PMC 7 selector */ #define SPR_970MMCR1_PMC8SEL(x) (((x) & 0x1f) << 2) /* PMC 8 selector */ #define SPR_970MMCRA 0x312 /* ... Monitor Mode Control Register 2 (PPC 970) */ #define SPR_970PMC1 0x313 /* ... PMC 1 */ #define SPR_970PMC2 0x314 /* ... PMC 2 */ #define SPR_970PMC3 0x315 /* ... PMC 3 */ #define SPR_970PMC4 0x316 /* ... PMC 4 */ #define SPR_970PMC5 0x317 /* ... PMC 5 */ #define SPR_970PMC6 0x318 /* ... PMC 6 */ #define SPR_970PMC7 0x319 /* ... PMC 7 */ #define SPR_970PMC8 0x31a /* ... PMC 8 */ #define SPR_M_TWB 0x31c /* ..8 MMU tablewalk base */ #define M_TWB_L1TB 0xfffff000 /* level-1 translation base */ #define M_TWB_L1INDX 0x00000ffc /* level-1 index */ #define SPR_MD_TWC 0x31d /* ..8 DMMU tablewalk control */ #define SPR_MD_RPN 0x31e /* ..8 DMMU real (phys) page number */ #define SPR_MD_TW 0x31f /* ..8 MMU tablewalk scratch */ #define SPR_MI_CAM 0x330 /* ..8 IMMU CAM entry read */ #define SPR_MI_RAM0 0x331 /* ..8 IMMU RAM entry read reg 0 */ #define SPR_MI_RAM1 0x332 /* ..8 IMMU RAM entry read reg 1 */ #define SPR_MD_CAM 0x338 /* ..8 IMMU CAM entry read */ #define SPR_MD_RAM0 0x339 /* ..8 IMMU RAM entry read reg 0 */ #define SPR_MD_RAM1 0x33a /* ..8 IMMU RAM entry read reg 1 */ #define SPR_UMMCR2 0x3a0 /* .6. User Monitor Mode Control Register 2 */ #define SPR_UMMCR0 0x3a8 /* .6. User Monitor Mode Control Register 0 */ #define SPR_USIA 0x3ab /* .6. User Sampled Instruction Address */ #define SPR_UMMCR1 0x3ac /* .6. User Monitor Mode Control Register 1 */ #define SPR_ZPR 0x3b0 /* 4.. Zone Protection Register */ #define SPR_MMCR2 0x3b0 /* .6. Monitor Mode Control Register 2 */ #define SPR_MMCR2_THRESHMULT_32 0x80000000 /* Multiply MMCR0 threshold by 32 */ #define SPR_MMCR2_THRESHMULT_2 0x00000000 /* Multiply MMCR0 threshold by 2 */ #define SPR_PID 0x3b1 /* 4.. Process ID */ #define SPR_PMC5 0x3b1 /* .6. Performance Counter Register 5 */ #define SPR_PMC6 0x3b2 /* .6. Performance Counter Register 6 */ #define SPR_CCR0 0x3b3 /* 4.. Core Configuration Register 0 */ #define SPR_IAC3 0x3b4 /* 4.. Instruction Address Compare 3 */ #define SPR_IAC4 0x3b5 /* 4.. Instruction Address Compare 4 */ #define SPR_DVC1 0x3b6 /* 4.. Data Value Compare 1 */ #define SPR_DVC2 0x3b7 /* 4.. Data Value Compare 2 */ #define SPR_MMCR0 0x3b8 /* .6. Monitor Mode Control Register 0 */ #define SPR_MMCR0_FC 0x80000000 /* Freeze counters */ #define SPR_MMCR0_FCS 0x40000000 /* Freeze counters in supervisor mode */ #define SPR_MMCR0_FCP 0x20000000 /* Freeze counters in user mode */ #define SPR_MMCR0_FCM1 0x10000000 /* Freeze counters when mark=1 */ #define SPR_MMCR0_FCM0 0x08000000 /* Freeze counters when mark=0 */ #define SPR_MMCR0_PMXE 0x04000000 /* Enable PM interrupt */ #define SPR_MMCR0_FCECE 0x02000000 /* Freeze counters after event */ #define SPR_MMCR0_TBSEL_15 0x01800000 /* Count bit 15 of TBL */ #define SPR_MMCR0_TBSEL_19 0x01000000 /* Count bit 19 of TBL */ #define SPR_MMCR0_TBSEL_23 0x00800000 /* Count bit 23 of TBL */ #define SPR_MMCR0_TBSEL_31 0x00000000 /* Count bit 31 of TBL */ #define SPR_MMCR0_TBEE 0x00400000 /* Time-base event enable */ #define SPR_MMCRO_THRESHOLD(x) ((x) << 16) /* Threshold value */ #define SPR_MMCR0_PMC1CE 0x00008000 /* PMC1 condition enable */ #define SPR_MMCR0_PMCNCE 0x00004000 /* PMCn condition enable */ #define SPR_MMCR0_TRIGGER 0x00002000 /* Trigger */ #define SPR_MMCR0_PMC1SEL(x) (((x) & 0x3f) << 6) /* PMC1 selector */ #define SPR_MMCR0_PMC2SEL(x) (((x) & 0x3f) << 0) /* PMC2 selector */ #define SPR_SGR 0x3b9 /* 4.. Storage Guarded Register */ #define SPR_PMC1 0x3b9 /* .6. Performance Counter Register 1 */ #define SPR_DCWR 0x3ba /* 4.. Data Cache Write-through Register */ #define SPR_PMC2 0x3ba /* .6. Performance Counter Register 2 */ #define SPR_SLER 0x3bb /* 4.. Storage Little Endian Register */ #define SPR_SIA 0x3bb /* .6. Sampled Instruction Address */ #define SPR_MMCR1 0x3bc /* .6. Monitor Mode Control Register 2 */ #define SPR_MMCR1_PMC3SEL(x) (((x) & 0x1f) << 27) /* PMC 3 selector */ #define SPR_MMCR1_PMC4SEL(x) (((x) & 0x1f) << 22) /* PMC 4 selector */ #define SPR_MMCR1_PMC5SEL(x) (((x) & 0x1f) << 17) /* PMC 5 selector */ #define SPR_MMCR1_PMC6SEL(x) (((x) & 0x3f) << 11) /* PMC 6 selector */ #define SPR_SU0R 0x3bc /* 4.. Storage User-defined 0 Register */ #define SPR_PMC3 0x3bd /* .6. Performance Counter Register 3 */ #define SPR_PMC4 0x3be /* .6. Performance Counter Register 4 */ #define SPR_DMISS 0x3d0 /* .68 Data TLB Miss Address Register */ #define SPR_DCMP 0x3d1 /* .68 Data TLB Compare Register */ #define SPR_HASH1 0x3d2 /* .68 Primary Hash Address Register */ #define SPR_ICDBDR 0x3d3 /* 4.. Instruction Cache Debug Data Register */ #define SPR_HASH2 0x3d3 /* .68 Secondary Hash Address Register */ #define SPR_IMISS 0x3d4 /* .68 Instruction TLB Miss Address Register */ #define SPR_TLBMISS 0x3d4 /* .6. TLB Miss Address Register */ #define SPR_DEAR 0x3d5 /* 4.. Data Error Address Register */ #define SPR_ICMP 0x3d5 /* .68 Instruction TLB Compare Register */ #define SPR_PTEHI 0x3d5 /* .6. Instruction TLB Compare Register */ #define SPR_EVPR 0x3d6 /* 4.. Exception Vector Prefix Register */ #define SPR_RPA 0x3d6 /* .68 Required Physical Address Register */ #define SPR_PTELO 0x3d6 /* .6. Required Physical Address Register */ #define SPR_TSR 0x150 /* ..8 Timer Status Register */ #define SPR_TCR 0x154 /* ..8 Timer Control Register */ #define TSR_ENW 0x80000000 /* Enable Next Watchdog */ #define TSR_WIS 0x40000000 /* Watchdog Interrupt Status */ #define TSR_WRS_MASK 0x30000000 /* Watchdog Reset Status */ #define TSR_WRS_NONE 0x00000000 /* No watchdog reset has occurred */ #define TSR_WRS_CORE 0x10000000 /* Core reset was forced by the watchdog */ #define TSR_WRS_CHIP 0x20000000 /* Chip reset was forced by the watchdog */ #define TSR_WRS_SYSTEM 0x30000000 /* System reset was forced by the watchdog */ #define TSR_PIS 0x08000000 /* PIT Interrupt Status */ #define TSR_DIS 0x08000000 /* Decrementer Interrupt Status */ #define TSR_FIS 0x04000000 /* FIT Interrupt Status */ #define TCR_WP_MASK 0xc0000000 /* Watchdog Period mask */ #define TCR_WP_2_17 0x00000000 /* 2**17 clocks */ #define TCR_WP_2_21 0x40000000 /* 2**21 clocks */ #define TCR_WP_2_25 0x80000000 /* 2**25 clocks */ #define TCR_WP_2_29 0xc0000000 /* 2**29 clocks */ #define TCR_WRC_MASK 0x30000000 /* Watchdog Reset Control mask */ #define TCR_WRC_NONE 0x00000000 /* No watchdog reset */ #define TCR_WRC_CORE 0x10000000 /* Core reset */ #define TCR_WRC_CHIP 0x20000000 /* Chip reset */ #define TCR_WRC_SYSTEM 0x30000000 /* System reset */ #define TCR_WIE 0x08000000 /* Watchdog Interrupt Enable */ #define TCR_PIE 0x04000000 /* PIT Interrupt Enable */ #define TCR_DIE 0x04000000 /* Pecrementer Interrupt Enable */ #define TCR_FP_MASK 0x03000000 /* FIT Period */ #define TCR_FP_2_9 0x00000000 /* 2**9 clocks */ #define TCR_FP_2_13 0x01000000 /* 2**13 clocks */ #define TCR_FP_2_17 0x02000000 /* 2**17 clocks */ #define TCR_FP_2_21 0x03000000 /* 2**21 clocks */ #define TCR_FIE 0x00800000 /* FIT Interrupt Enable */ #define TCR_ARE 0x00400000 /* Auto Reload Enable */ #define SPR_PIT 0x3db /* 4.. Programmable Interval Timer */ #define SPR_SRR2 0x3de /* 4.. Save/Restore Register 2 */ #define SPR_SRR3 0x3df /* 4.. Save/Restore Register 3 */ #define SPR_HID0 0x3f0 /* ..8 Hardware Implementation Register 0 */ #define SPR_HID1 0x3f1 /* ..8 Hardware Implementation Register 1 */ #define SPR_HID2 0x3f3 /* ..8 Hardware Implementation Register 2 */ #define SPR_HID4 0x3f4 /* ..8 Hardware Implementation Register 4 */ #define SPR_HID5 0x3f6 /* ..8 Hardware Implementation Register 5 */ #define SPR_HID6 0x3f9 /* ..8 Hardware Implementation Register 6 */ #define SPR_CELL_TSRL 0x380 /* ... Cell BE Thread Status Register */ #define SPR_CELL_TSCR 0x399 /* ... Cell BE Thread Switch Register */ #if defined(AIM) #define SPR_DBSR 0x3f0 /* 4.. Debug Status Register */ #define DBSR_IC 0x80000000 /* Instruction completion debug event */ #define DBSR_BT 0x40000000 /* Branch Taken debug event */ #define DBSR_EDE 0x20000000 /* Exception debug event */ #define DBSR_TIE 0x10000000 /* Trap Instruction debug event */ #define DBSR_UDE 0x08000000 /* Unconditional debug event */ #define DBSR_IA1 0x04000000 /* IAC1 debug event */ #define DBSR_IA2 0x02000000 /* IAC2 debug event */ #define DBSR_DR1 0x01000000 /* DAC1 Read debug event */ #define DBSR_DW1 0x00800000 /* DAC1 Write debug event */ #define DBSR_DR2 0x00400000 /* DAC2 Read debug event */ #define DBSR_DW2 0x00200000 /* DAC2 Write debug event */ #define DBSR_IDE 0x00100000 /* Imprecise debug event */ #define DBSR_IA3 0x00080000 /* IAC3 debug event */ #define DBSR_IA4 0x00040000 /* IAC4 debug event */ #define DBSR_MRR 0x00000300 /* Most recent reset */ #define SPR_DBCR0 0x3f2 /* 4.. Debug Control Register 0 */ #define SPR_DBCR1 0x3bd /* 4.. Debug Control Register 1 */ #define SPR_IAC1 0x3f4 /* 4.. Instruction Address Compare 1 */ #define SPR_IAC2 0x3f5 /* 4.. Instruction Address Compare 2 */ #define SPR_DAC1 0x3f6 /* 4.. Data Address Compare 1 */ #define SPR_DAC2 0x3f7 /* 4.. Data Address Compare 2 */ #define SPR_PIR 0x3ff /* .6. Processor Identification Register */ #elif defined(BOOKE) #define SPR_PIR 0x11e /* ..8 Processor Identification Register */ #define SPR_DBSR 0x130 /* ..8 Debug Status Register */ #define DBSR_IDE 0x80000000 /* Imprecise debug event. */ #define DBSR_UDE 0x40000000 /* Unconditional debug event. */ #define DBSR_MRR 0x30000000 /* Most recent Reset (mask). */ #define DBSR_ICMP 0x08000000 /* Instr. complete debug event. */ #define DBSR_BRT 0x04000000 /* Branch taken debug event. */ #define DBSR_IRPT 0x02000000 /* Interrupt taken debug event. */ #define DBSR_TRAP 0x01000000 /* Trap instr. debug event. */ #define DBSR_IAC1 0x00800000 /* Instr. address compare #1. */ #define DBSR_IAC2 0x00400000 /* Instr. address compare #2. */ #define DBSR_IAC3 0x00200000 /* Instr. address compare #3. */ #define DBSR_IAC4 0x00100000 /* Instr. address compare #4. */ #define DBSR_DAC1R 0x00080000 /* Data addr. read compare #1. */ #define DBSR_DAC1W 0x00040000 /* Data addr. write compare #1. */ #define DBSR_DAC2R 0x00020000 /* Data addr. read compare #2. */ #define DBSR_DAC2W 0x00010000 /* Data addr. write compare #2. */ #define DBSR_RET 0x00008000 /* Return debug event. */ #define SPR_DBCR0 0x134 /* ..8 Debug Control Register 0 */ #define SPR_DBCR1 0x135 /* ..8 Debug Control Register 1 */ #define SPR_IAC1 0x138 /* ..8 Instruction Address Compare 1 */ #define SPR_IAC2 0x139 /* ..8 Instruction Address Compare 2 */ #define SPR_DAC1 0x13c /* ..8 Data Address Compare 1 */ #define SPR_DAC2 0x13d /* ..8 Data Address Compare 2 */ #endif #define DBCR0_EDM 0x80000000 /* External Debug Mode */ #define DBCR0_IDM 0x40000000 /* Internal Debug Mode */ #define DBCR0_RST_MASK 0x30000000 /* ReSeT */ #define DBCR0_RST_NONE 0x00000000 /* No action */ #define DBCR0_RST_CORE 0x10000000 /* Core reset */ #define DBCR0_RST_CHIP 0x20000000 /* Chip reset */ #define DBCR0_RST_SYSTEM 0x30000000 /* System reset */ #define DBCR0_IC 0x08000000 /* Instruction Completion debug event */ #define DBCR0_BT 0x04000000 /* Branch Taken debug event */ #define DBCR0_EDE 0x02000000 /* Exception Debug Event */ #define DBCR0_TDE 0x01000000 /* Trap Debug Event */ #define DBCR0_IA1 0x00800000 /* IAC (Instruction Address Compare) 1 debug event */ #define DBCR0_IA2 0x00400000 /* IAC 2 debug event */ #define DBCR0_IA12 0x00200000 /* Instruction Address Range Compare 1-2 */ #define DBCR0_IA12X 0x00100000 /* IA12 eXclusive */ #define DBCR0_IA3 0x00080000 /* IAC 3 debug event */ #define DBCR0_IA4 0x00040000 /* IAC 4 debug event */ #define DBCR0_IA34 0x00020000 /* Instruction Address Range Compare 3-4 */ #define DBCR0_IA34X 0x00010000 /* IA34 eXclusive */ #define DBCR0_IA12T 0x00008000 /* Instruction Address Range Compare 1-2 range Toggle */ #define DBCR0_IA34T 0x00004000 /* Instruction Address Range Compare 3-4 range Toggle */ #define DBCR0_FT 0x00000001 /* Freeze Timers on debug event */ #define SPR_IABR 0x3f2 /* ..8 Instruction Address Breakpoint Register 0 */ #define SPR_DABR 0x3f5 /* .6. Data Address Breakpoint Register */ #define SPR_MSSCR0 0x3f6 /* .6. Memory SubSystem Control Register */ #define MSSCR0_SHDEN 0x80000000 /* 0: Shared-state enable */ #define MSSCR0_SHDPEN3 0x40000000 /* 1: ~SHD[01] signal enable in MEI mode */ #define MSSCR0_L1INTVEN 0x38000000 /* 2-4: L1 data cache ~HIT intervention enable */ #define MSSCR0_L2INTVEN 0x07000000 /* 5-7: L2 data cache ~HIT intervention enable*/ #define MSSCR0_DL1HWF 0x00800000 /* 8: L1 data cache hardware flush */ #define MSSCR0_MBO 0x00400000 /* 9: must be one */ #define MSSCR0_EMODE 0x00200000 /* 10: MPX bus mode (read-only) */ #define MSSCR0_ABD 0x00100000 /* 11: address bus driven (read-only) */ #define MSSCR0_MBZ 0x000fffff /* 12-31: must be zero */ #define MSSCR0_L2PFE 0x00000003 /* 30-31: L2 prefetch enable */ #define SPR_MSSSR0 0x3f7 /* .6. Memory Subsystem Status Register (MPC745x) */ #define MSSSR0_L2TAG 0x00040000 /* 13: L2 tag parity error */ #define MSSSR0_L2DAT 0x00020000 /* 14: L2 data parity error */ #define MSSSR0_L3TAG 0x00010000 /* 15: L3 tag parity error */ #define MSSSR0_L3DAT 0x00008000 /* 16: L3 data parity error */ #define MSSSR0_APE 0x00004000 /* 17: Address parity error */ #define MSSSR0_DPE 0x00002000 /* 18: Data parity error */ #define MSSSR0_TEA 0x00001000 /* 19: Bus transfer error acknowledge */ #define SPR_LDSTCR 0x3f8 /* .6. Load/Store Control Register */ #define SPR_L2PM 0x3f8 /* .6. L2 Private Memory Control Register */ #define SPR_L2CR 0x3f9 /* .6. L2 Control Register */ #define L2CR_L2E 0x80000000 /* 0: L2 enable */ #define L2CR_L2PE 0x40000000 /* 1: L2 data parity enable */ #define L2CR_L2SIZ 0x30000000 /* 2-3: L2 size */ #define L2SIZ_2M 0x00000000 #define L2SIZ_256K 0x10000000 #define L2SIZ_512K 0x20000000 #define L2SIZ_1M 0x30000000 #define L2CR_L2CLK 0x0e000000 /* 4-6: L2 clock ratio */ #define L2CLK_DIS 0x00000000 /* disable L2 clock */ #define L2CLK_10 0x02000000 /* core clock / 1 */ #define L2CLK_15 0x04000000 /* / 1.5 */ #define L2CLK_20 0x08000000 /* / 2 */ #define L2CLK_25 0x0a000000 /* / 2.5 */ #define L2CLK_30 0x0c000000 /* / 3 */ #define L2CR_L2RAM 0x01800000 /* 7-8: L2 RAM type */ #define L2RAM_FLOWTHRU_BURST 0x00000000 #define L2RAM_PIPELINE_BURST 0x01000000 #define L2RAM_PIPELINE_LATE 0x01800000 #define L2CR_L2DO 0x00400000 /* 9: L2 data-only. Setting this bit disables instruction caching. */ #define L2CR_L2I 0x00200000 /* 10: L2 global invalidate. */ #define L2CR_L2IO_7450 0x00010000 /* 11: L2 instruction-only (MPC745x). */ #define L2CR_L2CTL 0x00100000 /* 11: L2 RAM control (ZZ enable). Enables automatic operation of the L2ZZ (low-power mode) signal. */ #define L2CR_L2WT 0x00080000 /* 12: L2 write-through. */ #define L2CR_L2TS 0x00040000 /* 13: L2 test support. */ #define L2CR_L2OH 0x00030000 /* 14-15: L2 output hold. */ #define L2CR_L2DO_7450 0x00010000 /* 15: L2 data-only (MPC745x). */ #define L2CR_L2SL 0x00008000 /* 16: L2 DLL slow. */ #define L2CR_L2DF 0x00004000 /* 17: L2 differential clock. */ #define L2CR_L2BYP 0x00002000 /* 18: L2 DLL bypass. */ #define L2CR_L2FA 0x00001000 /* 19: L2 flush assist (for software flush). */ #define L2CR_L2HWF 0x00000800 /* 20: L2 hardware flush. */ #define L2CR_L2IO 0x00000400 /* 21: L2 instruction-only. */ #define L2CR_L2CLKSTP 0x00000200 /* 22: L2 clock stop. */ #define L2CR_L2DRO 0x00000100 /* 23: L2DLL rollover checkstop enable. */ #define L2CR_L2IP 0x00000001 /* 31: L2 global invalidate in */ /* progress (read only). */ #define SPR_L3CR 0x3fa /* .6. L3 Control Register */ #define L3CR_L3E 0x80000000 /* 0: L3 enable */ #define L3CR_L3PE 0x40000000 /* 1: L3 data parity enable */ #define L3CR_L3APE 0x20000000 #define L3CR_L3SIZ 0x10000000 /* 3: L3 size (0=1MB, 1=2MB) */ #define L3CR_L3CLKEN 0x08000000 /* 4: Enables L3_CLK[0:1] */ #define L3CR_L3CLK 0x03800000 #define L3CR_L3IO 0x00400000 #define L3CR_L3CLKEXT 0x00200000 #define L3CR_L3CKSPEXT 0x00100000 #define L3CR_L3OH1 0x00080000 #define L3CR_L3SPO 0x00040000 #define L3CR_L3CKSP 0x00030000 #define L3CR_L3PSP 0x0000e000 #define L3CR_L3REP 0x00001000 #define L3CR_L3HWF 0x00000800 #define L3CR_L3I 0x00000400 /* 21: L3 global invalidate */ #define L3CR_L3RT 0x00000300 #define L3CR_L3NIRCA 0x00000080 #define L3CR_L3DO 0x00000040 #define L3CR_PMEN 0x00000004 #define L3CR_PMSIZ 0x00000003 #define SPR_DCCR 0x3fa /* 4.. Data Cache Cachability Register */ #define SPR_ICCR 0x3fb /* 4.. Instruction Cache Cachability Register */ #define SPR_THRM1 0x3fc /* .6. Thermal Management Register */ #define SPR_THRM2 0x3fd /* .6. Thermal Management Register */ #define SPR_THRM_TIN 0x80000000 /* Thermal interrupt bit (RO) */ #define SPR_THRM_TIV 0x40000000 /* Thermal interrupt valid (RO) */ #define SPR_THRM_THRESHOLD(x) ((x) << 23) /* Thermal sensor threshold */ #define SPR_THRM_TID 0x00000004 /* Thermal interrupt direction */ #define SPR_THRM_TIE 0x00000002 /* Thermal interrupt enable */ #define SPR_THRM_VALID 0x00000001 /* Valid bit */ #define SPR_THRM3 0x3fe /* .6. Thermal Management Register */ #define SPR_THRM_TIMER(x) ((x) << 1) /* Sampling interval timer */ #define SPR_THRM_ENABLE 0x00000001 /* TAU Enable */ #define SPR_FPECR 0x3fe /* .6. Floating-Point Exception Cause Register */ /* Time Base Register declarations */ #define TBR_TBL 0x10c /* 468 Time Base Lower - read */ #define TBR_TBU 0x10d /* 468 Time Base Upper - read */ #define TBR_TBWL 0x11c /* 468 Time Base Lower - supervisor, write */ #define TBR_TBWU 0x11d /* 468 Time Base Upper - supervisor, write */ /* Performance counter declarations */ #define PMC_OVERFLOW 0x80000000 /* Counter has overflowed */ /* The first five countable [non-]events are common to many PMC's */ #define PMCN_NONE 0 /* Count nothing */ #define PMCN_CYCLES 1 /* Processor cycles */ #define PMCN_ICOMP 2 /* Instructions completed */ #define PMCN_TBLTRANS 3 /* TBL bit transitions */ #define PCMN_IDISPATCH 4 /* Instructions dispatched */ /* Similar things for the 970 PMC direct counters */ #define PMC970N_NONE 0x8 /* Count nothing */ #define PMC970N_CYCLES 0xf /* Processor cycles */ #define PMC970N_ICOMP 0x9 /* Instructions completed */ #if defined(BOOKE) #define SPR_MCARU 0x239 /* ..8 Machine Check Address register upper bits */ #define SPR_MCSR 0x23c /* ..8 Machine Check Syndrome register */ #define SPR_MCAR 0x23d /* ..8 Machine Check Address register */ #define SPR_ESR 0x003e /* ..8 Exception Syndrome Register */ #define ESR_PIL 0x08000000 /* Program interrupt - illegal */ #define ESR_PPR 0x04000000 /* Program interrupt - privileged */ #define ESR_PTR 0x02000000 /* Program interrupt - trap */ #define ESR_ST 0x00800000 /* Store operation */ #define ESR_DLK 0x00200000 /* Data storage, D cache locking */ #define ESR_ILK 0x00100000 /* Data storage, I cache locking */ #define ESR_BO 0x00020000 /* Data/instruction storage, byte ordering */ #define ESR_SPE 0x00000080 /* SPE exception bit */ #define SPR_CSRR0 0x03a /* ..8 58 Critical SRR0 */ #define SPR_CSRR1 0x03b /* ..8 59 Critical SRR1 */ #define SPR_MCSRR0 0x23a /* ..8 570 Machine check SRR0 */ #define SPR_MCSRR1 0x23b /* ..8 571 Machine check SRR1 */ #define SPR_DSRR0 0x23e /* ..8 574 Debug SRR0 */ #define SPR_DSRR1 0x23f /* ..8 575 Debug SRR1 */ #define SPR_MMUCR 0x3b2 /* 4.. MMU Control Register */ #define MMUCR_SWOA (0x80000000 >> 7) #define MMUCR_U1TE (0x80000000 >> 9) #define MMUCR_U2SWOAE (0x80000000 >> 10) #define MMUCR_DULXE (0x80000000 >> 12) #define MMUCR_IULXE (0x80000000 >> 13) #define MMUCR_STS (0x80000000 >> 15) #define MMUCR_STID_MASK (0xFF000000 >> 24) #define SPR_MMUCSR0 0x3f4 /* ..8 1012 MMU Control and Status Register 0 */ #define MMUCSR0_L2TLB0_FI 0x04 /* TLB0 flash invalidate */ #define MMUCSR0_L2TLB1_FI 0x02 /* TLB1 flash invalidate */ #define SPR_SVR 0x3ff /* ..8 1023 System Version Register */ #define SVR_MPC8533 0x8034 #define SVR_MPC8533E 0x803c #define SVR_MPC8541 0x8072 #define SVR_MPC8541E 0x807a #define SVR_MPC8548 0x8031 #define SVR_MPC8548E 0x8039 #define SVR_MPC8555 0x8071 #define SVR_MPC8555E 0x8079 #define SVR_MPC8572 0x80e0 #define SVR_MPC8572E 0x80e8 #define SVR_P1011 0x80e5 #define SVR_P1011E 0x80ed #define SVR_P1013 0x80e7 #define SVR_P1013E 0x80ef #define SVR_P1020 0x80e4 #define SVR_P1020E 0x80ec #define SVR_P1022 0x80e6 #define SVR_P1022E 0x80ee #define SVR_P2010 0x80e3 #define SVR_P2010E 0x80eb #define SVR_P2020 0x80e2 #define SVR_P2020E 0x80ea #define SVR_P2041 0x8210 #define SVR_P2041E 0x8218 #define SVR_P3041 0x8211 #define SVR_P3041E 0x8219 #define SVR_P4040 0x8200 #define SVR_P4040E 0x8208 #define SVR_P4080 0x8201 #define SVR_P4080E 0x8209 #define SVR_P5010 0x8221 #define SVR_P5010E 0x8229 #define SVR_P5020 0x8220 #define SVR_P5020E 0x8228 #define SVR_P5021 0x8205 #define SVR_P5021E 0x820d #define SVR_P5040 0x8204 #define SVR_P5040E 0x820c #define SVR_VER(svr) (((svr) >> 16) & 0xffff) #define SPR_PID0 0x030 /* ..8 Process ID Register 0 */ #define SPR_PID1 0x279 /* ..8 Process ID Register 1 */ #define SPR_PID2 0x27a /* ..8 Process ID Register 2 */ #define SPR_TLB0CFG 0x2B0 /* ..8 TLB 0 Config Register */ #define SPR_TLB1CFG 0x2B1 /* ..8 TLB 1 Config Register */ #define TLBCFG_ASSOC_MASK 0xff000000 /* Associativity of TLB */ #define TLBCFG_ASSOC_SHIFT 24 #define TLBCFG_NENTRY_MASK 0x00000fff /* Number of entries in TLB */ #define SPR_IVPR 0x03f /* ..8 Interrupt Vector Prefix Register */ #define SPR_IVOR0 0x190 /* ..8 Critical input */ #define SPR_IVOR1 0x191 /* ..8 Machine check */ #define SPR_IVOR2 0x192 #define SPR_IVOR3 0x193 #define SPR_IVOR4 0x194 #define SPR_IVOR5 0x195 #define SPR_IVOR6 0x196 #define SPR_IVOR7 0x197 #define SPR_IVOR8 0x198 #define SPR_IVOR9 0x199 #define SPR_IVOR10 0x19a #define SPR_IVOR11 0x19b #define SPR_IVOR12 0x19c #define SPR_IVOR13 0x19d #define SPR_IVOR14 0x19e #define SPR_IVOR15 0x19f #define SPR_IVOR32 0x210 #define SPR_IVOR33 0x211 #define SPR_IVOR34 0x212 #define SPR_IVOR35 0x213 #define SPR_MAS0 0x270 /* ..8 MMU Assist Register 0 Book-E/e500 */ #define SPR_MAS1 0x271 /* ..8 MMU Assist Register 1 Book-E/e500 */ #define SPR_MAS2 0x272 /* ..8 MMU Assist Register 2 Book-E/e500 */ #define SPR_MAS3 0x273 /* ..8 MMU Assist Register 3 Book-E/e500 */ #define SPR_MAS4 0x274 /* ..8 MMU Assist Register 4 Book-E/e500 */ #define SPR_MAS5 0x275 /* ..8 MMU Assist Register 5 Book-E */ #define SPR_MAS6 0x276 /* ..8 MMU Assist Register 6 Book-E/e500 */ #define SPR_MAS7 0x3B0 /* ..8 MMU Assist Register 7 Book-E/e500 */ #define SPR_MAS8 0x155 /* ..8 MMU Assist Register 8 Book-E/e500 */ #define SPR_L1CFG0 0x203 /* ..8 L1 cache configuration register 0 */ #define SPR_L1CFG1 0x204 /* ..8 L1 cache configuration register 1 */ #define SPR_CCR1 0x378 #define CCR1_L2COBE 0x00000040 #define DCR_L2DCDCRAI 0x0000 /* L2 D-Cache DCR Address Pointer */ #define DCR_L2DCDCRDI 0x0001 /* L2 D-Cache DCR Data Indirect */ #define DCR_L2CR0 0x00 /* L2 Cache Configuration Register 0 */ #define L2CR0_AS 0x30000000 #define SPR_L1CSR0 0x3F2 /* ..8 L1 Cache Control and Status Register 0 */ #define L1CSR0_DCPE 0x00010000 /* Data Cache Parity Enable */ #define L1CSR0_DCLFR 0x00000100 /* Data Cache Lock Bits Flash Reset */ #define L1CSR0_DCFI 0x00000002 /* Data Cache Flash Invalidate */ #define L1CSR0_DCE 0x00000001 /* Data Cache Enable */ #define SPR_L1CSR1 0x3F3 /* ..8 L1 Cache Control and Status Register 1 */ #define L1CSR1_ICPE 0x00010000 /* Instruction Cache Parity Enable */ #define L1CSR1_ICUL 0x00000400 /* Instr Cache Unable to Lock */ #define L1CSR1_ICLFR 0x00000100 /* Instruction Cache Lock Bits Flash Reset */ #define L1CSR1_ICFI 0x00000002 /* Instruction Cache Flash Invalidate */ #define L1CSR1_ICE 0x00000001 /* Instruction Cache Enable */ #define SPR_L2CSR0 0x3F9 /* ..8 L2 Cache Control and Status Register 0 */ #define L2CSR0_L2E 0x80000000 /* L2 Cache Enable */ #define L2CSR0_L2PE 0x40000000 /* L2 Cache Parity Enable */ #define L2CSR0_L2FI 0x00200000 /* L2 Cache Flash Invalidate */ #define L2CSR0_L2LFC 0x00000400 /* L2 Cache Lock Flags Clear */ #define SPR_BUCSR 0x3F5 /* ..8 Branch Unit Control and Status Register */ #define BUCSR_BPEN 0x00000001 /* Branch Prediction Enable */ #define BUCSR_BBFI 0x00000200 /* Branch Buffer Flash Invalidate */ #endif /* BOOKE */ #endif /* !_POWERPC_SPR_H_ */ diff --git a/sys/powerpc/pseries/mmu_phyp.c b/sys/powerpc/pseries/mmu_phyp.c index 6705ad22341d..e171d399db7e 100644 --- a/sys/powerpc/pseries/mmu_phyp.c +++ b/sys/powerpc/pseries/mmu_phyp.c @@ -1,474 +1,474 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2010 Andreas Tobler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mmu_if.h" #include "moea64_if.h" #include "phyp-hvcall.h" static struct rmlock mphyp_eviction_lock; /* * Kernel MMU interface */ static void mphyp_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend); static void mphyp_cpu_bootstrap(mmu_t mmup, int ap); static int64_t mphyp_pte_synch(mmu_t, struct pvo_entry *pvo); static int64_t mphyp_pte_clear(mmu_t, struct pvo_entry *pvo, uint64_t ptebit); static int64_t mphyp_pte_unset(mmu_t, struct pvo_entry *pvo); static int mphyp_pte_insert(mmu_t, struct pvo_entry *pvo); static mmu_method_t mphyp_methods[] = { MMUMETHOD(mmu_bootstrap, mphyp_bootstrap), MMUMETHOD(mmu_cpu_bootstrap, mphyp_cpu_bootstrap), MMUMETHOD(moea64_pte_synch, mphyp_pte_synch), MMUMETHOD(moea64_pte_clear, mphyp_pte_clear), MMUMETHOD(moea64_pte_unset, mphyp_pte_unset), MMUMETHOD(moea64_pte_insert, mphyp_pte_insert), /* XXX: pmap_copy_page, pmap_init_page with H_PAGE_INIT */ { 0, 0 } }; MMU_DEF_INHERIT(pseries_mmu, "mmu_phyp", mphyp_methods, 0, oea64_mmu); static int brokenkvm = 0; static void print_kvm_bug_warning(void *data) { if (brokenkvm) printf("WARNING: Running on a broken hypervisor that does " "not support mandatory H_CLEAR_MOD and H_CLEAR_REF " "hypercalls. Performance will be suboptimal.\n"); } SYSINIT(kvmbugwarn1, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1, print_kvm_bug_warning, NULL); SYSINIT(kvmbugwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1, print_kvm_bug_warning, NULL); static void mphyp_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { uint64_t final_pteg_count = 0; char buf[8]; uint32_t prop[2]; uint32_t nptlp, shift = 0, slb_encoding = 0; uint32_t lp_size, lp_encoding; struct lpte old; uint64_t vsid; phandle_t dev, node, root; int idx, len, res; rm_init(&mphyp_eviction_lock, "pte eviction"); moea64_early_bootstrap(mmup, kernelstart, kernelend); root = OF_peer(0); dev = OF_child(root); while (dev != 0) { res = OF_getprop(dev, "name", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpus") == 0) break; dev = OF_peer(dev); } node = OF_child(dev); while (node != 0) { res = OF_getprop(node, "device_type", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpu") == 0) break; node = OF_peer(node); } res = OF_getencprop(node, "ibm,pft-size", prop, sizeof(prop)); if (res <= 0) panic("mmu_phyp: unknown PFT size"); final_pteg_count = 1 << prop[1]; res = OF_getencprop(node, "ibm,slb-size", prop, sizeof(prop[0])); if (res > 0) n_slbs = prop[0]; moea64_pteg_count = final_pteg_count / sizeof(struct lpteg); /* Clear any old page table entries */ for (idx = 0; idx < moea64_pteg_count*8; idx++) { phyp_pft_hcall(H_READ, 0, idx, 0, 0, &old.pte_hi, &old.pte_lo, &old.pte_lo); vsid = (old.pte_hi << (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) >> 28; if (vsid == VSID_VRMA || vsid == 0 /* Older VRMA */) continue; if (old.pte_hi & LPTE_VALID) phyp_hcall(H_REMOVE, 0, idx, 0); } /* * Scan the large page size property for PAPR compatible machines. * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties' * for the encoding of the property. */ len = OF_getproplen(node, "ibm,segment-page-sizes"); if (len > 0) { /* * We have to use a variable length array on the stack * since we have very limited stack space. */ pcell_t arr[len/sizeof(cell_t)]; res = OF_getencprop(node, "ibm,segment-page-sizes", arr, sizeof(arr)); len /= 4; idx = 0; while (len > 0) { shift = arr[idx]; slb_encoding = arr[idx + 1]; nptlp = arr[idx + 2]; idx += 3; len -= 3; while (len > 0 && nptlp) { lp_size = arr[idx]; lp_encoding = arr[idx+1]; if (slb_encoding == SLBV_L && lp_encoding == 0) break; idx += 2; len -= 2; nptlp--; } if (nptlp && slb_encoding == SLBV_L && lp_encoding == 0) break; } if (len == 0) panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) " "not supported by this system. Please enable huge " "page backing if running under PowerKVM."); moea64_large_page_shift = shift; moea64_large_page_size = 1ULL << lp_size; } moea64_mid_bootstrap(mmup, kernelstart, kernelend); moea64_late_bootstrap(mmup, kernelstart, kernelend); /* Test for broken versions of KVM that don't conform to the spec */ if (phyp_hcall(H_CLEAR_MOD, 0, 0) == H_FUNCTION) brokenkvm = 1; } static void mphyp_cpu_bootstrap(mmu_t mmup, int ap) { struct slb *slb = PCPU_GET(aim.slb); register_t seg0; int i; /* * Install kernel SLB entries */ __asm __volatile ("slbia"); __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : "r"(0)); for (i = 0; i < 64; i++) { if (!(slb[i].slbe & SLBE_VALID)) continue; __asm __volatile ("slbmte %0, %1" :: "r"(slb[i].slbv), "r"(slb[i].slbe)); } } static int64_t mphyp_pte_synch(mmu_t mmu, struct pvo_entry *pvo) { struct lpte pte; uint64_t junk; __asm __volatile("ptesync"); phyp_pft_hcall(H_READ, 0, pvo->pvo_pte.slot, 0, 0, &pte.pte_hi, &pte.pte_lo, &junk); if ((pte.pte_hi & LPTE_AVPN_MASK) != ((pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) & LPTE_AVPN_MASK)) return (-1); if (!(pte.pte_hi & LPTE_VALID)) return (-1); return (pte.pte_lo & (LPTE_CHG | LPTE_REF)); } static int64_t mphyp_pte_clear(mmu_t mmu, struct pvo_entry *pvo, uint64_t ptebit) { struct rm_priotracker track; int64_t refchg; uint64_t ptelo, junk; int err; /* * This involves two steps (synch and clear) so we need the entry * not to change in the middle. We are protected against deliberate * unset by virtue of holding the pmap lock. Protection against * incidental unset (page table eviction) comes from holding the * shared eviction lock. */ PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); rm_rlock(&mphyp_eviction_lock, &track); refchg = mphyp_pte_synch(mmu, pvo); if (refchg < 0) { rm_runlock(&mphyp_eviction_lock, &track); return (refchg); } if (brokenkvm) { /* * No way to clear either bit, which is total madness. * Pessimistically claim that, once modified, it stays so * forever and that it is never referenced. */ rm_runlock(&mphyp_eviction_lock, &track); return (refchg & ~LPTE_REF); } if (ptebit & LPTE_CHG) { err = phyp_pft_hcall(H_CLEAR_MOD, 0, pvo->pvo_pte.slot, 0, 0, &ptelo, &junk, &junk); KASSERT(err == H_SUCCESS, ("Error clearing page change bit: %d", err)); refchg |= (ptelo & LPTE_CHG); } if (ptebit & LPTE_REF) { err = phyp_pft_hcall(H_CLEAR_REF, 0, pvo->pvo_pte.slot, 0, 0, &ptelo, &junk, &junk); KASSERT(err == H_SUCCESS, ("Error clearing page reference bit: %d", err)); refchg |= (ptelo & LPTE_REF); } rm_runlock(&mphyp_eviction_lock, &track); return (refchg); } static int64_t mphyp_pte_unset(mmu_t mmu, struct pvo_entry *pvo) { struct lpte pte; uint64_t junk; int err; PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); moea64_pte_from_pvo(pvo, &pte); err = phyp_pft_hcall(H_REMOVE, H_AVPN, pvo->pvo_pte.slot, pte.pte_hi & LPTE_AVPN_MASK, 0, &pte.pte_hi, &pte.pte_lo, &junk); KASSERT(err == H_SUCCESS || err == H_NOT_FOUND, ("Error removing page: %d", err)); if (err == H_NOT_FOUND) { moea64_pte_overflow--; return (-1); } return (pte.pte_lo & (LPTE_REF | LPTE_CHG)); } static uintptr_t mphyp_pte_spillable_ident(uintptr_t ptegbase, struct lpte *to_evict) { uint64_t slot, junk, k; struct lpte pt; int i, j; /* Start at a random slot */ i = mftb() % 8; k = -1; for (j = 0; j < 8; j++) { slot = ptegbase + (i + j) % 8; phyp_pft_hcall(H_READ, 0, slot, 0, 0, &pt.pte_hi, &pt.pte_lo, &junk); if (pt.pte_hi & LPTE_WIRED) continue; /* This is a candidate, so remember it */ k = slot; /* Try to get a page that has not been used lately */ if (!(pt.pte_hi & LPTE_VALID) || !(pt.pte_lo & LPTE_REF)) { memcpy(to_evict, &pt, sizeof(struct lpte)); return (k); } } if (k == -1) return (k); phyp_pft_hcall(H_READ, 0, k, 0, 0, &to_evict->pte_hi, &to_evict->pte_lo, &junk); return (k); } static int mphyp_pte_insert(mmu_t mmu, struct pvo_entry *pvo) { struct rm_priotracker track; int64_t result; struct lpte evicted, pte; uint64_t index, junk, lastptelo; PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); /* Initialize PTE */ moea64_pte_from_pvo(pvo, &pte); evicted.pte_hi = 0; /* Make sure further insertion is locked out during evictions */ rm_rlock(&mphyp_eviction_lock, &track); /* * First try primary hash. */ pvo->pvo_pte.slot &= ~7UL; /* Base slot address */ result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte.pte_hi, pte.pte_lo, &index, &evicted.pte_lo, &junk); if (result == H_SUCCESS) { rm_runlock(&mphyp_eviction_lock, &track); pvo->pvo_pte.slot = index; return (0); } KASSERT(result == H_PTEG_FULL, ("Page insertion error: %ld " - "(ptegidx: %#zx/%#x, PTE %#lx/%#lx", result, pvo->pvo_pte.slot, + "(ptegidx: %#zx/%#lx, PTE %#lx/%#lx", result, pvo->pvo_pte.slot, moea64_pteg_count, pte.pte_hi, pte.pte_lo)); /* * Next try secondary hash. */ pvo->pvo_vaddr ^= PVO_HID; pte.pte_hi ^= LPTE_HID; pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte.pte_hi, pte.pte_lo, &index, &evicted.pte_lo, &junk); if (result == H_SUCCESS) { rm_runlock(&mphyp_eviction_lock, &track); pvo->pvo_pte.slot = index; return (0); } KASSERT(result == H_PTEG_FULL, ("Secondary page insertion error: %ld", result)); /* * Out of luck. Find a PTE to sacrifice. */ /* Lock out all insertions for a bit */ rm_runlock(&mphyp_eviction_lock, &track); rm_wlock(&mphyp_eviction_lock); index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted); if (index == -1L) { /* Try other hash table? */ pvo->pvo_vaddr ^= PVO_HID; pte.pte_hi ^= LPTE_HID; pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted); } if (index == -1L) { /* No freeable slots in either PTEG? We're hosed. */ rm_wunlock(&mphyp_eviction_lock); panic("mphyp_pte_insert: overflow"); return (-1); } /* Victim acquired: update page before waving goodbye */ if (evicted.pte_hi & LPTE_VALID) { result = phyp_pft_hcall(H_REMOVE, H_AVPN, index, evicted.pte_hi & LPTE_AVPN_MASK, 0, &junk, &lastptelo, &junk); moea64_pte_overflow++; KASSERT(result == H_SUCCESS, ("Error evicting page: %d", (int)result)); } /* * Set the new PTE. */ result = phyp_pft_hcall(H_ENTER, H_EXACT, index, pte.pte_hi, pte.pte_lo, &index, &evicted.pte_lo, &junk); rm_wunlock(&mphyp_eviction_lock); /* All clear */ pvo->pvo_pte.slot = index; if (result == H_SUCCESS) return (0); panic("Page replacement error: %ld", result); return (result); }