Index: sys/powerpc/booke/pmap.c =================================================================== --- sys/powerpc/booke/pmap.c +++ sys/powerpc/booke/pmap.c @@ -221,25 +221,7 @@ #define TLB1_ENTRIES (tlb1_entries) -/* - * Base of the pmap_mapdev() region. On 32-bit it immediately follows the - * userspace address range. On On 64-bit it's far above, at (1 << 63), and - * ranges up to the DMAP, giving 62 bits of PA allowed. This is far larger than - * the widest Book-E address bus, the e6500 has a 40-bit PA space. This allows - * us to map akin to the DMAP, with addresses identical to the PA, offset by the - * base. - */ -#ifdef __powerpc64__ -#define VM_MAPDEV_BASE 0x8000000000000000 -#define VM_MAPDEV_PA_MAX 0x4000000000000000 /* Don't encroach on DMAP */ -#else -#define VM_MAPDEV_BASE ((vm_offset_t)VM_MAXUSER_ADDRESS + PAGE_SIZE) -#endif - -static vm_offset_t tlb1_map_base = VM_MAPDEV_BASE; - static tlbtid_t tid_alloc(struct pmap *); -static void tid_flush(tlbtid_t tid); #ifdef DDB #ifdef __powerpc64__ @@ -254,6 +236,8 @@ static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *); static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t, int); +static __inline uint32_t tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma); + static vm_size_t tsize2size(unsigned int); static unsigned int size2tsize(vm_size_t); static unsigned long ilog2(unsigned long); @@ -279,26 +263,6 @@ #define PMAP_SHPGPERPROC 200 #endif -#ifdef __powerpc64__ -#define PMAP_ROOT_SIZE (sizeof(pte_t***) * PP2D_NENTRIES) -static pte_t *ptbl_alloc(mmu_t, pmap_t, pte_t **, - unsigned int, boolean_t); -static void ptbl_free(mmu_t, pmap_t, pte_t **, unsigned int, vm_page_t); -static void ptbl_hold(mmu_t, pmap_t, pte_t **, unsigned int); -static int ptbl_unhold(mmu_t, pmap_t, vm_offset_t); -#else -#define PMAP_ROOT_SIZE (sizeof(pte_t**) * PDIR_NENTRIES) -static void ptbl_init(void); -static struct ptbl_buf *ptbl_buf_alloc(void); -static void ptbl_buf_free(struct ptbl_buf *); -static void ptbl_free_pmap_ptbl(pmap_t, pte_t *); - -static pte_t *ptbl_alloc(mmu_t, pmap_t, unsigned int, boolean_t); -static void ptbl_free(mmu_t, pmap_t, unsigned int); -static void ptbl_hold(mmu_t, pmap_t, unsigned int); -static int ptbl_unhold(mmu_t, pmap_t, unsigned int); -#endif - static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t); static int pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t); static int pte_remove(mmu_t, pmap_t, vm_offset_t, uint8_t); @@ -312,25 +276,8 @@ static void booke_pmap_init_qpages(void); -struct ptbl_buf { - TAILQ_ENTRY(ptbl_buf) link; /* list link */ - vm_offset_t kva; /* va of mapping */ -}; - -#ifndef __powerpc64__ -/* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */ -#define PTBL_BUFS (128 * 16) - -/* ptbl free list and a lock used for access synchronization. */ -static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist; -static struct mtx ptbl_buf_freelist_lock; - -/* Base address of kva space allocated fot ptbl bufs. */ -static vm_offset_t ptbl_buf_pool_vabase; - -/* Pointer to ptbl_buf structures. */ -static struct ptbl_buf *ptbl_bufs; -#endif +static inline void tlb_miss_lock(void); +static inline void tlb_miss_unlock(void); #ifdef SMP extern tlb_entry_t __boot_tlb1[]; @@ -476,6 +423,14 @@ MMU_DEF(booke_mmu, MMU_TYPE_BOOKE, mmu_booke_methods, 0); +#ifdef __powerpc64__ +#include "pmap_64.c" +#else +#include "pmap_32.c" +#endif + +static vm_offset_t tlb1_map_base = VM_MAPDEV_BASE; + static __inline uint32_t tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma) { @@ -580,972 +535,92 @@ { uint32_t tlb1_cfg; - tlb1_cfg = mfspr(SPR_TLB1CFG); - tlb1_entries = tlb1_cfg & TLBCFG_NENTRY_MASK; -} - -/**************************************************************************/ -/* Page table related */ -/**************************************************************************/ - -#ifdef __powerpc64__ -/* Initialize pool of kva ptbl buffers. */ -static void -ptbl_init(void) -{ -} - -/* Get a pointer to a PTE in a page table. */ -static __inline pte_t * -pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) -{ - pte_t **pdir; - pte_t *ptbl; - - KASSERT((pmap != NULL), ("pte_find: invalid pmap")); - - pdir = pmap->pm_pp2d[PP2D_IDX(va)]; - if (!pdir) - return NULL; - ptbl = pdir[PDIR_IDX(va)]; - return ((ptbl != NULL) ? &ptbl[PTBL_IDX(va)] : NULL); -} - -/* - * allocate a page of pointers to page directories, do not preallocate the - * page tables - */ -static pte_t ** -pdir_alloc(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, bool nosleep) -{ - vm_page_t m; - pte_t **pdir; - int req; - - req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; - while ((m = vm_page_alloc(NULL, pp2d_idx, req)) == NULL) { - PMAP_UNLOCK(pmap); - if (nosleep) { - return (NULL); - } - vm_wait(NULL); - PMAP_LOCK(pmap); - } - - /* Zero whole ptbl. */ - pdir = (pte_t **)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); - mmu_booke_zero_page(mmu, m); - - return (pdir); -} - -/* Free pdir pages and invalidate pdir entry. */ -static void -pdir_free(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, vm_page_t m) -{ - pte_t **pdir; - - pdir = pmap->pm_pp2d[pp2d_idx]; - - KASSERT((pdir != NULL), ("pdir_free: null pdir")); - - pmap->pm_pp2d[pp2d_idx] = NULL; - - vm_wire_sub(1); - vm_page_free_zero(m); -} - -/* - * Decrement pdir pages hold count and attempt to free pdir pages. Called - * when removing directory entry from pdir. - * - * Return 1 if pdir pages were freed. - */ -static int -pdir_unhold(mmu_t mmu, pmap_t pmap, u_int pp2d_idx) -{ - pte_t **pdir; - vm_paddr_t pa; - vm_page_t m; - - KASSERT((pmap != kernel_pmap), - ("pdir_unhold: unholding kernel pdir!")); - - pdir = pmap->pm_pp2d[pp2d_idx]; - - /* decrement hold count */ - pa = DMAP_TO_PHYS((vm_offset_t) pdir); - m = PHYS_TO_VM_PAGE(pa); - - /* - * Free pdir page if there are no dir entries in this pdir. - */ - m->ref_count--; - if (m->ref_count == 0) { - pdir_free(mmu, pmap, pp2d_idx, m); - return (1); - } - return (0); -} - -/* - * Increment hold count for pdir pages. This routine is used when new ptlb - * entry is being inserted into pdir. - */ -static void -pdir_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir) -{ - vm_page_t m; - - KASSERT((pmap != kernel_pmap), - ("pdir_hold: holding kernel pdir!")); - - KASSERT((pdir != NULL), ("pdir_hold: null pdir")); - - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pdir)); - m->ref_count++; -} - -/* Allocate page table. */ -static pte_t * -ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, - boolean_t nosleep) -{ - vm_page_t m; - pte_t *ptbl; - int req; - - KASSERT((pdir[pdir_idx] == NULL), - ("%s: valid ptbl entry exists!", __func__)); - - req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; - while ((m = vm_page_alloc(NULL, pdir_idx, req)) == NULL) { - if (nosleep) - return (NULL); - PMAP_UNLOCK(pmap); - rw_wunlock(&pvh_global_lock); - vm_wait(NULL); - rw_wlock(&pvh_global_lock); - PMAP_LOCK(pmap); - } - - /* Zero whole ptbl. */ - ptbl = (pte_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); - mmu_booke_zero_page(mmu, m); - - return (ptbl); -} - -/* Free ptbl pages and invalidate pdir entry. */ -static void -ptbl_free(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, vm_page_t m) -{ - pte_t *ptbl; - - ptbl = pdir[pdir_idx]; - - KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); - - pdir[pdir_idx] = NULL; - - vm_wire_sub(1); - vm_page_free_zero(m); -} - -/* - * Decrement ptbl pages hold count and attempt to free ptbl pages. Called - * when removing pte entry from ptbl. - * - * Return 1 if ptbl pages were freed. - */ -static int -ptbl_unhold(mmu_t mmu, pmap_t pmap, vm_offset_t va) -{ - pte_t *ptbl; - vm_page_t m; - u_int pp2d_idx; - pte_t **pdir; - u_int pdir_idx; - - pp2d_idx = PP2D_IDX(va); - pdir_idx = PDIR_IDX(va); - - KASSERT((pmap != kernel_pmap), - ("ptbl_unhold: unholding kernel ptbl!")); - - pdir = pmap->pm_pp2d[pp2d_idx]; - ptbl = pdir[pdir_idx]; - - /* decrement hold count */ - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); - - /* - * Free ptbl pages if there are no pte entries in this ptbl. - * ref_count has the same value for all ptbl pages, so check the - * last page. - */ - m->ref_count--; - if (m->ref_count == 0) { - ptbl_free(mmu, pmap, pdir, pdir_idx, m); - pdir_unhold(mmu, pmap, pp2d_idx); - return (1); - } - return (0); -} - -/* - * Increment hold count for ptbl pages. This routine is used when new pte - * entry is being inserted into ptbl. - */ -static void -ptbl_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx) -{ - pte_t *ptbl; - vm_page_t m; - - KASSERT((pmap != kernel_pmap), - ("ptbl_hold: holding kernel ptbl!")); - - ptbl = pdir[pdir_idx]; - - KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); - - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); - m->ref_count++; -} -#else - -/* Initialize pool of kva ptbl buffers. */ -static void -ptbl_init(void) -{ - int i; - - CTR3(KTR_PMAP, "%s: s (ptbl_bufs = 0x%08x size 0x%08x)", __func__, - (uint32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS); - CTR3(KTR_PMAP, "%s: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)", - __func__, ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE); - - mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF); - TAILQ_INIT(&ptbl_buf_freelist); - - for (i = 0; i < PTBL_BUFS; i++) { - ptbl_bufs[i].kva = - ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE; - TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link); - } -} - -/* Get a ptbl_buf from the freelist. */ -static struct ptbl_buf * -ptbl_buf_alloc(void) -{ - struct ptbl_buf *buf; - - mtx_lock(&ptbl_buf_freelist_lock); - buf = TAILQ_FIRST(&ptbl_buf_freelist); - if (buf != NULL) - TAILQ_REMOVE(&ptbl_buf_freelist, buf, link); - mtx_unlock(&ptbl_buf_freelist_lock); - - CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); - - return (buf); -} - -/* Return ptbl buff to free pool. */ -static void -ptbl_buf_free(struct ptbl_buf *buf) -{ - - CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); - - mtx_lock(&ptbl_buf_freelist_lock); - TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link); - mtx_unlock(&ptbl_buf_freelist_lock); -} - -/* - * Search the list of allocated ptbl bufs and find on list of allocated ptbls - */ -static void -ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl) -{ - struct ptbl_buf *pbuf; - - CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); - - PMAP_LOCK_ASSERT(pmap, MA_OWNED); - - TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link) - if (pbuf->kva == (vm_offset_t)ptbl) { - /* Remove from pmap ptbl buf list. */ - TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link); - - /* Free corresponding ptbl buf. */ - ptbl_buf_free(pbuf); - break; - } -} - -/* Allocate page table. */ -static pte_t * -ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep) -{ - vm_page_t mtbl[PTBL_PAGES]; - vm_page_t m; - struct ptbl_buf *pbuf; - unsigned int pidx; - pte_t *ptbl; - int i, j; - - CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, - (pmap == kernel_pmap), pdir_idx); - - KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), - ("ptbl_alloc: invalid pdir_idx")); - KASSERT((pmap->pm_pdir[pdir_idx] == NULL), - ("pte_alloc: valid ptbl entry exists!")); - - pbuf = ptbl_buf_alloc(); - if (pbuf == NULL) - panic("pte_alloc: couldn't alloc kernel virtual memory"); - - ptbl = (pte_t *)pbuf->kva; - - CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl); - - for (i = 0; i < PTBL_PAGES; i++) { - pidx = (PTBL_PAGES * pdir_idx) + i; - while ((m = vm_page_alloc(NULL, pidx, - VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { - if (nosleep) { - ptbl_free_pmap_ptbl(pmap, ptbl); - for (j = 0; j < i; j++) - vm_page_free(mtbl[j]); - vm_wire_sub(i); - return (NULL); - } - PMAP_UNLOCK(pmap); - rw_wunlock(&pvh_global_lock); - vm_wait(NULL); - rw_wlock(&pvh_global_lock); - PMAP_LOCK(pmap); - } - mtbl[i] = m; - } - - /* Map allocated pages into kernel_pmap. */ - mmu_booke_qenter(mmu, (vm_offset_t)ptbl, mtbl, PTBL_PAGES); - - /* Zero whole ptbl. */ - bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE); - - /* Add pbuf to the pmap ptbl bufs list. */ - TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link); - - return (ptbl); -} - -/* Free ptbl pages and invalidate pdir entry. */ -static void -ptbl_free(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) -{ - pte_t *ptbl; - vm_paddr_t pa; - vm_offset_t va; - vm_page_t m; - int i; - - CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, - (pmap == kernel_pmap), pdir_idx); - - KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), - ("ptbl_free: invalid pdir_idx")); - - ptbl = pmap->pm_pdir[pdir_idx]; - - CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); - - KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); - - /* - * Invalidate the pdir entry as soon as possible, so that other CPUs - * don't attempt to look up the page tables we are releasing. - */ - mtx_lock_spin(&tlbivax_mutex); - tlb_miss_lock(); - - pmap->pm_pdir[pdir_idx] = NULL; - - tlb_miss_unlock(); - mtx_unlock_spin(&tlbivax_mutex); - - for (i = 0; i < PTBL_PAGES; i++) { - va = ((vm_offset_t)ptbl + (i * PAGE_SIZE)); - pa = pte_vatopa(mmu, kernel_pmap, va); - m = PHYS_TO_VM_PAGE(pa); - vm_page_free_zero(m); - vm_wire_sub(1); - mmu_booke_kremove(mmu, va); - } - - ptbl_free_pmap_ptbl(pmap, ptbl); -} - -/* - * Decrement ptbl pages hold count and attempt to free ptbl pages. - * Called when removing pte entry from ptbl. - * - * Return 1 if ptbl pages were freed. - */ -static int -ptbl_unhold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) -{ - pte_t *ptbl; - vm_paddr_t pa; - vm_page_t m; - int i; - - CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, - (pmap == kernel_pmap), pdir_idx); - - KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), - ("ptbl_unhold: invalid pdir_idx")); - KASSERT((pmap != kernel_pmap), - ("ptbl_unhold: unholding kernel ptbl!")); - - ptbl = pmap->pm_pdir[pdir_idx]; - - //debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl); - KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS), - ("ptbl_unhold: non kva ptbl")); - - /* decrement hold count */ - for (i = 0; i < PTBL_PAGES; i++) { - pa = pte_vatopa(mmu, kernel_pmap, - (vm_offset_t)ptbl + (i * PAGE_SIZE)); - m = PHYS_TO_VM_PAGE(pa); - m->ref_count--; - } - - /* - * Free ptbl pages if there are no pte etries in this ptbl. - * ref_count has the same value for all ptbl pages, so check the last - * page. - */ - if (m->ref_count == 0) { - ptbl_free(mmu, pmap, pdir_idx); - - //debugf("ptbl_unhold: e (freed ptbl)\n"); - return (1); - } - - return (0); -} - -/* - * Increment hold count for ptbl pages. This routine is used when a new pte - * entry is being inserted into the ptbl. - */ -static void -ptbl_hold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) -{ - vm_paddr_t pa; - pte_t *ptbl; - vm_page_t m; - int i; - - CTR3(KTR_PMAP, "%s: pmap = %p pdir_idx = %d", __func__, pmap, - pdir_idx); - - KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), - ("ptbl_hold: invalid pdir_idx")); - KASSERT((pmap != kernel_pmap), - ("ptbl_hold: holding kernel ptbl!")); - - ptbl = pmap->pm_pdir[pdir_idx]; - - KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); - - for (i = 0; i < PTBL_PAGES; i++) { - pa = pte_vatopa(mmu, kernel_pmap, - (vm_offset_t)ptbl + (i * PAGE_SIZE)); - m = PHYS_TO_VM_PAGE(pa); - m->ref_count++; - } -} -#endif - -/* Allocate pv_entry structure. */ -pv_entry_t -pv_alloc(void) -{ - pv_entry_t pv; - - pv_entry_count++; - if (pv_entry_count > pv_entry_high_water) - pagedaemon_wakeup(0); /* XXX powerpc NUMA */ - pv = uma_zalloc(pvzone, M_NOWAIT); - - return (pv); -} - -/* Free pv_entry structure. */ -static __inline void -pv_free(pv_entry_t pve) -{ - - pv_entry_count--; - uma_zfree(pvzone, pve); -} - - -/* Allocate and initialize pv_entry structure. */ -static void -pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m) -{ - pv_entry_t pve; - - //int su = (pmap == kernel_pmap); - //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su, - // (u_int32_t)pmap, va, (u_int32_t)m); - - pve = pv_alloc(); - if (pve == NULL) - panic("pv_insert: no pv entries!"); - - pve->pv_pmap = pmap; - pve->pv_va = va; - - /* add to pv_list */ - PMAP_LOCK_ASSERT(pmap, MA_OWNED); - rw_assert(&pvh_global_lock, RA_WLOCKED); - - TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link); - - //debugf("pv_insert: e\n"); -} - -/* Destroy pv entry. */ -static void -pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m) -{ - pv_entry_t pve; - - //int su = (pmap == kernel_pmap); - //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); - - PMAP_LOCK_ASSERT(pmap, MA_OWNED); - rw_assert(&pvh_global_lock, RA_WLOCKED); - - /* find pv entry */ - TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) { - if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { - /* remove from pv_list */ - TAILQ_REMOVE(&m->md.pv_list, pve, pv_link); - if (TAILQ_EMPTY(&m->md.pv_list)) - vm_page_aflag_clear(m, PGA_WRITEABLE); - - /* free pv entry struct */ - pv_free(pve); - break; - } - } - - //debugf("pv_remove: e\n"); -} - -#ifdef __powerpc64__ -/* - * Clean pte entry, try to free page table page if requested. - * - * Return 1 if ptbl pages were freed, otherwise return 0. - */ -static int -pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, u_int8_t flags) -{ - vm_page_t m; - pte_t *pte; - - pte = pte_find(mmu, pmap, va); - KASSERT(pte != NULL, ("%s: NULL pte", __func__)); - - if (!PTE_ISVALID(pte)) - return (0); - - /* Get vm_page_t for mapped pte. */ - m = PHYS_TO_VM_PAGE(PTE_PA(pte)); - - if (PTE_ISWIRED(pte)) - pmap->pm_stats.wired_count--; - - /* Handle managed entry. */ - if (PTE_ISMANAGED(pte)) { - - /* Handle modified pages. */ - if (PTE_ISMODIFIED(pte)) - vm_page_dirty(m); - - /* Referenced pages. */ - if (PTE_ISREFERENCED(pte)) - vm_page_aflag_set(m, PGA_REFERENCED); - - /* Remove pv_entry from pv_list. */ - pv_remove(pmap, va, m); - } else if (pmap == kernel_pmap && m && m->md.pv_tracked) { - pv_remove(pmap, va, m); - if (TAILQ_EMPTY(&m->md.pv_list)) - m->md.pv_tracked = false; - } - mtx_lock_spin(&tlbivax_mutex); - tlb_miss_lock(); - - tlb0_flush_entry(va); - *pte = 0; - - tlb_miss_unlock(); - mtx_unlock_spin(&tlbivax_mutex); - - pmap->pm_stats.resident_count--; - - if (flags & PTBL_UNHOLD) { - return (ptbl_unhold(mmu, pmap, va)); - } - return (0); -} - -/* - * Insert PTE for a given page and virtual address. - */ -static int -pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, - boolean_t nosleep) -{ - unsigned int pp2d_idx = PP2D_IDX(va); - unsigned int pdir_idx = PDIR_IDX(va); - unsigned int ptbl_idx = PTBL_IDX(va); - pte_t *ptbl, *pte, pte_tmp; - pte_t **pdir; - - /* Get the page directory pointer. */ - pdir = pmap->pm_pp2d[pp2d_idx]; - if (pdir == NULL) - pdir = pdir_alloc(mmu, pmap, pp2d_idx, nosleep); - - /* Get the page table pointer. */ - ptbl = pdir[pdir_idx]; - - if (ptbl == NULL) { - /* Allocate page table pages. */ - ptbl = ptbl_alloc(mmu, pmap, pdir, pdir_idx, nosleep); - if (ptbl == NULL) { - KASSERT(nosleep, ("nosleep and NULL ptbl")); - return (ENOMEM); - } - pte = &ptbl[ptbl_idx]; - } else { - /* - * Check if there is valid mapping for requested va, if there - * is, remove it. - */ - pte = &ptbl[ptbl_idx]; - if (PTE_ISVALID(pte)) { - pte_remove(mmu, pmap, va, PTBL_HOLD); - } else { - /* - * pte is not used, increment hold count for ptbl - * pages. - */ - if (pmap != kernel_pmap) - ptbl_hold(mmu, pmap, pdir, pdir_idx); - } - } - - if (pdir[pdir_idx] == NULL) { - if (pmap != kernel_pmap && pmap->pm_pp2d[pp2d_idx] != NULL) - pdir_hold(mmu, pmap, pdir); - pdir[pdir_idx] = ptbl; - } - if (pmap->pm_pp2d[pp2d_idx] == NULL) - pmap->pm_pp2d[pp2d_idx] = pdir; - - /* - * Insert pv_entry into pv_list for mapped page if part of managed - * memory. - */ - if ((m->oflags & VPO_UNMANAGED) == 0) { - flags |= PTE_MANAGED; - - /* Create and insert pv entry. */ - pv_insert(pmap, va, m); - } - - pmap->pm_stats.resident_count++; - - pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); - pte_tmp |= (PTE_VALID | flags); - - mtx_lock_spin(&tlbivax_mutex); - tlb_miss_lock(); - - tlb0_flush_entry(va); - *pte = pte_tmp; - - tlb_miss_unlock(); - mtx_unlock_spin(&tlbivax_mutex); - - return (0); -} - -/* Return the pa for the given pmap/va. */ -static vm_paddr_t -pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) -{ - vm_paddr_t pa = 0; - pte_t *pte; - - pte = pte_find(mmu, pmap, va); - if ((pte != NULL) && PTE_ISVALID(pte)) - pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); - return (pa); -} - - -/* allocate pte entries to manage (addr & mask) to (addr & mask) + size */ -static void -kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) -{ - int i, j; - vm_offset_t va; - pte_t *pte; - - va = addr; - /* Initialize kernel pdir */ - for (i = 0; i < kernel_pdirs; i++) { - kernel_pmap->pm_pp2d[i + PP2D_IDX(va)] = - (pte_t **)(pdir + (i * PAGE_SIZE * PDIR_PAGES)); - for (j = PDIR_IDX(va + (i * PAGE_SIZE * PDIR_NENTRIES * PTBL_NENTRIES)); - j < PDIR_NENTRIES; j++) { - kernel_pmap->pm_pp2d[i + PP2D_IDX(va)][j] = - (pte_t *)(pdir + (kernel_pdirs * PAGE_SIZE) + - (((i * PDIR_NENTRIES) + j) * PAGE_SIZE)); - } - } - - /* - * Fill in PTEs covering kernel code and data. They are not required - * for address translation, as this area is covered by static TLB1 - * entries, but for pte_vatopa() to work correctly with kernel area - * addresses. - */ - for (va = addr; va < data_end; va += PAGE_SIZE) { - pte = &(kernel_pmap->pm_pp2d[PP2D_IDX(va)][PDIR_IDX(va)][PTBL_IDX(va)]); - *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); - *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | - PTE_VALID | PTE_PS_4KB; - } -} -#else -/* - * Clean pte entry, try to free page table page if requested. - * - * Return 1 if ptbl pages were freed, otherwise return 0. - */ -static int -pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, uint8_t flags) -{ - unsigned int pdir_idx = PDIR_IDX(va); - unsigned int ptbl_idx = PTBL_IDX(va); - vm_page_t m; - pte_t *ptbl; - pte_t *pte; - - //int su = (pmap == kernel_pmap); - //debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n", - // su, (u_int32_t)pmap, va, flags); - - ptbl = pmap->pm_pdir[pdir_idx]; - KASSERT(ptbl, ("pte_remove: null ptbl")); - - pte = &ptbl[ptbl_idx]; - - if (pte == NULL || !PTE_ISVALID(pte)) - return (0); - - if (PTE_ISWIRED(pte)) - pmap->pm_stats.wired_count--; - - /* Get vm_page_t for mapped pte. */ - m = PHYS_TO_VM_PAGE(PTE_PA(pte)); - - /* Handle managed entry. */ - if (PTE_ISMANAGED(pte)) { - - if (PTE_ISMODIFIED(pte)) - vm_page_dirty(m); - - if (PTE_ISREFERENCED(pte)) - vm_page_aflag_set(m, PGA_REFERENCED); - - pv_remove(pmap, va, m); - } else if (pmap == kernel_pmap && m && m->md.pv_tracked) { - /* - * Always pv_insert()/pv_remove() on MPC85XX, in case DPAA is - * used. This is needed by the NCSW support code for fast - * VA<->PA translation. - */ - pv_remove(pmap, va, m); - if (TAILQ_EMPTY(&m->md.pv_list)) - m->md.pv_tracked = false; - } - - mtx_lock_spin(&tlbivax_mutex); - tlb_miss_lock(); - - tlb0_flush_entry(va); - *pte = 0; - - tlb_miss_unlock(); - mtx_unlock_spin(&tlbivax_mutex); - - pmap->pm_stats.resident_count--; - - if (flags & PTBL_UNHOLD) { - //debugf("pte_remove: e (unhold)\n"); - return (ptbl_unhold(mmu, pmap, pdir_idx)); - } - - //debugf("pte_remove: e\n"); - return (0); -} - -/* - * Insert PTE for a given page and virtual address. - */ -static int -pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, - boolean_t nosleep) -{ - unsigned int pdir_idx = PDIR_IDX(va); - unsigned int ptbl_idx = PTBL_IDX(va); - pte_t *ptbl, *pte, pte_tmp; - - CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__, - pmap == kernel_pmap, pmap, va); - - /* Get the page table pointer. */ - ptbl = pmap->pm_pdir[pdir_idx]; - - if (ptbl == NULL) { - /* Allocate page table pages. */ - ptbl = ptbl_alloc(mmu, pmap, pdir_idx, nosleep); - if (ptbl == NULL) { - KASSERT(nosleep, ("nosleep and NULL ptbl")); - return (ENOMEM); - } - pmap->pm_pdir[pdir_idx] = ptbl; - pte = &ptbl[ptbl_idx]; - } else { - /* - * Check if there is valid mapping for requested - * va, if there is, remove it. - */ - pte = &pmap->pm_pdir[pdir_idx][ptbl_idx]; - if (PTE_ISVALID(pte)) { - pte_remove(mmu, pmap, va, PTBL_HOLD); - } else { - /* - * pte is not used, increment hold count - * for ptbl pages. - */ - if (pmap != kernel_pmap) - ptbl_hold(mmu, pmap, pdir_idx); - } - } - - /* - * Insert pv_entry into pv_list for mapped page if part of managed - * memory. - */ - if ((m->oflags & VPO_UNMANAGED) == 0) { - flags |= PTE_MANAGED; - - /* Create and insert pv entry. */ - pv_insert(pmap, va, m); - } + tlb1_cfg = mfspr(SPR_TLB1CFG); + tlb1_entries = tlb1_cfg & TLBCFG_NENTRY_MASK; +} - pmap->pm_stats.resident_count++; - - pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); - pte_tmp |= (PTE_VALID | flags | PTE_PS_4KB); /* 4KB pages only */ +/**************************************************************************/ +/* Page table related */ +/**************************************************************************/ - mtx_lock_spin(&tlbivax_mutex); - tlb_miss_lock(); +/* Allocate pv_entry structure. */ +pv_entry_t +pv_alloc(void) +{ + pv_entry_t pv; - tlb0_flush_entry(va); - *pte = pte_tmp; + pv_entry_count++; + if (pv_entry_count > pv_entry_high_water) + pagedaemon_wakeup(0); /* XXX powerpc NUMA */ + pv = uma_zalloc(pvzone, M_NOWAIT); - tlb_miss_unlock(); - mtx_unlock_spin(&tlbivax_mutex); - return (0); + return (pv); } -/* Return the pa for the given pmap/va. */ -static vm_paddr_t -pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) +/* Free pv_entry structure. */ +static __inline void +pv_free(pv_entry_t pve) { - vm_paddr_t pa = 0; - pte_t *pte; - pte = pte_find(mmu, pmap, va); - if ((pte != NULL) && PTE_ISVALID(pte)) - pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); - return (pa); + pv_entry_count--; + uma_zfree(pvzone, pve); } -/* Get a pointer to a PTE in a page table. */ -static pte_t * -pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) + +/* Allocate and initialize pv_entry structure. */ +static void +pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m) { - unsigned int pdir_idx = PDIR_IDX(va); - unsigned int ptbl_idx = PTBL_IDX(va); + pv_entry_t pve; + + //int su = (pmap == kernel_pmap); + //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su, + // (u_int32_t)pmap, va, (u_int32_t)m); + + pve = pv_alloc(); + if (pve == NULL) + panic("pv_insert: no pv entries!"); + + pve->pv_pmap = pmap; + pve->pv_va = va; - KASSERT((pmap != NULL), ("pte_find: invalid pmap")); + /* add to pv_list */ + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + rw_assert(&pvh_global_lock, RA_WLOCKED); - if (pmap->pm_pdir[pdir_idx]) - return (&(pmap->pm_pdir[pdir_idx][ptbl_idx])); + TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link); - return (NULL); + //debugf("pv_insert: e\n"); } -/* Set up kernel page tables. */ +/* Destroy pv entry. */ static void -kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) +pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m) { - int i; - vm_offset_t va; - pte_t *pte; + pv_entry_t pve; - /* Initialize kernel pdir */ - for (i = 0; i < kernel_ptbls; i++) - kernel_pmap->pm_pdir[kptbl_min + i] = - (pte_t *)(pdir + (i * PAGE_SIZE * PTBL_PAGES)); + //int su = (pmap == kernel_pmap); + //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); - /* - * Fill in PTEs covering kernel code and data. They are not required - * for address translation, as this area is covered by static TLB1 - * entries, but for pte_vatopa() to work correctly with kernel area - * addresses. - */ - for (va = addr; va < data_end; va += PAGE_SIZE) { - pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]); - *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); - *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | - PTE_VALID | PTE_PS_4KB; + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + rw_assert(&pvh_global_lock, RA_WLOCKED); + + /* find pv entry */ + TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) { + if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { + /* remove from pv_list */ + TAILQ_REMOVE(&m->md.pv_list, pve, pv_link); + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + + /* free pv entry struct */ + pv_free(pve); + break; + } } + + //debugf("pv_remove: e\n"); } -#endif /**************************************************************************/ /* PMAP related */ @@ -2227,53 +1302,6 @@ PCPU_SET(curpmap, pmap); } -/* - * Initialize a preallocated and zeroed pmap structure, - * such as one in a vmspace structure. - */ -static void -mmu_booke_pinit(mmu_t mmu, pmap_t pmap) -{ - int i; - - CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap, - curthread->td_proc->p_pid, curthread->td_proc->p_comm); - - KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap")); - - for (i = 0; i < MAXCPU; i++) - pmap->pm_tid[i] = TID_NONE; - CPU_ZERO(&kernel_pmap->pm_active); - bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); -#ifdef __powerpc64__ - pmap->pm_pp2d = uma_zalloc(ptbl_root_zone, M_WAITOK); - bzero(pmap->pm_pp2d, sizeof(pte_t **) * PP2D_NENTRIES); -#else - pmap->pm_pdir = uma_zalloc(ptbl_root_zone, M_WAITOK); - bzero(pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); - TAILQ_INIT(&pmap->pm_ptbl_list); -#endif -} - -/* - * Release any resources held by the given physical map. - * Called when a pmap initialized by mmu_booke_pinit is being released. - * Should only be called if the map contains no valid mappings. - */ -static void -mmu_booke_release(mmu_t mmu, pmap_t pmap) -{ - - KASSERT(pmap->pm_stats.resident_count == 0, - ("pmap_release: pmap resident count %ld != 0", - pmap->pm_stats.resident_count)); -#ifdef __powerpc64__ - uma_zfree(ptbl_root_zone, pmap->pm_pp2d); -#else - uma_zfree(ptbl_root_zone, pmap->pm_pdir); -#endif -} - /* * Insert the given physical page at the specified virtual address in the * target physical map with the protection requested. If specified the page @@ -2763,61 +1791,6 @@ rw_wunlock(&pvh_global_lock); } -static void -mmu_booke_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) -{ - pte_t *pte; - vm_paddr_t pa = 0; - int sync_sz, valid; -#ifndef __powerpc64__ - pmap_t pmap; - vm_page_t m; - vm_offset_t addr; - int active; -#endif - -#ifndef __powerpc64__ - rw_wlock(&pvh_global_lock); - pmap = PCPU_GET(curpmap); - active = (pm == kernel_pmap || pm == pmap) ? 1 : 0; -#endif - while (sz > 0) { - PMAP_LOCK(pm); - pte = pte_find(mmu, pm, va); - valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0; - if (valid) - pa = PTE_PA(pte); - PMAP_UNLOCK(pm); - sync_sz = PAGE_SIZE - (va & PAGE_MASK); - sync_sz = min(sync_sz, sz); - if (valid) { -#ifdef __powerpc64__ - pa += (va & PAGE_MASK); - __syncicache((void *)PHYS_TO_DMAP(pa), sync_sz); -#else - if (!active) { - /* Create a mapping in the active pmap. */ - addr = 0; - m = PHYS_TO_VM_PAGE(pa); - PMAP_LOCK(pmap); - pte_enter(mmu, pmap, m, addr, - PTE_SR | PTE_VALID, FALSE); - addr += (va & PAGE_MASK); - __syncicache((void *)addr, sync_sz); - pte_remove(mmu, pmap, addr, PTBL_UNHOLD); - PMAP_UNLOCK(pmap); - } else - __syncicache((void *)va, sync_sz); -#endif - } - va += sync_sz; - sz -= sync_sz; - } -#ifndef __powerpc64__ - rw_wunlock(&pvh_global_lock); -#endif -} - /* * Atomically extract and hold the physical page with the given * pmap and virtual address pair if that mapping permits the given @@ -2861,204 +1834,6 @@ TAILQ_INIT(&m->md.pv_list); } -/* - * mmu_booke_zero_page_area zeros the specified hardware page by - * mapping it into virtual memory and using bzero to clear - * its contents. - * - * off and size must reside within a single page. - */ -static void -mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) -{ - vm_offset_t va; - - /* XXX KASSERT off and size are within a single page? */ - -#ifdef __powerpc64__ - va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); - bzero((caddr_t)va + off, size); -#else - mtx_lock(&zero_page_mutex); - va = zero_page_va; - - mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); - bzero((caddr_t)va + off, size); - mmu_booke_kremove(mmu, va); - - mtx_unlock(&zero_page_mutex); -#endif -} - -/* - * mmu_booke_zero_page zeros the specified hardware page. - */ -static void -mmu_booke_zero_page(mmu_t mmu, vm_page_t m) -{ - vm_offset_t off, va; - -#ifdef __powerpc64__ - va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); - - for (off = 0; off < PAGE_SIZE; off += cacheline_size) - __asm __volatile("dcbz 0,%0" :: "r"(va + off)); -#else - va = zero_page_va; - mtx_lock(&zero_page_mutex); - - mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); - - for (off = 0; off < PAGE_SIZE; off += cacheline_size) - __asm __volatile("dcbz 0,%0" :: "r"(va + off)); - - mmu_booke_kremove(mmu, va); - - mtx_unlock(&zero_page_mutex); -#endif -} - -/* - * mmu_booke_copy_page copies the specified (machine independent) page by - * mapping the page into virtual memory and using memcopy to copy the page, - * one machine dependent page at a time. - */ -static void -mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm) -{ - vm_offset_t sva, dva; - -#ifdef __powerpc64__ - sva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(sm)); - dva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dm)); - memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); -#else - sva = copy_page_src_va; - dva = copy_page_dst_va; - - mtx_lock(©_page_mutex); - mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm)); - mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm)); - - memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); - - mmu_booke_kremove(mmu, dva); - mmu_booke_kremove(mmu, sva); - mtx_unlock(©_page_mutex); -#endif -} - -static inline void -mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, - vm_page_t *mb, vm_offset_t b_offset, int xfersize) -{ - void *a_cp, *b_cp; - vm_offset_t a_pg_offset, b_pg_offset; - int cnt; - -#ifdef __powerpc64__ - vm_page_t pa, pb; - - while (xfersize > 0) { - a_pg_offset = a_offset & PAGE_MASK; - pa = ma[a_offset >> PAGE_SHIFT]; - b_pg_offset = b_offset & PAGE_MASK; - pb = mb[b_offset >> PAGE_SHIFT]; - cnt = min(xfersize, PAGE_SIZE - a_pg_offset); - cnt = min(cnt, PAGE_SIZE - b_pg_offset); - a_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pa)) + - a_pg_offset); - b_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pb)) + - b_pg_offset); - bcopy(a_cp, b_cp, cnt); - a_offset += cnt; - b_offset += cnt; - xfersize -= cnt; - } -#else - mtx_lock(©_page_mutex); - while (xfersize > 0) { - a_pg_offset = a_offset & PAGE_MASK; - cnt = min(xfersize, PAGE_SIZE - a_pg_offset); - mmu_booke_kenter(mmu, copy_page_src_va, - VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); - a_cp = (char *)copy_page_src_va + a_pg_offset; - b_pg_offset = b_offset & PAGE_MASK; - cnt = min(cnt, PAGE_SIZE - b_pg_offset); - mmu_booke_kenter(mmu, copy_page_dst_va, - VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); - b_cp = (char *)copy_page_dst_va + b_pg_offset; - bcopy(a_cp, b_cp, cnt); - mmu_booke_kremove(mmu, copy_page_dst_va); - mmu_booke_kremove(mmu, copy_page_src_va); - a_offset += cnt; - b_offset += cnt; - xfersize -= cnt; - } - mtx_unlock(©_page_mutex); -#endif -} - -static vm_offset_t -mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m) -{ -#ifdef __powerpc64__ - return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); -#else - vm_paddr_t paddr; - vm_offset_t qaddr; - uint32_t flags; - pte_t *pte; - - paddr = VM_PAGE_TO_PHYS(m); - - flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; - flags |= tlb_calc_wimg(paddr, pmap_page_get_memattr(m)) << PTE_MAS2_SHIFT; - flags |= PTE_PS_4KB; - - critical_enter(); - qaddr = PCPU_GET(qmap_addr); - - pte = pte_find(mmu, kernel_pmap, qaddr); - - KASSERT(*pte == 0, ("mmu_booke_quick_enter_page: PTE busy")); - - /* - * XXX: tlbivax is broadcast to other cores, but qaddr should - * not be present in other TLBs. Is there a better instruction - * sequence to use? Or just forget it & use mmu_booke_kenter()... - */ - __asm __volatile("tlbivax 0, %0" :: "r"(qaddr & MAS2_EPN_MASK)); - __asm __volatile("isync; msync"); - - *pte = PTE_RPN_FROM_PA(paddr) | flags; - - /* Flush the real memory from the instruction cache. */ - if ((flags & (PTE_I | PTE_G)) == 0) - __syncicache((void *)qaddr, PAGE_SIZE); - - return (qaddr); -#endif -} - -static void -mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr) -{ -#ifndef __powerpc64__ - pte_t *pte; - - pte = pte_find(mmu, kernel_pmap, addr); - - KASSERT(PCPU_GET(qmap_addr) == addr, - ("mmu_booke_quick_remove_page: invalid address")); - KASSERT(*pte != 0, - ("mmu_booke_quick_remove_page: PTE not in use")); - - *pte = 0; - critical_exit(); -#endif -} - /* * Return whether or not the specified physical page was modified * in any of physical maps. @@ -3923,23 +2698,6 @@ } } -/* - * Return the largest uint value log such that 2^log <= num. - */ -static unsigned long -ilog2(unsigned long num) -{ - long lz; - -#ifdef __powerpc64__ - __asm ("cntlzd %0, %1" : "=r" (lz) : "r" (num)); - return (63 - lz); -#else - __asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num)); - return (31 - lz); -#endif -} - /* * Convert TLB TSIZE value to mapped region size. */ @@ -4274,65 +3032,6 @@ return (0); } -/* - * Invalidate all TLB0 entries which match the given TID. Note this is - * dedicated for cases when invalidations should NOT be propagated to other - * CPUs. - */ -static void -tid_flush(tlbtid_t tid) -{ - register_t msr; - uint32_t mas0, mas1, mas2; - int entry, way; - - - /* Don't evict kernel translations */ - if (tid == TID_KERNEL) - return; - - msr = mfmsr(); - __asm __volatile("wrteei 0"); - - /* - * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use - * it for PID invalidation. - */ - switch ((mfpvr() >> 16) & 0xffff) { - case FSL_E500mc: - case FSL_E5500: - case FSL_E6500: - mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT); - /* tlbilxpid */ - __asm __volatile("isync; .long 0x7c200024; isync; msync"); - __asm __volatile("wrtee %0" :: "r"(msr)); - return; - } - - for (way = 0; way < TLB0_WAYS; way++) - for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) { - - mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); - mtspr(SPR_MAS0, mas0); - - mas2 = entry << MAS2_TLB0_ENTRY_IDX_SHIFT; - mtspr(SPR_MAS2, mas2); - - __asm __volatile("isync; tlbre"); - - mas1 = mfspr(SPR_MAS1); - - if (!(mas1 & MAS1_VALID)) - continue; - if (((mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT) != tid) - continue; - mas1 &= ~MAS1_VALID; - mtspr(SPR_MAS1, mas1); - __asm __volatile("isync; tlbwe; isync; msync"); - } - __asm __volatile("wrtee %0" :: "r"(msr)); -} - #ifdef DDB /* Print out contents of the MAS registers for each TLB0 entry */ static void Index: sys/powerpc/booke/pmap_32.c =================================================================== --- /dev/null +++ sys/powerpc/booke/pmap_32.c @@ -0,0 +1,938 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski + * Copyright (C) 2006 Semihalf, Marian Balakowicz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Some hw specific parts of this pmap were derived or influenced + * by NetBSD's ibm4xx pmap module. More generic code is shared with + * a few other pmap modules from the FreeBSD tree. + */ + + /* + * VM layout notes: + * + * Kernel and user threads run within one common virtual address space + * defined by AS=0. + * + * 32-bit pmap: + * Virtual address space layout: + * ----------------------------- + * 0x0000_0000 - 0x7fff_ffff : user process + * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.) + * 0xc000_0000 - 0xc0ff_ffff : kernel reserved + * 0xc000_0000 - data_end : kernel code+data, env, metadata etc. + * 0xc100_0000 - 0xffff_ffff : KVA + * 0xc100_0000 - 0xc100_3fff : reserved for page zero/copy + * 0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs + * 0xc200_4000 - 0xc200_8fff : guard page + kstack0 + * 0xc200_9000 - 0xfeef_ffff : actual free KVA space + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_ddb.h" +#include "opt_kstack_pages.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include "mmu_if.h" + +#define PRI0ptrX "08x" + +/* Reserved KVA space and mutex for mmu_booke_zero_page. */ +static vm_offset_t zero_page_va; +static struct mtx zero_page_mutex; + +/* Reserved KVA space and mutex for mmu_booke_copy_page. */ +static vm_offset_t copy_page_src_va; +static vm_offset_t copy_page_dst_va; +static struct mtx copy_page_mutex; + +/**************************************************************************/ +/* PMAP */ +/**************************************************************************/ + +#define VM_MAPDEV_BASE ((vm_offset_t)VM_MAXUSER_ADDRESS + PAGE_SIZE) + +static void tid_flush(tlbtid_t tid); + +/**************************************************************************/ +/* Page table management */ +/**************************************************************************/ + +#define PMAP_ROOT_SIZE (sizeof(pte_t**) * PDIR_NENTRIES) +static void ptbl_init(void); +static struct ptbl_buf *ptbl_buf_alloc(void); +static void ptbl_buf_free(struct ptbl_buf *); +static void ptbl_free_pmap_ptbl(pmap_t, pte_t *); + +static pte_t *ptbl_alloc(mmu_t, pmap_t, unsigned int, boolean_t); +static void ptbl_free(mmu_t, pmap_t, unsigned int); +static void ptbl_hold(mmu_t, pmap_t, unsigned int); +static int ptbl_unhold(mmu_t, pmap_t, unsigned int); + +static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t); +static int pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t); +static int pte_remove(mmu_t, pmap_t, vm_offset_t, uint8_t); +static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t); +static void kernel_pte_alloc(vm_offset_t, vm_offset_t, vm_offset_t); + +struct ptbl_buf { + TAILQ_ENTRY(ptbl_buf) link; /* list link */ + vm_offset_t kva; /* va of mapping */ +}; + +/* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */ +#define PTBL_BUFS (128 * 16) + +/* ptbl free list and a lock used for access synchronization. */ +static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist; +static struct mtx ptbl_buf_freelist_lock; + +/* Base address of kva space allocated fot ptbl bufs. */ +static vm_offset_t ptbl_buf_pool_vabase; + +/* Pointer to ptbl_buf structures. */ +static struct ptbl_buf *ptbl_bufs; + +/**************************************************************************/ +/* Page table related */ +/**************************************************************************/ + + +/* Initialize pool of kva ptbl buffers. */ +static void +ptbl_init(void) +{ + int i; + + CTR3(KTR_PMAP, "%s: s (ptbl_bufs = 0x%08x size 0x%08x)", __func__, + (uint32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS); + CTR3(KTR_PMAP, "%s: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)", + __func__, ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE); + + mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF); + TAILQ_INIT(&ptbl_buf_freelist); + + for (i = 0; i < PTBL_BUFS; i++) { + ptbl_bufs[i].kva = + ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE; + TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link); + } +} + +/* Get a ptbl_buf from the freelist. */ +static struct ptbl_buf * +ptbl_buf_alloc(void) +{ + struct ptbl_buf *buf; + + mtx_lock(&ptbl_buf_freelist_lock); + buf = TAILQ_FIRST(&ptbl_buf_freelist); + if (buf != NULL) + TAILQ_REMOVE(&ptbl_buf_freelist, buf, link); + mtx_unlock(&ptbl_buf_freelist_lock); + + CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); + + return (buf); +} + +/* Return ptbl buff to free pool. */ +static void +ptbl_buf_free(struct ptbl_buf *buf) +{ + + CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); + + mtx_lock(&ptbl_buf_freelist_lock); + TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link); + mtx_unlock(&ptbl_buf_freelist_lock); +} + +/* + * Search the list of allocated ptbl bufs and find on list of allocated ptbls + */ +static void +ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl) +{ + struct ptbl_buf *pbuf; + + CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link) + if (pbuf->kva == (vm_offset_t)ptbl) { + /* Remove from pmap ptbl buf list. */ + TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link); + + /* Free corresponding ptbl buf. */ + ptbl_buf_free(pbuf); + break; + } +} + +/* Allocate page table. */ +static pte_t * +ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep) +{ + vm_page_t mtbl[PTBL_PAGES]; + vm_page_t m; + struct ptbl_buf *pbuf; + unsigned int pidx; + pte_t *ptbl; + int i, j; + + CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, + (pmap == kernel_pmap), pdir_idx); + + KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), + ("ptbl_alloc: invalid pdir_idx")); + KASSERT((pmap->pm_pdir[pdir_idx] == NULL), + ("pte_alloc: valid ptbl entry exists!")); + + pbuf = ptbl_buf_alloc(); + if (pbuf == NULL) + panic("pte_alloc: couldn't alloc kernel virtual memory"); + + ptbl = (pte_t *)pbuf->kva; + + CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl); + + for (i = 0; i < PTBL_PAGES; i++) { + pidx = (PTBL_PAGES * pdir_idx) + i; + while ((m = vm_page_alloc(NULL, pidx, + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { + if (nosleep) { + ptbl_free_pmap_ptbl(pmap, ptbl); + for (j = 0; j < i; j++) + vm_page_free(mtbl[j]); + vm_wire_sub(i); + return (NULL); + } + PMAP_UNLOCK(pmap); + rw_wunlock(&pvh_global_lock); + vm_wait(NULL); + rw_wlock(&pvh_global_lock); + PMAP_LOCK(pmap); + } + mtbl[i] = m; + } + + /* Map allocated pages into kernel_pmap. */ + mmu_booke_qenter(mmu, (vm_offset_t)ptbl, mtbl, PTBL_PAGES); + + /* Zero whole ptbl. */ + bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE); + + /* Add pbuf to the pmap ptbl bufs list. */ + TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link); + + return (ptbl); +} + +/* Free ptbl pages and invalidate pdir entry. */ +static void +ptbl_free(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) +{ + pte_t *ptbl; + vm_paddr_t pa; + vm_offset_t va; + vm_page_t m; + int i; + + CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, + (pmap == kernel_pmap), pdir_idx); + + KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), + ("ptbl_free: invalid pdir_idx")); + + ptbl = pmap->pm_pdir[pdir_idx]; + + CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); + + KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); + + /* + * Invalidate the pdir entry as soon as possible, so that other CPUs + * don't attempt to look up the page tables we are releasing. + */ + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + pmap->pm_pdir[pdir_idx] = NULL; + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + + for (i = 0; i < PTBL_PAGES; i++) { + va = ((vm_offset_t)ptbl + (i * PAGE_SIZE)); + pa = pte_vatopa(mmu, kernel_pmap, va); + m = PHYS_TO_VM_PAGE(pa); + vm_page_free_zero(m); + vm_wire_sub(1); + mmu_booke_kremove(mmu, va); + } + + ptbl_free_pmap_ptbl(pmap, ptbl); +} + +/* + * Decrement ptbl pages hold count and attempt to free ptbl pages. + * Called when removing pte entry from ptbl. + * + * Return 1 if ptbl pages were freed. + */ +static int +ptbl_unhold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) +{ + pte_t *ptbl; + vm_paddr_t pa; + vm_page_t m; + int i; + + CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, + (pmap == kernel_pmap), pdir_idx); + + KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), + ("ptbl_unhold: invalid pdir_idx")); + KASSERT((pmap != kernel_pmap), + ("ptbl_unhold: unholding kernel ptbl!")); + + ptbl = pmap->pm_pdir[pdir_idx]; + + //debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl); + KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS), + ("ptbl_unhold: non kva ptbl")); + + /* decrement hold count */ + for (i = 0; i < PTBL_PAGES; i++) { + pa = pte_vatopa(mmu, kernel_pmap, + (vm_offset_t)ptbl + (i * PAGE_SIZE)); + m = PHYS_TO_VM_PAGE(pa); + m->ref_count--; + } + + /* + * Free ptbl pages if there are no pte etries in this ptbl. + * ref_count has the same value for all ptbl pages, so check the last + * page. + */ + if (m->ref_count == 0) { + ptbl_free(mmu, pmap, pdir_idx); + + //debugf("ptbl_unhold: e (freed ptbl)\n"); + return (1); + } + + return (0); +} + +/* + * Increment hold count for ptbl pages. This routine is used when a new pte + * entry is being inserted into the ptbl. + */ +static void +ptbl_hold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) +{ + vm_paddr_t pa; + pte_t *ptbl; + vm_page_t m; + int i; + + CTR3(KTR_PMAP, "%s: pmap = %p pdir_idx = %d", __func__, pmap, + pdir_idx); + + KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), + ("ptbl_hold: invalid pdir_idx")); + KASSERT((pmap != kernel_pmap), + ("ptbl_hold: holding kernel ptbl!")); + + ptbl = pmap->pm_pdir[pdir_idx]; + + KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); + + for (i = 0; i < PTBL_PAGES; i++) { + pa = pte_vatopa(mmu, kernel_pmap, + (vm_offset_t)ptbl + (i * PAGE_SIZE)); + m = PHYS_TO_VM_PAGE(pa); + m->ref_count++; + } +} + +/* + * Clean pte entry, try to free page table page if requested. + * + * Return 1 if ptbl pages were freed, otherwise return 0. + */ +static int +pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, uint8_t flags) +{ + unsigned int pdir_idx = PDIR_IDX(va); + unsigned int ptbl_idx = PTBL_IDX(va); + vm_page_t m; + pte_t *ptbl; + pte_t *pte; + + //int su = (pmap == kernel_pmap); + //debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n", + // su, (u_int32_t)pmap, va, flags); + + ptbl = pmap->pm_pdir[pdir_idx]; + KASSERT(ptbl, ("pte_remove: null ptbl")); + + pte = &ptbl[ptbl_idx]; + + if (pte == NULL || !PTE_ISVALID(pte)) + return (0); + + if (PTE_ISWIRED(pte)) + pmap->pm_stats.wired_count--; + + /* Get vm_page_t for mapped pte. */ + m = PHYS_TO_VM_PAGE(PTE_PA(pte)); + + /* Handle managed entry. */ + if (PTE_ISMANAGED(pte)) { + + if (PTE_ISMODIFIED(pte)) + vm_page_dirty(m); + + if (PTE_ISREFERENCED(pte)) + vm_page_aflag_set(m, PGA_REFERENCED); + + pv_remove(pmap, va, m); + } else if (pmap == kernel_pmap && m && m->md.pv_tracked) { + /* + * Always pv_insert()/pv_remove() on MPC85XX, in case DPAA is + * used. This is needed by the NCSW support code for fast + * VA<->PA translation. + */ + pv_remove(pmap, va, m); + if (TAILQ_EMPTY(&m->md.pv_list)) + m->md.pv_tracked = false; + } + + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + tlb0_flush_entry(va); + *pte = 0; + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + + pmap->pm_stats.resident_count--; + + if (flags & PTBL_UNHOLD) { + //debugf("pte_remove: e (unhold)\n"); + return (ptbl_unhold(mmu, pmap, pdir_idx)); + } + + //debugf("pte_remove: e\n"); + return (0); +} + +/* + * Insert PTE for a given page and virtual address. + */ +static int +pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, + boolean_t nosleep) +{ + unsigned int pdir_idx = PDIR_IDX(va); + unsigned int ptbl_idx = PTBL_IDX(va); + pte_t *ptbl, *pte, pte_tmp; + + CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__, + pmap == kernel_pmap, pmap, va); + + /* Get the page table pointer. */ + ptbl = pmap->pm_pdir[pdir_idx]; + + if (ptbl == NULL) { + /* Allocate page table pages. */ + ptbl = ptbl_alloc(mmu, pmap, pdir_idx, nosleep); + if (ptbl == NULL) { + KASSERT(nosleep, ("nosleep and NULL ptbl")); + return (ENOMEM); + } + pmap->pm_pdir[pdir_idx] = ptbl; + pte = &ptbl[ptbl_idx]; + } else { + /* + * Check if there is valid mapping for requested + * va, if there is, remove it. + */ + pte = &pmap->pm_pdir[pdir_idx][ptbl_idx]; + if (PTE_ISVALID(pte)) { + pte_remove(mmu, pmap, va, PTBL_HOLD); + } else { + /* + * pte is not used, increment hold count + * for ptbl pages. + */ + if (pmap != kernel_pmap) + ptbl_hold(mmu, pmap, pdir_idx); + } + } + + /* + * Insert pv_entry into pv_list for mapped page if part of managed + * memory. + */ + if ((m->oflags & VPO_UNMANAGED) == 0) { + flags |= PTE_MANAGED; + + /* Create and insert pv entry. */ + pv_insert(pmap, va, m); + } + + pmap->pm_stats.resident_count++; + + pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); + pte_tmp |= (PTE_VALID | flags | PTE_PS_4KB); /* 4KB pages only */ + + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + tlb0_flush_entry(va); + *pte = pte_tmp; + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + return (0); +} + +/* Return the pa for the given pmap/va. */ +static vm_paddr_t +pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) +{ + vm_paddr_t pa = 0; + pte_t *pte; + + pte = pte_find(mmu, pmap, va); + if ((pte != NULL) && PTE_ISVALID(pte)) + pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); + return (pa); +} + +/* Get a pointer to a PTE in a page table. */ +static pte_t * +pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) +{ + unsigned int pdir_idx = PDIR_IDX(va); + unsigned int ptbl_idx = PTBL_IDX(va); + + KASSERT((pmap != NULL), ("pte_find: invalid pmap")); + + if (pmap->pm_pdir[pdir_idx]) + return (&(pmap->pm_pdir[pdir_idx][ptbl_idx])); + + return (NULL); +} + +/* Set up kernel page tables. */ +static void +kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) +{ + int i; + vm_offset_t va; + pte_t *pte; + + /* Initialize kernel pdir */ + for (i = 0; i < kernel_ptbls; i++) + kernel_pmap->pm_pdir[kptbl_min + i] = + (pte_t *)(pdir + (i * PAGE_SIZE * PTBL_PAGES)); + + /* + * Fill in PTEs covering kernel code and data. They are not required + * for address translation, as this area is covered by static TLB1 + * entries, but for pte_vatopa() to work correctly with kernel area + * addresses. + */ + for (va = addr; va < data_end; va += PAGE_SIZE) { + pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]); + *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); + *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | + PTE_VALID | PTE_PS_4KB; + } +} + +/* + * Initialize a preallocated and zeroed pmap structure, + * such as one in a vmspace structure. + */ +static void +mmu_booke_pinit(mmu_t mmu, pmap_t pmap) +{ + int i; + + CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap, + curthread->td_proc->p_pid, curthread->td_proc->p_comm); + + KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap")); + + for (i = 0; i < MAXCPU; i++) + pmap->pm_tid[i] = TID_NONE; + CPU_ZERO(&kernel_pmap->pm_active); + bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); + pmap->pm_pdir = uma_zalloc(ptbl_root_zone, M_WAITOK); + bzero(pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); + TAILQ_INIT(&pmap->pm_ptbl_list); +} + +/* + * Release any resources held by the given physical map. + * Called when a pmap initialized by mmu_booke_pinit is being released. + * Should only be called if the map contains no valid mappings. + */ +static void +mmu_booke_release(mmu_t mmu, pmap_t pmap) +{ + + KASSERT(pmap->pm_stats.resident_count == 0, + ("pmap_release: pmap resident count %ld != 0", + pmap->pm_stats.resident_count)); + uma_zfree(ptbl_root_zone, pmap->pm_pdir); +} + +static void +mmu_booke_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) +{ + pte_t *pte; + vm_paddr_t pa = 0; + int sync_sz, valid; + pmap_t pmap; + vm_page_t m; + vm_offset_t addr; + int active; + + rw_wlock(&pvh_global_lock); + pmap = PCPU_GET(curpmap); + active = (pm == kernel_pmap || pm == pmap) ? 1 : 0; + while (sz > 0) { + PMAP_LOCK(pm); + pte = pte_find(mmu, pm, va); + valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0; + if (valid) + pa = PTE_PA(pte); + PMAP_UNLOCK(pm); + sync_sz = PAGE_SIZE - (va & PAGE_MASK); + sync_sz = min(sync_sz, sz); + if (valid) { + if (!active) { + /* Create a mapping in the active pmap. */ + addr = 0; + m = PHYS_TO_VM_PAGE(pa); + PMAP_LOCK(pmap); + pte_enter(mmu, pmap, m, addr, + PTE_SR | PTE_VALID, FALSE); + addr += (va & PAGE_MASK); + __syncicache((void *)addr, sync_sz); + pte_remove(mmu, pmap, addr, PTBL_UNHOLD); + PMAP_UNLOCK(pmap); + } else + __syncicache((void *)va, sync_sz); + } + va += sync_sz; + sz -= sync_sz; + } + rw_wunlock(&pvh_global_lock); +} + +/* + * mmu_booke_zero_page_area zeros the specified hardware page by + * mapping it into virtual memory and using bzero to clear + * its contents. + * + * off and size must reside within a single page. + */ +static void +mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) +{ + vm_offset_t va; + + /* XXX KASSERT off and size are within a single page? */ + + mtx_lock(&zero_page_mutex); + va = zero_page_va; + + mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); + bzero((caddr_t)va + off, size); + mmu_booke_kremove(mmu, va); + + mtx_unlock(&zero_page_mutex); +} + +/* + * mmu_booke_zero_page zeros the specified hardware page. + */ +static void +mmu_booke_zero_page(mmu_t mmu, vm_page_t m) +{ + vm_offset_t off, va; + + va = zero_page_va; + mtx_lock(&zero_page_mutex); + + mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); + + for (off = 0; off < PAGE_SIZE; off += cacheline_size) + __asm __volatile("dcbz 0,%0" :: "r"(va + off)); + + mmu_booke_kremove(mmu, va); + + mtx_unlock(&zero_page_mutex); +} + +/* + * mmu_booke_copy_page copies the specified (machine independent) page by + * mapping the page into virtual memory and using memcopy to copy the page, + * one machine dependent page at a time. + */ +static void +mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm) +{ + vm_offset_t sva, dva; + + sva = copy_page_src_va; + dva = copy_page_dst_va; + + mtx_lock(©_page_mutex); + mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm)); + mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm)); + + memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); + + mmu_booke_kremove(mmu, dva); + mmu_booke_kremove(mmu, sva); + mtx_unlock(©_page_mutex); +} + +static inline void +mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, + vm_page_t *mb, vm_offset_t b_offset, int xfersize) +{ + void *a_cp, *b_cp; + vm_offset_t a_pg_offset, b_pg_offset; + int cnt; + + mtx_lock(©_page_mutex); + while (xfersize > 0) { + a_pg_offset = a_offset & PAGE_MASK; + cnt = min(xfersize, PAGE_SIZE - a_pg_offset); + mmu_booke_kenter(mmu, copy_page_src_va, + VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); + a_cp = (char *)copy_page_src_va + a_pg_offset; + b_pg_offset = b_offset & PAGE_MASK; + cnt = min(cnt, PAGE_SIZE - b_pg_offset); + mmu_booke_kenter(mmu, copy_page_dst_va, + VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); + b_cp = (char *)copy_page_dst_va + b_pg_offset; + bcopy(a_cp, b_cp, cnt); + mmu_booke_kremove(mmu, copy_page_dst_va); + mmu_booke_kremove(mmu, copy_page_src_va); + a_offset += cnt; + b_offset += cnt; + xfersize -= cnt; + } + mtx_unlock(©_page_mutex); +} + +static vm_offset_t +mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m) +{ + vm_paddr_t paddr; + vm_offset_t qaddr; + uint32_t flags; + pte_t *pte; + + paddr = VM_PAGE_TO_PHYS(m); + + flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; + flags |= tlb_calc_wimg(paddr, pmap_page_get_memattr(m)) << PTE_MAS2_SHIFT; + flags |= PTE_PS_4KB; + + critical_enter(); + qaddr = PCPU_GET(qmap_addr); + + pte = pte_find(mmu, kernel_pmap, qaddr); + + KASSERT(*pte == 0, ("mmu_booke_quick_enter_page: PTE busy")); + + /* + * XXX: tlbivax is broadcast to other cores, but qaddr should + * not be present in other TLBs. Is there a better instruction + * sequence to use? Or just forget it & use mmu_booke_kenter()... + */ + __asm __volatile("tlbivax 0, %0" :: "r"(qaddr & MAS2_EPN_MASK)); + __asm __volatile("isync; msync"); + + *pte = PTE_RPN_FROM_PA(paddr) | flags; + + /* Flush the real memory from the instruction cache. */ + if ((flags & (PTE_I | PTE_G)) == 0) + __syncicache((void *)qaddr, PAGE_SIZE); + + return (qaddr); +} + +static void +mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr) +{ + pte_t *pte; + + pte = pte_find(mmu, kernel_pmap, addr); + + KASSERT(PCPU_GET(qmap_addr) == addr, + ("mmu_booke_quick_remove_page: invalid address")); + KASSERT(*pte != 0, + ("mmu_booke_quick_remove_page: PTE not in use")); + + *pte = 0; + critical_exit(); +} + +/**************************************************************************/ +/* TID handling */ +/**************************************************************************/ + +/* + * Return the largest uint value log such that 2^log <= num. + */ +static unsigned long +ilog2(unsigned long num) +{ + long lz; + + __asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num)); + return (31 - lz); +} + +/* + * Invalidate all TLB0 entries which match the given TID. Note this is + * dedicated for cases when invalidations should NOT be propagated to other + * CPUs. + */ +static void +tid_flush(tlbtid_t tid) +{ + register_t msr; + uint32_t mas0, mas1, mas2; + int entry, way; + + + /* Don't evict kernel translations */ + if (tid == TID_KERNEL) + return; + + msr = mfmsr(); + __asm __volatile("wrteei 0"); + + /* + * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use + * it for PID invalidation. + */ + switch ((mfpvr() >> 16) & 0xffff) { + case FSL_E500mc: + case FSL_E5500: + case FSL_E6500: + mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT); + /* tlbilxpid */ + __asm __volatile("isync; .long 0x7c200024; isync; msync"); + __asm __volatile("wrtee %0" :: "r"(msr)); + return; + } + + for (way = 0; way < TLB0_WAYS; way++) + for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) { + + mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); + mtspr(SPR_MAS0, mas0); + + mas2 = entry << MAS2_TLB0_ENTRY_IDX_SHIFT; + mtspr(SPR_MAS2, mas2); + + __asm __volatile("isync; tlbre"); + + mas1 = mfspr(SPR_MAS1); + + if (!(mas1 & MAS1_VALID)) + continue; + if (((mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT) != tid) + continue; + mas1 &= ~MAS1_VALID; + mtspr(SPR_MAS1, mas1); + __asm __volatile("isync; tlbwe; isync; msync"); + } + __asm __volatile("wrtee %0" :: "r"(msr)); +} Index: sys/powerpc/booke/pmap_64.c =================================================================== --- /dev/null +++ sys/powerpc/booke/pmap_64.c @@ -0,0 +1,807 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski + * Copyright (C) 2006 Semihalf, Marian Balakowicz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Some hw specific parts of this pmap were derived or influenced + * by NetBSD's ibm4xx pmap module. More generic code is shared with + * a few other pmap modules from the FreeBSD tree. + */ + + /* + * VM layout notes: + * + * Kernel and user threads run within one common virtual address space + * defined by AS=0. + * + * 32-bit pmap: + * Virtual address space layout: + * ----------------------------- + * 0x0000_0000 - 0x7fff_ffff : user process + * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.) + * 0xc000_0000 - 0xc0ff_ffff : kernel reserved + * 0xc000_0000 - data_end : kernel code+data, env, metadata etc. + * 0xc100_0000 - 0xffff_ffff : KVA + * 0xc100_0000 - 0xc100_3fff : reserved for page zero/copy + * 0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs + * 0xc200_4000 - 0xc200_8fff : guard page + kstack0 + * 0xc200_9000 - 0xfeef_ffff : actual free KVA space + * + * 64-bit pmap: + * Virtual address space layout: + * ----------------------------- + * 0x0000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : user process + * 0x0000_0000_0000_0000 - 0x8fff_ffff_ffff_ffff : text, data, heap, maps, libraries + * 0x9000_0000_0000_0000 - 0xafff_ffff_ffff_ffff : mmio region + * 0xb000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : stack + * 0xc000_0000_0000_0000 - 0xcfff_ffff_ffff_ffff : kernel reserved + * 0xc000_0000_0000_0000 - endkernel-1 : kernel code & data + * endkernel - msgbufp-1 : flat device tree + * msgbufp - kernel_pdir-1 : message buffer + * kernel_pdir - kernel_pp2d-1 : kernel page directory + * kernel_pp2d - . : kernel pointers to page directory + * pmap_zero_copy_min - crashdumpmap-1 : reserved for page zero/copy + * crashdumpmap - ptbl_buf_pool_vabase-1 : reserved for ptbl bufs + * ptbl_buf_pool_vabase - virtual_avail-1 : user page directories and page tables + * virtual_avail - 0xcfff_ffff_ffff_ffff : actual free KVA space + * 0xd000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff : coprocessor region + * 0xe000_0000_0000_0000 - 0xefff_ffff_ffff_ffff : mmio region + * 0xf000_0000_0000_0000 - 0xffff_ffff_ffff_ffff : direct map + * 0xf000_0000_0000_0000 - +Maxmem : physmem map + * - 0xffff_ffff_ffff_ffff : device direct map + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_ddb.h" +#include "opt_kstack_pages.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include "mmu_if.h" + +#ifdef DEBUG +#define debugf(fmt, args...) printf(fmt, ##args) +#else +#define debugf(fmt, args...) +#endif + +#define PRI0ptrX "016lx" + +/**************************************************************************/ +/* PMAP */ +/**************************************************************************/ + +unsigned int kernel_pdirs; +static uma_zone_t ptbl_root_zone; + +/* + * Base of the pmap_mapdev() region. On 32-bit it immediately follows the + * userspace address range. On On 64-bit it's far above, at (1 << 63), and + * ranges up to the DMAP, giving 62 bits of PA allowed. This is far larger than + * the widest Book-E address bus, the e6500 has a 40-bit PA space. This allows + * us to map akin to the DMAP, with addresses identical to the PA, offset by the + * base. + */ +#define VM_MAPDEV_BASE 0x8000000000000000 +#define VM_MAPDEV_PA_MAX 0x4000000000000000 /* Don't encroach on DMAP */ + +static tlbtid_t tid_alloc(struct pmap *); +static void tid_flush(tlbtid_t tid); + +#ifdef DDB +#ifdef __powerpc64__ +static void tlb_print_entry(int, uint32_t, uint64_t, uint32_t, uint32_t); +#else +static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t); +#endif +#endif + +static void tlb1_read_entry(tlb_entry_t *, unsigned int); +static void tlb1_write_entry(tlb_entry_t *, unsigned int); +static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *); +static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t, int); + +static vm_size_t tsize2size(unsigned int); +static unsigned int size2tsize(vm_size_t); +static unsigned long ilog2(unsigned long); + +static void set_mas4_defaults(void); + +static inline void tlb0_flush_entry(vm_offset_t); + +/**************************************************************************/ +/* Page table management */ +/**************************************************************************/ + +static struct rwlock_padalign pvh_global_lock; + +#define PMAP_ROOT_SIZE (sizeof(pte_t***) * PP2D_NENTRIES) +static pte_t *ptbl_alloc(mmu_t, pmap_t, pte_t **, + unsigned int, boolean_t); +static void ptbl_free(mmu_t, pmap_t, pte_t **, unsigned int, vm_page_t); +static void ptbl_hold(mmu_t, pmap_t, pte_t **, unsigned int); +static int ptbl_unhold(mmu_t, pmap_t, vm_offset_t); + +static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t); +static int pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t); +static int pte_remove(mmu_t, pmap_t, vm_offset_t, uint8_t); +static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t); +static void kernel_pte_alloc(vm_offset_t, vm_offset_t, vm_offset_t); + +/**************************************************************************/ +/* Page table related */ +/**************************************************************************/ + +/* Initialize pool of kva ptbl buffers. */ +static void +ptbl_init(void) +{ +} + +/* Get a pointer to a PTE in a page table. */ +static __inline pte_t * +pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) +{ + pte_t **pdir; + pte_t *ptbl; + + KASSERT((pmap != NULL), ("pte_find: invalid pmap")); + + pdir = pmap->pm_pp2d[PP2D_IDX(va)]; + if (!pdir) + return NULL; + ptbl = pdir[PDIR_IDX(va)]; + return ((ptbl != NULL) ? &ptbl[PTBL_IDX(va)] : NULL); +} + +/* + * allocate a page of pointers to page directories, do not preallocate the + * page tables + */ +static pte_t ** +pdir_alloc(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, bool nosleep) +{ + vm_page_t m; + pte_t **pdir; + int req; + + req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; + while ((m = vm_page_alloc(NULL, pp2d_idx, req)) == NULL) { + PMAP_UNLOCK(pmap); + if (nosleep) { + return (NULL); + } + vm_wait(NULL); + PMAP_LOCK(pmap); + } + + /* Zero whole ptbl. */ + pdir = (pte_t **)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); + mmu_booke_zero_page(mmu, m); + + return (pdir); +} + +/* Free pdir pages and invalidate pdir entry. */ +static void +pdir_free(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, vm_page_t m) +{ + pte_t **pdir; + + pdir = pmap->pm_pp2d[pp2d_idx]; + + KASSERT((pdir != NULL), ("pdir_free: null pdir")); + + pmap->pm_pp2d[pp2d_idx] = NULL; + + vm_wire_sub(1); + vm_page_free_zero(m); +} + +/* + * Decrement pdir pages hold count and attempt to free pdir pages. Called + * when removing directory entry from pdir. + * + * Return 1 if pdir pages were freed. + */ +static int +pdir_unhold(mmu_t mmu, pmap_t pmap, u_int pp2d_idx) +{ + pte_t **pdir; + vm_paddr_t pa; + vm_page_t m; + + KASSERT((pmap != kernel_pmap), + ("pdir_unhold: unholding kernel pdir!")); + + pdir = pmap->pm_pp2d[pp2d_idx]; + + /* decrement hold count */ + pa = DMAP_TO_PHYS((vm_offset_t) pdir); + m = PHYS_TO_VM_PAGE(pa); + + /* + * Free pdir page if there are no dir entries in this pdir. + */ + m->ref_count--; + if (m->ref_count == 0) { + pdir_free(mmu, pmap, pp2d_idx, m); + return (1); + } + return (0); +} + +/* + * Increment hold count for pdir pages. This routine is used when new ptlb + * entry is being inserted into pdir. + */ +static void +pdir_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir) +{ + vm_page_t m; + + KASSERT((pmap != kernel_pmap), + ("pdir_hold: holding kernel pdir!")); + + KASSERT((pdir != NULL), ("pdir_hold: null pdir")); + + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pdir)); + m->ref_count++; +} + +/* Allocate page table. */ +static pte_t * +ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, + boolean_t nosleep) +{ + vm_page_t m; + pte_t *ptbl; + int req; + + KASSERT((pdir[pdir_idx] == NULL), + ("%s: valid ptbl entry exists!", __func__)); + + req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; + while ((m = vm_page_alloc(NULL, pdir_idx, req)) == NULL) { + if (nosleep) + return (NULL); + PMAP_UNLOCK(pmap); + rw_wunlock(&pvh_global_lock); + vm_wait(NULL); + rw_wlock(&pvh_global_lock); + PMAP_LOCK(pmap); + } + + /* Zero whole ptbl. */ + ptbl = (pte_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); + mmu_booke_zero_page(mmu, m); + + return (ptbl); +} + +/* Free ptbl pages and invalidate pdir entry. */ +static void +ptbl_free(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, vm_page_t m) +{ + pte_t *ptbl; + + ptbl = pdir[pdir_idx]; + + KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); + + pdir[pdir_idx] = NULL; + + vm_wire_sub(1); + vm_page_free_zero(m); +} + +/* + * Decrement ptbl pages hold count and attempt to free ptbl pages. Called + * when removing pte entry from ptbl. + * + * Return 1 if ptbl pages were freed. + */ +static int +ptbl_unhold(mmu_t mmu, pmap_t pmap, vm_offset_t va) +{ + pte_t *ptbl; + vm_page_t m; + u_int pp2d_idx; + pte_t **pdir; + u_int pdir_idx; + + pp2d_idx = PP2D_IDX(va); + pdir_idx = PDIR_IDX(va); + + KASSERT((pmap != kernel_pmap), + ("ptbl_unhold: unholding kernel ptbl!")); + + pdir = pmap->pm_pp2d[pp2d_idx]; + ptbl = pdir[pdir_idx]; + + /* decrement hold count */ + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); + + /* + * Free ptbl pages if there are no pte entries in this ptbl. + * ref_count has the same value for all ptbl pages, so check the + * last page. + */ + m->ref_count--; + if (m->ref_count == 0) { + ptbl_free(mmu, pmap, pdir, pdir_idx, m); + pdir_unhold(mmu, pmap, pp2d_idx); + return (1); + } + return (0); +} + +/* + * Increment hold count for ptbl pages. This routine is used when new pte + * entry is being inserted into ptbl. + */ +static void +ptbl_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx) +{ + pte_t *ptbl; + vm_page_t m; + + KASSERT((pmap != kernel_pmap), + ("ptbl_hold: holding kernel ptbl!")); + + ptbl = pdir[pdir_idx]; + + KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); + + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); + m->ref_count++; +} + +/* + * Clean pte entry, try to free page table page if requested. + * + * Return 1 if ptbl pages were freed, otherwise return 0. + */ +static int +pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, u_int8_t flags) +{ + vm_page_t m; + pte_t *pte; + + pte = pte_find(mmu, pmap, va); + KASSERT(pte != NULL, ("%s: NULL pte", __func__)); + + if (!PTE_ISVALID(pte)) + return (0); + + /* Get vm_page_t for mapped pte. */ + m = PHYS_TO_VM_PAGE(PTE_PA(pte)); + + if (PTE_ISWIRED(pte)) + pmap->pm_stats.wired_count--; + + /* Handle managed entry. */ + if (PTE_ISMANAGED(pte)) { + + /* Handle modified pages. */ + if (PTE_ISMODIFIED(pte)) + vm_page_dirty(m); + + /* Referenced pages. */ + if (PTE_ISREFERENCED(pte)) + vm_page_aflag_set(m, PGA_REFERENCED); + + /* Remove pv_entry from pv_list. */ + pv_remove(pmap, va, m); + } else if (pmap == kernel_pmap && m && m->md.pv_tracked) { + pv_remove(pmap, va, m); + if (TAILQ_EMPTY(&m->md.pv_list)) + m->md.pv_tracked = false; + } + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + tlb0_flush_entry(va); + *pte = 0; + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + + pmap->pm_stats.resident_count--; + + if (flags & PTBL_UNHOLD) { + return (ptbl_unhold(mmu, pmap, va)); + } + return (0); +} + +/* + * Insert PTE for a given page and virtual address. + */ +static int +pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, + boolean_t nosleep) +{ + unsigned int pp2d_idx = PP2D_IDX(va); + unsigned int pdir_idx = PDIR_IDX(va); + unsigned int ptbl_idx = PTBL_IDX(va); + pte_t *ptbl, *pte, pte_tmp; + pte_t **pdir; + + /* Get the page directory pointer. */ + pdir = pmap->pm_pp2d[pp2d_idx]; + if (pdir == NULL) + pdir = pdir_alloc(mmu, pmap, pp2d_idx, nosleep); + + /* Get the page table pointer. */ + ptbl = pdir[pdir_idx]; + + if (ptbl == NULL) { + /* Allocate page table pages. */ + ptbl = ptbl_alloc(mmu, pmap, pdir, pdir_idx, nosleep); + if (ptbl == NULL) { + KASSERT(nosleep, ("nosleep and NULL ptbl")); + return (ENOMEM); + } + pte = &ptbl[ptbl_idx]; + } else { + /* + * Check if there is valid mapping for requested va, if there + * is, remove it. + */ + pte = &ptbl[ptbl_idx]; + if (PTE_ISVALID(pte)) { + pte_remove(mmu, pmap, va, PTBL_HOLD); + } else { + /* + * pte is not used, increment hold count for ptbl + * pages. + */ + if (pmap != kernel_pmap) + ptbl_hold(mmu, pmap, pdir, pdir_idx); + } + } + + if (pdir[pdir_idx] == NULL) { + if (pmap != kernel_pmap && pmap->pm_pp2d[pp2d_idx] != NULL) + pdir_hold(mmu, pmap, pdir); + pdir[pdir_idx] = ptbl; + } + if (pmap->pm_pp2d[pp2d_idx] == NULL) + pmap->pm_pp2d[pp2d_idx] = pdir; + + /* + * Insert pv_entry into pv_list for mapped page if part of managed + * memory. + */ + if ((m->oflags & VPO_UNMANAGED) == 0) { + flags |= PTE_MANAGED; + + /* Create and insert pv entry. */ + pv_insert(pmap, va, m); + } + + pmap->pm_stats.resident_count++; + + pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); + pte_tmp |= (PTE_VALID | flags); + + mtx_lock_spin(&tlbivax_mutex); + tlb_miss_lock(); + + tlb0_flush_entry(va); + *pte = pte_tmp; + + tlb_miss_unlock(); + mtx_unlock_spin(&tlbivax_mutex); + + return (0); +} + +/* Return the pa for the given pmap/va. */ +static vm_paddr_t +pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) +{ + vm_paddr_t pa = 0; + pte_t *pte; + + pte = pte_find(mmu, pmap, va); + if ((pte != NULL) && PTE_ISVALID(pte)) + pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); + return (pa); +} + + +/* allocate pte entries to manage (addr & mask) to (addr & mask) + size */ +static void +kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) +{ + int i, j; + vm_offset_t va; + pte_t *pte; + + va = addr; + /* Initialize kernel pdir */ + for (i = 0; i < kernel_pdirs; i++) { + kernel_pmap->pm_pp2d[i + PP2D_IDX(va)] = + (pte_t **)(pdir + (i * PAGE_SIZE * PDIR_PAGES)); + for (j = PDIR_IDX(va + (i * PAGE_SIZE * PDIR_NENTRIES * PTBL_NENTRIES)); + j < PDIR_NENTRIES; j++) { + kernel_pmap->pm_pp2d[i + PP2D_IDX(va)][j] = + (pte_t *)(pdir + (kernel_pdirs * PAGE_SIZE) + + (((i * PDIR_NENTRIES) + j) * PAGE_SIZE)); + } + } + + /* + * Fill in PTEs covering kernel code and data. They are not required + * for address translation, as this area is covered by static TLB1 + * entries, but for pte_vatopa() to work correctly with kernel area + * addresses. + */ + for (va = addr; va < data_end; va += PAGE_SIZE) { + pte = &(kernel_pmap->pm_pp2d[PP2D_IDX(va)][PDIR_IDX(va)][PTBL_IDX(va)]); + *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); + *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | + PTE_VALID | PTE_PS_4KB; + } +} + +/* + * Initialize a preallocated and zeroed pmap structure, + * such as one in a vmspace structure. + */ +static void +mmu_booke_pinit(mmu_t mmu, pmap_t pmap) +{ + int i; + + CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap, + curthread->td_proc->p_pid, curthread->td_proc->p_comm); + + KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap")); + + for (i = 0; i < MAXCPU; i++) + pmap->pm_tid[i] = TID_NONE; + CPU_ZERO(&kernel_pmap->pm_active); + bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); + pmap->pm_pp2d = uma_zalloc(ptbl_root_zone, M_WAITOK); + bzero(pmap->pm_pp2d, sizeof(pte_t **) * PP2D_NENTRIES); +} + +/* + * Release any resources held by the given physical map. + * Called when a pmap initialized by mmu_booke_pinit is being released. + * Should only be called if the map contains no valid mappings. + */ +static void +mmu_booke_release(mmu_t mmu, pmap_t pmap) +{ + + KASSERT(pmap->pm_stats.resident_count == 0, + ("pmap_release: pmap resident count %ld != 0", + pmap->pm_stats.resident_count)); + uma_zfree(ptbl_root_zone, pmap->pm_pp2d); +} + +static void +mmu_booke_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) +{ + pte_t *pte; + vm_paddr_t pa = 0; + int sync_sz, valid; + + while (sz > 0) { + PMAP_LOCK(pm); + pte = pte_find(mmu, pm, va); + valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0; + if (valid) + pa = PTE_PA(pte); + PMAP_UNLOCK(pm); + sync_sz = PAGE_SIZE - (va & PAGE_MASK); + sync_sz = min(sync_sz, sz); + if (valid) { + pa += (va & PAGE_MASK); + __syncicache((void *)PHYS_TO_DMAP(pa), sync_sz); + } + va += sync_sz; + sz -= sync_sz; + } +} + +/* + * mmu_booke_zero_page_area zeros the specified hardware page by + * mapping it into virtual memory and using bzero to clear + * its contents. + * + * off and size must reside within a single page. + */ +static void +mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) +{ + vm_offset_t va; + + /* XXX KASSERT off and size are within a single page? */ + + va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); + bzero((caddr_t)va + off, size); +} + +/* + * mmu_booke_zero_page zeros the specified hardware page. + */ +static void +mmu_booke_zero_page(mmu_t mmu, vm_page_t m) +{ + vm_offset_t off, va; + + va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); + + for (off = 0; off < PAGE_SIZE; off += cacheline_size) + __asm __volatile("dcbz 0,%0" :: "r"(va + off)); +} + +/* + * mmu_booke_copy_page copies the specified (machine independent) page by + * mapping the page into virtual memory and using memcopy to copy the page, + * one machine dependent page at a time. + */ +static void +mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm) +{ + vm_offset_t sva, dva; + + sva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(sm)); + dva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dm)); + memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); +} + +static inline void +mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, + vm_page_t *mb, vm_offset_t b_offset, int xfersize) +{ + void *a_cp, *b_cp; + vm_offset_t a_pg_offset, b_pg_offset; + int cnt; + + vm_page_t pa, pb; + + while (xfersize > 0) { + a_pg_offset = a_offset & PAGE_MASK; + pa = ma[a_offset >> PAGE_SHIFT]; + b_pg_offset = b_offset & PAGE_MASK; + pb = mb[b_offset >> PAGE_SHIFT]; + cnt = min(xfersize, PAGE_SIZE - a_pg_offset); + cnt = min(cnt, PAGE_SIZE - b_pg_offset); + a_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pa)) + + a_pg_offset); + b_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pb)) + + b_pg_offset); + bcopy(a_cp, b_cp, cnt); + a_offset += cnt; + b_offset += cnt; + xfersize -= cnt; + } +} + +static vm_offset_t +mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m) +{ + return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); +} + +static void +mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr) +{ +} + +/**************************************************************************/ +/* TID handling */ +/**************************************************************************/ + +/* + * Return the largest uint value log such that 2^log <= num. + */ +static unsigned long +ilog2(unsigned long num) +{ + long lz; + + __asm ("cntlzd %0, %1" : "=r" (lz) : "r" (num)); + return (63 - lz); +} + +/* + * Invalidate all TLB0 entries which match the given TID. Note this is + * dedicated for cases when invalidations should NOT be propagated to other + * CPUs. + */ +static void +tid_flush(tlbtid_t tid) +{ + register_t msr; + + /* Don't evict kernel translations */ + if (tid == TID_KERNEL) + return; + + msr = mfmsr(); + __asm __volatile("wrteei 0"); + + /* + * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use + * it for PID invalidation. + */ + mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT); + __asm __volatile("isync; .long 0x7c200024; isync; msync"); + + __asm __volatile("wrtee %0" :: "r"(msr)); +}