Index: head/sys/powerpc/aim/mmu_oea64.c =================================================================== --- head/sys/powerpc/aim/mmu_oea64.c +++ head/sys/powerpc/aim/mmu_oea64.c @@ -309,6 +309,7 @@ int *is_user, vm_offset_t *decoded_addr); static size_t moea64_scan_pmap(mmu_t mmu); static void *moea64_dump_pmap_init(mmu_t mmu, unsigned blkpgs); +static void moea64_page_array_startup(mmu_t, long); static mmu_method_t moea64_methods[] = { @@ -348,6 +349,7 @@ MMUMETHOD(mmu_page_set_memattr, moea64_page_set_memattr), MMUMETHOD(mmu_quick_enter_page, moea64_quick_enter_page), MMUMETHOD(mmu_quick_remove_page, moea64_quick_remove_page), + MMUMETHOD(mmu_page_array_startup, moea64_page_array_startup), /* Internal interfaces */ MMUMETHOD(mmu_mapdev, moea64_mapdev), @@ -639,11 +641,33 @@ } #endif +static int +moea64_kenter_large(mmu_t mmup, vm_offset_t va, vm_paddr_t pa, uint64_t attr, int bootstrap) +{ + struct pvo_entry *pvo; + uint64_t pte_lo; + int error; + + pte_lo = LPTE_M; + pte_lo |= attr; + + pvo = alloc_pvo_entry(bootstrap); + pvo->pvo_vaddr |= PVO_WIRED | PVO_LARGE; + init_pvo_entry(pvo, kernel_pmap, va); + + pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | + VM_PROT_EXECUTE; + pvo->pvo_pte.pa = pa | pte_lo; + error = moea64_pvo_enter(mmup, pvo, NULL, NULL); + if (error != 0) + panic("Error %d inserting large page\n", error); + return (0); +} + static void moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { - struct pvo_entry *pvo; register_t msr; vm_paddr_t pa, pkernelstart, pkernelend; vm_offset_t size, off; @@ -660,15 +684,6 @@ for (pa = pregions[i].mr_start; pa < pregions[i].mr_start + pregions[i].mr_size; pa += moea64_large_page_size) { pte_lo = LPTE_M; - - pvo = alloc_pvo_entry(1 /* bootstrap */); - pvo->pvo_vaddr |= PVO_WIRED | PVO_LARGE; - init_pvo_entry(pvo, kernel_pmap, PHYS_TO_DMAP(pa)); - - /* - * Set memory access as guarded if prefetch within - * the page could exit the available physmem area. - */ if (pa & moea64_large_page_mask) { pa &= moea64_large_page_mask; pte_lo |= LPTE_G; @@ -677,10 +692,7 @@ pregions[i].mr_start + pregions[i].mr_size) pte_lo |= LPTE_G; - pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | - VM_PROT_EXECUTE; - pvo->pvo_pte.pa = pa | pte_lo; - moea64_pvo_enter(mmup, pvo, NULL, NULL); + moea64_kenter_large(mmup, PHYS_TO_DMAP(pa), pa, pte_lo, 1); } } PMAP_UNLOCK(kernel_pmap); @@ -3013,3 +3025,97 @@ } #endif + +static void +moea64_map_range(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_size_t npages) +{ + + for (; npages > 0; --npages) { + if (moea64_large_page_size != 0 && + (pa & moea64_large_page_mask) == 0 && + (va & moea64_large_page_mask) == 0 && + npages >= (moea64_large_page_size >> PAGE_SHIFT)) { + PMAP_LOCK(kernel_pmap); + moea64_kenter_large(mmu, va, pa, 0, 0); + PMAP_UNLOCK(kernel_pmap); + pa += moea64_large_page_size; + va += moea64_large_page_size; + npages -= (moea64_large_page_size >> PAGE_SHIFT) - 1; + } else { + moea64_kenter(mmu, va, pa); + pa += PAGE_SIZE; + va += PAGE_SIZE; + } + } +} + +static void +moea64_page_array_startup(mmu_t mmu, long pages) +{ + long dom_pages[MAXMEMDOM]; + vm_paddr_t pa; + vm_offset_t va, vm_page_base; + vm_size_t needed, size; + long page; + int domain; + int i; + + vm_page_base = 0xd000000000000000ULL; + + /* Short-circuit single-domain systems. */ + if (vm_ndomains == 1) { + size = round_page(pages * sizeof(struct vm_page)); + pa = vm_phys_early_alloc(0, size); + vm_page_base = moea64_map(mmu, &vm_page_base, + pa, pa + size, VM_PROT_READ | VM_PROT_WRITE); + vm_page_array_size = pages; + vm_page_array = (vm_page_t)vm_page_base; + return; + } + + page = 0; + for (i = 0; i < MAXMEMDOM; i++) + dom_pages[i] = 0; + + /* Now get the number of pages required per domain. */ + for (i = 0; i < vm_phys_nsegs; i++) { + domain = vm_phys_segs[i].domain; + KASSERT(domain < MAXMEMDOM, + ("Invalid vm_phys_segs NUMA domain %d!\n", domain)); + /* Get size of vm_page_array needed for this segment. */ + size = btoc(vm_phys_segs[i].end - vm_phys_segs[i].start); + dom_pages[domain] += size; + } + + for (i = 0; phys_avail[i + 1] != 0; i+= 2) { + domain = _vm_phys_domain(phys_avail[i]); + KASSERT(domain < MAXMEMDOM, + ("Invalid phys_avail NUMA domain %d!\n", domain)); + size = btoc(phys_avail[i + 1] - phys_avail[i]); + dom_pages[domain] += size; + } + + /* + * Map in chunks that can get us all 16MB pages. There will be some + * overlap between domains, but that's acceptable for now. + */ + vm_page_array_size = 0; + va = vm_page_base; + for (i = 0; i < MAXMEMDOM && vm_page_array_size < pages; i++) { + if (dom_pages[i] == 0) + continue; + size = ulmin(pages - vm_page_array_size, dom_pages[i]); + size = round_page(size * sizeof(struct vm_page)); + needed = size; + size = roundup2(size, moea64_large_page_size); + pa = vm_phys_early_alloc(i, size); + vm_page_array_size += size / sizeof(struct vm_page); + moea64_map_range(mmu, va, pa, size >> PAGE_SHIFT); + /* Scoot up domain 0, to reduce the domain page overlap. */ + if (i == 0) + vm_page_base += size - needed; + va += size; + } + vm_page_array = (vm_page_t)vm_page_base; + vm_page_array_size = pages; +} Index: head/sys/powerpc/aim/slb.c =================================================================== --- head/sys/powerpc/aim/slb.c +++ head/sys/powerpc/aim/slb.c @@ -221,7 +221,10 @@ if (mem_valid(DMAP_TO_PHYS(va), 0) == 0) slbv |= SLBV_L; - } + } else if (moea64_large_page_size != 0 && + va >= (vm_offset_t)vm_page_array && + va <= (uintptr_t)(&vm_page_array[vm_page_array_size])) + slbv |= SLBV_L; return (slbv); } Index: head/sys/powerpc/booke/pmap.c =================================================================== --- head/sys/powerpc/booke/pmap.c +++ head/sys/powerpc/booke/pmap.c @@ -393,6 +393,7 @@ volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen); static int mmu_booke_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, int *is_user, vm_offset_t *decoded_addr); +static void mmu_booke_page_array_startup(mmu_t , long); static mmu_method_t mmu_booke_methods[] = { @@ -434,6 +435,7 @@ MMUMETHOD(mmu_deactivate, mmu_booke_deactivate), MMUMETHOD(mmu_quick_enter_page, mmu_booke_quick_enter_page), MMUMETHOD(mmu_quick_remove_page, mmu_booke_quick_remove_page), + MMUMETHOD(mmu_page_array_startup, mmu_booke_page_array_startup), /* Internal interfaces */ MMUMETHOD(mmu_bootstrap, mmu_booke_bootstrap), @@ -1619,8 +1621,30 @@ debugf(" kernel pdir at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", kernel_pdir, data_end); + /* Retrieve phys/avail mem regions */ + mem_regions(&physmem_regions, &physmem_regions_sz, + &availmem_regions, &availmem_regions_sz); + + if (PHYS_AVAIL_ENTRIES < availmem_regions_sz) + panic("mmu_booke_bootstrap: phys_avail too small"); + + data_end = round_page(data_end); + vm_page_array = (vm_page_t)data_end; + /* + * Get a rough idea (upper bound) on the size of the page array. The + * vm_page_array will not handle any more pages than we have in the + * avail_regions array, and most likely much less. + */ + sz = 0; + for (mp = availmem_regions; mp->mr_size; mp++) { + sz += mp->mr_size; + } + sz = (round_page(sz) / (PAGE_SIZE + sizeof(struct vm_page))); + data_end += round_page(sz * sizeof(struct vm_page)); + /* Pre-round up to 1MB. This wastes some space, but saves TLB entries */ data_end = roundup2(data_end, 1 << 20); + debugf(" data_end: 0x%"PRI0ptrX"\n", data_end); debugf(" kernstart: %#zx\n", kernstart); debugf(" kernsize: %#zx\n", kernsize); @@ -1690,13 +1714,6 @@ * to us. Also, sort the entries for ascending addresses. */ - /* Retrieve phys/avail mem regions */ - mem_regions(&physmem_regions, &physmem_regions_sz, - &availmem_regions, &availmem_regions_sz); - - if (PHYS_AVAIL_ENTRIES < availmem_regions_sz) - panic("mmu_booke_bootstrap: phys_avail too small"); - sz = 0; cnt = availmem_regions_sz; debugf("processing avail regions:\n"); @@ -3662,6 +3679,12 @@ mtx_unlock_spin(&tlbivax_mutex); return (0); +} + +static void +mmu_booke_page_array_startup(mmu_t mmu, long pages) +{ + vm_page_array_size = pages; } /**************************************************************************/ Index: head/sys/powerpc/include/pmap.h =================================================================== --- head/sys/powerpc/include/pmap.h +++ head/sys/powerpc/include/pmap.h @@ -270,6 +270,8 @@ boolean_t pmap_mmu_install(char *name, int prio); const char *pmap_mmu_name(void); +void pmap_page_array_startup(long count); + #define vtophys(va) pmap_kextract((vm_offset_t)(va)) extern vm_offset_t virtual_avail; Index: head/sys/powerpc/include/vmparam.h =================================================================== --- head/sys/powerpc/include/vmparam.h +++ head/sys/powerpc/include/vmparam.h @@ -242,6 +242,21 @@ #endif #endif +#if defined(__powerpc64__) || defined(BOOKE) +/* + * powerpc64 and Book-E will provide their own page array allocators. + * + * On AIM, this will allocate a single virtual array, with pages from the + * correct memory domains. + * On Book-E this will let us put the array in TLB1, removing the need for TLB + * thrashing. + * + * VM_MIN_KERNEL_ADDRESS is just a dummy. It will get set by the MMU driver. + */ +#define PA_MIN_ADDRESS VM_MIN_KERNEL_ADDRESS +#define PMAP_HAS_PAGE_ARRAY 1 +#endif + #define PMAP_HAS_DMAP (hw_direct_map) #define PHYS_TO_DMAP(x) ({ \ KASSERT(hw_direct_map, ("Direct map not provided by PMAP")); \ Index: head/sys/powerpc/powerpc/mmu_if.m =================================================================== --- head/sys/powerpc/powerpc/mmu_if.m +++ head/sys/powerpc/powerpc/mmu_if.m @@ -1075,3 +1075,12 @@ vm_memattr_t _mode; } DEFAULT mmu_null_change_attr; +/** + * @brief Initialize the page array. + * + * @param _pages The number of pages to be accounted by the array. + */ +METHOD void page_array_startup { + mmu_t _mmu; + long _pages; +}; Index: head/sys/powerpc/powerpc/pmap_dispatch.c =================================================================== --- head/sys/powerpc/powerpc/pmap_dispatch.c +++ head/sys/powerpc/powerpc/pmap_dispatch.c @@ -610,6 +610,13 @@ return (MMU_CHANGE_ATTR(mmu_obj, addr, size, mode)); } +void +pmap_page_array_startup(long pages) +{ + CTR2(KTR_PMAP, "%s(%ld)", __func__, pages); + MMU_PAGE_ARRAY_STARTUP(mmu_obj, pages); +} + /* * MMU install routines. Highest priority wins, equal priority also * overrides allowing last-set to win. Index: head/sys/vm/vm_page.c =================================================================== --- head/sys/vm/vm_page.c +++ head/sys/vm/vm_page.c @@ -559,7 +559,8 @@ vm_page_t m; char *list, *listend; vm_offset_t mapped; - vm_paddr_t end, high_avail, low_avail, new_end, page_range, size; + vm_paddr_t end, high_avail, low_avail, new_end, size; + vm_paddr_t page_range __unused; vm_paddr_t last_pa, pa; u_long pagecount; int biggestone, i, segind;