Index: head/sys/amd64/amd64/pmap.c =================================================================== --- head/sys/amd64/amd64/pmap.c +++ head/sys/amd64/amd64/pmap.c @@ -383,6 +383,9 @@ static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ static int ndmpdpphys; /* number of DMPDPphys pages */ +static uint64_t PAPDPphys; /* phys addr of page array level 3 */ +static int npapdpphys; /* number of PAPDPphys pages */ + static vm_paddr_t KERNend; /* phys addr of end of bootstrap data */ /* @@ -1427,6 +1430,16 @@ pml4_entry_t *p4_p; uint64_t DMPDkernphys; + npapdpphys = howmany(ptoa(Maxmem) / sizeof(struct vm_page), NBPML4); + if (npapdpphys > NPAPML4E) { + printf("NDMPML4E limits system to %lu GB\n", + (NDMPML4E * 512) * (PAGE_SIZE / sizeof(struct vm_page))); + npapdpphys = NPAPML4E; + Maxmem = atop(NPAPML4E * NBPML4 * + (PAGE_SIZE / sizeof(struct vm_page))); + } + PAPDPphys = allocpages(firstaddr, npapdpphys); + /* Allocate page table pages for the direct map */ ndmpdp = howmany(ptoa(Maxmem), NBPDP); if (ndmpdp < 4) /* Minimum 4GB of dirmap */ @@ -1573,6 +1586,12 @@ p4_p[KPML4BASE + i] = KPDPphys + ptoa(i); p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V; } + + /* Connect the page array slots up to the pml4. */ + for (i = 0; i < npapdpphys; i++) { + p4_p[PAPML4I + i] = PAPDPphys + ptoa(i); + p4_p[PAPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx; + } } /* @@ -3387,6 +3406,11 @@ X86_PG_V; } + for (i = 0; i < npapdpphys; i++) { + pm_pml4[PAPML4I + i] = (PAPDPphys + ptoa(i)) | X86_PG_RW | + X86_PG_V; + } + /* install self-referential address mapping entry(s) */ pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; @@ -3743,6 +3767,8 @@ pmap->pm_pml4[KPML4BASE + i] = 0; for (i = 0; i < ndmpdpphys; i++)/* Direct Map */ pmap->pm_pml4[DMPML4I + i] = 0; + for (i = 0; i < npapdpphys; i++) + pmap->pm_pml4[PAPML4I + i] = 0; pmap->pm_pml4[PML4PML4I] = 0; /* Recursive Mapping */ for (i = 0; i < lm_ents; i++) /* Large Map */ pmap->pm_pml4[LMSPML4I + i] = 0; @@ -3779,6 +3805,44 @@ } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "LU", "Amount of KVM free"); + +void +pmap_page_array_startup(long pages) +{ + pdp_entry_t *pdpe; + pd_entry_t *pde, newpdir; + vm_offset_t va, start, end; + vm_paddr_t pa; + long pfn; + int domain, i; + + vm_page_array_size = pages; + + start = va = PA_MIN_ADDRESS; + end = va + (pages * sizeof(struct vm_page)); + while (va < end) { + pfn = first_page + ((va - start) / sizeof(struct vm_page)); + domain = _vm_phys_domain(ctob(pfn)); + pdpe = pmap_pdpe(kernel_pmap, va); + if ((*pdpe & X86_PG_V) == 0) { + pa = vm_phys_early_alloc(domain, PAGE_SIZE); + bzero((void *)PHYS_TO_DMAP(pa), PAGE_SIZE); + *pdpe = (pdp_entry_t)(pa | X86_PG_V | X86_PG_RW | + X86_PG_A | X86_PG_M); + continue; /* try again */ + } + pde = pmap_pdpe_to_pde(pdpe, va); + if ((*pde & X86_PG_V) != 0) + panic("Unexpected pde"); + pa = vm_phys_early_alloc(domain, NBPDR); + for (i = 0; i < NPDEPG; i++) + dump_add_page(pa + (i * PAGE_SIZE)); + newpdir = (pd_entry_t)(pa | X86_PG_V | X86_PG_RW | X86_PG_A | + X86_PG_M | PG_PS | pg_g | pg_nx); + pde_store(pde, newpdir); + va += NBPDR; + } +} /* * grow the number of kernel page table entries, if needed Index: head/sys/amd64/include/pmap.h =================================================================== --- head/sys/amd64/include/pmap.h +++ head/sys/amd64/include/pmap.h @@ -201,6 +201,13 @@ #define NDMPML4E 8 /* + * NPAPML4E is the maximum number of PML4 entries that will be + * used to implement the page array. This should be roughly 3% of + * NPDPML4E owing to 3% overhead for struct vm_page. + */ +#define NPAPML4E 1 + +/* * These values control the layout of virtual memory. The starting address * of the direct map, which is controlled by DMPML4I, must be a multiple of * its size. (See the PHYS_TO_DMAP() and DMAP_TO_PHYS() macros.) @@ -219,7 +226,8 @@ #define PML4PML4I (NPML4EPG/2) /* Index of recursive pml4 mapping */ #define KPML4BASE (NPML4EPG-NKPML4E) /* KVM at highest addresses */ -#define DMPML4I rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */ +#define PAPML4I (KPML4BASE-1-NPAPML4E) /* Below KVM */ +#define DMPML4I rounddown(PAPML4I-NDMPML4E, NDMPML4E) /* Below pages */ #define KPML4I (NPML4EPG-1) #define KPDPI (NPDPEPG-2) /* kernbase at -2GB */ @@ -467,6 +475,7 @@ u_int keyidx, int flags); void pmap_thread_init_invl_gen(struct thread *td); int pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap); +void pmap_page_array_startup(long count); #endif /* _KERNEL */ /* Return various clipped indexes for a given VA */ Index: head/sys/amd64/include/vmparam.h =================================================================== --- head/sys/amd64/include/vmparam.h +++ head/sys/amd64/include/vmparam.h @@ -160,7 +160,9 @@ * 0xffff808000000000 - 0xffff847fffffffff large map (can be tuned up) * 0xffff848000000000 - 0xfffff7ffffffffff unused (large map extends there) * 0xfffff80000000000 - 0xfffffbffffffffff 4TB direct map - * 0xfffffc0000000000 - 0xfffffdffffffffff unused + * 0xfffffc0000000000 - 0xfffffcffffffffff unused + * 0xfffffd0000000000 - 0xfffffd7fffffffff page array 512GB + * 0xfffffd8000000000 - 0xfffffdffffffffff unused * 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map * * Within the kernel map: @@ -175,6 +177,8 @@ #define DMAP_MIN_ADDRESS KVADDR(DMPML4I, 0, 0, 0) #define DMAP_MAX_ADDRESS KVADDR(DMPML4I + NDMPML4E, 0, 0, 0) +#define PA_MIN_ADDRESS KVADDR(PAPML4I, 0, 0, 0) + #define LARGEMAP_MIN_ADDRESS KVADDR(LMSPML4I, 0, 0, 0) #define LARGEMAP_MAX_ADDRESS KVADDR(LMEPML4I + 1, 0, 0, 0) @@ -210,6 +214,12 @@ ("virtual address %#jx not covered by the DMAP", \ (uintmax_t)x)); \ (x) & ~DMAP_MIN_ADDRESS; }) + +/* + * amd64 statically allocates the page array address so that it can + * be more easily allocated on the correct memory domains. + */ +#define PMAP_HAS_PAGE_ARRAY 1 /* * How many physical pages per kmem arena virtual page. Index: head/sys/vm/vm_page.c =================================================================== --- head/sys/vm/vm_page.c +++ head/sys/vm/vm_page.c @@ -135,7 +135,11 @@ */ vm_page_t bogus_page; +#ifdef PMAP_HAS_PAGE_ARRAY +vm_page_t vm_page_array = (vm_page_t)PA_MIN_ADDRESS; +#else vm_page_t vm_page_array; +#endif long vm_page_array_size; long first_page; @@ -522,6 +526,31 @@ pmap_page_init(m); } +#ifndef PMAP_HAS_PAGE_ARRAY +static vm_paddr_t +vm_page_array_alloc(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t page_range) +{ + vm_paddr_t new_end; + + /* + * Reserve an unmapped guard page to trap access to vm_page_array[-1]. + * However, because this page is allocated from KVM, out-of-bounds + * accesses using the direct map will not be trapped. + */ + *vaddr += PAGE_SIZE; + + /* + * Allocate physical memory for the page structures, and map it. + */ + new_end = trunc_page(end - page_range * sizeof(struct vm_page)); + vm_page_array = (vm_page_t)pmap_map(vaddr, new_end, end, + VM_PROT_READ | VM_PROT_WRITE); + vm_page_array_size = page_range; + + return (new_end); +} +#endif + /* * vm_page_startup: * @@ -693,6 +722,11 @@ #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." #endif +#ifdef PMAP_HAS_PAGE_ARRAY + pmap_page_array_startup(size / PAGE_SIZE); + biggestone = vm_phys_avail_largest(); + end = new_end = phys_avail[biggestone + 1]; +#else #ifdef VM_PHYSSEG_DENSE /* * In the VM_PHYSSEG_DENSE case, the number of pages can account for @@ -723,31 +757,15 @@ } } end = new_end; + new_end = vm_page_array_alloc(&vaddr, end, page_range); +#endif - /* - * Reserve an unmapped guard page to trap access to vm_page_array[-1]. - * However, because this page is allocated from KVM, out-of-bounds - * accesses using the direct map will not be trapped. - */ - vaddr += PAGE_SIZE; - - /* - * Allocate physical memory for the page structures, and map it. - */ - new_end = trunc_page(end - page_range * sizeof(struct vm_page)); - mapped = pmap_map(&vaddr, new_end, end, - VM_PROT_READ | VM_PROT_WRITE); - vm_page_array = (vm_page_t)mapped; - vm_page_array_size = page_range; - #if VM_NRESERVLEVEL > 0 /* * Allocate physical memory for the reservation management system's * data structures, and map it. */ - if (high_avail == end) - high_avail = new_end; - new_end = vm_reserv_startup(&vaddr, new_end, high_avail); + new_end = vm_reserv_startup(&vaddr, new_end); #endif #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \ defined(__riscv) Index: head/sys/vm/vm_reserv.h =================================================================== --- head/sys/vm/vm_reserv.h +++ head/sys/vm/vm_reserv.h @@ -66,8 +66,7 @@ void vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object, vm_pindex_t old_object_offset); int vm_reserv_size(int level); -vm_paddr_t vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, - vm_paddr_t high_water); +vm_paddr_t vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end); vm_page_t vm_reserv_to_superpage(vm_page_t m); #endif /* VM_NRESERVLEVEL > 0 */ Index: head/sys/vm/vm_reserv.c =================================================================== --- head/sys/vm/vm_reserv.c +++ head/sys/vm/vm_reserv.c @@ -1360,10 +1360,23 @@ * management system's data structures, in particular, the reservation array. */ vm_paddr_t -vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water) +vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end) { - vm_paddr_t new_end; + vm_paddr_t new_end, high_water; size_t size; + int i; + + high_water = phys_avail[1]; + for (i = 0; i < vm_phys_nsegs; i++) { + if (vm_phys_segs[i].end > high_water) + high_water = vm_phys_segs[i].end; + } + + /* Skip the first chunk. It is already accounted for. */ + for (i = 2; phys_avail[i + 1] != 0; i += 2) { + if (phys_avail[i + 1] > high_water) + high_water = phys_avail[i + 1]; + } /* * Calculate the size (in bytes) of the reservation array. Round up