diff --git a/sys/kern/subr_vmem.c b/sys/kern/subr_vmem.c --- a/sys/kern/subr_vmem.c +++ b/sys/kern/subr_vmem.c @@ -238,11 +238,14 @@ static struct vmem kernel_arena_storage; static struct vmem buffer_arena_storage; static struct vmem transient_arena_storage; +static struct vmem kstack_arena_storage; + /* kernel and kmem arenas are aliased for backwards KPI compat. */ vmem_t *kernel_arena = &kernel_arena_storage; vmem_t *kmem_arena = &kernel_arena_storage; vmem_t *buffer_arena = &buffer_arena_storage; vmem_t *transient_arena = &transient_arena_storage; +vmem_t *kstack_arena = &kstack_arena_storage; #ifdef DEBUG_MEMGUARD static struct vmem memguard_arena_storage; diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -129,6 +129,7 @@ void vm_imgact_unmap_page(struct sf_buf *sf); void vm_thread_dispose(struct thread *td); int vm_thread_new(struct thread *td, int pages); +vm_pindex_t vm_kstack_pindex(vm_offset_t ks, int npages); void vm_thread_stack_back(struct domainset *ds, vm_offset_t kaddr, vm_page_t ma[], int npages, int req_class); u_int vm_active_count(void); diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -285,6 +285,45 @@ CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_RW, &kstack_cache_size, 0, sysctl_kstack_cache_size, "IU", "Maximum number of cached kernel stacks"); +/* + * kva_alloc_kstack: + * + * Allocate a virtual address range from the kstack arena. + */ +static vm_offset_t +kva_alloc_kstack(vm_size_t size) +{ + vm_offset_t addr; + + size = round_page(size); + /* Fall back to the kernel arena for non-standard kstack sizes */ + if (size != ((kstack_pages + KSTACK_GUARD_PAGES) * PAGE_SIZE)) { + return (kva_alloc(size)); + } + + if (vmem_alloc(kstack_arena, size, M_BESTFIT | M_NOWAIT, &addr)) + return (0); + + return (addr); +} + +/* + * kva_free_kstack: + * + * Release a region of kernel virtual memory + * allocated from the kstack arena. + */ +static void +kva_free_kstack(vm_offset_t addr, vm_size_t size) +{ + size = round_page(size); + if (size != ((kstack_pages + KSTACK_GUARD_PAGES) * PAGE_SIZE)) { + kva_free(addr, size); + } else { + vmem_free(kstack_arena, addr, size); + } +} + /* * Create the kernel stack (including pcb for i386) for a new thread. */ @@ -298,7 +337,7 @@ /* * Get a kernel virtual address for this thread's kstack. */ - ks = kva_alloc((pages + KSTACK_GUARD_PAGES) * PAGE_SIZE); + ks = kva_alloc_kstack((pages + KSTACK_GUARD_PAGES) * PAGE_SIZE); if (ks == 0) { printf("%s: kstack allocation failed\n", __func__); return (0); @@ -327,7 +366,7 @@ vm_pindex_t pindex; int i; - pindex = atop(ks - VM_MIN_KERNEL_ADDRESS); + pindex = vm_kstack_pindex(ks, pages); pmap_qremove(ks, pages); VM_OBJECT_WLOCK(kstack_object); @@ -341,7 +380,7 @@ } VM_OBJECT_WUNLOCK(kstack_object); kasan_mark((void *)ks, ptoa(pages), ptoa(pages), 0); - kva_free(ks - (KSTACK_GUARD_PAGES * PAGE_SIZE), + kva_free_kstack(ks - (KSTACK_GUARD_PAGES * PAGE_SIZE), (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE); } @@ -400,6 +439,29 @@ vm_thread_stack_dispose(ks, pages); } +/* + * Calculate kstack pindex. + + * Uses a non-linear mapping if guard pages are + * active to avoid pindex holes in the kstack object. + */ +vm_pindex_t +vm_kstack_pindex(vm_offset_t ks, int npages) +{ + KASSERT(npages == kstack_pages, + ("Calculating kstack pindex with npages != kstack_pages\n")); + + vm_pindex_t pindex = atop(ks - VM_MIN_KERNEL_ADDRESS); + + if (KSTACK_GUARD_PAGES == 0) { + return (pindex); + } + KASSERT((pindex % (npages + KSTACK_GUARD_PAGES)) != 0, + ("Attempting to calculate kstack guard page pindex\n")); + + return (pindex - ((pindex / (npages + KSTACK_GUARD_PAGES)) + 1)); +} + /* * Allocate physical pages, following the specified NUMA policy, to back a * kernel stack. @@ -411,7 +473,7 @@ vm_pindex_t pindex; int n; - pindex = atop(ks - VM_MIN_KERNEL_ADDRESS); + pindex = vm_kstack_pindex(ks, npages); VM_OBJECT_WLOCK(kstack_object); for (n = 0; n < npages;) { diff --git a/sys/vm/vm_kern.h b/sys/vm/vm_kern.h --- a/sys/vm/vm_kern.h +++ b/sys/vm/vm_kern.h @@ -77,6 +77,7 @@ extern struct vmem *buffer_arena; extern struct vmem *transient_arena; extern struct vmem *memguard_arena; +extern struct vmem *kstack_arena; extern u_long vm_kmem_size; extern u_int exec_map_entries; extern u_int exec_map_entry_size; diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -760,6 +760,80 @@ VMEM_ADDR_MAX, flags, addrp)); } +/* + * Import KVA from a parent arena into the kstack arena. Imports must be + * a multiple of kernel stack pages + guard pages in size. + * + * Kstack VA allocations need to be aligned so that the linear KVA pindex + * is divisible by the total number of kstack VA pages. This is necessary to + * make vm_kstack_pindex work properly. + * + * We allocate a KVA_QUANTUM-aligned VA region that is slightly + * larger than the requested size and adjust it until it is both + * properly aligned and of the requested size. + */ +static int +kva_import_kstack(void *arena, vmem_size_t size, int flags, vmem_addr_t *addrp) +{ + int error, rem; + size_t npages = kstack_pages + KSTACK_GUARD_PAGES; + vmem_size_t padding = (npages)*PAGE_SIZE; + vm_pindex_t lin_pidx; + + KASSERT((size % npages) == 0, + ("kva_import_kstack: Size %jd is not a multiple of kstack pages (%d)", + (intmax_t)size, (int)npages)); + + error = vmem_xalloc(arena, size + padding, KVA_QUANTUM, 0, 0, + VMEM_ADDR_MIN, VMEM_ADDR_MAX, flags, addrp); + if (error) { + return (error); + } + + lin_pidx = atop(*addrp - VM_MIN_KERNEL_ADDRESS); + rem = lin_pidx % (npages); + if (rem != 0) { + /* Bump addr to next aligned address */ + *addrp = *addrp + ((npages - rem) * PAGE_SIZE); + } + + return (0); +} + +/* + * Release KVA from a parent arena into the kstack arena. Released imports must + * be a multiple of kernel stack pages + guard pages in size. + */ +static void +kva_release_kstack(void *arena, vmem_addr_t addr, vmem_size_t size) +{ + int rem; + size_t npages = kstack_pages + KSTACK_GUARD_PAGES; + vmem_size_t padding = (npages)*PAGE_SIZE; + + KASSERT((size % npages) == 0, + ("kva_release_kstack: Size %jd is not a multiple of kstack pages (%d)", + (intmax_t)size, (int)npages)); + KASSERT((addr % npages) == 0, + ("kva_release_kstack: Address %p is not a multiple of kstack pages (%d)", + (void *)addr, (int)npages)); + + /* + * If the address is not KVA_QUANTUM-aligned we have to decrement + * it to account for the shift in kva_import_kstack. + */ + rem = addr % KVA_QUANTUM; + if (rem) { + KASSERT(rem <= (npages * PAGE_SIZE), + ("kva_release_kstack: rem > npages (%d), (%d)", rem, + (int)npages)); + addr -= rem; + } + vmem_xfree(arena, addr, size + padding); + + return; +} + /* * kmem_init: * @@ -772,7 +846,7 @@ void kmem_init(vm_offset_t start, vm_offset_t end) { - vm_size_t quantum; + vm_size_t quantum, kstack_quantum; int domain; vm_map_init(kernel_map, kernel_pmap, VM_MIN_KERNEL_ADDRESS, end); @@ -811,12 +885,30 @@ else quantum = KVA_QUANTUM; + kstack_quantum = KVA_QUANTUM; + +#ifdef __ILP32__ + /* Adjust kstack quantum size. */ + kstack_quantum -= (kstack_quantum % + ((kstack_pages + KSTACK_GUARD_PAGES) * PAGE_SIZE)); +#else + /* The kstack_quantum is larger than KVA_QUANTUM to account + for holes induced by guard pages. */ + kstack_quantum *= (kstack_pages + KSTACK_GUARD_PAGES); +#endif /* * Initialize the kernel_arena. This can grow on demand. */ vmem_init(kernel_arena, "kernel arena", 0, 0, PAGE_SIZE, 0, 0); vmem_set_import(kernel_arena, kva_import, NULL, NULL, quantum); + /* + * Initialize the kstack_arena and set kernel_arena as parent. + */ + vmem_init(kstack_arena, "kstack arena", 0, 0, PAGE_SIZE, 0, 0); + vmem_set_import(kstack_arena, kva_import_kstack, kva_release_kstack, + kernel_arena, kstack_quantum); + for (domain = 0; domain < vm_ndomains; domain++) { /* * Initialize the per-domain arenas. These are used to color diff --git a/sys/vm/vm_swapout.c b/sys/vm/vm_swapout.c --- a/sys/vm/vm_swapout.c +++ b/sys/vm/vm_swapout.c @@ -538,7 +538,7 @@ cpu_thread_swapout(td); kaddr = td->td_kstack; pages = td->td_kstack_pages; - pindex = atop(kaddr - VM_MIN_KERNEL_ADDRESS); + pindex = vm_kstack_pindex(kaddr, pages); pmap_qremove(kaddr, pages); VM_OBJECT_WLOCK(kstack_object); for (i = 0; i < pages; i++) {