Index: sys/kern/kern_exec.c =================================================================== --- sys/kern/kern_exec.c +++ sys/kern/kern_exec.c @@ -50,22 +50,24 @@ #include #include #include +#include +#include +#include #include #include -#include #include -#include #include #include #include #include #include -#include -#include #include +#include +#include +#include #include +#include #include -#include #ifdef KTRACE #include #endif @@ -101,9 +103,6 @@ MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); -DPCPU_DEFINE(vm_offset_t, execargs); -CTASSERT(sizeof(vm_offset_t) == sizeof(long)); - int coredump_pack_fileinfo = 1; SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN, &coredump_pack_fileinfo, 0, @@ -1318,37 +1317,97 @@ return (error); } -static vm_offset_t -exec_alloc_args_kva(int cpuid) +struct exec_args_kva { + vm_map_entry_t entry; + SLIST_ENTRY(exec_args_kva) next; +}; + +DPCPU_DEFINE(struct exec_args_kva *, exec_args_kva); + +static SLIST_HEAD(, exec_args_kva) exec_args_kva_freelist; +static struct mtx exec_args_kva_mtx; + +static void +exec_prealloc_args_kva(void *arg __unused) { - vm_offset_t addr; + struct exec_args_kva *argkva; + vm_map_t map; + vm_object_t object; + vm_offset_t off; + vm_pindex_t pi; + vm_prot_t prot; + u_int i; + boolean_t wired; + + SLIST_INIT(&exec_args_kva_freelist); + mtx_init(&exec_args_kva_mtx, "exec args kva", NULL, MTX_DEF); + for (i = 0; i < exec_map_entries; i++) { + argkva = malloc(sizeof(*argkva), M_PARGS, M_WAITOK); + object = vm_object_allocate(OBJT_DEFAULT, exec_map_entry_size); + off = kmap_alloc_wait(exec_map, object, exec_map_entry_size); + + map = exec_map; + if (vm_map_lookup(&map, off, VM_PROT_RW, &argkva->entry, + &object, &pi, &prot, &wired) != KERN_SUCCESS) + panic("failed to allocate %d exec map entries", i + 1); + vm_map_lookup_done(map, argkva->entry); + MPASS(off == argkva->entry->start); + + SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next); + } +} +SYSINIT(exec_args_kva, SI_SUB_EXEC, SI_ORDER_ANY, exec_prealloc_args_kva, NULL); - addr = atomic_readandclear_long((long *)DPCPU_ID_PTR(cpuid, execargs)); - if (addr == 0) - addr = kmap_alloc_wait(exec_map, PATH_MAX + ARG_MAX); - return (addr); +static vm_offset_t +exec_alloc_args_kva(void **cookie) +{ + struct exec_args_kva *argkva; + + argkva = (void *)atomic_readandclear_ptr( + (uintptr_t *)DPCPU_PTR(exec_args_kva)); + if (argkva == NULL) { + mtx_lock(&exec_args_kva_mtx); + while ((argkva = SLIST_FIRST(&exec_args_kva_freelist)) == NULL) + (void)mtx_sleep(&exec_args_kva_freelist, + &exec_args_kva_mtx, 0, "execkva", 0); + SLIST_REMOVE_HEAD(&exec_args_kva_freelist, next); + mtx_unlock(&exec_args_kva_mtx); + } + *(struct exec_args_kva **)cookie = argkva; + return (argkva->entry->start); } static void -exec_free_args_kva(vm_offset_t addr, int cpuid) +exec_free_args_kva(void *cookie) { - - if (!atomic_cmpset_long((long *)DPCPU_ID_PTR(cpuid, execargs), 0, - (long)addr)) - kmap_free_wakeup(exec_map, addr, PATH_MAX + ARG_MAX); + struct exec_args_kva *argkva; + vm_offset_t off; + + argkva = cookie; + off = argkva->entry->start; + + pmap_advise(exec_map->pmap, off, off + exec_map_entry_size, MADV_FREE); + vm_object_madvise(argkva->entry->object.vm_object, OFF_TO_IDX(off), + OFF_TO_IDX(off + exec_map_entry_size), MADV_FREE); + if (!atomic_cmpset_ptr((uintptr_t *)DPCPU_PTR(exec_args_kva), + (uintptr_t)NULL, (uintptr_t)argkva)) { + mtx_lock(&exec_args_kva_mtx); + SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next); + wakeup_one(&exec_args_kva_freelist); + mtx_unlock(&exec_args_kva_mtx); + } } /* * Allocate temporary demand-paged, zero-filled memory for the file name, - * argument, and environment strings. Returns zero if the allocation succeeds - * and ENOMEM otherwise. + * argument, and environment strings. */ int exec_alloc_args(struct image_args *args) { - args->buf = (void *)exec_alloc_args_kva(PCPU_GET(cpuid)); - return (args->buf != NULL ? 0 : ENOMEM); + args->buf = (char *)exec_alloc_args_kva(&args->bufkva); + return (0); } void @@ -1356,7 +1415,7 @@ { if (args->buf != NULL) { - exec_free_args_kva((vm_offset_t)args->buf, PCPU_GET(cpuid)); + exec_free_args_kva(args->bufkva); args->buf = NULL; } if (args->fname_buf != NULL) { Index: sys/sys/imgact.h =================================================================== --- sys/sys/imgact.h +++ sys/sys/imgact.h @@ -42,6 +42,7 @@ struct image_args { char *buf; /* pointer to string buffer */ + void *bufkva; /* cookie for string buffer KVA */ char *begin_argv; /* beginning of argv in buf */ char *begin_envv; /* beginning of envv in buf */ char *endp; /* current `end' pointer of arg & env strings */ Index: sys/vm/vm_extern.h =================================================================== --- sys/vm/vm_extern.h +++ sys/vm/vm_extern.h @@ -48,7 +48,7 @@ void kva_free(vm_offset_t, vm_size_t); /* These operate on pageable virtual addresses. */ -vm_offset_t kmap_alloc_wait(vm_map_t, vm_size_t); +vm_offset_t kmap_alloc_wait(vm_map_t, vm_object_t, vm_size_t); void kmap_free_wakeup(vm_map_t, vm_offset_t, vm_size_t); /* These operate on virtual addresses backed by memory. */ Index: sys/vm/vm_init.c =================================================================== --- sys/vm/vm_init.c +++ sys/vm/vm_init.c @@ -67,6 +67,7 @@ #include #include +#include #include #include #include @@ -267,12 +268,19 @@ panic("Clean map calculation incorrect"); /* - * Allocate the pageable submaps. We may cache a buffer of size - * PATH_MAX+ARG_MAX per CPU, so we therefore need to reserve space for - * at least ncpu+1 buffers to avoid deadlock. + * Allocate the pageable submaps. We may cache an exec map entry per + * CPU, so we therefore need to reserve space for at least ncpu+1 + * entries to avoid deadlock. The exec map is also used by some image + * activators, so we leave a fixed number of pages for their use. */ +#ifdef __LP64__ + exec_map_entries = 8 * mp_ncpus; +#else + exec_map_entries = min(8 * mp_ncpus, 2 * mp_ncpus + 4); +#endif + exec_map_entry_size = round_page(PATH_MAX + ARG_MAX); exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, - mp_ncpus * 8 * round_page(PATH_MAX + ARG_MAX), FALSE); + exec_map_entries * exec_map_entry_size + 64 * PAGE_SIZE, FALSE); pipe_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, maxpipekva, FALSE); } Index: sys/vm/vm_kern.h =================================================================== --- sys/vm/vm_kern.h +++ sys/vm/vm_kern.h @@ -61,7 +61,7 @@ */ #ifndef _VM_VM_KERN_H_ -#define _VM_VM_KERN_H_ 1 +#define _VM_VM_KERN_H_ /* Kernel memory management definitions. */ extern vm_map_t kernel_map; @@ -74,5 +74,7 @@ extern struct vmem *memguard_arena; extern vm_offset_t swapbkva; extern u_long vm_kmem_size; +extern u_int exec_map_entries; +extern u_int exec_map_entry_size; -#endif /* _VM_VM_KERN_H_ */ +#endif /* _VM_VM_KERN_H_ */ Index: sys/vm/vm_kern.c =================================================================== --- sys/vm/vm_kern.c +++ sys/vm/vm_kern.c @@ -97,6 +97,9 @@ /* NB: Used by kernel debuggers. */ const u_long vm_maxuser_address = VM_MAXUSER_ADDRESS; +u_int exec_map_entry_size; +u_int exec_map_entries; + SYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD, SYSCTL_NULL_ULONG_PTR, VM_MIN_KERNEL_ADDRESS, "Min kernel address"); @@ -427,9 +430,7 @@ * This routine may block. */ vm_offset_t -kmap_alloc_wait(map, size) - vm_map_t map; - vm_size_t size; +kmap_alloc_wait(vm_map_t map, vm_object_t object, vm_size_t size) { vm_offset_t addr; @@ -454,7 +455,7 @@ map->needs_wakeup = TRUE; vm_map_unlock_and_wait(map, 0); } - vm_map_insert(map, NULL, 0, addr, addr + size, VM_PROT_ALL, + vm_map_insert(map, object, 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, MAP_ACC_CHARGED); vm_map_unlock(map); return (addr); @@ -467,10 +468,7 @@ * waiting for memory in that map. */ void -kmap_free_wakeup(map, addr, size) - vm_map_t map; - vm_offset_t addr; - vm_size_t size; +kmap_free_wakeup(vm_map_t map, vm_offset_t addr, vm_size_t size) { vm_map_lock(map);