Index: sys/kern/subr_vmem.c =================================================================== --- sys/kern/subr_vmem.c +++ sys/kern/subr_vmem.c @@ -77,8 +77,6 @@ #include #include -int vmem_startup_count(void); - #define VMEM_OPTORDER 5 #define VMEM_OPTVALUE (1 << VMEM_OPTORDER) #define VMEM_MAXORDER \ @@ -662,17 +660,6 @@ return (NULL); } - -/* - * How many pages do we need to startup_alloc. - */ -int -vmem_startup_count(void) -{ - - return (howmany(BT_MAXALLOC, slab_ipers(sizeof(struct vmem_btag), - UMA_ALIGN_PTR))); -} #endif void Index: sys/vm/uma_core.c =================================================================== --- sys/vm/uma_core.c +++ sys/vm/uma_core.c @@ -101,6 +101,8 @@ #include #endif +#include + /* * This is the zone and keg from which all zones are spawned. */ @@ -138,11 +140,10 @@ static struct rwlock_padalign __exclusive_cache_line uma_rwlock; /* - * Pointer and counter to pool of pages, that is preallocated at - * startup to bootstrap UMA. + * First available virual address for boot time allocations. */ -static char *bootmem; -static int boot_pages; +static vm_offset_t bootstart; +static vm_offset_t bootmem; static struct sx uma_reclaim_lock; @@ -158,8 +159,7 @@ "UMA kernel memory usage"); /* Is the VM done starting up? */ -static enum { BOOT_COLD = 0, BOOT_STRAPPED, BOOT_PAGEALLOC, BOOT_BUCKETS, - BOOT_RUNNING } booted = BOOT_COLD; +static enum { BOOT_COLD = 0, BOOT_KVA, BOOT_RUNNING } booted = BOOT_COLD; /* * This is the handle used to schedule events that need to happen @@ -238,9 +238,7 @@ /* Prototypes.. */ -int uma_startup_count(int); -void uma_startup(void *, int); -void uma_startup1(void); +void uma_startup1(vm_offset_t); void uma_startup2(void); static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); @@ -259,6 +257,7 @@ static void zone_dtor(void *, int, void *); static int zero_init(void *, int, int); static void zone_foreach(void (*zfunc)(uma_zone_t, void *), void *); +static void zone_foreach_unlocked(void (*zfunc)(uma_zone_t, void *), void *); static void zone_timeout(uma_zone_t zone, void *); static int hash_alloc(struct uma_hash *, u_int); static int hash_expand(struct uma_hash *, struct uma_hash *); @@ -340,7 +339,7 @@ bucket_enable(void) { - KASSERT(booted >= BOOT_BUCKETS, ("Bucket enable before init")); + KASSERT(booted >= BOOT_KVA, ("Bucket enable before init")); bucketdisable = vm_page_count_min(); } @@ -426,13 +425,11 @@ uma_bucket_t bucket; /* - * This is to stop us from allocating per cpu buckets while we're - * running out of vm.boot_pages. Otherwise, we would exhaust the - * boot pages. This also prevents us from allocating buckets in - * low memory situations. + * Don't allocate buckets in low memory situations. */ if (bucketdisable) return (NULL); + /* * To limit bucket recursion we store the original zone flags * in a cookie passed via zalloc_arg/zfree_arg. This allows the @@ -1195,9 +1192,6 @@ dom = &keg->uk_domain[i]; KEG_LOCK(keg, i); LIST_FOREACH_SAFE(slab, &dom->ud_free_slab, us_link, tmp) { - /* We have nowhere to free these to. */ - if (slab->us_flags & UMA_SLAB_BOOT) - continue; if (keg->uk_flags & UMA_ZFLAG_HASH) UMA_HASH_REMOVE(&keg->uk_hash, slab); n++; @@ -1392,50 +1386,56 @@ startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, int wait) { - uma_keg_t keg; + vm_paddr_t pa; + vm_page_t m; void *mem; int pages; + int i; - keg = zone->uz_keg; - /* - * If we are in BOOT_BUCKETS or higher, than switch to real - * allocator. Zones with page sized slabs switch at BOOT_PAGEALLOC. - */ - switch (booted) { - case BOOT_COLD: - case BOOT_STRAPPED: - break; - case BOOT_PAGEALLOC: - if (keg->uk_ppera > 1) - break; - case BOOT_BUCKETS: - case BOOT_RUNNING: -#ifdef UMA_MD_SMALL_ALLOC - keg->uk_allocf = (keg->uk_ppera > 1) ? - page_alloc : uma_small_alloc; -#else - keg->uk_allocf = page_alloc; -#endif - return keg->uk_allocf(zone, bytes, domain, pflag, wait); - } - - /* - * Check our small startup cache to see if it has pages remaining. - */ pages = howmany(bytes, PAGE_SIZE); KASSERT(pages > 0, ("%s can't reserve 0 pages", __func__)); - if (pages > boot_pages) - panic("UMA zone \"%s\": Increase vm.boot_pages", zone->uz_name); -#ifdef DIAGNOSTIC - printf("%s from \"%s\", %d boot pages left\n", __func__, zone->uz_name, - boot_pages); -#endif - mem = bootmem; - boot_pages -= pages; - bootmem += pages * PAGE_SIZE; + *pflag = UMA_SLAB_BOOT; + m = vm_page_alloc_contig_domain(NULL, 0, domain, + malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED, pages, + (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT); + if (m == NULL) + return (NULL); + + pa = VM_PAGE_TO_PHYS(m); + for (i = 0; i < pages; i++, pa += PAGE_SIZE) { +#if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \ + defined(__riscv) || defined(__powerpc64__) + if ((wait & M_NODUMP) == 0) + dump_add_page(pa); +#endif + } + /* Allocate KVA and indirectly advance bootmem. */ + mem = (void *)pmap_map(&bootmem, m->phys_addr, + m->phys_addr + (pages * PAGE_SIZE), VM_PROT_READ | VM_PROT_WRITE); + if ((wait & M_ZERO) != 0) + bzero(mem, pages * PAGE_SIZE); + + return (mem); +} - return (mem); +static void +startup_free(void *mem, vm_size_t bytes) +{ + vm_offset_t va; + vm_page_t m; + + va = (vm_offset_t)mem; + m = PHYS_TO_VM_PAGE(pmap_kextract(va)); + pmap_remove(kernel_pmap, va, va + bytes); + for (; bytes != 0; bytes -= PAGE_SIZE, m++) { +#if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \ + defined(__riscv) || defined(__powerpc64__) + dump_drop_page(VM_PAGE_TO_PHYS(m)); +#endif + vm_page_unwire_noq(m); + vm_page_free(m); + } } /* @@ -1588,6 +1588,11 @@ page_free(void *mem, vm_size_t size, uint8_t flags) { + if ((flags & UMA_SLAB_BOOT) != 0) { + startup_free(mem, size); + return; + } + if ((flags & UMA_SLAB_KERNEL) == 0) panic("UMA: page_free used with invalid flags %x", flags); @@ -1942,12 +1947,13 @@ * If we haven't booted yet we need allocations to go through the * startup cache until the vm is ready. */ - if (booted < BOOT_PAGEALLOC) - keg->uk_allocf = startup_alloc; #ifdef UMA_MD_SMALL_ALLOC - else if (keg->uk_ppera == 1) + if (keg->uk_ppera == 1) keg->uk_allocf = uma_small_alloc; + else #endif + if (booted < BOOT_KVA) + keg->uk_allocf = startup_alloc; else if (keg->uk_flags & UMA_ZONE_PCPU) keg->uk_allocf = pcpu_page_alloc; else @@ -2003,6 +2009,18 @@ return (0); } +static void +zone_kva_available(uma_zone_t zone, void *unused) +{ + uma_keg_t keg; + + if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0) + return; + KEG_GET(zone, keg); + if (keg->uk_allocf == startup_alloc) + keg->uk_allocf = page_alloc; +} + static void zone_alloc_counters(uma_zone_t zone, void *unused) { @@ -2444,135 +2462,77 @@ ZONE_CROSS_LOCK_FINI(zone); } -/* - * Traverses every zone in the system and calls a callback - * - * Arguments: - * zfunc A pointer to a function which accepts a zone - * as an argument. - * - * Returns: - * Nothing - */ static void -zone_foreach(void (*zfunc)(uma_zone_t, void *arg), void *arg) +zone_foreach_unlocked(void (*zfunc)(uma_zone_t, void *arg), void *arg) { uma_keg_t keg; uma_zone_t zone; - /* - * Before BOOT_RUNNING we are guaranteed to be single - * threaded, so locking isn't needed. Startup functions - * are allowed to use M_WAITOK. - */ - if (__predict_true(booted == BOOT_RUNNING)) - rw_rlock(&uma_rwlock); LIST_FOREACH(keg, &uma_kegs, uk_link) { LIST_FOREACH(zone, &keg->uk_zones, uz_link) zfunc(zone, arg); } LIST_FOREACH(zone, &uma_cachezones, uz_link) zfunc(zone, arg); - if (__predict_true(booted == BOOT_RUNNING)) - rw_runlock(&uma_rwlock); } /* - * Count how many pages do we need to bootstrap. VM supplies - * its need in early zones in the argument, we add up our zones, - * which consist of the UMA Slabs, UMA Hash and 9 Bucket zones. The - * zone of zones and zone of kegs are accounted separately. + * Traverses every zone in the system and calls a callback + * + * Arguments: + * zfunc A pointer to a function which accepts a zone + * as an argument. + * + * Returns: + * Nothing */ -#define UMA_BOOT_ZONES 11 -static int zsize, ksize; -int -uma_startup_count(int vm_zones) +static void +zone_foreach(void (*zfunc)(uma_zone_t, void *arg), void *arg) { - int zones, pages; - u_int zppera, zipers; - u_int kppera, kipers; - size_t space, size; - - ksize = sizeof(struct uma_keg) + - (sizeof(struct uma_domain) * vm_ndomains); - ksize = roundup(ksize, UMA_SUPER_ALIGN); - zsize = sizeof(struct uma_zone) + - (sizeof(struct uma_cache) * (mp_maxid + 1)) + - (sizeof(struct uma_zone_domain) * vm_ndomains); - zsize = roundup(zsize, UMA_SUPER_ALIGN); - - /* - * Memory for the zone of kegs and its keg, and for zone - * of zones. Allocated directly in uma_startup(). - */ - pages = howmany(zsize * 2 + ksize, PAGE_SIZE); - -#ifdef UMA_MD_SMALL_ALLOC - zones = UMA_BOOT_ZONES; -#else - zones = UMA_BOOT_ZONES + vm_zones; - vm_zones = 0; -#endif - size = slab_sizeof(SLAB_MAX_SETSIZE); - space = slab_space(SLAB_MAX_SETSIZE); - - /* Memory for the rest of startup zones, UMA and VM, ... */ - if (zsize > space) { - /* See keg_large_init(). */ - zppera = howmany(zsize + slab_sizeof(1), PAGE_SIZE); - zipers = 1; - zones += vm_zones; - } else { - zppera = 1; - zipers = space / zsize; - } - pages += howmany(zones, zipers) * zppera; - - /* ... and their kegs. Note that zone of zones allocates a keg! */ - if (ksize > space) { - /* See keg_large_init(). */ - kppera = howmany(ksize + slab_sizeof(1), PAGE_SIZE); - kipers = 1; - } else { - kppera = 1; - kipers = space / ksize; - } - pages += howmany(zones + 1, kipers) * kppera; - /* - * Allocate an additional slab for zones and kegs on NUMA - * systems. The round-robin allocation policy will populate at - * least one slab per-domain. - */ - pages += (vm_ndomains - 1) * (zppera + kppera); - - return (pages); + rw_rlock(&uma_rwlock); + zone_foreach_unlocked(zfunc, arg); + rw_runlock(&uma_rwlock); } +/* + * Initialize the kernel memory allocator. This is done after pages can be + * allocated but before general KVA is available. + */ void -uma_startup(void *mem, int npages) +uma_startup1(vm_offset_t virtual_avail) { struct uma_zctor_args args; + size_t ksize, zsize, size; uma_keg_t masterkeg; uintptr_t m; + uint8_t pflag; + + bootstart = bootmem = virtual_avail; #ifdef DIAGNOSTIC printf("Entering %s with %d boot pages configured\n", __func__, npages); #endif rw_init(&uma_rwlock, "UMA lock"); + sx_init(&uma_reclaim_lock, "umareclaim"); + + ksize = sizeof(struct uma_keg) + + (sizeof(struct uma_domain) * vm_ndomains); + ksize = roundup(ksize, UMA_SUPER_ALIGN); + zsize = sizeof(struct uma_zone) + + (sizeof(struct uma_cache) * (mp_maxid + 1)) + + (sizeof(struct uma_zone_domain) * vm_ndomains); + zsize = roundup(zsize, UMA_SUPER_ALIGN); - /* Use bootpages memory for the zone of zones and zone of kegs. */ - m = (uintptr_t)mem; + /* Allocate the zone of zones, zone of kegs, and zone of zones keg. */ + size = (zsize * 2) + ksize; + m = (uintptr_t)startup_alloc(NULL, size, 0, &pflag, M_NOWAIT | M_ZERO); zones = (uma_zone_t)m; m += zsize; kegs = (uma_zone_t)m; m += zsize; masterkeg = (uma_keg_t)m; - m += ksize; - m = roundup(m, PAGE_SIZE); - npages -= (m - (uintptr_t)mem) / PAGE_SIZE; - mem = (void *)m; /* "manually" create the initial zone */ memset(&args, 0, sizeof(args)); @@ -2587,9 +2547,6 @@ args.flags = UMA_ZFLAG_INTERNAL; zone_ctor(kegs, zsize, &args, M_WAITOK); - bootmem = mem; - boot_pages = npages; - args.name = "UMA Zones"; args.size = zsize; args.ctor = zone_ctor; @@ -2608,36 +2565,30 @@ hashzone = uma_zcreate("UMA Hash", sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); - - booted = BOOT_STRAPPED; -} - -void -uma_startup1(void) -{ - -#ifdef DIAGNOSTIC - printf("Entering %s with %d boot pages left\n", __func__, boot_pages); -#endif - booted = BOOT_PAGEALLOC; } +/* + * Advertise the availability of normal kva allocations and switch to + * the default back-end allocator. Marks the KVA we consumed on startup + * as used in the map. + */ void uma_startup2(void) { -#ifdef DIAGNOSTIC - printf("Entering %s with %d boot pages left\n", __func__, boot_pages); +#ifndef PMAP_HAS_DMAP + (void)vm_map_insert(kernel_map, NULL, 0, bootstart, + bootmem - bootstart, VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT); #endif - sx_init(&uma_reclaim_lock, "umareclaim"); + + booted = BOOT_KVA; + zone_foreach_unlocked(zone_kva_available, NULL); bucket_init(); - booted = BOOT_BUCKETS; bucket_enable(); } /* - * Initialize our callout handle - * + * Finish our initialization steps. */ static void uma_startup3(void) @@ -2648,8 +2599,8 @@ uma_dbg_cnt = counter_u64_alloc(M_WAITOK); uma_skip_cnt = counter_u64_alloc(M_WAITOK); #endif - zone_foreach(zone_alloc_counters, NULL); - zone_foreach(zone_alloc_sysctl, NULL); + zone_foreach_unlocked(zone_alloc_counters, NULL); + zone_foreach_unlocked(zone_alloc_sysctl, NULL); callout_init(&uma_callout, 1); callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); booted = BOOT_RUNNING; @@ -2688,7 +2639,6 @@ { struct uma_zctor_args args; uma_zone_t res; - bool locked; KASSERT(powerof2(align + 1), ("invalid zone alignment %d for \"%s\"", align, name)); @@ -2720,15 +2670,10 @@ args.flags = flags; args.keg = NULL; - if (booted < BOOT_BUCKETS) { - locked = false; - } else { - sx_slock(&uma_reclaim_lock); - locked = true; - } + sx_slock(&uma_reclaim_lock); res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK); - if (locked) - sx_sunlock(&uma_reclaim_lock); + sx_sunlock(&uma_reclaim_lock); + return (res); } @@ -2740,7 +2685,6 @@ struct uma_zctor_args args; uma_keg_t keg; uma_zone_t res; - bool locked; keg = master->uz_keg; memset(&args, 0, sizeof(args)); @@ -2754,16 +2698,11 @@ args.flags = keg->uk_flags | UMA_ZONE_SECONDARY; args.keg = keg; - if (booted < BOOT_BUCKETS) { - locked = false; - } else { - sx_slock(&uma_reclaim_lock); - locked = true; - } + sx_slock(&uma_reclaim_lock); /* XXX Attaches only one keg of potentially many. */ res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK); - if (locked) - sx_sunlock(&uma_reclaim_lock); + sx_sunlock(&uma_reclaim_lock); + return (res); } Index: sys/vm/vm_init.c =================================================================== --- sys/vm/vm_init.c +++ sys/vm/vm_init.c @@ -95,8 +95,7 @@ #include #include -extern void uma_startup1(void); -extern void uma_startup2(void); +extern void uma_startup1(vm_offset_t); extern void vm_radix_reserve_kva(void); long physmem; @@ -110,8 +109,6 @@ /* * vm_init initializes the virtual memory system. * This is done only by the first cpu up. - * - * The start and end address of physical memory is passed in. */ static void vm_mem_init(void *dummy) @@ -135,10 +132,9 @@ */ domainset_zero(); -#ifdef UMA_MD_SMALL_ALLOC - /* Announce page availability to UMA. */ - uma_startup1(); -#endif + /* Bootstrap the kernel memory allocator. */ + uma_startup1(virtual_avail); + /* * Initialize other VM packages */ @@ -151,8 +147,6 @@ /* Set up radix zone to use noobj_alloc. */ vm_radix_reserve_kva(); #endif - /* Announce full page availability to UMA. */ - uma_startup2(); kmem_init_zero_region(); pmap_init(); vm_pager_init(); Index: sys/vm/vm_kern.c =================================================================== --- sys/vm/vm_kern.c +++ sys/vm/vm_kern.c @@ -129,6 +129,8 @@ #endif #define KVA_QUANTUM (1 << KVA_QUANTUM_SHIFT) +extern void uma_startup2(void); + /* * kva_alloc: * @@ -814,6 +816,13 @@ kernel_arena, KVA_QUANTUM); #endif } + + /* + * This must be the very first call so that the virtual address + * space used for early allocations is properly marked used in + * the map. + */ + uma_startup2(); } /* Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -113,10 +113,6 @@ #include -extern int uma_startup_count(int); -extern void uma_startup(void *, int); -extern int vmem_startup_count(void); - struct vm_domain vm_dom[MAXMEMDOM]; DPCPU_DEFINE_STATIC(struct vm_batchqueue, pqbatch[MAXMEMDOM][PQ_COUNT]); @@ -169,11 +165,6 @@ long vm_page_array_size; long first_page; -static int boot_pages; -SYSCTL_INT(_vm, OID_AUTO, boot_pages, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, - &boot_pages, 0, - "number of pages allocated for bootstrapping the VM system"); - static TAILQ_HEAD(, vm_page) blacklist_head; static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD | @@ -595,48 +586,7 @@ for (i = 0; i < vm_ndomains; i++) vm_page_domain_init(i); - /* - * Allocate memory for use when boot strapping the kernel memory - * allocator. Tell UMA how many zones we are going to create - * before going fully functional. UMA will add its zones. - * - * VM startup zones: vmem, vmem_btag, VM OBJECT, RADIX NODE, MAP, - * KMAP ENTRY, MAP ENTRY, VMSPACE. - */ - boot_pages = uma_startup_count(8); - -#ifndef UMA_MD_SMALL_ALLOC - /* vmem_startup() calls uma_prealloc(). */ - boot_pages += vmem_startup_count(); - /* vm_map_startup() calls uma_prealloc(). */ - boot_pages += howmany(MAX_KMAP, - slab_ipers(sizeof(struct vm_map), UMA_ALIGN_PTR)); - - /* - * Before we are fully boot strapped we need to account for the - * following allocations: - * - * "KMAP ENTRY" from kmem_init() - * "vmem btag" from vmem_startup() - * "vmem" from vmem_create() - * "KMAP" from vm_map_startup() - * - * Each needs at least one page per-domain. - */ - boot_pages += 4 * vm_ndomains; -#endif - /* - * CTFLAG_RDTUN doesn't work during the early boot process, so we must - * manually fetch the value. - */ - TUNABLE_INT_FETCH("vm.boot_pages", &boot_pages); - new_end = end - (boot_pages * UMA_SLAB_SIZE); - new_end = trunc_page(new_end); - mapped = pmap_map(&vaddr, new_end, end, - VM_PROT_READ | VM_PROT_WRITE); - bzero((void *)mapped, end - new_end); - uma_startup((void *)mapped, boot_pages); - + new_end = end; #ifdef WITNESS witness_size = round_page(witness_startup_count()); new_end -= witness_size;