Index: sys/amd64/include/vmparam.h =================================================================== --- sys/amd64/include/vmparam.h +++ sys/amd64/include/vmparam.h @@ -76,7 +76,7 @@ * of the direct mapped segment. This uses 2MB pages for reduced * TLB pressure. */ -#define UMA_MD_SMALL_ALLOC +/* #define UMA_MD_SMALL_ALLOC */ /* * The physical address space is densely populated. Index: sys/vm/uma_core.c =================================================================== --- sys/vm/uma_core.c +++ sys/vm/uma_core.c @@ -2508,27 +2508,27 @@ * zone of zones and zone of kegs are accounted separately. */ #define UMA_BOOT_ZONES 11 -/* Zone of zones and zone of kegs have arbitrary alignment. */ -#define UMA_BOOT_ALIGN 32 static int zsize, ksize; int uma_startup_count(int vm_zones) { int zones, pages; + u_int ppera, ipers; size_t space, size; ksize = sizeof(struct uma_keg) + (sizeof(struct uma_domain) * vm_ndomains); + ksize = roundup(ksize, UMA_SUPER_ALIGN); zsize = sizeof(struct uma_zone) + (sizeof(struct uma_cache) * (mp_maxid + 1)) + (sizeof(struct uma_zone_domain) * vm_ndomains); + zsize = roundup(zsize, UMA_SUPER_ALIGN); /* - * Memory for the zone of kegs and its keg, - * and for zone of zones. + * Memory for the zone of kegs and its keg, and for zone + * of zones. Allocated directly in uma_startup(). */ - pages = howmany(roundup(zsize, CACHE_LINE_SIZE) * 2 + - roundup(ksize, CACHE_LINE_SIZE), PAGE_SIZE); + pages = howmany(zsize * 2 + ksize, PAGE_SIZE); #ifdef UMA_MD_SMALL_ALLOC zones = UMA_BOOT_ZONES; @@ -2539,25 +2539,30 @@ size = slab_sizeof(SLAB_MAX_SETSIZE); space = slab_space(SLAB_MAX_SETSIZE); - /* Memory for the rest of startup zones, UMA and VM, ... */ if (zsize > space) { /* See keg_large_init(). */ - u_int ppera; - - ppera = howmany(roundup2(zsize, UMA_BOOT_ALIGN), PAGE_SIZE); - if (PAGE_SIZE * ppera - roundup2(zsize, UMA_BOOT_ALIGN) < size) + ppera = howmany(zsize, PAGE_SIZE); + if (PAGE_SIZE * ppera - zsize < size) ppera++; - pages += (zones + vm_zones) * ppera; - } else if (roundup2(zsize, UMA_BOOT_ALIGN) > space) - /* See keg_small_init() special case for uk_ppera = 1. */ - pages += zones; - else - pages += howmany(zones, - space / roundup2(zsize, UMA_BOOT_ALIGN)); + ipers = 1; + zones += vm_zones; + } else { + ipers = space / zsize; + ppera = 1; + } + + /* Memory for the rest of startup zones, UMA and VM, ... */ + pages += howmany(zones, ipers) * ppera; /* ... and their kegs. Note that zone of zones allocates a keg! */ - pages += howmany(zones + 1, - space / roundup2(ksize, UMA_BOOT_ALIGN)); + pages += howmany(zones + 1, space / ksize); + + /* + * Allocate an additional slab for zones and kegs on NUMA + * systems. The round-robin allocation policy will populate at + * least one slab per-domain. + */ + pages += (vm_ndomains - 1) * (ppera + 1); return (pages); } @@ -2578,11 +2583,11 @@ /* Use bootpages memory for the zone of zones and zone of kegs. */ m = (uintptr_t)mem; zones = (uma_zone_t)m; - m += roundup(zsize, CACHE_LINE_SIZE); + m += zsize; kegs = (uma_zone_t)m; - m += roundup(zsize, CACHE_LINE_SIZE); + m += zsize; masterkeg = (uma_keg_t)m; - m += roundup(ksize, CACHE_LINE_SIZE); + m += ksize; m = roundup(m, PAGE_SIZE); npages -= (m - (uintptr_t)mem) / PAGE_SIZE; mem = (void *)m; @@ -2596,7 +2601,7 @@ args.uminit = zero_init; args.fini = NULL; args.keg = masterkeg; - args.align = UMA_BOOT_ALIGN - 1; + args.align = UMA_SUPER_ALIGN - 1; args.flags = UMA_ZFLAG_INTERNAL; zone_ctor(kegs, zsize, &args, M_WAITOK); @@ -2610,7 +2615,7 @@ args.uminit = zero_init; args.fini = NULL; args.keg = NULL; - args.align = UMA_BOOT_ALIGN - 1; + args.align = UMA_SUPER_ALIGN - 1; args.flags = UMA_ZFLAG_INTERNAL; zone_ctor(zones, zsize, &args, M_WAITOK); @@ -4153,8 +4158,10 @@ int64_t nitems; u_int i; - nitems = counter_u64_fetch(zone->uz_allocs) - - counter_u64_fetch(zone->uz_frees); + nitems = 0; + if (zone->uz_allocs != EARLY_COUNTER && zone->uz_frees != EARLY_COUNTER) + nitems = counter_u64_fetch(zone->uz_allocs) - + counter_u64_fetch(zone->uz_frees); CPU_FOREACH(i) nitems += atomic_load_64(&zone->uz_cpu[i].uc_allocs) - atomic_load_64(&zone->uz_cpu[i].uc_frees); @@ -4168,7 +4175,9 @@ uint64_t nitems; u_int i; - nitems = counter_u64_fetch(zone->uz_allocs); + nitems = 0; + if (zone->uz_allocs != EARLY_COUNTER) + nitems = counter_u64_fetch(zone->uz_allocs); CPU_FOREACH(i) nitems += atomic_load_64(&zone->uz_cpu[i].uc_allocs); @@ -4181,7 +4190,9 @@ uint64_t nitems; u_int i; - nitems = counter_u64_fetch(zone->uz_frees); + nitems = 0; + if (zone->uz_frees != EARLY_COUNTER) + nitems = counter_u64_fetch(zone->uz_frees); CPU_FOREACH(i) nitems += atomic_load_64(&zone->uz_cpu[i].uc_frees); @@ -4289,10 +4300,7 @@ KEG_ASSERT_COLD(keg); ZONE_ASSERT_COLD(zone); - pages = count / keg->uk_ipers; - if (pages * keg->uk_ipers < count) - pages++; - pages *= keg->uk_ppera; + pages = howmany(count, keg->uk_ipers) * keg->uk_ppera; #ifdef UMA_MD_SMALL_ALLOC if (keg->uk_ppera > 1) { @@ -4334,9 +4342,7 @@ int aflags, domain, slabs; KEG_GET(zone, keg); - slabs = items / keg->uk_ipers; - if (slabs * keg->uk_ipers < items) - slabs++; + slabs = howmany(items, keg->uk_ipers); while (slabs-- > 0) { aflags = M_NOWAIT; vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, Index: sys/vm/uma_int.h =================================================================== --- sys/vm/uma_int.h +++ sys/vm/uma_int.h @@ -166,14 +166,17 @@ }; /* - * align field or structure to cache line + * Align field or structure to cache 'sector' in intel termonology. This + * is more efficient with adjacent line prefetch. */ #if defined(__amd64__) || defined(__powerpc64__) -#define UMA_ALIGN __aligned(128) +#define UMA_SUPER_ALIGN (CACHE_LINE_SIZE * 2) #else -#define UMA_ALIGN __aligned(CACHE_LINE_SIZE) +#define UMA_SUPER_ALIGN CACHE_LINE_SIZE #endif +#define UMA_ALIGN __aligned(UMA_SUPER_ALIGN) + /* * The uma_bucket structure is used to queue and manage buckets divorced * from per-cpu caches. They are loaded into uma_cache_bucket structures @@ -532,7 +535,7 @@ KASSERT(uma_zone_get_allocs((z)) == 0, \ ("zone %s initialization after use.", (z)->uz_name)) -#undef UMA_ALIGN +#undef UMA_ALIGN #ifdef _KERNEL /* Internal prototypes */ Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -613,10 +613,17 @@ slab_ipers(sizeof(struct vm_map), UMA_ALIGN_PTR)); /* - * Before going fully functional kmem_init() does allocation - * from "KMAP ENTRY" and vmem_create() does allocation from "vmem". + * Before we are fully boot strapped we need to account for the + * following allocations: + * + * "KMAP ENTRY" from kmem_init() + * "vmem btag" from vmem_startup() + * "vmem" from vmem_create() + * "KMAP" from vm_map_startup() + * + * Each needs at least one page per-domain. */ - boot_pages += 2; + boot_pages += 4 * vm_ndomains; #endif /* * CTFLAG_RDTUN doesn't work during the early boot process, so we must