Index: sys/kern/kern_malloc.c =================================================================== --- sys/kern/kern_malloc.c +++ sys/kern/kern_malloc.c @@ -237,16 +237,22 @@ { u_long size; - size = vmem_size(kmem_arena, VMEM_ALLOC); + size = uma_size(); return (sysctl_handle_long(oidp, &size, 0, req)); } static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS) { - u_long size; + u_long size, limit; - size = vmem_size(kmem_arena, VMEM_FREE); + /* The sysctl is unsigned, implement as a saturation value. */ + size = uma_size(); + limit = uma_limit(); + if (size > limit) + size = 0; + else + size = limit - size; return (sysctl_handle_long(oidp, &size, 0, req)); } @@ -667,19 +673,6 @@ return (mem); } -/* - * Wake the uma reclamation pagedaemon thread when we exhaust KVA. It - * will call the lowmem handler and uma_reclaim() callbacks in a - * context that is safe. - */ -static void -kmem_reclaim(vmem_t *vm, int flags) -{ - - uma_reclaim_wakeup(); - pagedaemon_wakeup(); -} - #ifndef __sparc64__ CTASSERT(VM_KMEM_SIZE_SCALE >= 1); #endif @@ -757,9 +750,7 @@ #else tmp = vm_kmem_size; #endif - vmem_init(kmem_arena, "kmem arena", kva_alloc(tmp), tmp, PAGE_SIZE, - 0, 0); - vmem_set_reclaim(kmem_arena, kmem_reclaim); + uma_set_limit(tmp); #ifdef DEBUG_MEMGUARD /* @@ -767,7 +758,7 @@ * replacement allocator used for detecting tamper-after-free * scenarios as they occur. It is only used for debugging. */ - memguard_init(kmem_arena); + memguard_init(kernel_arena); #endif } Index: sys/kern/subr_vmem.c =================================================================== --- sys/kern/subr_vmem.c +++ sys/kern/subr_vmem.c @@ -135,6 +135,7 @@ int vm_nbusytag; vmem_size_t vm_inuse; vmem_size_t vm_size; + vmem_size_t vm_limit; /* Used on import. */ vmem_import_t *vm_importfn; @@ -226,11 +227,11 @@ /* boot time arena storage. */ static struct vmem kernel_arena_storage; -static struct vmem kmem_arena_storage; static struct vmem buffer_arena_storage; static struct vmem transient_arena_storage; +/* kernel and kmem arenas are aliased for backwards KPI compat. */ vmem_t *kernel_arena = &kernel_arena_storage; -vmem_t *kmem_arena = &kmem_arena_storage; +vmem_t *kmem_arena = &kernel_arena_storage; vmem_t *buffer_arena = &buffer_arena_storage; vmem_t *transient_arena = &transient_arena_storage; @@ -252,11 +253,11 @@ VMEM_ASSERT_LOCKED(vm); /* - * Only allow the kmem arena to dip into reserve tags. It is the + * Only allow the kernel arena to dip into reserve tags. It is the * vmem where new tags come from. */ flags &= BT_FLAGS; - if (vm != kmem_arena) + if (vm != kernel_arena) flags &= ~M_USE_RESERVE; /* @@ -613,22 +614,22 @@ { vmem_addr_t addr; - *pflag = UMA_SLAB_KMEM; + *pflag = UMA_SLAB_KERNEL; /* * Single thread boundary tag allocation so that the address space * and memory are added in one atomic operation. */ mtx_lock(&vmem_bt_lock); - if (vmem_xalloc(kmem_arena, bytes, 0, 0, 0, VMEM_ADDR_MIN, + if (vmem_xalloc(kernel_arena, bytes, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, M_NOWAIT | M_NOVM | M_USE_RESERVE | M_BESTFIT, &addr) == 0) { - if (kmem_back(kmem_object, addr, bytes, + if (kmem_back(kernel_object, addr, bytes, M_NOWAIT | M_USE_RESERVE) == 0) { mtx_unlock(&vmem_bt_lock); return ((void *)addr); } - vmem_xfree(kmem_arena, addr, bytes); + vmem_xfree(kernel_arena, addr, bytes); mtx_unlock(&vmem_bt_lock); /* * Out of memory, not address space. This may not even be @@ -843,6 +844,9 @@ size = (align * 2) + size; size = roundup(size, vm->vm_import_quantum); + if (vm->vm_limit != 0 && vm->vm_limit < vm->vm_size + size) + return ENOMEM; + /* * Hide MAXALLOC tags so we're guaranteed to be able to add this * span and the tag we want to allocate from it. @@ -976,6 +980,15 @@ } void +vmem_set_limit(vmem_t *vm, vmem_size_t limit) +{ + + VMEM_LOCK(vm); + vm->vm_limit = limit; + VMEM_UNLOCK(vm); +} + +void vmem_set_reclaim(vmem_t *vm, vmem_reclaim_t *reclaimfn) { @@ -1007,6 +1020,7 @@ vm->vm_quantum_shift = flsl(quantum) - 1; vm->vm_nbusytag = 0; vm->vm_size = 0; + vm->vm_limit = 0; vm->vm_inuse = 0; qc_init(vm, qcache_max); Index: sys/sys/vmem.h =================================================================== --- sys/sys/vmem.h +++ sys/sys/vmem.h @@ -74,6 +74,12 @@ vmem_release_t *releasefn, void *arg, vmem_size_t import_quantum); /* + * Set a limit on the total size of a vmem. + */ + +void vmem_set_limit(vmem_t *vm, vmem_size_t limit); + +/* * Set a callback for reclaiming memory when space is exhausted: */ void vmem_set_reclaim(vmem_t *vm, vmem_reclaim_t *reclaimfn); Index: sys/vm/memguard.c =================================================================== --- sys/vm/memguard.c +++ sys/vm/memguard.c @@ -64,7 +64,7 @@ static SYSCTL_NODE(_vm, OID_AUTO, memguard, CTLFLAG_RW, NULL, "MemGuard data"); /* - * The vm_memguard_divisor variable controls how much of kmem_map should be + * The vm_memguard_divisor variable controls how much of kernel_arena should be * reserved for MemGuard. */ static u_int vm_memguard_divisor; @@ -155,7 +155,7 @@ /* * Return a fudged value to be used for vm_kmem_size for allocating - * the kmem_map. The memguard memory will be a submap. + * the kernel_arena. The memguard memory will be a submap. */ unsigned long memguard_fudge(unsigned long km_size, const struct vm_map *parent_map) @@ -346,7 +346,7 @@ addr = origaddr; if (do_guard) addr += PAGE_SIZE; - rv = kmem_back(kmem_object, addr, size_p, flags); + rv = kmem_back(kernel_object, addr, size_p, flags); if (rv != KERN_SUCCESS) { vmem_xfree(memguard_arena, origaddr, size_v); memguard_fail_pgs++; @@ -416,7 +416,7 @@ * vm_map lock to serialize updates to memguard_wasted, since * we had the lock at increment. */ - kmem_unback(kmem_object, addr, size); + kmem_unback(kernel_object, addr, size); if (sizev > size) addr -= PAGE_SIZE; vmem_xfree(memguard_arena, addr, sizev); Index: sys/vm/uma.h =================================================================== --- sys/vm/uma.h +++ sys/vm/uma.h @@ -607,12 +607,11 @@ * These flags are setable in the allocf and visible in the freef. */ #define UMA_SLAB_BOOT 0x01 /* Slab alloced from boot pages */ -#define UMA_SLAB_KMEM 0x02 /* Slab alloced from kmem_map */ #define UMA_SLAB_KERNEL 0x04 /* Slab alloced from kernel_map */ #define UMA_SLAB_PRIV 0x08 /* Slab alloced from priv allocator */ #define UMA_SLAB_OFFP 0x10 /* Slab is managed separately */ #define UMA_SLAB_MALLOC 0x20 /* Slab is a large malloc slab */ -/* 0x40 and 0x80 are available */ +/* 0x02, 0x40 and 0x80 are available */ /* * Used to pre-fill a zone with some number of items Index: sys/vm/uma_core.c =================================================================== --- sys/vm/uma_core.c +++ sys/vm/uma_core.c @@ -145,6 +145,10 @@ static struct sx uma_drain_lock; +/* kmem soft limit. */ +static unsigned long uma_kmem_limit; +static volatile unsigned long uma_kmem_total; + /* Is the VM done starting up? */ static int booted = 0; #define UMA_STARTUP 1 @@ -283,6 +287,22 @@ SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0, "Warn when UMA zones becomes full"); +/* Adjust bytes under management by UMA. */ +static inline void +uma_total_dec(unsigned long size) +{ + + atomic_subtract_long(&uma_kmem_total, size); +} + +static inline void +uma_total_inc(unsigned long size) +{ + + if (atomic_fetchadd_long(&uma_kmem_total, size) > uma_kmem_limit) + uma_reclaim_wakeup(); +} + /* * This routine checks to see whether or not it's safe to enable buckets. */ @@ -829,6 +849,7 @@ if (keg->uk_flags & UMA_ZONE_OFFPAGE) zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE); keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags); + uma_total_dec(PAGE_SIZE * keg->uk_ppera); } /* @@ -933,6 +954,7 @@ { uma_alloc allocf; uma_slab_t slab; + unsigned long size; uint8_t *mem; uint8_t flags; int i; @@ -943,6 +965,7 @@ allocf = keg->uk_allocf; KEG_UNLOCK(keg); + size = keg->uk_ppera * PAGE_SIZE; if (keg->uk_flags & UMA_ZONE_OFFPAGE) { slab = zone_alloc_item(keg->uk_slabzone, NULL, wait); @@ -966,13 +989,14 @@ wait |= M_NODUMP; /* zone is passed for legacy reasons. */ - mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait); + mem = allocf(zone, size, &flags, wait); if (mem == NULL) { if (keg->uk_flags & UMA_ZONE_OFFPAGE) zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE); slab = NULL; goto out; } + uma_total_inc(size); /* Point the slab into the allocated memory */ if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) @@ -1077,8 +1101,8 @@ { void *p; /* Returned page */ - *pflag = UMA_SLAB_KMEM; - p = (void *) kmem_malloc(kmem_arena, bytes, wait); + *pflag = UMA_SLAB_KERNEL; + p = (void *) kmem_malloc(kernel_arena, bytes, wait); return (p); } @@ -1159,9 +1183,7 @@ { struct vmem *vmem; - if (flags & UMA_SLAB_KMEM) - vmem = kmem_arena; - else if (flags & UMA_SLAB_KERNEL) + if (flags & UMA_SLAB_KERNEL) vmem = kernel_arena; else panic("UMA: page_free used with invalid flags %x", flags); @@ -3130,31 +3152,33 @@ sx_xunlock(&uma_drain_lock); } -static int uma_reclaim_needed; +static volatile int uma_reclaim_needed; void uma_reclaim_wakeup(void) { - uma_reclaim_needed = 1; - wakeup(&uma_reclaim_needed); + if (atomic_fetchadd_int(&uma_reclaim_needed, 1) == 0) + wakeup(uma_reclaim); } void uma_reclaim_worker(void *arg __unused) { - sx_xlock(&uma_drain_lock); for (;;) { - sx_sleep(&uma_reclaim_needed, &uma_drain_lock, PVM, - "umarcl", 0); - if (uma_reclaim_needed) { - uma_reclaim_needed = 0; - sx_xunlock(&uma_drain_lock); - EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM); - sx_xlock(&uma_drain_lock); - uma_reclaim_locked(true); - } + sx_xlock(&uma_drain_lock); + while (uma_reclaim_needed == 0) + sx_sleep(uma_reclaim, &uma_drain_lock, PVM, "umarcl", + hz); + sx_xunlock(&uma_drain_lock); + EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM); + sx_xlock(&uma_drain_lock); + uma_reclaim_locked(true); + atomic_set_int(&uma_reclaim_needed, 0); + sx_xunlock(&uma_drain_lock); + /* Don't fire more than once per-second. */ + pause("umarclslp", hz); } } @@ -3192,6 +3216,7 @@ slab->us_data = mem; slab->us_flags = flags | UMA_SLAB_MALLOC; slab->us_size = size; + uma_total_inc(size); } else { zone_free_item(slabzone, slab, NULL, SKIP_NONE); } @@ -3204,6 +3229,7 @@ { page_free(slab->us_data, slab->us_size, slab->us_flags); + uma_total_dec(slab->us_size); zone_free_item(slabzone, slab, NULL, SKIP_NONE); } @@ -3217,6 +3243,27 @@ bzero(zpcpu_get_cpu(item, i), zone->uz_size); } else bzero(item, zone->uz_size); +} + +unsigned long +uma_limit(void) +{ + + return uma_kmem_limit; +} + +void +uma_set_limit(unsigned long limit) +{ + uma_kmem_limit = limit; +} + + +unsigned long +uma_size(void) +{ + + return uma_kmem_total; } void Index: sys/vm/uma_int.h =================================================================== --- sys/vm/uma_int.h +++ sys/vm/uma_int.h @@ -423,6 +423,13 @@ void *uma_small_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait); void uma_small_free(void *mem, vm_size_t size, uint8_t flags); + +/* Set a global soft limit on UMA managed memory. */ +void uma_set_limit(unsigned long limit); +unsigned long uma_limit(void); + +/* Return the amount of memory managed by UMA. */ +unsigned long uma_size(void); #endif /* _KERNEL */ #endif /* VM_UMA_INT_H */ Index: sys/vm/vm_kern.c =================================================================== --- sys/vm/vm_kern.c +++ sys/vm/vm_kern.c @@ -162,11 +162,13 @@ kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr) { - vm_object_t object = vmem == kmem_arena ? kmem_object : kernel_object; + vm_object_t object = kernel_object; vm_offset_t addr, i, offset; vm_page_t m; int pflags, tries; + KASSERT(vmem == kernel_arena, + ("kmem_alloc_attr: Only kernel_arena is supported.")); size = round_page(size); if (vmem_alloc(vmem, size, M_BESTFIT | flags, &addr)) return (0); @@ -218,12 +220,14 @@ vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { - vm_object_t object = vmem == kmem_arena ? kmem_object : kernel_object; + vm_object_t object = kernel_object; vm_offset_t addr, offset, tmp; vm_page_t end_m, m; u_long npages; int pflags, tries; + KASSERT(vmem == kernel_arena, + ("kmem_alloc_contig: Only kernel_arena is supported.")); size = round_page(size); if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) return (0); @@ -312,12 +316,13 @@ vm_offset_t addr; int rv; + KASSERT(vmem == kernel_arena, + ("kmem_malloc: Only kernel_arena is supported.")); size = round_page(size); if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) return (0); - rv = kmem_back((vmem == kmem_arena) ? kmem_object : kernel_object, - addr, size, flags); + rv = kmem_back(kernel_object, addr, size, flags); if (rv != KERN_SUCCESS) { vmem_free(vmem, addr, size); return (0); @@ -337,8 +342,8 @@ vm_page_t m, mpred; int pflags; - KASSERT(object == kmem_object || object == kernel_object, - ("kmem_back: only supports kernel objects.")); + KASSERT(object == kernel_object, + ("kmem_back: only supports kernel object.")); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; @@ -394,8 +399,8 @@ vm_page_t m, next; vm_offset_t end, offset; - KASSERT(object == kmem_object || object == kernel_object, - ("kmem_unback: only supports kernel objects.")); + KASSERT(object == kernel_object, + ("kmem_unback: only supports kernel object.")); pmap_remove(kernel_pmap, addr, addr + size); offset = addr - VM_MIN_KERNEL_ADDRESS; @@ -420,9 +425,10 @@ kmem_free(struct vmem *vmem, vm_offset_t addr, vm_size_t size) { + KASSERT(vmem == kernel_arena, + ("kmem_free: Only kernel_arena is supported.")); size = round_page(size); - kmem_unback((vmem == kmem_arena) ? kmem_object : kernel_object, - addr, size); + kmem_unback(kernel_object, addr, size); vmem_free(vmem, addr, size); } Index: sys/vm/vm_map.c =================================================================== --- sys/vm/vm_map.c +++ sys/vm/vm_map.c @@ -1187,9 +1187,9 @@ vm_inherit_t inheritance; VM_MAP_ASSERT_LOCKED(map); - KASSERT((object != kmem_object && object != kernel_object) || + KASSERT(object != kernel_object || (cow & MAP_COPY_ON_WRITE) == 0, - ("vm_map_insert: kmem or kernel object and COW")); + ("vm_map_insert: kernel object and COW")); KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0, ("vm_map_insert: paradoxical MAP_NOFAULT request")); KASSERT((prot & ~max) == 0, @@ -2988,7 +2988,7 @@ VM_OBJECT_WLOCK(object); if (object->ref_count != 1 && ((object->flags & (OBJ_NOSPLIT | OBJ_ONEMAPPING)) == OBJ_ONEMAPPING || - object == kernel_object || object == kmem_object)) { + object == kernel_object)) { vm_object_collapse(object); /* Index: sys/vm/vm_object.h =================================================================== --- sys/vm/vm_object.h +++ sys/vm/vm_object.h @@ -225,10 +225,10 @@ extern struct mtx vm_object_list_mtx; /* lock for object list and count */ extern struct vm_object kernel_object_store; -extern struct vm_object kmem_object_store; +/* kernel and kmem are aliased for backwards KPI compat. */ #define kernel_object (&kernel_object_store) -#define kmem_object (&kmem_object_store) +#define kmem_object (&kernel_object_store) #define VM_OBJECT_ASSERT_LOCKED(object) \ rw_assert(&(object)->lock, RA_LOCKED) Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c +++ sys/vm/vm_object.c @@ -142,7 +142,6 @@ struct mtx vm_object_list_mtx; /* lock for object list and count */ struct vm_object kernel_object_store; -struct vm_object kmem_object_store; static SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD, 0, "VM object stats"); @@ -290,14 +289,6 @@ #if VM_NRESERVLEVEL > 0 kernel_object->flags |= OBJ_COLORED; kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); -#endif - - rw_init(&kmem_object->lock, "kmem vm object"); - _vm_object_allocate(OBJT_PHYS, atop(VM_MAX_KERNEL_ADDRESS - - VM_MIN_KERNEL_ADDRESS), kmem_object); -#if VM_NRESERVLEVEL > 0 - kmem_object->flags |= OBJ_COLORED; - kmem_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); #endif /*