Index: sys/vm/uma.h =================================================================== --- sys/vm/uma.h +++ sys/vm/uma.h @@ -441,10 +441,12 @@ typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag); /* - * Reclaims unused memory + * Reclaims unused memory. If no NUMA domain is specified, memory from all + * domains is reclaimed. * * Arguments: - * req Reclamation request type. + * req Reclamation request type. + * domain The target NUMA domain. * Returns: * None */ @@ -452,7 +454,9 @@ #define UMA_RECLAIM_DRAIN_CPU 2 /* release bucket and per-CPU caches */ #define UMA_RECLAIM_TRIM 3 /* trim bucket cache to WSS */ void uma_reclaim(int req); +void uma_reclaim_domain(int req, int domain); void uma_zone_reclaim(uma_zone_t, int req); +void uma_zone_reclaim_domain(uma_zone_t, int req, int domain); /* * Sets the alignment mask to be used for all zones requesting cache Index: sys/vm/uma_core.c =================================================================== --- sys/vm/uma_core.c +++ sys/vm/uma_core.c @@ -167,17 +167,20 @@ static LIST_HEAD(,uma_zone) uma_cachezones = LIST_HEAD_INITIALIZER(uma_cachezones); -/* This RW lock protects the keg list */ +/* + * Mutex for global lists: uma_kegs, uma_cachezones, and the per-keg list of + * zones. + */ static struct rwlock_padalign __exclusive_cache_line uma_rwlock; +static struct sx uma_reclaim_lock; + /* * First available virual address for boot time allocations. */ static vm_offset_t bootstart; static vm_offset_t bootmem; -static struct sx uma_reclaim_lock; - /* * kmem soft limit, initialized by uma_set_limit(). Ensure that early * allocations don't trigger a wakeup of the reclaim thread. @@ -288,7 +291,7 @@ static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int, int); static void cache_drain(uma_zone_t); static void bucket_drain(uma_zone_t, uma_bucket_t); -static void bucket_cache_reclaim(uma_zone_t zone, bool); +static void bucket_cache_reclaim(uma_zone_t zone, bool, int); static int keg_ctor(void *, int, void *, int); static void keg_dtor(void *, int, void *); static int zone_ctor(void *, int, void *, int); @@ -314,7 +317,7 @@ static void bucket_init(void); static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int); static void bucket_free(uma_zone_t zone, uma_bucket_t, void *); -static void bucket_zone_drain(void); +static void bucket_zone_drain(int domain); static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int); static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab); static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item); @@ -523,12 +526,13 @@ } static void -bucket_zone_drain(void) +bucket_zone_drain(int domain) { struct uma_bucket_zone *ubz; for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) - uma_zone_reclaim(ubz->ubz_zone, UMA_RECLAIM_DRAIN); + uma_zone_reclaim_domain(ubz->ubz_zone, UMA_RECLAIM_DRAIN, + domain); } /* @@ -1215,7 +1219,7 @@ bucket_free(zone, bucket, NULL); } } - bucket_cache_reclaim(zone, true); + bucket_cache_reclaim(zone, true, UMA_ANYDOMAIN); } static void @@ -1225,8 +1229,10 @@ if (zone->uz_flags & UMA_ZFLAG_INTERNAL) return; + ZONE_LOCK(zone); zone->uz_bucket_size = (zone->uz_bucket_size_min + zone->uz_bucket_size) / 2; + ZONE_UNLOCK(zone); } static void @@ -1349,7 +1355,7 @@ } static void -bucket_cache_reclaim(uma_zone_t zone, bool drain) +bucket_cache_reclaim(uma_zone_t zone, bool drain, int domain) { int i; @@ -1360,8 +1366,12 @@ if (zone->uz_bucket_size > zone->uz_bucket_size_min) zone->uz_bucket_size--; - for (i = 0; i < vm_ndomains; i++) - bucket_cache_reclaim_domain(zone, drain, i); + if (domain != UMA_ANYDOMAIN) { + bucket_cache_reclaim_domain(zone, drain, domain); + } else { + for (i = 0; i < vm_ndomains; i++) + bucket_cache_reclaim_domain(zone, drain, i); + } } static void @@ -1464,63 +1474,65 @@ * Returns nothing. */ static void -keg_drain(uma_keg_t keg) +keg_drain(uma_keg_t keg, int domain) { int i; if ((keg->uk_flags & UMA_ZONE_NOFREE) != 0) return; - for (i = 0; i < vm_ndomains; i++) - keg_drain_domain(keg, i); + if (domain != UMA_ANYDOMAIN) { + keg_drain_domain(keg, domain); + } else { + for (i = 0; i < vm_ndomains; i++) + keg_drain_domain(keg, i); + } } static void -zone_reclaim(uma_zone_t zone, int waitok, bool drain) +zone_reclaim(uma_zone_t zone, int domain, int waitok, bool drain) { - /* - * Set draining to interlock with zone_dtor() so we can release our - * locks as we go. Only dtor() should do a WAITOK call since it - * is the only call that knows the structure will still be available - * when it wakes up. + * Count active reclaim operations in order to interlock with + * zone_dtor(), which removes the zone from global lists before + * attempting to reclaim items itself. + * + * The zone may be destroyed while sleeping, so only zone_dtor() should + * specify M_WAITOK. */ ZONE_LOCK(zone); - while (zone->uz_flags & UMA_ZFLAG_RECLAIMING) { - if (waitok == M_NOWAIT) - goto out; - msleep(zone, &ZDOM_GET(zone, 0)->uzd_lock, PVM, "zonedrain", - 1); + if (waitok == M_WAITOK) { + while (zone->uz_reclaimers > 0) + msleep(zone, ZONE_LOCKPTR(zone), PVM, "zonedrain", 1); } - zone->uz_flags |= UMA_ZFLAG_RECLAIMING; + zone->uz_reclaimers++; ZONE_UNLOCK(zone); - bucket_cache_reclaim(zone, drain); + bucket_cache_reclaim(zone, drain, domain); - /* - * The DRAINING flag protects us from being freed while - * we're running. Normally the uma_rwlock would protect us but we - * must be able to release and acquire the right lock for each keg. - */ if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0) - keg_drain(zone->uz_keg); + keg_drain(zone->uz_keg, domain); ZONE_LOCK(zone); - zone->uz_flags &= ~UMA_ZFLAG_RECLAIMING; - wakeup(zone); -out: + zone->uz_reclaimers--; + if (zone->uz_reclaimers == 0) + wakeup(zone); ZONE_UNLOCK(zone); } static void -zone_drain(uma_zone_t zone, void *unused) +zone_drain(uma_zone_t zone, void *arg) { + int domain; - zone_reclaim(zone, M_NOWAIT, true); + domain = (int)(uintptr_t)arg; + zone_reclaim(zone, domain, M_NOWAIT, true); } static void -zone_trim(uma_zone_t zone, void *unused) +zone_trim(uma_zone_t zone, void *arg) { + int domain; - zone_reclaim(zone, M_NOWAIT, false); + domain = (int)(uintptr_t)arg; + zone_reclaim(zone, domain, M_NOWAIT, false); } /* @@ -2784,7 +2796,7 @@ keg = zone->uz_keg; keg->uk_reserve = 0; } - zone_reclaim(zone, M_WAITOK, true); + zone_reclaim(zone, UMA_ANYDOMAIN, M_WAITOK, true); /* * We only destroy kegs from non secondary/non cache zones. @@ -3054,9 +3066,9 @@ args.flags = flags; args.keg = NULL; - sx_slock(&uma_reclaim_lock); + sx_xlock(&uma_reclaim_lock); res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK); - sx_sunlock(&uma_reclaim_lock); + sx_xunlock(&uma_reclaim_lock); return (res); } @@ -3082,9 +3094,9 @@ args.flags = keg->uk_flags | UMA_ZONE_SECONDARY; args.keg = keg; - sx_slock(&uma_reclaim_lock); + sx_xlock(&uma_reclaim_lock); res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK); - sx_sunlock(&uma_reclaim_lock); + sx_xunlock(&uma_reclaim_lock); return (res); } @@ -3125,9 +3137,9 @@ if (booted == BOOT_SHUTDOWN && zone->uz_fini == NULL && zone->uz_release == zone_release) return; - sx_slock(&uma_reclaim_lock); + sx_xlock(&uma_reclaim_lock); zone_free_item(zones, zone, NULL, SKIP_NONE); - sx_sunlock(&uma_reclaim_lock); + sx_xunlock(&uma_reclaim_lock); } void @@ -4919,22 +4931,29 @@ void uma_reclaim(int req) { + uma_reclaim_domain(req, UMA_ANYDOMAIN); +} + +void +uma_reclaim_domain(int req, int domain) +{ + void *arg; - CTR0(KTR_UMA, "UMA: vm asked us to release pages!"); - sx_xlock(&uma_reclaim_lock); bucket_enable(); + arg = (void *)(uintptr_t)domain; + sx_slock(&uma_reclaim_lock); switch (req) { case UMA_RECLAIM_TRIM: - zone_foreach(zone_trim, NULL); + zone_foreach(zone_trim, arg); break; case UMA_RECLAIM_DRAIN: + zone_foreach(zone_drain, arg); + break; case UMA_RECLAIM_DRAIN_CPU: - zone_foreach(zone_drain, NULL); - if (req == UMA_RECLAIM_DRAIN_CPU) { - pcpu_cache_drain_safe(NULL); - zone_foreach(zone_drain, NULL); - } + zone_foreach(zone_drain, arg); + pcpu_cache_drain_safe(NULL); + zone_foreach(zone_drain, arg); break; default: panic("unhandled reclamation request %d", req); @@ -4945,10 +4964,10 @@ * we visit again so that we can free pages that are empty once other * zones are drained. We have to do the same for buckets. */ - zone_drain(slabzones[0], NULL); - zone_drain(slabzones[1], NULL); - bucket_zone_drain(); - sx_xunlock(&uma_reclaim_lock); + zone_drain(slabzones[0], arg); + zone_drain(slabzones[1], arg); + bucket_zone_drain(domain); + sx_sunlock(&uma_reclaim_lock); } static volatile int uma_reclaim_needed; @@ -4983,17 +5002,25 @@ void uma_zone_reclaim(uma_zone_t zone, int req) { + uma_zone_reclaim_domain(zone, req, UMA_ANYDOMAIN); +} + +void +uma_zone_reclaim_domain(uma_zone_t zone, int req, int domain) +{ + void *arg; + arg = (void *)(uintptr_t)domain; switch (req) { case UMA_RECLAIM_TRIM: - zone_trim(zone, NULL); + zone_trim(zone, arg); break; case UMA_RECLAIM_DRAIN: - zone_drain(zone, NULL); + zone_drain(zone, arg); break; case UMA_RECLAIM_DRAIN_CPU: pcpu_cache_drain_safe(zone); - zone_drain(zone, NULL); + zone_drain(zone, arg); break; default: panic("unhandled reclamation request %d", req); Index: sys/vm/uma_int.h =================================================================== --- sys/vm/uma_int.h +++ sys/vm/uma_int.h @@ -162,7 +162,6 @@ #define UMA_ZFLAG_CTORDTOR 0x01000000 /* Zone has ctor/dtor set. */ #define UMA_ZFLAG_LIMIT 0x02000000 /* Zone has limit set. */ #define UMA_ZFLAG_CACHE 0x04000000 /* uma_zcache_create()d it */ -#define UMA_ZFLAG_RECLAIMING 0x08000000 /* Running zone_reclaim(). */ #define UMA_ZFLAG_BUCKET 0x10000000 /* Bucket zone. */ #define UMA_ZFLAG_INTERNAL 0x20000000 /* No offpage no PCPU. */ #define UMA_ZFLAG_TRASH 0x40000000 /* Add trash ctor/dtor. */ @@ -175,7 +174,6 @@ "\37TRASH" \ "\36INTERNAL" \ "\35BUCKET" \ - "\34RECLAIMING" \ "\33CACHE" \ "\32LIMIT" \ "\31CTORDTOR" \ @@ -490,7 +488,7 @@ char *uz_ctlname; /* sysctl safe name string. */ int uz_namecnt; /* duplicate name count. */ uint16_t uz_bucket_size_min; /* Min number of items in bucket */ - uint16_t uz_pad0; + uint16_t uz_reclaimers; /* pending reclaim operations. */ /* Offset 192, rare read-only. */ struct sysctl_oid *uz_oid; /* sysctl oid pointer. */ @@ -582,6 +580,7 @@ #define ZONE_LOCK(z) ZDOM_LOCK(ZDOM_GET((z), 0)) #define ZONE_UNLOCK(z) ZDOM_UNLOCK(ZDOM_GET((z), 0)) +#define ZONE_LOCKPTR(z) (&ZDOM_GET((z), 0)->uzd_lock) #define ZONE_CROSS_LOCK_INIT(z) \ mtx_init(&(z)->uz_cross_lock, "UMA Cross", NULL, MTX_DEF) Index: sys/vm/vm_kern.c =================================================================== --- sys/vm/vm_kern.c +++ sys/vm/vm_kern.c @@ -899,7 +899,6 @@ EVENTHANDLER_INVOKE(vm_lowmem, i); return (0); } - SYSCTL_PROC(_debug, OID_AUTO, vm_lowmem, CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, 0, debug_vm_lowmem, "I", "set to trigger vm_lowmem event with given flags"); @@ -911,7 +910,7 @@ i = 0; error = sysctl_handle_int(oidp, &i, 0, req); - if (error != 0) + if (error != 0 || req->newptr == NULL) return (error); if (i != UMA_RECLAIM_TRIM && i != UMA_RECLAIM_DRAIN && i != UMA_RECLAIM_DRAIN_CPU) @@ -919,7 +918,31 @@ uma_reclaim(i); return (0); } - SYSCTL_PROC(_debug, OID_AUTO, uma_reclaim, CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, 0, debug_uma_reclaim, "I", "set to generate request to reclaim uma caches"); + +static int +debug_uma_reclaim_domain(SYSCTL_HANDLER_ARGS) +{ + int domain, error, request; + + request = 0; + error = sysctl_handle_int(oidp, &request, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + + domain = request >> 4; + request &= 0xf; + if (request != UMA_RECLAIM_TRIM && request != UMA_RECLAIM_DRAIN && + request != UMA_RECLAIM_DRAIN_CPU) + return (EINVAL); + if (domain < 0 || domain >= vm_ndomains) + return (EINVAL); + uma_reclaim_domain(request, domain); + return (0); +} +SYSCTL_PROC(_debug, OID_AUTO, uma_reclaim_domain, + CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, 0, + debug_uma_reclaim_domain, "I", + "");