Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F153466653
D23673.id68283.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
31 KB
Referenced Files
None
Subscribers
None
D23673.id68283.diff
View Options
Index: lib/libmemstat/memstat_uma.c
===================================================================
--- lib/libmemstat/memstat_uma.c
+++ lib/libmemstat/memstat_uma.c
@@ -425,12 +425,13 @@
(unsigned long )uz.uz_frees);
mtp->mt_failures = kvm_counter_u64_fetch(kvm,
(unsigned long )uz.uz_fails);
+ mtp->mt_xdomain = kvm_counter_u64_fetch(kvm,
+ (unsigned long )uz.uz_xdomain);
mtp->mt_sleeps = uz.uz_sleeps;
/* See comment above in memstat_sysctl_uma(). */
if (mtp->mt_numallocs < mtp->mt_numfrees)
mtp->mt_numallocs = mtp->mt_numfrees;
- mtp->mt_xdomain = uz.uz_xdomain;
if (kz.uk_flags & UMA_ZFLAG_INTERNAL)
goto skip_percpu;
for (i = 0; i < mp_maxid + 1; i++) {
Index: sys/vm/uma_core.c
===================================================================
--- sys/vm/uma_core.c
+++ sys/vm/uma_core.c
@@ -518,6 +518,9 @@
{
struct uma_bucket_zone *ubz;
+ if (bucket->ub_cnt != 0)
+ bucket_drain(zone, bucket);
+
KASSERT(bucket->ub_cnt == 0,
("bucket_free: Freeing a non free bucket."));
KASSERT(bucket->ub_seq == SMR_SEQ_INVALID,
@@ -537,18 +540,60 @@
uma_zone_reclaim(ubz->ubz_zone, UMA_RECLAIM_DRAIN);
}
+/*
+ * Acquire the domain lock and record contention.
+ */
+static uma_zone_domain_t
+zone_domain_lock(uma_zone_t zone, int domain)
+{
+ uma_zone_domain_t zdom;
+ bool lockfail;
+
+ zdom = ZDOM_GET(zone, domain);
+ lockfail = false;
+ if (ZDOM_OWNED(zdom))
+ lockfail = true;
+ ZDOM_LOCK(zdom);
+ /* This is unsynchronized. The counter does not need to be precise. */
+ if (lockfail && zone->uz_bucket_size < zone->uz_bucket_size_max)
+ zone->uz_bucket_size++;
+ return (zdom);
+}
+
+static int
+zone_domain_lowest(uma_zone_t zone, int pref)
+{
+ long least, nitems;
+ int domain;
+ int i;
+
+ least = LONG_MAX;
+ domain = 0;
+ for (i = 0; i < vm_ndomains; i++) {
+ nitems = ZDOM_GET(zone, i)->uzd_nitems;
+ if (nitems < least) {
+ domain = i;
+ least = nitems;
+ } else if (nitems == least && (i == pref || domain == pref))
+ domain = pref;
+ }
+
+ return (domain);
+}
+
/*
* Attempt to satisfy an allocation by retrieving a full bucket from one of the
* zone's caches. If a bucket is found the zone is not locked on return.
*/
static uma_bucket_t
-zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom)
+zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom, bool reclaim)
{
uma_bucket_t bucket;
+ long max;
int i;
bool dtor = false;
- ZONE_LOCK_ASSERT(zone);
+ ZDOM_LOCK_ASSERT(zdom);
if ((bucket = STAILQ_FIRST(&zdom->uzd_buckets)) == NULL)
return (NULL);
@@ -560,14 +605,29 @@
return (NULL);
bucket->ub_seq = SMR_SEQ_INVALID;
dtor = (zone->uz_dtor != NULL) || UMA_ALWAYS_CTORDTOR;
+ if (STAILQ_NEXT(bucket, ub_link) != NULL)
+ zdom->uzd_seq = STAILQ_NEXT(bucket, ub_link)->ub_seq;
}
MPASS(zdom->uzd_nitems >= bucket->ub_cnt);
STAILQ_REMOVE_HEAD(&zdom->uzd_buckets, ub_link);
zdom->uzd_nitems -= bucket->ub_cnt;
if (zdom->uzd_imin > zdom->uzd_nitems)
zdom->uzd_imin = zdom->uzd_nitems;
- zone->uz_bkt_count -= bucket->ub_cnt;
- ZONE_UNLOCK(zone);
+ if (reclaim) {
+ long cnt;
+ /*
+ * Shift the bounds of the current WSS interval to avoid
+ * perturbing the estimate.
+ */
+ max = zdom->uzd_imax;
+ do {
+ if (max < bucket->ub_cnt)
+ cnt = 0;
+ else
+ cnt = max - bucket->ub_cnt;
+ } while (atomic_fcmpset_long(&zdom->uzd_imax, &max, cnt) == 0);
+ }
+ ZDOM_UNLOCK(zdom);
if (dtor)
for (i = 0; i < bucket->ub_cnt; i++)
item_dtor(zone, bucket->ub_bucket[i], zone->uz_size,
@@ -579,22 +639,42 @@
/*
* Insert a full bucket into the specified cache. The "ws" parameter indicates
* whether the bucket's contents should be counted as part of the zone's working
- * set.
+ * set. The bucket may be freed if it exceeds the bucket limit.
*/
static void
-zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket,
+zone_put_bucket(uma_zone_t zone, int domain, uma_bucket_t bucket, void *udata,
const bool ws)
{
+ uma_zone_domain_t zdom;
+ long max;
- ZONE_LOCK_ASSERT(zone);
- KASSERT(!ws || zone->uz_bkt_count < zone->uz_bkt_max,
+ zdom = zone_domain_lock(zone, domain);
+
+ KASSERT(!ws || zdom->uzd_nitems < zone->uz_bkt_max,
("%s: zone %p overflow", __func__, zone));
- STAILQ_INSERT_TAIL(&zdom->uzd_buckets, bucket, ub_link);
+ /*
+ * Conditionally set the maximum number of items.
+ */
zdom->uzd_nitems += bucket->ub_cnt;
- if (ws && zdom->uzd_imax < zdom->uzd_nitems)
- zdom->uzd_imax = zdom->uzd_nitems;
- zone->uz_bkt_count += bucket->ub_cnt;
+ if (ws) {
+ max = zdom->uzd_imax;
+ do {
+ if (max > zdom->uzd_nitems)
+ break;
+ } while (atomic_fcmpset_long(&zdom->uzd_imax, &max,
+ zdom->uzd_nitems) == 0);
+ }
+ if (__predict_true(zdom->uzd_nitems < zone->uz_bkt_max)) {
+ if (STAILQ_EMPTY(&zdom->uzd_buckets))
+ zdom->uzd_seq = bucket->ub_seq;
+ STAILQ_INSERT_TAIL(&zdom->uzd_buckets, bucket, ub_link);
+ ZDOM_UNLOCK(zdom);
+ return;
+ }
+ zdom->uzd_nitems -= bucket->ub_cnt;
+ ZDOM_UNLOCK(zdom);
+ bucket_free(zone, bucket, udata);
}
/* Pops an item out of a per-cpu cache bucket. */
@@ -736,6 +816,37 @@
cache_bucket_copy(b2, &b3);
}
+static uma_bucket_t
+cache_fetch_bucket(uma_zone_t zone, uma_cache_t cache, int domain)
+{
+ uma_zone_domain_t zdom;
+ uma_bucket_t bucket;
+
+ /*
+ * Avoid the lock if possible.
+ */
+ zdom = ZDOM_GET(zone, domain);
+ if (zdom->uzd_nitems == 0)
+ return (NULL);
+
+ if ((cache_uz_flags(cache) & UMA_ZONE_SMR) != 0 &&
+ !smr_poll(zone->uz_smr, zdom->uzd_seq, false))
+ return (NULL);
+
+ /*
+ * Check the zone's cache of buckets.
+ */
+ zdom = zone_domain_lock(zone, domain);
+ if ((bucket = zone_fetch_bucket(zone, zdom, false)) != NULL) {
+ KASSERT(bucket->ub_cnt != 0,
+ ("uma_zalloc_arg: Returning an empty bucket."));
+ return (bucket);
+ }
+ ZDOM_UNLOCK(zdom);
+
+ return (NULL);
+}
+
static void
zone_log_warning(uma_zone_t zone)
{
@@ -787,10 +898,12 @@
{
long wss;
+ ZDOM_LOCK(zdom);
MPASS(zdom->uzd_imax >= zdom->uzd_imin);
wss = zdom->uzd_imax - zdom->uzd_imin;
zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems;
zdom->uzd_wss = (4 * wss + zdom->uzd_wss) / 5;
+ ZDOM_UNLOCK(zdom);
}
/*
@@ -853,10 +966,8 @@
KEG_UNLOCK(keg, 0);
update_wss:
- ZONE_LOCK(zone);
for (int i = 0; i < vm_ndomains; i++)
- zone_domain_update_wss(&zone->uz_domain[i]);
- ZONE_UNLOCK(zone);
+ zone_domain_update_wss(ZDOM_GET(zone, i));
}
/*
@@ -975,7 +1086,7 @@
{
int i;
- if (bucket == NULL || bucket->ub_cnt == 0)
+ if (bucket->ub_cnt == 0)
return;
if ((zone->uz_flags & UMA_ZONE_SMR) != 0 &&
@@ -1033,20 +1144,16 @@
CPU_FOREACH(cpu) {
cache = &zone->uz_cpu[cpu];
bucket = cache_bucket_unload_alloc(cache);
- if (bucket != NULL) {
- bucket_drain(zone, bucket);
+ if (bucket != NULL)
bucket_free(zone, bucket, NULL);
- }
bucket = cache_bucket_unload_free(cache);
if (bucket != NULL) {
bucket->ub_seq = seq;
- bucket_drain(zone, bucket);
bucket_free(zone, bucket, NULL);
}
bucket = cache_bucket_unload_cross(cache);
if (bucket != NULL) {
bucket->ub_seq = seq;
- bucket_drain(zone, bucket);
bucket_free(zone, bucket, NULL);
}
}
@@ -1060,10 +1167,8 @@
if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
return;
- ZONE_LOCK(zone);
zone->uz_bucket_size =
(zone->uz_bucket_size_min + zone->uz_bucket_size) / 2;
- ZONE_UNLOCK(zone);
}
static void
@@ -1078,10 +1183,7 @@
b1 = b2 = b3 = NULL;
critical_enter();
- if (zone->uz_flags & UMA_ZONE_FIRSTTOUCH)
- domain = PCPU_GET(domain);
- else
- domain = 0;
+ domain = PCPU_GET(domain);
cache = &zone->uz_cpu[curcpu];
b1 = cache_bucket_unload_alloc(cache);
@@ -1095,25 +1197,12 @@
}
critical_exit();
- ZONE_LOCK(zone);
- if (b1 != NULL && b1->ub_cnt != 0) {
- zone_put_bucket(zone, &zone->uz_domain[domain], b1, false);
- b1 = NULL;
- }
- if (b2 != NULL && b2->ub_cnt != 0) {
- zone_put_bucket(zone, &zone->uz_domain[domain], b2, false);
- b2 = NULL;
- }
- ZONE_UNLOCK(zone);
-
if (b1 != NULL)
- bucket_free(zone, b1, NULL);
+ zone_put_bucket(zone, domain, b1, NULL, false);
if (b2 != NULL)
- bucket_free(zone, b2, NULL);
- if (b3 != NULL) {
- bucket_drain(zone, b3);
+ zone_put_bucket(zone, domain, b2, NULL, false);
+ if (b3 != NULL)
bucket_free(zone, b3, NULL);
- }
}
/*
@@ -1161,31 +1250,28 @@
{
uma_zone_domain_t zdom;
uma_bucket_t bucket;
- long target, tofree;
+ long target;
int i;
+ /*
+ * Shrink the zone bucket size to ensure that the per-CPU caches
+ * don't grow too large.
+ */
+ if (zone->uz_bucket_size > zone->uz_bucket_size_min)
+ zone->uz_bucket_size--;
+
for (i = 0; i < vm_ndomains; i++) {
/*
* The cross bucket is partially filled and not part of
* the item count. Reclaim it individually here.
*/
- zdom = &zone->uz_domain[i];
+ zdom = ZDOM_GET(zone, i);
ZONE_CROSS_LOCK(zone);
bucket = zdom->uzd_cross;
zdom->uzd_cross = NULL;
ZONE_CROSS_UNLOCK(zone);
- if (bucket != NULL) {
- bucket_drain(zone, bucket);
+ if (bucket != NULL)
bucket_free(zone, bucket, NULL);
- }
-
- /*
- * Shrink the zone bucket size to ensure that the per-CPU caches
- * don't grow too large.
- */
- ZONE_LOCK(zone);
- if (i == 0 && zone->uz_bucket_size > zone->uz_bucket_size_min)
- zone->uz_bucket_size--;
/*
* If we were asked to drain the zone, we are done only once
@@ -1195,29 +1281,17 @@
* then the estimate will grow at the end of this interval and
* we ignore the historical average.
*/
+ ZDOM_LOCK(zdom);
target = drain ? 0 : lmax(zdom->uzd_wss, zdom->uzd_nitems -
zdom->uzd_imin);
while (zdom->uzd_nitems > target) {
- bucket = STAILQ_FIRST(&zdom->uzd_buckets);
+ bucket = zone_fetch_bucket(zone, zdom, true);
if (bucket == NULL)
break;
- tofree = bucket->ub_cnt;
- STAILQ_REMOVE_HEAD(&zdom->uzd_buckets, ub_link);
- zdom->uzd_nitems -= tofree;
-
- /*
- * Shift the bounds of the current WSS interval to avoid
- * perturbing the estimate.
- */
- zdom->uzd_imax -= lmin(zdom->uzd_imax, tofree);
- zdom->uzd_imin -= lmin(zdom->uzd_imin, tofree);
-
- ZONE_UNLOCK(zone);
- bucket_drain(zone, bucket);
bucket_free(zone, bucket, NULL);
- ZONE_LOCK(zone);
+ ZDOM_LOCK(zdom);
}
- ZONE_UNLOCK(zone);
+ ZDOM_UNLOCK(zdom);
}
}
@@ -1311,7 +1385,8 @@
while (zone->uz_flags & UMA_ZFLAG_RECLAIMING) {
if (waitok == M_NOWAIT)
goto out;
- msleep(zone, &zone->uz_lock, PVM, "zonedrain", 1);
+ msleep(zone, &ZDOM_GET(zone, 0)->uzd_lock, PVM, "zonedrain",
+ 1);
}
zone->uz_flags |= UMA_ZFLAG_RECLAIMING;
ZONE_UNLOCK(zone);
@@ -2198,6 +2273,7 @@
zone->uz_allocs = counter_u64_alloc(M_WAITOK);
zone->uz_frees = counter_u64_alloc(M_WAITOK);
zone->uz_fails = counter_u64_alloc(M_WAITOK);
+ zone->uz_xdomain = counter_u64_alloc(M_WAITOK);
}
static void
@@ -2316,20 +2392,15 @@
"Total zone limit sleeps");
SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"bucket_max", CTLFLAG_RD, &zone->uz_bkt_max, 0,
- "Maximum number of items in the bucket cache");
- SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
- "bucket_cnt", CTLFLAG_RD, &zone->uz_bkt_count, 0,
- "Number of items in the bucket cache");
+ "Maximum number of items in each domain's bucket cache");
/*
* Per-domain zone information.
*/
domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid),
OID_AUTO, "domain", CTLFLAG_RD, NULL, "");
- if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) == 0)
- domains = 1;
for (i = 0; i < domains; i++) {
- zdom = &zone->uz_domain[i];
+ zdom = ZDOM_GET(zone, i);
oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid),
OID_AUTO, VM_DOMAIN(i)->vmd_name, CTLFLAG_RD, NULL, "");
SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
@@ -2366,8 +2437,8 @@
SYSCTL_ADD_COUNTER_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"fails", CTLFLAG_RD, &zone->uz_fails,
"Number of allocation failures");
- SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
- "xdomain", CTLFLAG_RD, &zone->uz_xdomain, 0,
+ SYSCTL_ADD_COUNTER_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "xdomain", CTLFLAG_RD, &zone->uz_xdomain,
"Free calls from the wrong domain");
}
@@ -2414,6 +2485,7 @@
{
struct uma_zone_count cnt;
struct uma_zctor_args *arg = udata;
+ uma_zone_domain_t zdom;
uma_zone_t zone = mem;
uma_zone_t z;
uma_keg_t keg;
@@ -2426,15 +2498,12 @@
zone->uz_init = NULL;
zone->uz_fini = NULL;
zone->uz_sleeps = 0;
- zone->uz_xdomain = 0;
zone->uz_bucket_size = 0;
zone->uz_bucket_size_min = 0;
zone->uz_bucket_size_max = BUCKET_MAX;
zone->uz_flags = (arg->flags & UMA_ZONE_SMR);
zone->uz_warning = NULL;
/* The domain structures follow the cpu structures. */
- zone->uz_domain =
- (struct uma_zone_domain *)&zone->uz_cpu[mp_maxid + 1];
zone->uz_bkt_max = ULONG_MAX;
timevalclear(&zone->uz_ratecheck);
@@ -2443,11 +2512,13 @@
cnt.count = 0;
zone_foreach(zone_count, &cnt);
zone->uz_namecnt = cnt.count;
- ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
ZONE_CROSS_LOCK_INIT(zone);
- for (i = 0; i < vm_ndomains; i++)
- STAILQ_INIT(&zone->uz_domain[i].uzd_buckets);
+ for (i = 0; i < vm_ndomains; i++) {
+ zdom = ZDOM_GET(zone, i);
+ ZDOM_LOCK_INIT(zone, zdom, (arg->flags & UMA_ZONE_MTXCLASS));
+ STAILQ_INIT(&zdom->uzd_buckets);
+ }
#ifdef INVARIANTS
if (arg->uminit == trash_init && arg->fini == trash_fini)
@@ -2599,6 +2670,7 @@
{
uma_zone_t zone;
uma_keg_t keg;
+ int i;
zone = (uma_zone_t)arg;
@@ -2610,13 +2682,8 @@
rw_wlock(&uma_rwlock);
LIST_REMOVE(zone, uz_link);
rw_wunlock(&uma_rwlock);
- /*
- * XXX there are some races here where
- * the zone can be drained but zone lock
- * released and then refilled before we
- * remove it... we dont care for now
- */
zone_reclaim(zone, M_WAITOK, true);
+
/*
* We only destroy kegs from non secondary/non cache zones.
*/
@@ -2630,8 +2697,10 @@
counter_u64_free(zone->uz_allocs);
counter_u64_free(zone->uz_frees);
counter_u64_free(zone->uz_fails);
+ counter_u64_free(zone->uz_xdomain);
free(zone->uz_ctlname, M_UMA);
- ZONE_LOCK_FINI(zone);
+ for (i = 0; i < vm_ndomains; i++)
+ ZDOM_LOCK_FINI(ZDOM_GET(zone, i));
ZONE_CROSS_LOCK_FINI(zone);
}
@@ -3235,7 +3304,6 @@
uma_zone_domain_t zdom;
uma_bucket_t bucket;
int domain;
- bool lockfail;
CRITICAL_ASSERT(curthread);
@@ -3246,7 +3314,7 @@
* SMR Zones can't re-use the free bucket until the sequence has
* expired.
*/
- if ((zone->uz_flags & UMA_ZONE_SMR) == 0 &&
+ if ((cache_uz_flags(cache) & UMA_ZONE_SMR) == 0 &&
cache->uc_freebucket.ucb_cnt != 0) {
cache_bucket_swap(&cache->uc_freebucket,
&cache->uc_allocbucket);
@@ -3257,9 +3325,14 @@
* Discard any empty allocation bucket while we hold no locks.
*/
bucket = cache_bucket_unload_alloc(cache);
+ domain = PCPU_GET(domain);
critical_exit();
- if (bucket != NULL)
+
+ if (bucket != NULL) {
+ KASSERT(bucket->ub_cnt == 0,
+ ("cache_alloc: Entered with non-empty alloc bucket."));
bucket_free(zone, bucket, udata);
+ }
/* Short-circuit for zones without buckets and low memory. */
if (zone->uz_bucket_size == 0 || bucketdisable) {
@@ -3269,60 +3342,17 @@
/*
* Attempt to retrieve the item from the per-CPU cache has failed, so
- * we must go back to the zone. This requires the zone lock, so we
+ * we must go back to the zone. This requires the zdom lock, so we
* must drop the critical section, then re-acquire it when we go back
* to the cache. Since the critical section is released, we may be
* preempted or migrate. As such, make sure not to maintain any
* thread-local state specific to the cache from prior to releasing
* the critical section.
*/
- lockfail = 0;
- if (ZONE_TRYLOCK(zone) == 0) {
- /* Record contention to size the buckets. */
- ZONE_LOCK(zone);
- lockfail = 1;
- }
-
- /* See if we lost the race to fill the cache. */
- critical_enter();
- cache = &zone->uz_cpu[curcpu];
- if (cache->uc_allocbucket.ucb_bucket != NULL) {
- ZONE_UNLOCK(zone);
- return (true);
- }
-
- /*
- * Check the zone's cache of buckets.
- */
- if (zone->uz_flags & UMA_ZONE_FIRSTTOUCH) {
- domain = PCPU_GET(domain);
- zdom = &zone->uz_domain[domain];
- } else {
- domain = UMA_ANYDOMAIN;
- zdom = &zone->uz_domain[0];
- }
-
- if ((bucket = zone_fetch_bucket(zone, zdom)) != NULL) {
- KASSERT(bucket->ub_cnt != 0,
- ("uma_zalloc_arg: Returning an empty bucket."));
- cache_bucket_load_alloc(cache, bucket);
- return (true);
- }
- /* We are no longer associated with this CPU. */
- critical_exit();
-
- /*
- * We bump the uz count when the cache size is insufficient to
- * handle the working set.
- */
- if (lockfail && zone->uz_bucket_size < zone->uz_bucket_size_max)
- zone->uz_bucket_size++;
- ZONE_UNLOCK(zone);
+ bucket = cache_fetch_bucket(zone, cache, domain);
+ if (bucket == NULL)
+ bucket = zone_alloc_bucket(zone, udata, domain, flags);
- /*
- * Fill a bucket and attempt to use it as the alloc bucket.
- */
- bucket = zone_alloc_bucket(zone, udata, domain, flags);
CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
zone->uz_name, zone, bucket);
if (bucket == NULL) {
@@ -3335,24 +3365,24 @@
* initialized bucket to make this less likely or claim
* the memory directly.
*/
- ZONE_LOCK(zone);
critical_enter();
cache = &zone->uz_cpu[curcpu];
if (cache->uc_allocbucket.ucb_bucket == NULL &&
- ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) == 0 ||
+ ((cache_uz_flags(cache) & UMA_ZONE_FIRSTTOUCH) == 0 ||
domain == PCPU_GET(domain))) {
+ zdom = ZDOM_GET(zone, domain);
+ atomic_add_long(&zdom->uzd_imax, bucket->ub_cnt);
cache_bucket_load_alloc(cache, bucket);
- zdom->uzd_imax += bucket->ub_cnt;
- } else if (zone->uz_bkt_count >= zone->uz_bkt_max) {
- critical_exit();
- ZONE_UNLOCK(zone);
- bucket_drain(zone, bucket);
- bucket_free(zone, bucket, udata);
- critical_enter();
return (true);
- } else
- zone_put_bucket(zone, zdom, bucket, false);
- ZONE_UNLOCK(zone);
+ }
+
+ /*
+ * We lost the race, release this bucket and start over.
+ */
+ critical_exit();
+ zone_put_bucket(zone, domain, bucket, udata, false);
+ critical_enter();
+
return (true);
}
@@ -3745,6 +3775,8 @@
/* Avoid allocs targeting empty domains. */
if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
domain = UMA_ANYDOMAIN;
+ if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0)
+ domain = UMA_ANYDOMAIN;
if (zone->uz_max_items > 0)
maxbucket = zone_alloc_limit(zone, zone->uz_bucket_size,
@@ -4044,7 +4076,7 @@
while (bucket->ub_cnt > 0) {
item = bucket->ub_bucket[bucket->ub_cnt - 1];
domain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
- zdom = &zone->uz_domain[domain];
+ zdom = ZDOM_GET(zone, domain);
if (zdom->uzd_cross == NULL) {
zdom->uzd_cross = bucket_alloc(zone, udata, M_NOWAIT);
if (zdom->uzd_cross == NULL)
@@ -4060,29 +4092,16 @@
bucket->ub_cnt--;
}
ZONE_CROSS_UNLOCK(zone);
- if (!STAILQ_EMPTY(&fullbuckets)) {
- ZONE_LOCK(zone);
- while ((b = STAILQ_FIRST(&fullbuckets)) != NULL) {
- STAILQ_REMOVE_HEAD(&fullbuckets, ub_link);
- if (zone->uz_bkt_count >= zone->uz_bkt_max) {
- ZONE_UNLOCK(zone);
- bucket_drain(zone, b);
- bucket_free(zone, b, udata);
- ZONE_LOCK(zone);
- } else {
- domain = _vm_phys_domain(
- pmap_kextract(
- (vm_offset_t)b->ub_bucket[0]));
- zdom = &zone->uz_domain[domain];
- zone_put_bucket(zone, zdom, b, true);
- }
- }
- ZONE_UNLOCK(zone);
- }
- if (bucket->ub_cnt != 0)
- bucket_drain(zone, bucket);
- bucket->ub_seq = SMR_SEQ_INVALID;
+ if (bucket->ub_cnt == 0)
+ bucket->ub_seq = SMR_SEQ_INVALID;
bucket_free(zone, bucket, udata);
+
+ while ((b = STAILQ_FIRST(&fullbuckets)) != NULL) {
+ STAILQ_REMOVE_HEAD(&fullbuckets, ub_link);
+ domain = _vm_phys_domain(pmap_kextract(
+ (vm_offset_t)b->ub_bucket[0]));
+ zone_put_bucket(zone, domain, b, udata, true);
+ }
}
#endif
@@ -4090,7 +4109,6 @@
zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata,
int domain, int itemdomain)
{
- uma_zone_domain_t zdom;
#ifdef NUMA
/*
@@ -4107,32 +4125,16 @@
/*
* Attempt to save the bucket in the zone's domain bucket cache.
- *
- * We bump the uz count when the cache size is insufficient to
- * handle the working set.
*/
- if (ZONE_TRYLOCK(zone) == 0) {
- /* Record contention to size the buckets. */
- ZONE_LOCK(zone);
- if (zone->uz_bucket_size < zone->uz_bucket_size_max)
- zone->uz_bucket_size++;
- }
-
CTR3(KTR_UMA,
"uma_zfree: zone %s(%p) putting bucket %p on free list",
zone->uz_name, zone, bucket);
/* ub_cnt is pointing to the last free item */
KASSERT(bucket->ub_cnt == bucket->ub_entries,
("uma_zfree: Attempting to insert partial bucket onto the full list.\n"));
- if (zone->uz_bkt_count >= zone->uz_bkt_max) {
- ZONE_UNLOCK(zone);
- bucket_drain(zone, bucket);
- bucket_free(zone, bucket, udata);
- } else {
- zdom = &zone->uz_domain[itemdomain];
- zone_put_bucket(zone, zdom, bucket, true);
- ZONE_UNLOCK(zone);
- }
+ if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0)
+ itemdomain = zone_domain_lowest(zone, itemdomain);
+ zone_put_bucket(zone, itemdomain, bucket, udata, true);
}
/*
@@ -4165,19 +4167,21 @@
* cross bucket if the current domain and itemdomain do not match.
*/
cbucket = &cache->uc_freebucket;
+ domain = PCPU_GET(domain);
#ifdef NUMA
- if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0) {
- domain = PCPU_GET(domain);
+ if ((cache_uz_flags(cache) & UMA_ZONE_FIRSTTOUCH) != 0) {
if (domain != itemdomain) {
cbucket = &cache->uc_crossbucket;
if (cbucket->ucb_cnt != 0)
- atomic_add_64(&zone->uz_xdomain,
+ counter_u64_add(zone->uz_xdomain,
cbucket->ucb_cnt);
}
} else
#endif
- itemdomain = domain = 0;
+ itemdomain = domain;
bucket = cache_bucket_unload(cbucket);
+ KASSERT(bucket == NULL || bucket->ub_cnt != 0,
+ ("cache_free: Entered with empty free bucket."));
/* We are no longer associated with this CPU. */
critical_exit();
@@ -4213,7 +4217,7 @@
* is already populated we will fall through and attempt to populate
* the free bucket.
*/
- if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0) {
+ if ((cache_uz_flags(cache) & UMA_ZONE_FIRSTTOUCH) != 0) {
domain = PCPU_GET(domain);
if (domain != itemdomain &&
cache->uc_crossbucket.ucb_bucket == NULL) {
@@ -4408,7 +4412,7 @@
}
if (zone->uz_bucket_size_min > zone->uz_bucket_size_max)
zone->uz_bucket_size_min = zone->uz_bucket_size_max;
- zone->uz_bkt_max = nitems;
+ zone->uz_bkt_max = nitems / vm_ndomains;
ZONE_UNLOCK(zone);
}
@@ -4622,7 +4626,6 @@
} else
kva = 0;
- ZONE_LOCK(zone);
MPASS(keg->uk_kva == 0);
keg->uk_kva = kva;
keg->uk_offset = 0;
@@ -4635,7 +4638,6 @@
keg->uk_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE;
zone->uz_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE;
zone_update_caches(zone);
- ZONE_UNLOCK(zone);
return (1);
}
@@ -4842,8 +4844,8 @@
}
allocs += counter_u64_fetch(z->uz_allocs);
frees += counter_u64_fetch(z->uz_frees);
+ xdomain += counter_u64_fetch(z->uz_xdomain);
sleeps += z->uz_sleeps;
- xdomain += z->uz_xdomain;
if (cachefreep != NULL)
*cachefreep = cachefree;
if (allocsp != NULL)
@@ -4887,23 +4889,15 @@
for (i = 0; i < vm_ndomains; i++) {
- zdom = &z->uz_domain[i];
+ zdom = ZDOM_GET(z, i);
uth->uth_zone_free += zdom->uzd_nitems;
}
uth->uth_allocs = counter_u64_fetch(z->uz_allocs);
uth->uth_frees = counter_u64_fetch(z->uz_frees);
uth->uth_fails = counter_u64_fetch(z->uz_fails);
+ uth->uth_xdomain = counter_u64_fetch(z->uz_xdomain);
uth->uth_sleeps = z->uz_sleeps;
- uth->uth_xdomain = z->uz_xdomain;
- /*
- * While it is not normally safe to access the cache bucket pointers
- * while not on the CPU that owns the cache, we only allow the pointers
- * to be exchanged without the zone lock held, not invalidated, so
- * accept the possible race associated with bucket exchange during
- * monitoring. Use atomic_load_ptr() to ensure that the bucket pointers
- * are loaded only once.
- */
for (i = 0; i < mp_maxid + 1; i++) {
bzero(&ups[i], sizeof(*ups));
if (internal || CPU_ABSENT(i))
@@ -4964,7 +4958,6 @@
}
LIST_FOREACH(z, &kz->uk_zones, uz_link) {
bzero(&uth, sizeof(uth));
- ZONE_LOCK(z);
strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
uth.uth_align = kz->uk_align;
uth.uth_size = kz->uk_size;
@@ -4989,7 +4982,6 @@
uth.uth_zone_flags = UTH_ZONE_SECONDARY;
uma_vm_zone_stats(&uth, z, &sbuf, ups,
kz->uk_flags & UMA_ZFLAG_INTERNAL);
- ZONE_UNLOCK(z);
(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
for (i = 0; i < mp_maxid + 1; i++)
(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
@@ -4997,11 +4989,9 @@
}
LIST_FOREACH(z, &uma_cachezones, uz_link) {
bzero(&uth, sizeof(uth));
- ZONE_LOCK(z);
strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
uth.uth_size = z->uz_size;
uma_vm_zone_stats(&uth, z, &sbuf, ups, false);
- ZONE_UNLOCK(z);
(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
for (i = 0; i < mp_maxid + 1; i++)
(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
@@ -5260,7 +5250,7 @@
uma_zone_sumstat(z, cachefree, allocs, &frees, sleeps,
xdomain);
for (i = 0; i < vm_ndomains; i++) {
- *cachefree += z->uz_domain[i].uzd_nitems;
+ *cachefree += ZDOM_GET(z, i)->uzd_nitems;
if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
(LIST_FIRST(&kz->uk_zones) != z)))
*cachefree += kz->uk_domain[i].ud_free_items;
@@ -5354,7 +5344,7 @@
LIST_FOREACH(z, &uma_cachezones, uz_link) {
uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL, NULL);
for (i = 0; i < vm_ndomains; i++)
- cachefree += z->uz_domain[i].uzd_nitems;
+ cachefree += ZDOM_GET(z, i)->uzd_nitems;
db_printf("%18s %8ju %8jd %8ld %12ju %8u\n",
z->uz_name, (uintmax_t)z->uz_size,
(intmax_t)(allocs - frees), cachefree,
Index: sys/vm/uma_int.h
===================================================================
--- sys/vm/uma_int.h
+++ sys/vm/uma_int.h
@@ -471,6 +471,8 @@
long uzd_imax; /* maximum item count this period */
long uzd_imin; /* minimum item count this period */
long uzd_wss; /* working set size estimate */
+ smr_seq_t uzd_seq; /* Lowest queued seq. */
+ struct mtx uzd_lock; /* Lock for the domain */
} __aligned(CACHE_LINE_SIZE);
typedef struct uma_zone_domain * uma_zone_domain_t;
@@ -480,56 +482,47 @@
*/
struct uma_zone {
/* Offset 0, used in alloc/free fast/medium fast path and const. */
- uma_keg_t uz_keg; /* This zone's keg if !CACHE */
- struct uma_zone_domain *uz_domain; /* per-domain buckets */
uint32_t uz_flags; /* Flags inherited from kegs */
uint32_t uz_size; /* Size inherited from kegs */
uma_ctor uz_ctor; /* Constructor for each allocation */
uma_dtor uz_dtor; /* Destructor */
smr_t uz_smr; /* Safe memory reclaim context. */
uint64_t uz_max_items; /* Maximum number of items to alloc */
+ uint64_t uz_bkt_max; /* Maximum bucket cache size */
uint32_t uz_sleepers; /* Threads sleeping on limit */
uint16_t uz_bucket_size; /* Number of items in full bucket */
uint16_t uz_bucket_size_max; /* Maximum number of bucket items */
+ counter_u64_t uz_xdomain; /* Total number of cross-domain frees */
/* Offset 64, used in bucket replenish. */
+ uma_keg_t uz_keg; /* This zone's keg if !CACHE */
uma_import uz_import; /* Import new memory to cache. */
uma_release uz_release; /* Release memory from cache. */
void *uz_arg; /* Import/release argument. */
uma_init uz_init; /* Initializer for each item */
uma_fini uz_fini; /* Finalizer for each item. */
- void *uz_spare1;
- uint64_t uz_bkt_count; /* Items in bucket cache */
- uint64_t uz_bkt_max; /* Maximum bucket cache size */
+ volatile uint64_t uz_items; /* Total items count & sleepers */
+ uint64_t uz_sleeps; /* Total number of alloc sleeps */
- /* Offset 128 Rare. */
- /*
- * The lock is placed here to avoid adjacent line prefetcher
- * in fast paths and to take up space near infrequently accessed
- * members to reduce alignment overhead.
- */
- struct mtx uz_lock; /* Lock for the zone */
+ /* Offset 128 Rare stats, misc read-only. */
LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */
- const char *uz_name; /* Text name of the zone */
- /* The next two fields are used to print a rate-limited warnings. */
- const char *uz_warning; /* Warning to print on failure */
- struct timeval uz_ratecheck; /* Warnings rate-limiting */
- struct task uz_maxaction; /* Task to run when at limit */
- uint16_t uz_bucket_size_min; /* Min number of items in bucket */
-
- struct mtx_padalign uz_cross_lock; /* Cross domain free lock */
-
- /* Offset 256+, stats and misc. */
counter_u64_t uz_allocs; /* Total number of allocations */
counter_u64_t uz_frees; /* Total number of frees */
counter_u64_t uz_fails; /* Total number of alloc failures */
- uint64_t uz_sleeps; /* Total number of alloc sleeps */
- uint64_t uz_xdomain; /* Total number of cross-domain frees */
- volatile uint64_t uz_items; /* Total items count & sleepers */
-
+ const char *uz_name; /* Text name of the zone */
char *uz_ctlname; /* sysctl safe name string. */
- struct sysctl_oid *uz_oid; /* sysctl oid pointer. */
int uz_namecnt; /* duplicate name count. */
+ uint16_t uz_bucket_size_min; /* Min number of items in bucket */
+ uint16_t uz_pad0;
+
+ /* Offset 192, rare read-only. */
+ struct sysctl_oid *uz_oid; /* sysctl oid pointer. */
+ const char *uz_warning; /* Warning to print on failure */
+ struct timeval uz_ratecheck; /* Warnings rate-limiting */
+ struct task uz_maxaction; /* Task to run when at limit */
+
+ /* Offset 256. */
+ struct mtx uz_cross_lock; /* Cross domain free lock */
/*
* This HAS to be the last item because we adjust the zone size
@@ -537,7 +530,7 @@
*/
struct uma_cache uz_cpu[]; /* Per cpu caches */
- /* uz_domain follows here. */
+ /* domains follow here. */
};
/*
@@ -582,25 +575,35 @@
#define KEG_GET(zone, keg) do { \
(keg) = (zone)->uz_keg; \
- KASSERT((void *)(keg) != (void *)&(zone)->uz_lock, \
+ KASSERT((void *)(keg) != NULL, \
("%s: Invalid zone %p type", __func__, (zone))); \
} while (0)
-#define ZONE_LOCK_INIT(z, lc) \
- do { \
- if ((lc)) \
- mtx_init(&(z)->uz_lock, (z)->uz_name, \
- (z)->uz_name, MTX_DEF | MTX_DUPOK); \
- else \
- mtx_init(&(z)->uz_lock, (z)->uz_name, \
- "UMA zone", MTX_DEF | MTX_DUPOK); \
+/* Domains are contiguous after the last CPU */
+#define ZDOM_GET(z, n) \
+ (&((uma_zone_domain_t)&(z)->uz_cpu[mp_maxid + 1])[n])
+
+#define ZDOM_GET_LOCKED(z, n) \
+ ({ uma_zone_domain_t zd = ZDOM_GET((z), (n)); ZDOM_LOCK(zd); zd;})
+
+#define ZDOM_LOCK_INIT(z, zdom, lc) \
+ do { \
+ if ((lc)) \
+ mtx_init(&(zdom)->uzd_lock, (z)->uz_name, \
+ (z)->uz_name, MTX_DEF | MTX_DUPOK); \
+ else \
+ mtx_init(&(zdom)->uzd_lock, (z)->uz_name, \
+ "UMA zone", MTX_DEF | MTX_DUPOK); \
} while (0)
+#define ZDOM_LOCK_FINI(z) mtx_destroy(&(z)->uzd_lock)
+#define ZDOM_LOCK_ASSERT(z) mtx_assert(&(z)->uzd_lock, MA_OWNED)
+
+#define ZDOM_LOCK(z) mtx_lock(&(z)->uzd_lock)
+#define ZDOM_OWNED(z) (mtx_owner(&(z)->uzd_lock) != NULL)
+#define ZDOM_UNLOCK(z) mtx_unlock(&(z)->uzd_lock)
-#define ZONE_LOCK(z) mtx_lock(&(z)->uz_lock)
-#define ZONE_TRYLOCK(z) mtx_trylock(&(z)->uz_lock)
-#define ZONE_UNLOCK(z) mtx_unlock(&(z)->uz_lock)
-#define ZONE_LOCK_FINI(z) mtx_destroy(&(z)->uz_lock)
-#define ZONE_LOCK_ASSERT(z) mtx_assert(&(z)->uz_lock, MA_OWNED)
+#define ZONE_LOCK(z) ZDOM_LOCK(ZDOM_GET((z), 0))
+#define ZONE_UNLOCK(z) ZDOM_UNLOCK(ZDOM_GET((z), 0))
#define ZONE_CROSS_LOCK_INIT(z) \
mtx_init(&(z)->uz_cross_lock, "UMA Cross", NULL, MTX_DEF)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Apr 22, 8:15 AM (19 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31964125
Default Alt Text
D23673.id68283.diff (31 KB)
Attached To
Mode
D23673: Use per-domain locks for the zone layer.
Attached
Detach File
Event Timeline
Log In to Comment