Page MenuHomeFreeBSD

D23673.id68283.diff
No OneTemporary

D23673.id68283.diff

Index: lib/libmemstat/memstat_uma.c
===================================================================
--- lib/libmemstat/memstat_uma.c
+++ lib/libmemstat/memstat_uma.c
@@ -425,12 +425,13 @@
(unsigned long )uz.uz_frees);
mtp->mt_failures = kvm_counter_u64_fetch(kvm,
(unsigned long )uz.uz_fails);
+ mtp->mt_xdomain = kvm_counter_u64_fetch(kvm,
+ (unsigned long )uz.uz_xdomain);
mtp->mt_sleeps = uz.uz_sleeps;
/* See comment above in memstat_sysctl_uma(). */
if (mtp->mt_numallocs < mtp->mt_numfrees)
mtp->mt_numallocs = mtp->mt_numfrees;
- mtp->mt_xdomain = uz.uz_xdomain;
if (kz.uk_flags & UMA_ZFLAG_INTERNAL)
goto skip_percpu;
for (i = 0; i < mp_maxid + 1; i++) {
Index: sys/vm/uma_core.c
===================================================================
--- sys/vm/uma_core.c
+++ sys/vm/uma_core.c
@@ -518,6 +518,9 @@
{
struct uma_bucket_zone *ubz;
+ if (bucket->ub_cnt != 0)
+ bucket_drain(zone, bucket);
+
KASSERT(bucket->ub_cnt == 0,
("bucket_free: Freeing a non free bucket."));
KASSERT(bucket->ub_seq == SMR_SEQ_INVALID,
@@ -537,18 +540,60 @@
uma_zone_reclaim(ubz->ubz_zone, UMA_RECLAIM_DRAIN);
}
+/*
+ * Acquire the domain lock and record contention.
+ */
+static uma_zone_domain_t
+zone_domain_lock(uma_zone_t zone, int domain)
+{
+ uma_zone_domain_t zdom;
+ bool lockfail;
+
+ zdom = ZDOM_GET(zone, domain);
+ lockfail = false;
+ if (ZDOM_OWNED(zdom))
+ lockfail = true;
+ ZDOM_LOCK(zdom);
+ /* This is unsynchronized. The counter does not need to be precise. */
+ if (lockfail && zone->uz_bucket_size < zone->uz_bucket_size_max)
+ zone->uz_bucket_size++;
+ return (zdom);
+}
+
+static int
+zone_domain_lowest(uma_zone_t zone, int pref)
+{
+ long least, nitems;
+ int domain;
+ int i;
+
+ least = LONG_MAX;
+ domain = 0;
+ for (i = 0; i < vm_ndomains; i++) {
+ nitems = ZDOM_GET(zone, i)->uzd_nitems;
+ if (nitems < least) {
+ domain = i;
+ least = nitems;
+ } else if (nitems == least && (i == pref || domain == pref))
+ domain = pref;
+ }
+
+ return (domain);
+}
+
/*
* Attempt to satisfy an allocation by retrieving a full bucket from one of the
* zone's caches. If a bucket is found the zone is not locked on return.
*/
static uma_bucket_t
-zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom)
+zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom, bool reclaim)
{
uma_bucket_t bucket;
+ long max;
int i;
bool dtor = false;
- ZONE_LOCK_ASSERT(zone);
+ ZDOM_LOCK_ASSERT(zdom);
if ((bucket = STAILQ_FIRST(&zdom->uzd_buckets)) == NULL)
return (NULL);
@@ -560,14 +605,29 @@
return (NULL);
bucket->ub_seq = SMR_SEQ_INVALID;
dtor = (zone->uz_dtor != NULL) || UMA_ALWAYS_CTORDTOR;
+ if (STAILQ_NEXT(bucket, ub_link) != NULL)
+ zdom->uzd_seq = STAILQ_NEXT(bucket, ub_link)->ub_seq;
}
MPASS(zdom->uzd_nitems >= bucket->ub_cnt);
STAILQ_REMOVE_HEAD(&zdom->uzd_buckets, ub_link);
zdom->uzd_nitems -= bucket->ub_cnt;
if (zdom->uzd_imin > zdom->uzd_nitems)
zdom->uzd_imin = zdom->uzd_nitems;
- zone->uz_bkt_count -= bucket->ub_cnt;
- ZONE_UNLOCK(zone);
+ if (reclaim) {
+ long cnt;
+ /*
+ * Shift the bounds of the current WSS interval to avoid
+ * perturbing the estimate.
+ */
+ max = zdom->uzd_imax;
+ do {
+ if (max < bucket->ub_cnt)
+ cnt = 0;
+ else
+ cnt = max - bucket->ub_cnt;
+ } while (atomic_fcmpset_long(&zdom->uzd_imax, &max, cnt) == 0);
+ }
+ ZDOM_UNLOCK(zdom);
if (dtor)
for (i = 0; i < bucket->ub_cnt; i++)
item_dtor(zone, bucket->ub_bucket[i], zone->uz_size,
@@ -579,22 +639,42 @@
/*
* Insert a full bucket into the specified cache. The "ws" parameter indicates
* whether the bucket's contents should be counted as part of the zone's working
- * set.
+ * set. The bucket may be freed if it exceeds the bucket limit.
*/
static void
-zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket,
+zone_put_bucket(uma_zone_t zone, int domain, uma_bucket_t bucket, void *udata,
const bool ws)
{
+ uma_zone_domain_t zdom;
+ long max;
- ZONE_LOCK_ASSERT(zone);
- KASSERT(!ws || zone->uz_bkt_count < zone->uz_bkt_max,
+ zdom = zone_domain_lock(zone, domain);
+
+ KASSERT(!ws || zdom->uzd_nitems < zone->uz_bkt_max,
("%s: zone %p overflow", __func__, zone));
- STAILQ_INSERT_TAIL(&zdom->uzd_buckets, bucket, ub_link);
+ /*
+ * Conditionally set the maximum number of items.
+ */
zdom->uzd_nitems += bucket->ub_cnt;
- if (ws && zdom->uzd_imax < zdom->uzd_nitems)
- zdom->uzd_imax = zdom->uzd_nitems;
- zone->uz_bkt_count += bucket->ub_cnt;
+ if (ws) {
+ max = zdom->uzd_imax;
+ do {
+ if (max > zdom->uzd_nitems)
+ break;
+ } while (atomic_fcmpset_long(&zdom->uzd_imax, &max,
+ zdom->uzd_nitems) == 0);
+ }
+ if (__predict_true(zdom->uzd_nitems < zone->uz_bkt_max)) {
+ if (STAILQ_EMPTY(&zdom->uzd_buckets))
+ zdom->uzd_seq = bucket->ub_seq;
+ STAILQ_INSERT_TAIL(&zdom->uzd_buckets, bucket, ub_link);
+ ZDOM_UNLOCK(zdom);
+ return;
+ }
+ zdom->uzd_nitems -= bucket->ub_cnt;
+ ZDOM_UNLOCK(zdom);
+ bucket_free(zone, bucket, udata);
}
/* Pops an item out of a per-cpu cache bucket. */
@@ -736,6 +816,37 @@
cache_bucket_copy(b2, &b3);
}
+static uma_bucket_t
+cache_fetch_bucket(uma_zone_t zone, uma_cache_t cache, int domain)
+{
+ uma_zone_domain_t zdom;
+ uma_bucket_t bucket;
+
+ /*
+ * Avoid the lock if possible.
+ */
+ zdom = ZDOM_GET(zone, domain);
+ if (zdom->uzd_nitems == 0)
+ return (NULL);
+
+ if ((cache_uz_flags(cache) & UMA_ZONE_SMR) != 0 &&
+ !smr_poll(zone->uz_smr, zdom->uzd_seq, false))
+ return (NULL);
+
+ /*
+ * Check the zone's cache of buckets.
+ */
+ zdom = zone_domain_lock(zone, domain);
+ if ((bucket = zone_fetch_bucket(zone, zdom, false)) != NULL) {
+ KASSERT(bucket->ub_cnt != 0,
+ ("uma_zalloc_arg: Returning an empty bucket."));
+ return (bucket);
+ }
+ ZDOM_UNLOCK(zdom);
+
+ return (NULL);
+}
+
static void
zone_log_warning(uma_zone_t zone)
{
@@ -787,10 +898,12 @@
{
long wss;
+ ZDOM_LOCK(zdom);
MPASS(zdom->uzd_imax >= zdom->uzd_imin);
wss = zdom->uzd_imax - zdom->uzd_imin;
zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems;
zdom->uzd_wss = (4 * wss + zdom->uzd_wss) / 5;
+ ZDOM_UNLOCK(zdom);
}
/*
@@ -853,10 +966,8 @@
KEG_UNLOCK(keg, 0);
update_wss:
- ZONE_LOCK(zone);
for (int i = 0; i < vm_ndomains; i++)
- zone_domain_update_wss(&zone->uz_domain[i]);
- ZONE_UNLOCK(zone);
+ zone_domain_update_wss(ZDOM_GET(zone, i));
}
/*
@@ -975,7 +1086,7 @@
{
int i;
- if (bucket == NULL || bucket->ub_cnt == 0)
+ if (bucket->ub_cnt == 0)
return;
if ((zone->uz_flags & UMA_ZONE_SMR) != 0 &&
@@ -1033,20 +1144,16 @@
CPU_FOREACH(cpu) {
cache = &zone->uz_cpu[cpu];
bucket = cache_bucket_unload_alloc(cache);
- if (bucket != NULL) {
- bucket_drain(zone, bucket);
+ if (bucket != NULL)
bucket_free(zone, bucket, NULL);
- }
bucket = cache_bucket_unload_free(cache);
if (bucket != NULL) {
bucket->ub_seq = seq;
- bucket_drain(zone, bucket);
bucket_free(zone, bucket, NULL);
}
bucket = cache_bucket_unload_cross(cache);
if (bucket != NULL) {
bucket->ub_seq = seq;
- bucket_drain(zone, bucket);
bucket_free(zone, bucket, NULL);
}
}
@@ -1060,10 +1167,8 @@
if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
return;
- ZONE_LOCK(zone);
zone->uz_bucket_size =
(zone->uz_bucket_size_min + zone->uz_bucket_size) / 2;
- ZONE_UNLOCK(zone);
}
static void
@@ -1078,10 +1183,7 @@
b1 = b2 = b3 = NULL;
critical_enter();
- if (zone->uz_flags & UMA_ZONE_FIRSTTOUCH)
- domain = PCPU_GET(domain);
- else
- domain = 0;
+ domain = PCPU_GET(domain);
cache = &zone->uz_cpu[curcpu];
b1 = cache_bucket_unload_alloc(cache);
@@ -1095,25 +1197,12 @@
}
critical_exit();
- ZONE_LOCK(zone);
- if (b1 != NULL && b1->ub_cnt != 0) {
- zone_put_bucket(zone, &zone->uz_domain[domain], b1, false);
- b1 = NULL;
- }
- if (b2 != NULL && b2->ub_cnt != 0) {
- zone_put_bucket(zone, &zone->uz_domain[domain], b2, false);
- b2 = NULL;
- }
- ZONE_UNLOCK(zone);
-
if (b1 != NULL)
- bucket_free(zone, b1, NULL);
+ zone_put_bucket(zone, domain, b1, NULL, false);
if (b2 != NULL)
- bucket_free(zone, b2, NULL);
- if (b3 != NULL) {
- bucket_drain(zone, b3);
+ zone_put_bucket(zone, domain, b2, NULL, false);
+ if (b3 != NULL)
bucket_free(zone, b3, NULL);
- }
}
/*
@@ -1161,31 +1250,28 @@
{
uma_zone_domain_t zdom;
uma_bucket_t bucket;
- long target, tofree;
+ long target;
int i;
+ /*
+ * Shrink the zone bucket size to ensure that the per-CPU caches
+ * don't grow too large.
+ */
+ if (zone->uz_bucket_size > zone->uz_bucket_size_min)
+ zone->uz_bucket_size--;
+
for (i = 0; i < vm_ndomains; i++) {
/*
* The cross bucket is partially filled and not part of
* the item count. Reclaim it individually here.
*/
- zdom = &zone->uz_domain[i];
+ zdom = ZDOM_GET(zone, i);
ZONE_CROSS_LOCK(zone);
bucket = zdom->uzd_cross;
zdom->uzd_cross = NULL;
ZONE_CROSS_UNLOCK(zone);
- if (bucket != NULL) {
- bucket_drain(zone, bucket);
+ if (bucket != NULL)
bucket_free(zone, bucket, NULL);
- }
-
- /*
- * Shrink the zone bucket size to ensure that the per-CPU caches
- * don't grow too large.
- */
- ZONE_LOCK(zone);
- if (i == 0 && zone->uz_bucket_size > zone->uz_bucket_size_min)
- zone->uz_bucket_size--;
/*
* If we were asked to drain the zone, we are done only once
@@ -1195,29 +1281,17 @@
* then the estimate will grow at the end of this interval and
* we ignore the historical average.
*/
+ ZDOM_LOCK(zdom);
target = drain ? 0 : lmax(zdom->uzd_wss, zdom->uzd_nitems -
zdom->uzd_imin);
while (zdom->uzd_nitems > target) {
- bucket = STAILQ_FIRST(&zdom->uzd_buckets);
+ bucket = zone_fetch_bucket(zone, zdom, true);
if (bucket == NULL)
break;
- tofree = bucket->ub_cnt;
- STAILQ_REMOVE_HEAD(&zdom->uzd_buckets, ub_link);
- zdom->uzd_nitems -= tofree;
-
- /*
- * Shift the bounds of the current WSS interval to avoid
- * perturbing the estimate.
- */
- zdom->uzd_imax -= lmin(zdom->uzd_imax, tofree);
- zdom->uzd_imin -= lmin(zdom->uzd_imin, tofree);
-
- ZONE_UNLOCK(zone);
- bucket_drain(zone, bucket);
bucket_free(zone, bucket, NULL);
- ZONE_LOCK(zone);
+ ZDOM_LOCK(zdom);
}
- ZONE_UNLOCK(zone);
+ ZDOM_UNLOCK(zdom);
}
}
@@ -1311,7 +1385,8 @@
while (zone->uz_flags & UMA_ZFLAG_RECLAIMING) {
if (waitok == M_NOWAIT)
goto out;
- msleep(zone, &zone->uz_lock, PVM, "zonedrain", 1);
+ msleep(zone, &ZDOM_GET(zone, 0)->uzd_lock, PVM, "zonedrain",
+ 1);
}
zone->uz_flags |= UMA_ZFLAG_RECLAIMING;
ZONE_UNLOCK(zone);
@@ -2198,6 +2273,7 @@
zone->uz_allocs = counter_u64_alloc(M_WAITOK);
zone->uz_frees = counter_u64_alloc(M_WAITOK);
zone->uz_fails = counter_u64_alloc(M_WAITOK);
+ zone->uz_xdomain = counter_u64_alloc(M_WAITOK);
}
static void
@@ -2316,20 +2392,15 @@
"Total zone limit sleeps");
SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"bucket_max", CTLFLAG_RD, &zone->uz_bkt_max, 0,
- "Maximum number of items in the bucket cache");
- SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
- "bucket_cnt", CTLFLAG_RD, &zone->uz_bkt_count, 0,
- "Number of items in the bucket cache");
+ "Maximum number of items in each domain's bucket cache");
/*
* Per-domain zone information.
*/
domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid),
OID_AUTO, "domain", CTLFLAG_RD, NULL, "");
- if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) == 0)
- domains = 1;
for (i = 0; i < domains; i++) {
- zdom = &zone->uz_domain[i];
+ zdom = ZDOM_GET(zone, i);
oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid),
OID_AUTO, VM_DOMAIN(i)->vmd_name, CTLFLAG_RD, NULL, "");
SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
@@ -2366,8 +2437,8 @@
SYSCTL_ADD_COUNTER_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"fails", CTLFLAG_RD, &zone->uz_fails,
"Number of allocation failures");
- SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
- "xdomain", CTLFLAG_RD, &zone->uz_xdomain, 0,
+ SYSCTL_ADD_COUNTER_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "xdomain", CTLFLAG_RD, &zone->uz_xdomain,
"Free calls from the wrong domain");
}
@@ -2414,6 +2485,7 @@
{
struct uma_zone_count cnt;
struct uma_zctor_args *arg = udata;
+ uma_zone_domain_t zdom;
uma_zone_t zone = mem;
uma_zone_t z;
uma_keg_t keg;
@@ -2426,15 +2498,12 @@
zone->uz_init = NULL;
zone->uz_fini = NULL;
zone->uz_sleeps = 0;
- zone->uz_xdomain = 0;
zone->uz_bucket_size = 0;
zone->uz_bucket_size_min = 0;
zone->uz_bucket_size_max = BUCKET_MAX;
zone->uz_flags = (arg->flags & UMA_ZONE_SMR);
zone->uz_warning = NULL;
/* The domain structures follow the cpu structures. */
- zone->uz_domain =
- (struct uma_zone_domain *)&zone->uz_cpu[mp_maxid + 1];
zone->uz_bkt_max = ULONG_MAX;
timevalclear(&zone->uz_ratecheck);
@@ -2443,11 +2512,13 @@
cnt.count = 0;
zone_foreach(zone_count, &cnt);
zone->uz_namecnt = cnt.count;
- ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
ZONE_CROSS_LOCK_INIT(zone);
- for (i = 0; i < vm_ndomains; i++)
- STAILQ_INIT(&zone->uz_domain[i].uzd_buckets);
+ for (i = 0; i < vm_ndomains; i++) {
+ zdom = ZDOM_GET(zone, i);
+ ZDOM_LOCK_INIT(zone, zdom, (arg->flags & UMA_ZONE_MTXCLASS));
+ STAILQ_INIT(&zdom->uzd_buckets);
+ }
#ifdef INVARIANTS
if (arg->uminit == trash_init && arg->fini == trash_fini)
@@ -2599,6 +2670,7 @@
{
uma_zone_t zone;
uma_keg_t keg;
+ int i;
zone = (uma_zone_t)arg;
@@ -2610,13 +2682,8 @@
rw_wlock(&uma_rwlock);
LIST_REMOVE(zone, uz_link);
rw_wunlock(&uma_rwlock);
- /*
- * XXX there are some races here where
- * the zone can be drained but zone lock
- * released and then refilled before we
- * remove it... we dont care for now
- */
zone_reclaim(zone, M_WAITOK, true);
+
/*
* We only destroy kegs from non secondary/non cache zones.
*/
@@ -2630,8 +2697,10 @@
counter_u64_free(zone->uz_allocs);
counter_u64_free(zone->uz_frees);
counter_u64_free(zone->uz_fails);
+ counter_u64_free(zone->uz_xdomain);
free(zone->uz_ctlname, M_UMA);
- ZONE_LOCK_FINI(zone);
+ for (i = 0; i < vm_ndomains; i++)
+ ZDOM_LOCK_FINI(ZDOM_GET(zone, i));
ZONE_CROSS_LOCK_FINI(zone);
}
@@ -3235,7 +3304,6 @@
uma_zone_domain_t zdom;
uma_bucket_t bucket;
int domain;
- bool lockfail;
CRITICAL_ASSERT(curthread);
@@ -3246,7 +3314,7 @@
* SMR Zones can't re-use the free bucket until the sequence has
* expired.
*/
- if ((zone->uz_flags & UMA_ZONE_SMR) == 0 &&
+ if ((cache_uz_flags(cache) & UMA_ZONE_SMR) == 0 &&
cache->uc_freebucket.ucb_cnt != 0) {
cache_bucket_swap(&cache->uc_freebucket,
&cache->uc_allocbucket);
@@ -3257,9 +3325,14 @@
* Discard any empty allocation bucket while we hold no locks.
*/
bucket = cache_bucket_unload_alloc(cache);
+ domain = PCPU_GET(domain);
critical_exit();
- if (bucket != NULL)
+
+ if (bucket != NULL) {
+ KASSERT(bucket->ub_cnt == 0,
+ ("cache_alloc: Entered with non-empty alloc bucket."));
bucket_free(zone, bucket, udata);
+ }
/* Short-circuit for zones without buckets and low memory. */
if (zone->uz_bucket_size == 0 || bucketdisable) {
@@ -3269,60 +3342,17 @@
/*
* Attempt to retrieve the item from the per-CPU cache has failed, so
- * we must go back to the zone. This requires the zone lock, so we
+ * we must go back to the zone. This requires the zdom lock, so we
* must drop the critical section, then re-acquire it when we go back
* to the cache. Since the critical section is released, we may be
* preempted or migrate. As such, make sure not to maintain any
* thread-local state specific to the cache from prior to releasing
* the critical section.
*/
- lockfail = 0;
- if (ZONE_TRYLOCK(zone) == 0) {
- /* Record contention to size the buckets. */
- ZONE_LOCK(zone);
- lockfail = 1;
- }
-
- /* See if we lost the race to fill the cache. */
- critical_enter();
- cache = &zone->uz_cpu[curcpu];
- if (cache->uc_allocbucket.ucb_bucket != NULL) {
- ZONE_UNLOCK(zone);
- return (true);
- }
-
- /*
- * Check the zone's cache of buckets.
- */
- if (zone->uz_flags & UMA_ZONE_FIRSTTOUCH) {
- domain = PCPU_GET(domain);
- zdom = &zone->uz_domain[domain];
- } else {
- domain = UMA_ANYDOMAIN;
- zdom = &zone->uz_domain[0];
- }
-
- if ((bucket = zone_fetch_bucket(zone, zdom)) != NULL) {
- KASSERT(bucket->ub_cnt != 0,
- ("uma_zalloc_arg: Returning an empty bucket."));
- cache_bucket_load_alloc(cache, bucket);
- return (true);
- }
- /* We are no longer associated with this CPU. */
- critical_exit();
-
- /*
- * We bump the uz count when the cache size is insufficient to
- * handle the working set.
- */
- if (lockfail && zone->uz_bucket_size < zone->uz_bucket_size_max)
- zone->uz_bucket_size++;
- ZONE_UNLOCK(zone);
+ bucket = cache_fetch_bucket(zone, cache, domain);
+ if (bucket == NULL)
+ bucket = zone_alloc_bucket(zone, udata, domain, flags);
- /*
- * Fill a bucket and attempt to use it as the alloc bucket.
- */
- bucket = zone_alloc_bucket(zone, udata, domain, flags);
CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
zone->uz_name, zone, bucket);
if (bucket == NULL) {
@@ -3335,24 +3365,24 @@
* initialized bucket to make this less likely or claim
* the memory directly.
*/
- ZONE_LOCK(zone);
critical_enter();
cache = &zone->uz_cpu[curcpu];
if (cache->uc_allocbucket.ucb_bucket == NULL &&
- ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) == 0 ||
+ ((cache_uz_flags(cache) & UMA_ZONE_FIRSTTOUCH) == 0 ||
domain == PCPU_GET(domain))) {
+ zdom = ZDOM_GET(zone, domain);
+ atomic_add_long(&zdom->uzd_imax, bucket->ub_cnt);
cache_bucket_load_alloc(cache, bucket);
- zdom->uzd_imax += bucket->ub_cnt;
- } else if (zone->uz_bkt_count >= zone->uz_bkt_max) {
- critical_exit();
- ZONE_UNLOCK(zone);
- bucket_drain(zone, bucket);
- bucket_free(zone, bucket, udata);
- critical_enter();
return (true);
- } else
- zone_put_bucket(zone, zdom, bucket, false);
- ZONE_UNLOCK(zone);
+ }
+
+ /*
+ * We lost the race, release this bucket and start over.
+ */
+ critical_exit();
+ zone_put_bucket(zone, domain, bucket, udata, false);
+ critical_enter();
+
return (true);
}
@@ -3745,6 +3775,8 @@
/* Avoid allocs targeting empty domains. */
if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
domain = UMA_ANYDOMAIN;
+ if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0)
+ domain = UMA_ANYDOMAIN;
if (zone->uz_max_items > 0)
maxbucket = zone_alloc_limit(zone, zone->uz_bucket_size,
@@ -4044,7 +4076,7 @@
while (bucket->ub_cnt > 0) {
item = bucket->ub_bucket[bucket->ub_cnt - 1];
domain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
- zdom = &zone->uz_domain[domain];
+ zdom = ZDOM_GET(zone, domain);
if (zdom->uzd_cross == NULL) {
zdom->uzd_cross = bucket_alloc(zone, udata, M_NOWAIT);
if (zdom->uzd_cross == NULL)
@@ -4060,29 +4092,16 @@
bucket->ub_cnt--;
}
ZONE_CROSS_UNLOCK(zone);
- if (!STAILQ_EMPTY(&fullbuckets)) {
- ZONE_LOCK(zone);
- while ((b = STAILQ_FIRST(&fullbuckets)) != NULL) {
- STAILQ_REMOVE_HEAD(&fullbuckets, ub_link);
- if (zone->uz_bkt_count >= zone->uz_bkt_max) {
- ZONE_UNLOCK(zone);
- bucket_drain(zone, b);
- bucket_free(zone, b, udata);
- ZONE_LOCK(zone);
- } else {
- domain = _vm_phys_domain(
- pmap_kextract(
- (vm_offset_t)b->ub_bucket[0]));
- zdom = &zone->uz_domain[domain];
- zone_put_bucket(zone, zdom, b, true);
- }
- }
- ZONE_UNLOCK(zone);
- }
- if (bucket->ub_cnt != 0)
- bucket_drain(zone, bucket);
- bucket->ub_seq = SMR_SEQ_INVALID;
+ if (bucket->ub_cnt == 0)
+ bucket->ub_seq = SMR_SEQ_INVALID;
bucket_free(zone, bucket, udata);
+
+ while ((b = STAILQ_FIRST(&fullbuckets)) != NULL) {
+ STAILQ_REMOVE_HEAD(&fullbuckets, ub_link);
+ domain = _vm_phys_domain(pmap_kextract(
+ (vm_offset_t)b->ub_bucket[0]));
+ zone_put_bucket(zone, domain, b, udata, true);
+ }
}
#endif
@@ -4090,7 +4109,6 @@
zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata,
int domain, int itemdomain)
{
- uma_zone_domain_t zdom;
#ifdef NUMA
/*
@@ -4107,32 +4125,16 @@
/*
* Attempt to save the bucket in the zone's domain bucket cache.
- *
- * We bump the uz count when the cache size is insufficient to
- * handle the working set.
*/
- if (ZONE_TRYLOCK(zone) == 0) {
- /* Record contention to size the buckets. */
- ZONE_LOCK(zone);
- if (zone->uz_bucket_size < zone->uz_bucket_size_max)
- zone->uz_bucket_size++;
- }
-
CTR3(KTR_UMA,
"uma_zfree: zone %s(%p) putting bucket %p on free list",
zone->uz_name, zone, bucket);
/* ub_cnt is pointing to the last free item */
KASSERT(bucket->ub_cnt == bucket->ub_entries,
("uma_zfree: Attempting to insert partial bucket onto the full list.\n"));
- if (zone->uz_bkt_count >= zone->uz_bkt_max) {
- ZONE_UNLOCK(zone);
- bucket_drain(zone, bucket);
- bucket_free(zone, bucket, udata);
- } else {
- zdom = &zone->uz_domain[itemdomain];
- zone_put_bucket(zone, zdom, bucket, true);
- ZONE_UNLOCK(zone);
- }
+ if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0)
+ itemdomain = zone_domain_lowest(zone, itemdomain);
+ zone_put_bucket(zone, itemdomain, bucket, udata, true);
}
/*
@@ -4165,19 +4167,21 @@
* cross bucket if the current domain and itemdomain do not match.
*/
cbucket = &cache->uc_freebucket;
+ domain = PCPU_GET(domain);
#ifdef NUMA
- if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0) {
- domain = PCPU_GET(domain);
+ if ((cache_uz_flags(cache) & UMA_ZONE_FIRSTTOUCH) != 0) {
if (domain != itemdomain) {
cbucket = &cache->uc_crossbucket;
if (cbucket->ucb_cnt != 0)
- atomic_add_64(&zone->uz_xdomain,
+ counter_u64_add(zone->uz_xdomain,
cbucket->ucb_cnt);
}
} else
#endif
- itemdomain = domain = 0;
+ itemdomain = domain;
bucket = cache_bucket_unload(cbucket);
+ KASSERT(bucket == NULL || bucket->ub_cnt != 0,
+ ("cache_free: Entered with empty free bucket."));
/* We are no longer associated with this CPU. */
critical_exit();
@@ -4213,7 +4217,7 @@
* is already populated we will fall through and attempt to populate
* the free bucket.
*/
- if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0) {
+ if ((cache_uz_flags(cache) & UMA_ZONE_FIRSTTOUCH) != 0) {
domain = PCPU_GET(domain);
if (domain != itemdomain &&
cache->uc_crossbucket.ucb_bucket == NULL) {
@@ -4408,7 +4412,7 @@
}
if (zone->uz_bucket_size_min > zone->uz_bucket_size_max)
zone->uz_bucket_size_min = zone->uz_bucket_size_max;
- zone->uz_bkt_max = nitems;
+ zone->uz_bkt_max = nitems / vm_ndomains;
ZONE_UNLOCK(zone);
}
@@ -4622,7 +4626,6 @@
} else
kva = 0;
- ZONE_LOCK(zone);
MPASS(keg->uk_kva == 0);
keg->uk_kva = kva;
keg->uk_offset = 0;
@@ -4635,7 +4638,6 @@
keg->uk_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE;
zone->uz_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE;
zone_update_caches(zone);
- ZONE_UNLOCK(zone);
return (1);
}
@@ -4842,8 +4844,8 @@
}
allocs += counter_u64_fetch(z->uz_allocs);
frees += counter_u64_fetch(z->uz_frees);
+ xdomain += counter_u64_fetch(z->uz_xdomain);
sleeps += z->uz_sleeps;
- xdomain += z->uz_xdomain;
if (cachefreep != NULL)
*cachefreep = cachefree;
if (allocsp != NULL)
@@ -4887,23 +4889,15 @@
for (i = 0; i < vm_ndomains; i++) {
- zdom = &z->uz_domain[i];
+ zdom = ZDOM_GET(z, i);
uth->uth_zone_free += zdom->uzd_nitems;
}
uth->uth_allocs = counter_u64_fetch(z->uz_allocs);
uth->uth_frees = counter_u64_fetch(z->uz_frees);
uth->uth_fails = counter_u64_fetch(z->uz_fails);
+ uth->uth_xdomain = counter_u64_fetch(z->uz_xdomain);
uth->uth_sleeps = z->uz_sleeps;
- uth->uth_xdomain = z->uz_xdomain;
- /*
- * While it is not normally safe to access the cache bucket pointers
- * while not on the CPU that owns the cache, we only allow the pointers
- * to be exchanged without the zone lock held, not invalidated, so
- * accept the possible race associated with bucket exchange during
- * monitoring. Use atomic_load_ptr() to ensure that the bucket pointers
- * are loaded only once.
- */
for (i = 0; i < mp_maxid + 1; i++) {
bzero(&ups[i], sizeof(*ups));
if (internal || CPU_ABSENT(i))
@@ -4964,7 +4958,6 @@
}
LIST_FOREACH(z, &kz->uk_zones, uz_link) {
bzero(&uth, sizeof(uth));
- ZONE_LOCK(z);
strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
uth.uth_align = kz->uk_align;
uth.uth_size = kz->uk_size;
@@ -4989,7 +4982,6 @@
uth.uth_zone_flags = UTH_ZONE_SECONDARY;
uma_vm_zone_stats(&uth, z, &sbuf, ups,
kz->uk_flags & UMA_ZFLAG_INTERNAL);
- ZONE_UNLOCK(z);
(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
for (i = 0; i < mp_maxid + 1; i++)
(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
@@ -4997,11 +4989,9 @@
}
LIST_FOREACH(z, &uma_cachezones, uz_link) {
bzero(&uth, sizeof(uth));
- ZONE_LOCK(z);
strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
uth.uth_size = z->uz_size;
uma_vm_zone_stats(&uth, z, &sbuf, ups, false);
- ZONE_UNLOCK(z);
(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
for (i = 0; i < mp_maxid + 1; i++)
(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
@@ -5260,7 +5250,7 @@
uma_zone_sumstat(z, cachefree, allocs, &frees, sleeps,
xdomain);
for (i = 0; i < vm_ndomains; i++) {
- *cachefree += z->uz_domain[i].uzd_nitems;
+ *cachefree += ZDOM_GET(z, i)->uzd_nitems;
if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
(LIST_FIRST(&kz->uk_zones) != z)))
*cachefree += kz->uk_domain[i].ud_free_items;
@@ -5354,7 +5344,7 @@
LIST_FOREACH(z, &uma_cachezones, uz_link) {
uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL, NULL);
for (i = 0; i < vm_ndomains; i++)
- cachefree += z->uz_domain[i].uzd_nitems;
+ cachefree += ZDOM_GET(z, i)->uzd_nitems;
db_printf("%18s %8ju %8jd %8ld %12ju %8u\n",
z->uz_name, (uintmax_t)z->uz_size,
(intmax_t)(allocs - frees), cachefree,
Index: sys/vm/uma_int.h
===================================================================
--- sys/vm/uma_int.h
+++ sys/vm/uma_int.h
@@ -471,6 +471,8 @@
long uzd_imax; /* maximum item count this period */
long uzd_imin; /* minimum item count this period */
long uzd_wss; /* working set size estimate */
+ smr_seq_t uzd_seq; /* Lowest queued seq. */
+ struct mtx uzd_lock; /* Lock for the domain */
} __aligned(CACHE_LINE_SIZE);
typedef struct uma_zone_domain * uma_zone_domain_t;
@@ -480,56 +482,47 @@
*/
struct uma_zone {
/* Offset 0, used in alloc/free fast/medium fast path and const. */
- uma_keg_t uz_keg; /* This zone's keg if !CACHE */
- struct uma_zone_domain *uz_domain; /* per-domain buckets */
uint32_t uz_flags; /* Flags inherited from kegs */
uint32_t uz_size; /* Size inherited from kegs */
uma_ctor uz_ctor; /* Constructor for each allocation */
uma_dtor uz_dtor; /* Destructor */
smr_t uz_smr; /* Safe memory reclaim context. */
uint64_t uz_max_items; /* Maximum number of items to alloc */
+ uint64_t uz_bkt_max; /* Maximum bucket cache size */
uint32_t uz_sleepers; /* Threads sleeping on limit */
uint16_t uz_bucket_size; /* Number of items in full bucket */
uint16_t uz_bucket_size_max; /* Maximum number of bucket items */
+ counter_u64_t uz_xdomain; /* Total number of cross-domain frees */
/* Offset 64, used in bucket replenish. */
+ uma_keg_t uz_keg; /* This zone's keg if !CACHE */
uma_import uz_import; /* Import new memory to cache. */
uma_release uz_release; /* Release memory from cache. */
void *uz_arg; /* Import/release argument. */
uma_init uz_init; /* Initializer for each item */
uma_fini uz_fini; /* Finalizer for each item. */
- void *uz_spare1;
- uint64_t uz_bkt_count; /* Items in bucket cache */
- uint64_t uz_bkt_max; /* Maximum bucket cache size */
+ volatile uint64_t uz_items; /* Total items count & sleepers */
+ uint64_t uz_sleeps; /* Total number of alloc sleeps */
- /* Offset 128 Rare. */
- /*
- * The lock is placed here to avoid adjacent line prefetcher
- * in fast paths and to take up space near infrequently accessed
- * members to reduce alignment overhead.
- */
- struct mtx uz_lock; /* Lock for the zone */
+ /* Offset 128 Rare stats, misc read-only. */
LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */
- const char *uz_name; /* Text name of the zone */
- /* The next two fields are used to print a rate-limited warnings. */
- const char *uz_warning; /* Warning to print on failure */
- struct timeval uz_ratecheck; /* Warnings rate-limiting */
- struct task uz_maxaction; /* Task to run when at limit */
- uint16_t uz_bucket_size_min; /* Min number of items in bucket */
-
- struct mtx_padalign uz_cross_lock; /* Cross domain free lock */
-
- /* Offset 256+, stats and misc. */
counter_u64_t uz_allocs; /* Total number of allocations */
counter_u64_t uz_frees; /* Total number of frees */
counter_u64_t uz_fails; /* Total number of alloc failures */
- uint64_t uz_sleeps; /* Total number of alloc sleeps */
- uint64_t uz_xdomain; /* Total number of cross-domain frees */
- volatile uint64_t uz_items; /* Total items count & sleepers */
-
+ const char *uz_name; /* Text name of the zone */
char *uz_ctlname; /* sysctl safe name string. */
- struct sysctl_oid *uz_oid; /* sysctl oid pointer. */
int uz_namecnt; /* duplicate name count. */
+ uint16_t uz_bucket_size_min; /* Min number of items in bucket */
+ uint16_t uz_pad0;
+
+ /* Offset 192, rare read-only. */
+ struct sysctl_oid *uz_oid; /* sysctl oid pointer. */
+ const char *uz_warning; /* Warning to print on failure */
+ struct timeval uz_ratecheck; /* Warnings rate-limiting */
+ struct task uz_maxaction; /* Task to run when at limit */
+
+ /* Offset 256. */
+ struct mtx uz_cross_lock; /* Cross domain free lock */
/*
* This HAS to be the last item because we adjust the zone size
@@ -537,7 +530,7 @@
*/
struct uma_cache uz_cpu[]; /* Per cpu caches */
- /* uz_domain follows here. */
+ /* domains follow here. */
};
/*
@@ -582,25 +575,35 @@
#define KEG_GET(zone, keg) do { \
(keg) = (zone)->uz_keg; \
- KASSERT((void *)(keg) != (void *)&(zone)->uz_lock, \
+ KASSERT((void *)(keg) != NULL, \
("%s: Invalid zone %p type", __func__, (zone))); \
} while (0)
-#define ZONE_LOCK_INIT(z, lc) \
- do { \
- if ((lc)) \
- mtx_init(&(z)->uz_lock, (z)->uz_name, \
- (z)->uz_name, MTX_DEF | MTX_DUPOK); \
- else \
- mtx_init(&(z)->uz_lock, (z)->uz_name, \
- "UMA zone", MTX_DEF | MTX_DUPOK); \
+/* Domains are contiguous after the last CPU */
+#define ZDOM_GET(z, n) \
+ (&((uma_zone_domain_t)&(z)->uz_cpu[mp_maxid + 1])[n])
+
+#define ZDOM_GET_LOCKED(z, n) \
+ ({ uma_zone_domain_t zd = ZDOM_GET((z), (n)); ZDOM_LOCK(zd); zd;})
+
+#define ZDOM_LOCK_INIT(z, zdom, lc) \
+ do { \
+ if ((lc)) \
+ mtx_init(&(zdom)->uzd_lock, (z)->uz_name, \
+ (z)->uz_name, MTX_DEF | MTX_DUPOK); \
+ else \
+ mtx_init(&(zdom)->uzd_lock, (z)->uz_name, \
+ "UMA zone", MTX_DEF | MTX_DUPOK); \
} while (0)
+#define ZDOM_LOCK_FINI(z) mtx_destroy(&(z)->uzd_lock)
+#define ZDOM_LOCK_ASSERT(z) mtx_assert(&(z)->uzd_lock, MA_OWNED)
+
+#define ZDOM_LOCK(z) mtx_lock(&(z)->uzd_lock)
+#define ZDOM_OWNED(z) (mtx_owner(&(z)->uzd_lock) != NULL)
+#define ZDOM_UNLOCK(z) mtx_unlock(&(z)->uzd_lock)
-#define ZONE_LOCK(z) mtx_lock(&(z)->uz_lock)
-#define ZONE_TRYLOCK(z) mtx_trylock(&(z)->uz_lock)
-#define ZONE_UNLOCK(z) mtx_unlock(&(z)->uz_lock)
-#define ZONE_LOCK_FINI(z) mtx_destroy(&(z)->uz_lock)
-#define ZONE_LOCK_ASSERT(z) mtx_assert(&(z)->uz_lock, MA_OWNED)
+#define ZONE_LOCK(z) ZDOM_LOCK(ZDOM_GET((z), 0))
+#define ZONE_UNLOCK(z) ZDOM_UNLOCK(ZDOM_GET((z), 0))
#define ZONE_CROSS_LOCK_INIT(z) \
mtx_init(&(z)->uz_cross_lock, "UMA Cross", NULL, MTX_DEF)

File Metadata

Mime Type
text/plain
Expires
Wed, Apr 22, 8:15 AM (19 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31964125
Default Alt Text
D23673.id68283.diff (31 KB)

Event Timeline