Page MenuHomeFreeBSD

D7538.diff.slw-alx2

Authored By
devnull_e-moe.ru
Dec 19 2017, 10:43 AM
Size
8 KB
Referenced Files
None
Subscribers
None

D7538.diff.slw-alx2

Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (revision 326930)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (working copy)
@@ -4199,6 +4199,8 @@
}
}
+static long needfree = 0;
+
typedef enum free_memory_reason_t {
FMR_UNKNOWN,
FMR_NEEDFREE,
@@ -4223,13 +4225,21 @@
*/
int64_t arc_swapfs_reserve = 64;
+typedef enum free_memory_measure_t {
+ FMM_EXCLUDE_ZONE_CACHE,
+ FMM_INCLUDE_ZONE_CACHE
+} free_memory_measure_t;
+
/*
* Return the amount of memory that can be consumed before reclaim will be
* needed. Positive if there is sufficient free memory, negative indicates
* the amount of memory that needs to be freed up.
*/
+
+static int64_t arc_check_uma_cache(int64_t lowest);
+
static int64_t
-arc_available_memory(void)
+arc_available_memory(free_memory_measure_t zone_measure)
{
int64_t lowest = INT64_MAX;
int64_t n;
@@ -4236,11 +4246,23 @@
free_memory_reason_t r = FMR_UNKNOWN;
#ifdef _KERNEL
+ if (needfree > 0) {
+ n = (int64_t)vm_cnt.v_free_target - (int64_t)vm_cnt.v_free_count;
+ needfree = n > 0 ? n : 0;
+ }
+ if (needfree > 0) {
+ n = PAGESIZE * (-needfree);
+ if (n < lowest) {
+ lowest = n;
+ r = FMR_NEEDFREE;
+ }
+ }
+
/*
* Cooperate with pagedaemon when it's time for it to scan
* and reclaim some pages.
*/
- n = PAGESIZE * ((int64_t)freemem - zfs_arc_free_target);
+ n = PAGESIZE * ((int64_t)freemem - (int64_t)zfs_arc_free_target - (int64_t)vm_cnt.v_free_reserved);
if (n < lowest) {
lowest = n;
r = FMR_LOTSFREE;
@@ -4345,6 +4367,16 @@
}
}
+ /* Some memory can be in zone cache elements, for this case
+ * ARC cache not under memory pressure and can rise.
+ * zone_measure == FMM_INCLUDE_ZONE_CACHE flaged this
+ */
+ if (lowest < 0 && zone_measure == FMM_INCLUDE_ZONE_CACHE) {
+ lowest = arc_check_uma_cache(lowest);
+ if (lowest >= 0)
+ r = FMR_UNKNOWN;
+ }
+
#else /* _KERNEL */
/* Every 100 calls, free a small amount */
if (spa_get_random(100) == 0)
@@ -4366,7 +4398,7 @@
static boolean_t
arc_reclaim_needed(void)
{
- return (arc_available_memory() < 0);
+ return (arc_available_memory(FMM_INCLUDE_ZONE_CACHE) < 0);
}
extern kmem_cache_t *zio_buf_cache[];
@@ -4426,6 +4458,66 @@
DTRACE_PROBE(arc__kmem_reap_end);
}
+int sysctl_drain_cache = 1;
+SYSCTL_INT(_vfs_zfs, OID_AUTO, drain_uma_cache, CTLFLAG_RW, &sysctl_drain_cache, 0, "drain per-CPU UMA cache");
+
+
+#ifdef _KERNEL
+static int64_t
+arc_check_uma_cache(int64_t lowest)
+{
+ int iter = 4;
+ int step = 1 << (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT - 3);
+ int n = (SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT) - 1;
+
+ while (n >= 0) {
+ lowest += uma_zone_get_free_size(zio_data_buf_cache[n]->kc_zone);
+ if (lowest >= 0)
+ return lowest;
+ n -= step;
+ if(--iter == 0) {
+ if (step > 1) step >>= 1;
+ iter = 4;
+ }
+ }
+ return lowest;
+}
+#endif
+
+static void
+arc_drain_uma_cache(uint64_t target)
+{
+ int iter = 4;
+ int step = 1 << (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT - 3);
+ int n = (SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT) - 1;
+ size_t free_size;
+
+ DTRACE_PROBE2(arc__drain_uma_cache_start, uint64_t, target, uint64_t, (uint64_t)vm_cnt.v_free_count * PAGESIZE);
+#ifdef _KERNEL
+ free_size = (uint64_t)vm_cnt.v_free_count * PAGESIZE;
+ if (target <= free_size)
+ return;
+ while (n >= 0) {
+ free_size = uma_zone_get_free_size(zio_data_buf_cache[n]->kc_zone);
+ if (free_size) {
+ if (sysctl_drain_cache)
+ uma_reclaim_zone_cache(zio_data_buf_cache[n]->kc_zone);
+ kmem_cache_reap_now(zio_data_buf_cache[n]);
+ DTRACE_PROBE3(arc__drain_uma_cache_zone, char *, zio_data_buf_cache[n]->kc_name, size_t, free_size, uint64_t, (uint64_t)vm_cnt.v_free_count * PAGESIZE);
+ free_size = (uint64_t)vm_cnt.v_free_count * PAGESIZE;
+ if (target <= free_size)
+ break;
+ }
+ n -= step;
+ if(--iter == 0) {
+ if (step > 1) step >>= 1;
+ iter = 4;
+ }
+ }
+#endif
+ DTRACE_PROBE(arc__drain_uma_cache_end);
+}
+
/*
* Threads can block in arc_get_data_impl() waiting for this thread to evict
* enough data and signal them to proceed. When this happens, the threads in
@@ -4477,7 +4569,8 @@
*/
evicted = arc_adjust();
- int64_t free_memory = arc_available_memory();
+ int64_t free_memory = arc_available_memory(FMM_EXCLUDE_ZONE_CACHE);
+ DTRACE_PROBE2(arc__reclaim_adj, uint64_t, evicted, int64_t, free_memory);
if (free_memory < 0) {
arc_no_grow = B_TRUE;
@@ -4489,18 +4582,35 @@
*/
growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
+#ifdef _KERNEL
+ if (arc_check_uma_cache(free_memory) >= 0)
+ arc_drain_uma_cache((uint64_t)freemem * PAGESIZE - free_memory);
+#else
arc_kmem_reap_now();
-
+#endif
+
/*
* If we are still low on memory, shrink the ARC
* so that we have arc_shrink_min free space.
*/
- free_memory = arc_available_memory();
+ free_memory = arc_available_memory(FMM_EXCLUDE_ZONE_CACHE);
int64_t to_free =
(arc_c >> arc_shrink_shift) - free_memory;
+ DTRACE_PROBE3(arc__reclaim_tst, int64_t, to_free, int64_t, free_memory, long, needfree);
if (to_free > 0) {
+#ifdef _KERNEL
+ to_free = MAX(to_free, ptob(needfree));
+ uint64_t free_target =
+ (uint64_t)freemem * PAGESIZE - free_memory;
+#endif
arc_shrink(to_free);
+#ifdef _KERNEL
+ arc_drain_uma_cache(free_target);
+#else
+ arc_kmem_reap_now();
+#endif
+ DTRACE_PROBE(arc__reclaim_shr);
}
} else if (free_memory < arc_c >> arc_no_grow_shift) {
arc_no_grow = B_TRUE;
@@ -4520,6 +4630,9 @@
* infinite loop.
*/
if (arc_size <= arc_c || evicted == 0) {
+#ifdef _KERNEL
+ needfree = 0;
+#endif
/*
* We're either no longer overflowing, or we
* can't evict anything more, so we should wake
@@ -6292,18 +6405,14 @@
static void
arc_lowmem(void *arg __unused, int howto __unused)
{
+ int64_t n;
mutex_enter(&arc_reclaim_lock);
+ /* XXX: Memory deficit should be passed as argument. */
+ n = (int64_t)vm_cnt.v_free_target - (int64_t)vm_cnt.v_free_count;
+ needfree = (n>0) ? n : vm_cnt.v_free_target >> 8;
DTRACE_PROBE1(arc__needfree, int64_t, ((int64_t)freemem - zfs_arc_free_target) * PAGESIZE);
cv_signal(&arc_reclaim_thread_cv);
-
- /*
- * It is unsafe to block here in arbitrary threads, because we can come
- * here from ARC itself and may hold ARC locks and thus risk a deadlock
- * with ARC reclaim thread.
- */
- if (curproc == pageproc)
- (void) cv_wait(&arc_reclaim_waiters_cv, &arc_reclaim_lock);
mutex_exit(&arc_reclaim_lock);
}
#endif
Index: sys/vm/uma.h
===================================================================
--- sys/vm/uma.h (revision 326930)
+++ sys/vm/uma.h (working copy)
@@ -449,6 +449,16 @@
void uma_reclaim(void);
/*
+ * Reclaims unused per-CPU cache memory from the specified zone
+ *
+ * Arguments:
+ * zone The zone for cleanup
+ * Returns:
+ * None
+ */
+void uma_reclaim_zone_cache(uma_zone_t zone);
+
+/*
* Sets the alignment mask to be used for all zones requesting cache
* alignment. Should be called by MD boot code prior to starting VM/UMA.
*
@@ -546,6 +556,18 @@
int uma_zone_get_cur(uma_zone_t zone);
/*
+ * Obtains the approximate current size of items free in a zone
+ *
+ * Arguments:
+ * zone The zone to obtain the current free size from
+ *
+ * Return:
+ * int The approximate current size of items free in a zone
+ */
+size_t uma_zone_get_free_size(uma_zone_t zone);
+
+
+/*
* The following two routines (uma_zone_set_init/fini)
* are used to set the backend init/fini pair which acts on an
* object as it becomes allocated and is placed in a slab within
Index: sys/vm/uma_core.c
===================================================================
--- sys/vm/uma_core.c (revision 326930)
+++ sys/vm/uma_core.c (working copy)
@@ -2992,6 +2992,39 @@
}
/* See uma.h */
+size_t
+uma_zone_get_free_size(uma_zone_t zone)
+{
+ uma_klink_t kl;
+ uma_bucket_t bucket;
+ int64_t nitems;
+ u_int i;
+
+ ZONE_LOCK(zone);
+ nitems = 0;
+ if(!(zone->uz_flags & UMA_ZONE_SECONDARY)) {
+ LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
+ nitems += kl->kl_keg->uk_free;
+ }
+ }
+ CPU_FOREACH(i) {
+ /*
+ * See the comment in sysctl_vm_zone_stats() regarding the
+ * safety of accessing the per-cpu caches. With the zone lock
+ * held, it is safe, but can potentially result in stale data.
+ */
+ bucket = zone->uz_cpu[i].uc_allocbucket;
+ if (bucket != NULL)
+ nitems += bucket->ub_cnt;
+ bucket = zone->uz_cpu[i].uc_freebucket;
+ if (bucket != NULL)
+ nitems += bucket->ub_cnt;
+ }
+ ZONE_UNLOCK(zone);
+ return (nitems * zone->uz_size);
+}
+
+/* See uma.h */
void
uma_zone_set_init(uma_zone_t zone, uma_init uminit)
{
@@ -3157,6 +3190,14 @@
}
/* See uma.h */
+void
+uma_reclaim_zone_cache(uma_zone_t zone)
+{
+ bucket_enable();
+ cache_drain_safe(zone);
+}
+
+/* See uma.h */
static void
uma_reclaim_locked(bool kmem_danger)
{

File Metadata

Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1235641
Default Alt Text
D7538.diff.slw-alx2 (8 KB)

Event Timeline