Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F135974128
D7538.id42437.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
8 KB
Referenced Files
None
Subscribers
None
D7538.id42437.diff
View Options
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -4254,6 +4254,8 @@
}
}
+static long needfree = 0;
+
typedef enum free_memory_reason_t {
FMR_UNKNOWN,
FMR_NEEDFREE,
@@ -4278,13 +4280,21 @@
*/
int64_t arc_swapfs_reserve = 64;
+typedef enum free_memory_measure_t {
+ FMM_EXCLUDE_ZONE_CACHE,
+ FMM_INCLUDE_ZONE_CACHE
+} free_memory_measure_t;
+
/*
* Return the amount of memory that can be consumed before reclaim will be
* needed. Positive if there is sufficient free memory, negative indicates
* the amount of memory that needs to be freed up.
*/
+
+static int64_t arc_check_uma_cache(int64_t lowest);
+
static int64_t
-arc_available_memory(void)
+arc_available_memory(free_memory_measure_t zone_measure)
{
int64_t lowest = INT64_MAX;
int64_t n;
@@ -4292,14 +4302,16 @@
#ifdef _KERNEL
#ifdef __FreeBSD__
- /*
- * Cooperate with pagedaemon when it's time for it to scan
- * and reclaim some pages.
- */
- n = PAGESIZE * ((int64_t)freemem - zfs_arc_free_target);
- if (n < lowest) {
- lowest = n;
- r = FMR_LOTSFREE;
+ if (needfree > 0) {
+ n = (int64_t)vm_cnt.v_free_target - (int64_t)vm_cnt.v_free_count;
+ needfree = n > 0 ? n : 0;
+ }
+ if (needfree > 0) {
+ n = PAGESIZE * (-needfree);
+ if (n < lowest) {
+ lowest = n;
+ r = FMR_NEEDFREE;
+ }
}
#else
@@ -4410,6 +4422,16 @@
}
}
+ /* Some memory can be in zone cache elements, for this case
+ * ARC cache not under memory pressure and can rise.
+ * zone_measure == FMM_INCLUDE_ZONE_CACHE flaged this
+ */
+ if (lowest < 0 && zone_measure == FMM_INCLUDE_ZONE_CACHE) {
+ lowest = arc_check_uma_cache(lowest);
+ if (lowest >= 0)
+ r = FMR_UNKNOWN;
+ }
+
#else /* _KERNEL */
/* Every 100 calls, free a small amount */
if (spa_get_random(100) == 0)
@@ -4431,7 +4453,7 @@
static boolean_t
arc_reclaim_needed(void)
{
- return (arc_available_memory() < 0);
+ return (arc_available_memory(FMM_INCLUDE_ZONE_CACHE) < 0);
}
extern kmem_cache_t *zio_buf_cache[];
@@ -4501,6 +4523,69 @@
DTRACE_PROBE(arc__kmem_reap_end);
}
+int sysctl_drain_cache = 1;
+SYSCTL_INT(_vfs_zfs, OID_AUTO, drain_uma_cache, CTLFLAG_RW, &sysctl_drain_cache, 0, "drain per-CPU UMA cache");
+
+
+#ifdef _KERNEL
+static int64_t
+arc_check_uma_cache(int64_t lowest)
+{
+ int iter = 4;
+ int step = 1 << (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT - 3);
+ int n = (SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT) - 1;
+
+ while (n >= 0) {
+ lowest += uma_zone_get_free_size(zio_data_buf_cache[n]->kc_zone);
+ if (lowest >= 0)
+ return lowest;
+ n -= step;
+ if(--iter == 0) {
+ if (step > 1) step >>= 1;
+ iter = 4;
+ }
+ }
+ lowest += uma_zone_get_free_size(abd_chunk_cache->kc_zone);
+ return lowest;
+}
+#endif
+
+static void
+arc_drain_uma_cache(uint64_t target)
+{
+ int iter = 4;
+ int step = 1 << (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT - 3);
+ int n = (SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT) - 1;
+ size_t free_size;
+
+ DTRACE_PROBE2(arc__drain_uma_cache_start, uint64_t, target, uint64_t, (uint64_t)vm_cnt.v_free_count * PAGESIZE);
+#ifdef _KERNEL
+ free_size = (uint64_t)vm_cnt.v_free_count * PAGESIZE;
+ if (target <= free_size)
+ return;
+ while (n >= 0) {
+ free_size = uma_zone_get_free_size(zio_data_buf_cache[n]->kc_zone);
+ if (free_size) {
+ if (sysctl_drain_cache)
+ uma_reclaim_zone_cache(zio_data_buf_cache[n]->kc_zone);
+ kmem_cache_reap_soon(zio_data_buf_cache[n]);
+ DTRACE_PROBE3(arc__drain_uma_cache_zone, char *, zio_data_buf_cache[n]->kc_name, size_t, free_size, uint64_t, (uint64_t)vm_cnt.v_free_count * PAGESIZE);
+ free_size = (uint64_t)vm_cnt.v_free_count * PAGESIZE;
+ if (target <= free_size)
+ break;
+ }
+ n -= step;
+ if(--iter == 0) {
+ if (step > 1) step >>= 1;
+ iter = 4;
+ }
+ }
+ if (target > free_size)
+ kmem_cache_reap_soon(abd_chunk_cache);
+#endif
+ DTRACE_PROBE(arc__drain_uma_cache_end);
+}
+
/*
* Threads can block in arc_get_data_impl() waiting for this thread to evict
* enough data and signal them to proceed. When this happens, the threads in
@@ -4554,7 +4639,8 @@
*/
evicted = arc_adjust();
- int64_t free_memory = arc_available_memory();
+ int64_t free_memory = arc_available_memory(FMM_EXCLUDE_ZONE_CACHE);
+ DTRACE_PROBE2(arc__reclaim_adj, uint64_t, evicted, int64_t, free_memory);
if (free_memory < 0) {
hrtime_t curtime = gethrtime();
arc_no_grow = B_TRUE;
@@ -4574,26 +4660,38 @@
* reaping caches, while we're near arc_c_min.
*/
if (curtime >= kmem_reap_time) {
+#ifdef _KERNEL
+ if (arc_check_uma_cache(free_memory) >= 0)
+ arc_drain_uma_cache((uint64_t)freemem * PAGESIZE - free_memory);
+#else
arc_kmem_reap_now();
+#endif
kmem_reap_time = gethrtime() +
MSEC2NSEC(arc_kmem_cache_reap_retry_ms);
}
-
+
/*
* If we are still low on memory, shrink the ARC
* so that we have arc_shrink_min free space.
*/
- free_memory = arc_available_memory();
+ free_memory = arc_available_memory(FMM_EXCLUDE_ZONE_CACHE);
int64_t to_free =
(arc_c >> arc_shrink_shift) - free_memory;
+ DTRACE_PROBE3(arc__reclaim_tst, int64_t, to_free, int64_t, free_memory, long, needfree);
if (to_free > 0) {
#ifdef _KERNEL
-#ifdef illumos
to_free = MAX(to_free, ptob(needfree));
-#endif
+ uint64_t free_target =
+ (uint64_t)freemem * PAGESIZE - free_memory;
#endif
arc_shrink(to_free);
+#ifdef _KERNEL
+ arc_drain_uma_cache(free_target);
+#else
+ arc_kmem_reap_now();
+#endif
+ DTRACE_PROBE(arc__reclaim_shr);
}
} else if (free_memory < arc_c >> arc_no_grow_shift) {
arc_no_grow = B_TRUE;
@@ -6450,18 +6548,14 @@
static void
arc_lowmem(void *arg __unused, int howto __unused)
{
+ int64_t n;
mutex_enter(&arc_reclaim_lock);
- DTRACE_PROBE1(arc__needfree, int64_t, ((int64_t)freemem - zfs_arc_free_target) * PAGESIZE);
+ /* XXX: Memory deficit should be passed as argument. */
+ n = (int64_t)vm_cnt.v_free_target - (int64_t)vm_cnt.v_free_count;
+ needfree = (n>0) ? n : vm_cnt.v_free_target >> 8;
+ DTRACE_PROBE2(arc__needfree, int64_t, ((int64_t)freemem - zfs_arc_free_target) * PAGESIZE, int64_t, needfree);
cv_signal(&arc_reclaim_thread_cv);
-
- /*
- * It is unsafe to block here in arbitrary threads, because we can come
- * here from ARC itself and may hold ARC locks and thus risk a deadlock
- * with ARC reclaim thread.
- */
- if (curproc == pageproc)
- (void) cv_wait(&arc_reclaim_waiters_cv, &arc_reclaim_lock);
mutex_exit(&arc_reclaim_lock);
}
#endif
Index: sys/vm/uma.h
===================================================================
--- sys/vm/uma.h
+++ sys/vm/uma.h
@@ -454,6 +454,16 @@
void uma_reclaim(void);
/*
+ * Reclaims unused per-CPU cache memory from the specified zone
+ *
+ * Arguments:
+ * zone The zone for cleanup
+ * Returns:
+ * None
+ */
+void uma_reclaim_zone_cache(uma_zone_t zone);
+
+/*
* Sets the alignment mask to be used for all zones requesting cache
* alignment. Should be called by MD boot code prior to starting VM/UMA.
*
@@ -551,6 +561,18 @@
int uma_zone_get_cur(uma_zone_t zone);
/*
+ * Obtains the approximate current size of items free in a zone
+ *
+ * Arguments:
+ * zone The zone to obtain the current free size from
+ *
+ * Return:
+ * int The approximate current size of items free in a zone
+ */
+size_t uma_zone_get_free_size(uma_zone_t zone);
+
+
+/*
* The following two routines (uma_zone_set_init/fini)
* are used to set the backend init/fini pair which acts on an
* object as it becomes allocated and is placed in a slab within
Index: sys/vm/uma_core.c
===================================================================
--- sys/vm/uma_core.c
+++ sys/vm/uma_core.c
@@ -3006,6 +3006,39 @@
}
/* See uma.h */
+size_t
+uma_zone_get_free_size(uma_zone_t zone)
+{
+ uma_klink_t kl;
+ uma_bucket_t bucket;
+ int64_t nitems;
+ u_int i;
+
+ ZONE_LOCK(zone);
+ nitems = 0;
+ if(!(zone->uz_flags & UMA_ZONE_SECONDARY)) {
+ LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
+ nitems += kl->kl_keg->uk_free;
+ }
+ }
+ CPU_FOREACH(i) {
+ /*
+ * See the comment in sysctl_vm_zone_stats() regarding the
+ * safety of accessing the per-cpu caches. With the zone lock
+ * held, it is safe, but can potentially result in stale data.
+ */
+ bucket = zone->uz_cpu[i].uc_allocbucket;
+ if (bucket != NULL)
+ nitems += bucket->ub_cnt;
+ bucket = zone->uz_cpu[i].uc_freebucket;
+ if (bucket != NULL)
+ nitems += bucket->ub_cnt;
+ }
+ ZONE_UNLOCK(zone);
+ return (nitems * zone->uz_size);
+}
+
+/* See uma.h */
void
uma_zone_set_init(uma_zone_t zone, uma_init uminit)
{
@@ -3171,6 +3204,14 @@
}
/* See uma.h */
+void
+uma_reclaim_zone_cache(uma_zone_t zone)
+{
+ bucket_enable();
+ cache_drain_safe(zone);
+}
+
+/* See uma.h */
static void
uma_reclaim_locked(bool kmem_danger)
{
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Nov 15, 5:37 PM (12 h, 39 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25334375
Default Alt Text
D7538.id42437.diff (8 KB)
Attached To
Mode
D7538: Correct adaptation ZFS ARC memory pressure to FreeBSD
Attached
Detach File
Event Timeline
Log In to Comment