D7538.diff.slw-alx2
devnull_e-moe.ru
Actions

Authored By

	devnull_e-moe.ru
	Dec 19 2017, 10:43 AM

Size

8 KB

Referenced Files

None

Subscribers

None

D7538.diff.slw-alx2
View Options

	Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
	===================================================================
	--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (revision 326930)
	+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (working copy)
	@@ -4199,6 +4199,8 @@
	}
	}

	+static long needfree = 0;
	+
	typedef enum free_memory_reason_t {
	FMR_UNKNOWN,
	FMR_NEEDFREE,
	@@ -4223,13 +4225,21 @@
	*/
	int64_t arc_swapfs_reserve = 64;

	+typedef enum free_memory_measure_t {
	+ FMM_EXCLUDE_ZONE_CACHE,
	+ FMM_INCLUDE_ZONE_CACHE
	+} free_memory_measure_t;
	+
	/*
	* Return the amount of memory that can be consumed before reclaim will be
	* needed. Positive if there is sufficient free memory, negative indicates
	* the amount of memory that needs to be freed up.
	*/
	+
	+static int64_t arc_check_uma_cache(int64_t lowest);
	+
	static int64_t
	-arc_available_memory(void)
	+arc_available_memory(free_memory_measure_t zone_measure)
	{
	int64_t lowest = INT64_MAX;
	int64_t n;
	@@ -4236,11 +4246,23 @@
	free_memory_reason_t r = FMR_UNKNOWN;

	#ifdef _KERNEL
	+ if (needfree > 0) {
	+ n = (int64_t)vm_cnt.v_free_target - (int64_t)vm_cnt.v_free_count;
	+ needfree = n > 0 ? n : 0;
	+ }
	+ if (needfree > 0) {
	+ n = PAGESIZE * (-needfree);
	+ if (n < lowest) {
	+ lowest = n;
	+ r = FMR_NEEDFREE;
	+ }
	+ }
	+
	/*
	* Cooperate with pagedaemon when it's time for it to scan
	* and reclaim some pages.
	*/
	- n = PAGESIZE * ((int64_t)freemem - zfs_arc_free_target);
	+ n = PAGESIZE * ((int64_t)freemem - (int64_t)zfs_arc_free_target - (int64_t)vm_cnt.v_free_reserved);
	if (n < lowest) {
	lowest = n;
	r = FMR_LOTSFREE;
	@@ -4345,6 +4367,16 @@
	}
	}

	+ /* Some memory can be in zone cache elements, for this case
	+ * ARC cache not under memory pressure and can rise.
	+ * zone_measure == FMM_INCLUDE_ZONE_CACHE flaged this
	+ */
	+ if (lowest < 0 && zone_measure == FMM_INCLUDE_ZONE_CACHE) {
	+ lowest = arc_check_uma_cache(lowest);
	+ if (lowest >= 0)
	+ r = FMR_UNKNOWN;
	+ }
	+
	#else /* _KERNEL */
	/* Every 100 calls, free a small amount */
	if (spa_get_random(100) == 0)
	@@ -4366,7 +4398,7 @@
	static boolean_t
	arc_reclaim_needed(void)
	{
	- return (arc_available_memory() < 0);
	+ return (arc_available_memory(FMM_INCLUDE_ZONE_CACHE) < 0);
	}

	extern kmem_cache_t *zio_buf_cache[];
	@@ -4426,6 +4458,66 @@
	DTRACE_PROBE(arc__kmem_reap_end);
	}

	+int sysctl_drain_cache = 1;
	+SYSCTL_INT(_vfs_zfs, OID_AUTO, drain_uma_cache, CTLFLAG_RW, &sysctl_drain_cache, 0, "drain per-CPU UMA cache");
	+
	+
	+#ifdef _KERNEL
	+static int64_t
	+arc_check_uma_cache(int64_t lowest)
	+{
	+ int iter = 4;
	+ int step = 1 << (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT - 3);
	+ int n = (SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT) - 1;
	+
	+ while (n >= 0) {
	+ lowest += uma_zone_get_free_size(zio_data_buf_cache[n]->kc_zone);
	+ if (lowest >= 0)
	+ return lowest;
	+ n -= step;
	+ if(--iter == 0) {
	+ if (step > 1) step >>= 1;
	+ iter = 4;
	+ }
	+ }
	+ return lowest;
	+}
	+#endif
	+
	+static void
	+arc_drain_uma_cache(uint64_t target)
	+{
	+ int iter = 4;
	+ int step = 1 << (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT - 3);
	+ int n = (SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT) - 1;
	+ size_t free_size;
	+
	+ DTRACE_PROBE2(arc__drain_uma_cache_start, uint64_t, target, uint64_t, (uint64_t)vm_cnt.v_free_count * PAGESIZE);
	+#ifdef _KERNEL
	+ free_size = (uint64_t)vm_cnt.v_free_count * PAGESIZE;
	+ if (target <= free_size)
	+ return;
	+ while (n >= 0) {
	+ free_size = uma_zone_get_free_size(zio_data_buf_cache[n]->kc_zone);
	+ if (free_size) {
	+ if (sysctl_drain_cache)
	+ uma_reclaim_zone_cache(zio_data_buf_cache[n]->kc_zone);
	+ kmem_cache_reap_now(zio_data_buf_cache[n]);
	+ DTRACE_PROBE3(arc__drain_uma_cache_zone, char , zio_data_buf_cache[n]->kc_name, size_t, free_size, uint64_t, (uint64_t)vm_cnt.v_free_count PAGESIZE);
	+ free_size = (uint64_t)vm_cnt.v_free_count * PAGESIZE;
	+ if (target <= free_size)
	+ break;
	+ }
	+ n -= step;
	+ if(--iter == 0) {
	+ if (step > 1) step >>= 1;
	+ iter = 4;
	+ }
	+ }
	+#endif
	+ DTRACE_PROBE(arc__drain_uma_cache_end);
	+}
	+
	/*
	* Threads can block in arc_get_data_impl() waiting for this thread to evict
	* enough data and signal them to proceed. When this happens, the threads in
	@@ -4477,7 +4569,8 @@
	*/
	evicted = arc_adjust();

	- int64_t free_memory = arc_available_memory();
	+ int64_t free_memory = arc_available_memory(FMM_EXCLUDE_ZONE_CACHE);
	+ DTRACE_PROBE2(arc__reclaim_adj, uint64_t, evicted, int64_t, free_memory);
	if (free_memory < 0) {

	arc_no_grow = B_TRUE;
	@@ -4489,18 +4582,35 @@
	*/
	growtime = gethrtime() + SEC2NSEC(arc_grow_retry);

	+#ifdef _KERNEL
	+ if (arc_check_uma_cache(free_memory) >= 0)
	+ arc_drain_uma_cache((uint64_t)freemem * PAGESIZE - free_memory);
	+#else
	arc_kmem_reap_now();
	-
	+#endif
	+
	/*
	* If we are still low on memory, shrink the ARC
	* so that we have arc_shrink_min free space.
	*/
	- free_memory = arc_available_memory();
	+ free_memory = arc_available_memory(FMM_EXCLUDE_ZONE_CACHE);

	int64_t to_free =
	(arc_c >> arc_shrink_shift) - free_memory;
	+ DTRACE_PROBE3(arc__reclaim_tst, int64_t, to_free, int64_t, free_memory, long, needfree);
	if (to_free > 0) {
	+#ifdef _KERNEL
	+ to_free = MAX(to_free, ptob(needfree));
	+ uint64_t free_target =
	+ (uint64_t)freemem * PAGESIZE - free_memory;
	+#endif
	arc_shrink(to_free);
	+#ifdef _KERNEL
	+ arc_drain_uma_cache(free_target);
	+#else
	+ arc_kmem_reap_now();
	+#endif
	+ DTRACE_PROBE(arc__reclaim_shr);
	}
	} else if (free_memory < arc_c >> arc_no_grow_shift) {
	arc_no_grow = B_TRUE;
	@@ -4520,6 +4630,9 @@
	* infinite loop.
	*/
	if (arc_size <= arc_c \|\| evicted == 0) {
	+#ifdef _KERNEL
	+ needfree = 0;
	+#endif
	/*
	* We're either no longer overflowing, or we
	* can't evict anything more, so we should wake
	@@ -6292,18 +6405,14 @@
	static void
	arc_lowmem(void *arg __unused, int howto __unused)
	{
	+ int64_t n;

	mutex_enter(&arc_reclaim_lock);
	+ /* XXX: Memory deficit should be passed as argument. */
	+ n = (int64_t)vm_cnt.v_free_target - (int64_t)vm_cnt.v_free_count;
	+ needfree = (n>0) ? n : vm_cnt.v_free_target >> 8;
	DTRACE_PROBE1(arc__needfree, int64_t, ((int64_t)freemem - zfs_arc_free_target) * PAGESIZE);
	cv_signal(&arc_reclaim_thread_cv);
	-
	- /*
	- * It is unsafe to block here in arbitrary threads, because we can come
	- * here from ARC itself and may hold ARC locks and thus risk a deadlock
	- * with ARC reclaim thread.
	- */
	- if (curproc == pageproc)
	- (void) cv_wait(&arc_reclaim_waiters_cv, &arc_reclaim_lock);
	mutex_exit(&arc_reclaim_lock);
	}
	#endif
	Index: sys/vm/uma.h
	===================================================================
	--- sys/vm/uma.h (revision 326930)
	+++ sys/vm/uma.h (working copy)
	@@ -449,6 +449,16 @@
	void uma_reclaim(void);

	/*
	+ * Reclaims unused per-CPU cache memory from the specified zone
	+ *
	+ * Arguments:
	+ * zone The zone for cleanup
	+ * Returns:
	+ * None
	+ */
	+void uma_reclaim_zone_cache(uma_zone_t zone);
	+
	+/*
	* Sets the alignment mask to be used for all zones requesting cache
	* alignment. Should be called by MD boot code prior to starting VM/UMA.
	*
	@@ -546,6 +556,18 @@
	int uma_zone_get_cur(uma_zone_t zone);

	/*
	+ * Obtains the approximate current size of items free in a zone
	+ *
	+ * Arguments:
	+ * zone The zone to obtain the current free size from
	+ *
	+ * Return:
	+ * int The approximate current size of items free in a zone
	+ */
	+size_t uma_zone_get_free_size(uma_zone_t zone);
	+
	+
	+/*
	* The following two routines (uma_zone_set_init/fini)
	* are used to set the backend init/fini pair which acts on an
	* object as it becomes allocated and is placed in a slab within
	Index: sys/vm/uma_core.c
	===================================================================
	--- sys/vm/uma_core.c (revision 326930)
	+++ sys/vm/uma_core.c (working copy)
	@@ -2992,6 +2992,39 @@
	}

	/* See uma.h */
	+size_t
	+uma_zone_get_free_size(uma_zone_t zone)
	+{
	+ uma_klink_t kl;
	+ uma_bucket_t bucket;
	+ int64_t nitems;
	+ u_int i;
	+
	+ ZONE_LOCK(zone);
	+ nitems = 0;
	+ if(!(zone->uz_flags & UMA_ZONE_SECONDARY)) {
	+ LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
	+ nitems += kl->kl_keg->uk_free;
	+ }
	+ }
	+ CPU_FOREACH(i) {
	+ /*
	+ * See the comment in sysctl_vm_zone_stats() regarding the
	+ * safety of accessing the per-cpu caches. With the zone lock
	+ * held, it is safe, but can potentially result in stale data.
	+ */
	+ bucket = zone->uz_cpu[i].uc_allocbucket;
	+ if (bucket != NULL)
	+ nitems += bucket->ub_cnt;
	+ bucket = zone->uz_cpu[i].uc_freebucket;
	+ if (bucket != NULL)
	+ nitems += bucket->ub_cnt;
	+ }
	+ ZONE_UNLOCK(zone);
	+ return (nitems * zone->uz_size);
	+}
	+
	+/* See uma.h */
	void
	uma_zone_set_init(uma_zone_t zone, uma_init uminit)
	{
	@@ -3157,6 +3190,14 @@
	}

	/* See uma.h */
	+void
	+uma_reclaim_zone_cache(uma_zone_t zone)
	+{
	+ bucket_enable();
	+ cache_drain_safe(zone);
	+}
	+
	+/* See uma.h */
	static void
	uma_reclaim_locked(bool kmem_danger)
	{

File Metadata

Mime Type: text/x-diff
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1235641
Default Alt Text: D7538.diff.slw-alx2 (8 KB)

D7538.diff.slw-alx2devnull_e-moe.ruActions

D7538.diff.slw-alx2View Options

File Metadata

Event Timeline

D7538.diff.slw-alx2
devnull_e-moe.ru
Actions

D7538.diff.slw-alx2
View Options