Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	(revision 326930)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	(working copy)
@@ -4199,6 +4199,8 @@
 	}
 }
 
+static long needfree = 0;
+
 typedef enum free_memory_reason_t {
 	FMR_UNKNOWN,
 	FMR_NEEDFREE,
@@ -4223,13 +4225,21 @@
  */
 int64_t arc_swapfs_reserve = 64;
 
+typedef enum free_memory_measure_t {
+	FMM_EXCLUDE_ZONE_CACHE,
+	FMM_INCLUDE_ZONE_CACHE
+} free_memory_measure_t;
+
 /*
  * Return the amount of memory that can be consumed before reclaim will be
  * needed.  Positive if there is sufficient free memory, negative indicates
  * the amount of memory that needs to be freed up.
  */
+
+static int64_t arc_check_uma_cache(int64_t lowest);
+
 static int64_t
-arc_available_memory(void)
+arc_available_memory(free_memory_measure_t zone_measure)
 {
 	int64_t lowest = INT64_MAX;
 	int64_t n;
@@ -4236,11 +4246,23 @@
 	free_memory_reason_t r = FMR_UNKNOWN;
 
 #ifdef _KERNEL
+	if (needfree > 0) {
+		n = (int64_t)vm_cnt.v_free_target - (int64_t)vm_cnt.v_free_count;
+		needfree = n > 0 ? n : 0;
+	}
+	if (needfree > 0) {
+		n = PAGESIZE * (-needfree);
+		if (n < lowest) {
+			lowest = n;
+			r = FMR_NEEDFREE;
+		}
+	}
+
 	/*
 	 * Cooperate with pagedaemon when it's time for it to scan
 	 * and reclaim some pages.
 	 */
-	n = PAGESIZE * ((int64_t)freemem - zfs_arc_free_target);
+	n = PAGESIZE * ((int64_t)freemem - (int64_t)zfs_arc_free_target - (int64_t)vm_cnt.v_free_reserved);
 	if (n < lowest) {
 		lowest = n;
 		r = FMR_LOTSFREE;
@@ -4345,6 +4367,16 @@
 		}
 	}
 
+	/* Some memory can be in zone cache elements, for this case
+	 * ARC cache not under memory pressure and can rise.
+	 * zone_measure == FMM_INCLUDE_ZONE_CACHE flaged this
+	 */
+	if (lowest < 0 && zone_measure == FMM_INCLUDE_ZONE_CACHE) {
+		lowest = arc_check_uma_cache(lowest);
+		if (lowest >= 0)
+				r = FMR_UNKNOWN;
+	}
+
 #else	/* _KERNEL */
 	/* Every 100 calls, free a small amount */
 	if (spa_get_random(100) == 0)
@@ -4366,7 +4398,7 @@
 static boolean_t
 arc_reclaim_needed(void)
 {
-	return (arc_available_memory() < 0);
+	return (arc_available_memory(FMM_INCLUDE_ZONE_CACHE) < 0);
 }
 
 extern kmem_cache_t	*zio_buf_cache[];
@@ -4426,6 +4458,66 @@
 	DTRACE_PROBE(arc__kmem_reap_end);
 }
 
+int sysctl_drain_cache = 1;
+SYSCTL_INT(_vfs_zfs, OID_AUTO, drain_uma_cache, CTLFLAG_RW, &sysctl_drain_cache, 0, "drain per-CPU UMA cache");
+
+
+#ifdef _KERNEL
+static int64_t
+arc_check_uma_cache(int64_t lowest)
+{
+	int			iter = 4;
+	int			step = 1 << (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT - 3);
+	int			n = (SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT) - 1;
+
+	while (n >= 0) {
+		lowest += uma_zone_get_free_size(zio_data_buf_cache[n]->kc_zone);
+		if (lowest >= 0)
+			return lowest;
+		n -= step;
+		if(--iter == 0) {
+			if (step > 1) step >>= 1;
+			iter = 4;
+		}
+	}
+	return lowest;
+}
+#endif
+
+static void
+arc_drain_uma_cache(uint64_t target)
+{
+	int			iter = 4;
+	int			step = 1 << (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT - 3);
+	int			n = (SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT) - 1;
+	size_t			free_size;
+
+	DTRACE_PROBE2(arc__drain_uma_cache_start, uint64_t, target, uint64_t, (uint64_t)vm_cnt.v_free_count * PAGESIZE);
+#ifdef _KERNEL
+	free_size = (uint64_t)vm_cnt.v_free_count * PAGESIZE;
+	if (target <= free_size)
+		return;
+	while (n >= 0) {
+		free_size = uma_zone_get_free_size(zio_data_buf_cache[n]->kc_zone);
+		if (free_size) {
+			if (sysctl_drain_cache)
+				uma_reclaim_zone_cache(zio_data_buf_cache[n]->kc_zone);
+			kmem_cache_reap_now(zio_data_buf_cache[n]);
+			DTRACE_PROBE3(arc__drain_uma_cache_zone, char *, zio_data_buf_cache[n]->kc_name, size_t, free_size, uint64_t, (uint64_t)vm_cnt.v_free_count * PAGESIZE);
+			free_size = (uint64_t)vm_cnt.v_free_count * PAGESIZE;
+			if (target <= free_size)
+				break;
+		}
+		n -= step;
+		if(--iter == 0) {
+			if (step > 1) step >>= 1;
+			iter = 4;
+		}
+	}
+#endif
+	DTRACE_PROBE(arc__drain_uma_cache_end);
+}
+
 /*
  * Threads can block in arc_get_data_impl() waiting for this thread to evict
  * enough data and signal them to proceed. When this happens, the threads in
@@ -4477,7 +4569,8 @@
 		 */
 		evicted = arc_adjust();
 
-		int64_t free_memory = arc_available_memory();
+		int64_t free_memory = arc_available_memory(FMM_EXCLUDE_ZONE_CACHE);
+		DTRACE_PROBE2(arc__reclaim_adj, uint64_t, evicted, int64_t, free_memory);
 		if (free_memory < 0) {
 
 			arc_no_grow = B_TRUE;
@@ -4489,18 +4582,35 @@
 			 */
 			growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
 
+#ifdef _KERNEL
+			if (arc_check_uma_cache(free_memory) >= 0)
+				arc_drain_uma_cache((uint64_t)freemem * PAGESIZE - free_memory);
+#else
 			arc_kmem_reap_now();
-
+#endif
+			
 			/*
 			 * If we are still low on memory, shrink the ARC
 			 * so that we have arc_shrink_min free space.
 			 */
-			free_memory = arc_available_memory();
+			free_memory = arc_available_memory(FMM_EXCLUDE_ZONE_CACHE);
 
 			int64_t to_free =
 			    (arc_c >> arc_shrink_shift) - free_memory;
+			DTRACE_PROBE3(arc__reclaim_tst, int64_t, to_free, int64_t, free_memory, long, needfree);
 			if (to_free > 0) {
+#ifdef _KERNEL
+				to_free = MAX(to_free, ptob(needfree));
+				uint64_t free_target = 
+					(uint64_t)freemem * PAGESIZE - free_memory;
+#endif
 				arc_shrink(to_free);
+#ifdef _KERNEL
+				arc_drain_uma_cache(free_target);
+#else
+				arc_kmem_reap_now();
+#endif
+				DTRACE_PROBE(arc__reclaim_shr);
 			}
 		} else if (free_memory < arc_c >> arc_no_grow_shift) {
 			arc_no_grow = B_TRUE;
@@ -4520,6 +4630,9 @@
 		 * infinite loop.
 		 */
 		if (arc_size <= arc_c || evicted == 0) {
+#ifdef _KERNEL
+			needfree = 0;
+#endif
 			/*
 			 * We're either no longer overflowing, or we
 			 * can't evict anything more, so we should wake
@@ -6292,18 +6405,14 @@
 static void
 arc_lowmem(void *arg __unused, int howto __unused)
 {
+	int64_t n;
 
 	mutex_enter(&arc_reclaim_lock);
+	/* XXX: Memory deficit should be passed as argument. */
+	n = (int64_t)vm_cnt.v_free_target - (int64_t)vm_cnt.v_free_count;
+	needfree = (n>0) ? n : vm_cnt.v_free_target >> 8;
 	DTRACE_PROBE1(arc__needfree, int64_t, ((int64_t)freemem - zfs_arc_free_target) * PAGESIZE);
 	cv_signal(&arc_reclaim_thread_cv);
-
-	/*
-	 * It is unsafe to block here in arbitrary threads, because we can come
-	 * here from ARC itself and may hold ARC locks and thus risk a deadlock
-	 * with ARC reclaim thread.
-	 */
-	if (curproc == pageproc)
-		(void) cv_wait(&arc_reclaim_waiters_cv, &arc_reclaim_lock);
 	mutex_exit(&arc_reclaim_lock);
 }
 #endif
Index: sys/vm/uma.h
===================================================================
--- sys/vm/uma.h	(revision 326930)
+++ sys/vm/uma.h	(working copy)
@@ -449,6 +449,16 @@
 void uma_reclaim(void);
 
 /*
+ * Reclaims unused per-CPU cache memory from the specified zone
+ *
+ * Arguments:
+ *	zone  The zone for cleanup
+ * Returns:
+ *	None
+ */
+void uma_reclaim_zone_cache(uma_zone_t zone); 
+
+/*
  * Sets the alignment mask to be used for all zones requesting cache
  * alignment.  Should be called by MD boot code prior to starting VM/UMA.
  *
@@ -546,6 +556,18 @@
 int uma_zone_get_cur(uma_zone_t zone);
 
 /*
+ * Obtains the approximate current size of items free in a zone
+ *
+ * Arguments:
+ *	zone  The zone to obtain the current free size from
+ *
+ * Return:
+ *	int  The approximate current size of items free in a zone
+ */
+size_t uma_zone_get_free_size(uma_zone_t zone);
+
+
+/*
  * The following two routines (uma_zone_set_init/fini)
  * are used to set the backend init/fini pair which acts on an
  * object as it becomes allocated and is placed in a slab within
Index: sys/vm/uma_core.c
===================================================================
--- sys/vm/uma_core.c	(revision 326930)
+++ sys/vm/uma_core.c	(working copy)
@@ -2992,6 +2992,39 @@
 }
 
 /* See uma.h */
+size_t
+uma_zone_get_free_size(uma_zone_t zone)
+{
+	uma_klink_t kl;
+	uma_bucket_t bucket;
+	int64_t nitems;
+	u_int i;
+
+	ZONE_LOCK(zone);
+	nitems = 0;
+	if(!(zone->uz_flags & UMA_ZONE_SECONDARY)) {
+		LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
+			nitems += kl->kl_keg->uk_free;
+		}
+	}
+	CPU_FOREACH(i) {
+		/*
+		 * See the comment in sysctl_vm_zone_stats() regarding the
+		 * safety of accessing the per-cpu caches. With the zone lock
+		 * held, it is safe, but can potentially result in stale data.
+		 */
+		bucket = zone->uz_cpu[i].uc_allocbucket;
+		if (bucket != NULL)
+			nitems += bucket->ub_cnt;
+		bucket = zone->uz_cpu[i].uc_freebucket;
+		if (bucket != NULL)
+			nitems += bucket->ub_cnt;
+	}
+	ZONE_UNLOCK(zone);
+	return (nitems * zone->uz_size);
+}
+
+/* See uma.h */
 void
 uma_zone_set_init(uma_zone_t zone, uma_init uminit)
 {
@@ -3157,6 +3190,14 @@
 }
 
 /* See uma.h */
+void
+uma_reclaim_zone_cache(uma_zone_t zone)
+{
+	bucket_enable();
+	cache_drain_safe(zone);
+}
+
+/* See uma.h */
 static void
 uma_reclaim_locked(bool kmem_danger)
 {
