Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c @@ -3851,6 +3851,11 @@ */ int64_t arc_swapfs_reserve = 64; +typedef enum free_memory_measure_t { + FMM_EXCLUDE_ZONE_CACHE, + FMM_INCLUDE_ZONE_CACHE +} free_memory_measure_t; + /* * Return the amount of memory that can be consumed before reclaim will be * needed. Positive if there is sufficient free memory, negative indicates @@ -3857,7 +3862,7 @@ * the amount of memory that needs to be freed up. */ static int64_t -arc_available_memory(void) +arc_available_memory(free_memory_measure_t zone_measure) { int64_t lowest = INT64_MAX; int64_t n; @@ -3981,6 +3986,26 @@ } } + /* Some memory can be in zone cache elements, for this case + * ARC cache not under memory pressure and can rise. + * zone_measure == FMM_INCLUDE_ZONE_CACHE flaged this + */ + if (lowest < 0 && zone_measure == FMM_INCLUDE_ZONE_CACHE) { + kmem_cache_t *prev_data_cache = NULL; + extern kmem_cache_t *zio_data_buf_cache[]; + + for (n = (SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT) - 1; n >= 0; n--) { + if (zio_data_buf_cache[i] == prev_data_cache) + continue; + prev_data_cache = zio_data_buf_cache[i]; + lowest += uma_zone_get_free_size(zio_data_buf_cache[i]->kc_zone); + if (lowest >= 0) { + r = FMR_UNKNOWN; + break; + } + } + } + #else /* _KERNEL */ /* Every 100 calls, free a small amount */ if (spa_get_random(100) == 0) @@ -4002,7 +4027,7 @@ static boolean_t arc_reclaim_needed(void) { - return (arc_available_memory() < 0); + return (arc_available_memory(FMM_INCLUDE_ZONE_CACHE) < 0); } extern kmem_cache_t *zio_buf_cache[]; @@ -4009,6 +4034,28 @@ extern kmem_cache_t *zio_data_buf_cache[]; extern kmem_cache_t *range_seg_cache; +static void +arc_drain_uma_cache(uint64_t target) +{ + int i; + kmem_cache_t *prev_data_cache = NULL; + + DTRACE_PROBE(arc__drain_uma_cache_start); +#ifdef _KERNEL + for (i = (SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT) - 1; i >= 0; i--) { + if(zio_data_buf_cache[i] == prev_data_cache) + continue; + prev_data_cache = zio_data_buf_cache[i]; + if(uma_zone_get_free(zio_data_buf_cache[i]->kc_zone) == 0) + continue; + uma_reclaim_zone_cache(zio_data_buf_cache[i]->kc_zone); + if((uint64_t)vm_cnt.v_free_count * PAGESIZE >= target) + return; + } +#endif + DTRACE_PROBE(arc__drain_uma_cache_end); +} + static __noinline void arc_kmem_reap_now(void) { @@ -4111,8 +4158,10 @@ */ evicted = arc_adjust(); - int64_t free_memory = arc_available_memory(); + int64_t free_memory = arc_available_memory(FMM_EXCLUDE_ZONE_CACHE); if (free_memory < 0) { + uint64_t free_target = + (uint64_t)freemem * PAGESIZE - free_memory; arc_no_grow = B_TRUE; arc_warm = B_TRUE; @@ -4124,12 +4173,13 @@ growtime = gethrtime() + SEC2NSEC(arc_grow_retry); arc_kmem_reap_now(); + arc_drain_uma_cache(free_target); /* * If we are still low on memory, shrink the ARC * so that we have arc_shrink_min free space. */ - free_memory = arc_available_memory(); + free_memory = arc_available_memory(FMM_EXCLUDE_ZONE_CACHE); int64_t to_free = (arc_c >> arc_shrink_shift) - free_memory; @@ -4138,6 +4188,8 @@ to_free = MAX(to_free, ptob(needfree)); #endif arc_shrink(to_free); + arc_kmem_reap_now(); + arc_drain_uma_cache(free_target); } } else if (free_memory < arc_c >> arc_no_grow_shift) { arc_no_grow = B_TRUE; @@ -5884,20 +5936,14 @@ static void arc_lowmem(void *arg __unused, int howto __unused) { + int64_t n; mutex_enter(&arc_reclaim_lock); /* XXX: Memory deficit should be passed as argument. */ - needfree = btoc(arc_c >> arc_shrink_shift); + n = (int64_t)vm_cnt.v_free_target - (int64_t)vm_cnt.v_free_count; + needfree = (n>0) ? n : vm_cnt.v_free_target >> 8; DTRACE_PROBE(arc__needfree); cv_signal(&arc_reclaim_thread_cv); - - /* - * It is unsafe to block here in arbitrary threads, because we can come - * here from ARC itself and may hold ARC locks and thus risk a deadlock - * with ARC reclaim thread. - */ - if (curproc == pageproc) - (void) cv_wait(&arc_reclaim_waiters_cv, &arc_reclaim_lock); mutex_exit(&arc_reclaim_lock); } #endif Index: sys/vm/uma.h =================================================================== --- sys/vm/uma.h +++ sys/vm/uma.h @@ -448,6 +448,16 @@ void uma_reclaim(void); /* + * Reclaims unused per-CPU cache memory from the specified zone + * + * Arguments: + * zone The zone for cleanup + * Returns: + * None + */ +void uma_reclaim_zone_cache(uma_zone_t zone); + +/* * Sets the alignment mask to be used for all zones requesting cache * alignment. Should be called by MD boot code prior to starting VM/UMA. * @@ -545,6 +555,18 @@ int uma_zone_get_cur(uma_zone_t zone); /* + * Obtains the approximate current size of items free in a zone + * + * Arguments: + * zone The zone to obtain the current free size from + * + * Return: + * int The approximate current size of items free in a zone + */ +size_t uma_zone_get_free_size(uma_zone_t zone); + + +/* * The following two routines (uma_zone_set_init/fini) * are used to set the backend init/fini pair which acts on an * object as it becomes allocated and is placed in a slab within Index: sys/vm/uma_core.c =================================================================== --- sys/vm/uma_core.c +++ sys/vm/uma_core.c @@ -2984,6 +2984,39 @@ } /* See uma.h */ +size_t +uma_zone_get_free(uma_zone_t zone) +{ + uma_klink_t kl; + uma_bucket_t bucket; + int64_t nitems; + u_int i; + + ZONE_LOCK(zone); + nitems = 0; + if(!(zone->uz_flags & UMA_ZONE_SECONDARY)) { + LIST_FOREACH(kl, &zone->uz_kegs, kl_link) { + nitems += kl->kl_keg->uk_free; + } + } + CPU_FOREACH(i) { + /* + * See the comment in sysctl_vm_zone_stats() regarding the + * safety of accessing the per-cpu caches. With the zone lock + * held, it is safe, but can potentially result in stale data. + */ + bucket = zone->uz_cpu[i].uc_allocbucket; + if (bucket != NULL) + nitems += bucket->ub_cnt; + bucket = zone->uz_cpu[i].uc_freebucket; + if (bucket != NULL) + nitems += bucket->ub_cnt; + } + ZONE_UNLOCK(zone); + return (nitems * zone->uz_size); +} + +/* See uma.h */ void uma_zone_set_init(uma_zone_t zone, uma_init uminit) { @@ -3148,6 +3181,14 @@ } /* See uma.h */ +void +uma_reclaim_zone_cache(uma_zone_t zone) +{ + bucket_enable(); + cache_drain_safe(zone); +} + +/* See uma.h */ static void uma_reclaim_locked(bool kmem_danger) {