Changeset View
Standalone View
sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 549 Lines • ▼ Show 20 Lines | typedef struct arc_state { | ||||
arc_state_type_t arcs_state; | arc_state_type_t arcs_state; | ||||
} arc_state_t; | } arc_state_t; | ||||
/* | /* | ||||
* Percentage that can be consumed by dnodes of ARC meta buffers. | * Percentage that can be consumed by dnodes of ARC meta buffers. | ||||
*/ | */ | ||||
int zfs_arc_meta_prune = 10000; | int zfs_arc_meta_prune = 10000; | ||||
unsigned long zfs_arc_dnode_limit_percent = 10; | unsigned long zfs_arc_dnode_limit_percent = 10; | ||||
int zfs_arc_meta_strategy = ARC_STRATEGY_META_ONLY; | int zfs_arc_meta_strategy = ARC_STRATEGY_META_BALANCED; | ||||
int zfs_arc_meta_adjust_restarts = 4096; | int zfs_arc_meta_adjust_restarts = 4096; | ||||
#define ARC_BALANCED_MIN 8*1024UL*1024UL*1024UL | |||||
markj: The description says 4GB. Where does the limit come from?
The value should be wrapped in… | |||||
mmacyAuthorUnsubmitted Done Inline ActionsI misremembered, it's obviously 8G. The value is completely arbitrary. It's just that problems have been observed on low memory systems. But that may just have been an artifact of a buggy port. mmacy: I misremembered, it's obviously 8G. The value is completely arbitrary. It's just that problems… | |||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, arc_meta_strategy, CTLFLAG_RWTUN, | SYSCTL_INT(_vfs_zfs, OID_AUTO, arc_meta_strategy, CTLFLAG_RWTUN, | ||||
&zfs_arc_meta_strategy, 0, | &zfs_arc_meta_strategy, 0, | ||||
"ARC metadata reclamation strategy " | "ARC metadata reclamation strategy " | ||||
"(0 = metadata only, 1 = balance data and metadata)"); | "(0 = metadata only, 1 = balance data and metadata)"); | ||||
/* The 6 states: */ | /* The 6 states: */ | ||||
static arc_state_t ARC_anon; | static arc_state_t ARC_anon; | ||||
▲ Show 20 Lines • Show All 3,544 Lines • ▼ Show 20 Lines | while (refcount_count(&state->arcs_esize[type]) != 0) { | ||||
if (!retry) | if (!retry) | ||||
break; | break; | ||||
} | } | ||||
return (evicted); | return (evicted); | ||||
} | } | ||||
#if defined(__FreeBSD__) && defined(_KERNEL) | |||||
extern struct vfsops zfs_vfsops; | |||||
/* | /* | ||||
* Helper function for arc_prune_async() it is responsible for safely | * Helper function for arc_prune_async() it is responsible for safely | ||||
* handling the execution of a registered arc_prune_func_t. | * handling the execution of a registered arc_prune_func_t. | ||||
*/ | */ | ||||
static void | static void | ||||
arc_prune_task(void *arg) | |||||
{ | |||||
int64_t nr_scan = *(int64_t*)arg; | |||||
Not Done Inline Actionsnr_scan is a misleading name. vnlru_free() will free the requested number of vnodes, and may scan more than that. markj: nr_scan is a misleading name. vnlru_free() will free the requested number of vnodes, and may… | |||||
free(arg, M_TEMP); | |||||
vnlru_free(nr_scan, &zfs_vfsops); | |||||
markjUnsubmitted Not Done Inline ActionsMy reading of the ZoL code is that it tries to shrink all caches attached to the filesystem. vnlru_free() doesn't have the same effect, there are various UMA zones that you might want to try and exert pressure on as well. namei_zone for instance. Note that vnlru_proc() calls uma_reclaim() for this reason (though that is admittedly overkill). Maybe it's sufficient to just call vnlru_free(), but a comment should relate this to what happens on Linux. markj: My reading of the ZoL code is that it tries to shrink all caches attached to the filesystem. | |||||
} | |||||
/* | |||||
* Notify registered consumers they must drop holds on a portion of the ARC | |||||
* buffered they reference. This provides a mechanism to ensure the ARC can | |||||
* honor the arc_meta_limit and reclaim otherwise pinned ARC buffers. This | |||||
* is analogous to dnlc_reduce_cache() but more generic. | |||||
* | |||||
* This operation is performed asynchronously so it may be safely called | |||||
* in the context of the arc_reclaim_thread(). A reference is taken here | |||||
* for each registered arc_prune_t and the arc_prune_task() is responsible | |||||
* for releasing it once the registered arc_prune_func_t has completed. | |||||
*/ | |||||
static void | |||||
arc_prune_async(int64_t adjust) | |||||
{ | |||||
int64_t *adjustptr; | |||||
if ((adjustptr = malloc(sizeof(int64_t), M_TEMP, M_NOWAIT)) == NULL) | |||||
return; | |||||
*adjustptr = adjust; | |||||
taskq_dispatch(arc_prune_taskq, arc_prune_task, adjustptr, TQ_SLEEP); | |||||
ARCSTAT_BUMP(arcstat_prune); | |||||
} | |||||
#else | |||||
/* | |||||
* Helper function for arc_prune_async() it is responsible for safely | |||||
* handling the execution of a registered arc_prune_func_t. | |||||
*/ | |||||
static void | |||||
arc_prune_task(void *ptr) | arc_prune_task(void *ptr) | ||||
{ | { | ||||
arc_prune_t *ap = (arc_prune_t *)ptr; | arc_prune_t *ap = (arc_prune_t *)ptr; | ||||
arc_prune_func_t *func = ap->p_pfunc; | arc_prune_func_t *func = ap->p_pfunc; | ||||
if (func != NULL) | if (func != NULL) | ||||
func(ap->p_adjust, ap->p_private); | func(ap->p_adjust, ap->p_private); | ||||
Show All 29 Lines | if (taskq_dispatch(arc_prune_taskq, arc_prune_task, | ||||
ap, TQ_SLEEP) == TASKQID_INVALID) { | ap, TQ_SLEEP) == TASKQID_INVALID) { | ||||
refcount_remove(&ap->p_refcnt, ap->p_pfunc); | refcount_remove(&ap->p_refcnt, ap->p_pfunc); | ||||
continue; | continue; | ||||
} | } | ||||
ARCSTAT_BUMP(arcstat_prune); | ARCSTAT_BUMP(arcstat_prune); | ||||
} | } | ||||
mutex_exit(&arc_prune_mtx); | mutex_exit(&arc_prune_mtx); | ||||
} | } | ||||
#endif | |||||
/* | /* | ||||
* Evict the specified number of bytes from the state specified, | * Evict the specified number of bytes from the state specified, | ||||
* restricting eviction to the spa and type given. This function | * restricting eviction to the spa and type given. This function | ||||
* prevents us from trying to evict more from a state's list than | * prevents us from trying to evict more from a state's list than | ||||
* is "evictable", and to skip evicting altogether when passed a | * is "evictable", and to skip evicting altogether when passed a | ||||
* negative value for "bytes". In contrast, arc_evict_state() will | * negative value for "bytes". In contrast, arc_evict_state() will | ||||
* evict everything it can, when passed a negative value for "bytes". | * evict everything it can, when passed a negative value for "bytes". | ||||
▲ Show 20 Lines • Show All 150 Lines • ▼ Show 20 Lines | arc_adjust_meta_only(uint64_t meta_used) | ||||
total_evicted += arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA); | total_evicted += arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA); | ||||
return (total_evicted); | return (total_evicted); | ||||
} | } | ||||
static uint64_t | static uint64_t | ||||
arc_adjust_meta(uint64_t meta_used) | arc_adjust_meta(uint64_t meta_used) | ||||
{ | { | ||||
if (zfs_arc_meta_strategy == ARC_STRATEGY_META_ONLY) | if (zfs_arc_meta_strategy == ARC_STRATEGY_META_ONLY | ||||
#ifdef __FreeBSD__ | |||||
|| (zfs_arc_max && (zfs_arc_max < ARC_BALANCED_MIN)) | |||||
markjUnsubmitted Not Done Inline ActionsWhy apply this limit? markj: Why apply this limit? | |||||
mmacyAuthorUnsubmitted Done Inline ActionsThe thinking is that for systems below some threshold we maintain the legacy behavior. If you think we can test it without and can suggest all the zones to apply pressure to and how to apply a bit more fine grained pressure that would certainly be better. mmacy: The thinking is that for systems below some threshold we maintain the legacy behavior. If you… | |||||
markjUnsubmitted Not Done Inline ActionsSure, I understand what the code is doing, but I don't know what problems you observed or why this threshold is supposed to make sense, so I have nothing useful to offer. There is no list of such zones or any way to be more fine-grained, though I'm hoping to fix the latter soon. That's why vnlru_proc() just calls uma_reclaim(). markj: Sure, I understand what the code is doing, but I don't know what problems you observed or why… | |||||
#endif | |||||
) | |||||
return (arc_adjust_meta_only(meta_used)); | return (arc_adjust_meta_only(meta_used)); | ||||
else | else | ||||
return (arc_adjust_meta_balanced(meta_used)); | return (arc_adjust_meta_balanced(meta_used)); | ||||
} | } | ||||
/* | /* | ||||
* Return the type of the oldest buffer in the given arc state | * Return the type of the oldest buffer in the given arc state | ||||
* | * | ||||
▲ Show 20 Lines • Show All 4,211 Lines • Show Last 20 Lines |
The description says 4GB. Where does the limit come from?
The value should be wrapped in parens.