Changeset View
Standalone View
sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
Show First 20 Lines • Show All 229 Lines • ▼ Show 20 Lines | |||||
uint64_t zfs_arc_meta_min = 0; | uint64_t zfs_arc_meta_min = 0; | ||||
int zfs_arc_grow_retry = 0; | int zfs_arc_grow_retry = 0; | ||||
int zfs_arc_shrink_shift = 0; | int zfs_arc_shrink_shift = 0; | ||||
int zfs_arc_p_min_shift = 0; | int zfs_arc_p_min_shift = 0; | ||||
int zfs_disable_dup_eviction = 0; | int zfs_disable_dup_eviction = 0; | ||||
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ | uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ | ||||
u_int zfs_arc_free_target = 0; | u_int zfs_arc_free_target = 0; | ||||
/* Absolute min for arc min / max is 16MB. */ | |||||
static uint64_t arc_abs_min = 16 << 20; | |||||
static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS); | static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS); | ||||
static int sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS); | static int sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS); | ||||
static int sysctl_vfs_zfs_arc_max(SYSCTL_HANDLER_ARGS); | |||||
static int sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_ARGS); | |||||
#ifdef _KERNEL | #if defined(__FreeBSD__) && defined(_KERNEL) | ||||
static void | static void | ||||
arc_free_target_init(void *unused __unused) | arc_free_target_init(void *unused __unused) | ||||
{ | { | ||||
zfs_arc_free_target = vm_pageout_wakeup_thresh; | zfs_arc_free_target = vm_pageout_wakeup_thresh; | ||||
} | } | ||||
SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, | SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, | ||||
arc_free_target_init, NULL); | arc_free_target_init, NULL); | ||||
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); | TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); | ||||
TUNABLE_QUAD("vfs.zfs.arc_meta_min", &zfs_arc_meta_min); | TUNABLE_QUAD("vfs.zfs.arc_meta_min", &zfs_arc_meta_min); | ||||
TUNABLE_INT("vfs.zfs.arc_shrink_shift", &zfs_arc_shrink_shift); | TUNABLE_INT("vfs.zfs.arc_shrink_shift", &zfs_arc_shrink_shift); | ||||
SYSCTL_DECL(_vfs_zfs); | SYSCTL_DECL(_vfs_zfs); | ||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0, | SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max, CTLTYPE_U64 | CTLFLAG_RWTUN, | ||||
"Maximum ARC size"); | 0, sizeof(uint64_t), sysctl_vfs_zfs_arc_max, "QU", "Maximum ARC size"); | ||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_RDTUN, &zfs_arc_min, 0, | SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min, CTLTYPE_U64 | CTLFLAG_RWTUN, | ||||
"Minimum ARC size"); | 0, sizeof(uint64_t), sysctl_vfs_zfs_arc_min, "QU", "Minimum ARC size"); | ||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN, | SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN, | ||||
&zfs_arc_average_blocksize, 0, | &zfs_arc_average_blocksize, 0, | ||||
"ARC average blocksize"); | "ARC average blocksize"); | ||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, arc_shrink_shift, CTLFLAG_RW, | SYSCTL_INT(_vfs_zfs, OID_AUTO, arc_shrink_shift, CTLFLAG_RW, | ||||
&arc_shrink_shift, 0, | &arc_shrink_shift, 0, | ||||
"log2(fraction of arc to reclaim)"); | "log2(fraction of arc to reclaim)"); | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 607 Lines • ▼ Show 20 Lines | struct arc_buf_hdr { | ||||
uint64_t b_spa; | uint64_t b_spa; | ||||
/* L2ARC fields. Undefined when not in L2ARC. */ | /* L2ARC fields. Undefined when not in L2ARC. */ | ||||
l2arc_buf_hdr_t b_l2hdr; | l2arc_buf_hdr_t b_l2hdr; | ||||
/* L1ARC fields. Undefined when in l2arc_only state */ | /* L1ARC fields. Undefined when in l2arc_only state */ | ||||
l1arc_buf_hdr_t b_l1hdr; | l1arc_buf_hdr_t b_l1hdr; | ||||
}; | }; | ||||
#ifdef _KERNEL | #if defined(__FreeBSD__) && defined(_KERNEL) | ||||
static int | static int | ||||
sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS) | sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS) | ||||
{ | { | ||||
uint64_t val; | uint64_t val; | ||||
int err; | int err; | ||||
val = arc_meta_limit; | val = arc_meta_limit; | ||||
err = sysctl_handle_64(oidp, &val, 0, req); | err = sysctl_handle_64(oidp, &val, 0, req); | ||||
if (err != 0 || req->newptr == NULL) | if (err != 0 || req->newptr == NULL) | ||||
return (err); | return (err); | ||||
if (val <= 0 || val > arc_c_max) | if (val <= 0 || val > arc_c_max) | ||||
return (EINVAL); | return (EINVAL); | ||||
arc_meta_limit = val; | arc_meta_limit = val; | ||||
return (0); | return (0); | ||||
} | } | ||||
static int | |||||
sysctl_vfs_zfs_arc_max(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
uint64_t val; | |||||
int err; | |||||
val = zfs_arc_max; | |||||
err = sysctl_handle_64(oidp, &val, 0, req); | |||||
if (err != 0 || req->newptr == NULL) | |||||
return (err); | |||||
if (val <= 0 || val < arc_abs_min || val > kmem_size()) | |||||
return (EINVAL); | |||||
if (val < arc_c_min) | |||||
return (EINVAL); | |||||
if (zfs_arc_meta_limit > 0 && val < zfs_arc_meta_limit) | |||||
return (EINVAL); | |||||
arc_c_max = val; | |||||
arc_c = arc_c_max; | |||||
arc_p = (arc_c >> 1); | |||||
if (zfs_arc_meta_limit == 0) { | |||||
/* limit meta-data to 1/4 of the arc capacity */ | |||||
arc_meta_limit = arc_c_max / 4; | |||||
} | |||||
/* if kmem_flags are set, lets try to use less memory */ | |||||
if (kmem_debugging()) | |||||
arc_c = arc_c / 2; | |||||
zfs_arc_max = arc_c; | |||||
allanjude: It might be worth calling arc_kmem_reap_now() or kmem_reap() if the user is lowing the arc_max… | |||||
Not Done Inline ActionsWhen I tested it cleaned up quickly without that, so not sure its actually needed. smh: When I tested it cleaned up quickly without that, so not sure its actually needed. | |||||
Not Done Inline ActionsIn my case it did not: Frequently read files in ARC: before: Mem: 90M Active, 25M Inact, 61G Wired, 64G Free So I lowered the arc_max: Mem: 90M Active, 25M Inact, 61G Wired, 64G Free So the ARC shrunk, but no additional memory was freed However if I run some sysctl's I added: Mem: 125M Active, 26M Inact, 38G Wired, 87G Free A great deal of the 'Wired' memory moves to free. This does even more: Mem: 90M Active, 25M Inact, 17G Wired, 107G Free It is not clear to me why in arc_kmem_reap_now() we only call kmem_reap() in the i386 case. So I think we should either make lowering the arc_max call this, or, make it a separate sysctl similar to the temporary one I created. It is not instantaneous like changing arc_max is, so maybe separate makes more sense. I just thought I'd mention this here, since usually the point of lowering arc_max is to free memory for another purpose. allanjude: In my case it did not:
Frequently read files in ARC:
before:
Mem: 90M Active, 25M Inact, 61G… | |||||
return (0); | |||||
} | |||||
static int | |||||
sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
uint64_t val; | |||||
int err; | |||||
val = zfs_arc_min; | |||||
err = sysctl_handle_64(oidp, &val, 0, req); | |||||
if (err != 0 || req->newptr == NULL) | |||||
return (err); | |||||
if (val <= 0 || val < arc_abs_min || val > arc_c_max) | |||||
return (EINVAL); | |||||
arc_c_min = val; | |||||
if (zfs_arc_meta_min == 0) | |||||
arc_meta_min = arc_c_min / 2; | |||||
if (arc_c < arc_c_min) | |||||
arc_c = arc_c_min; | |||||
zfs_arc_min = arc_c_min; | |||||
return (0); | |||||
} | |||||
#endif | #endif | ||||
static arc_buf_t *arc_eviction_list; | static arc_buf_t *arc_eviction_list; | ||||
static arc_buf_hdr_t arc_eviction_hdr; | static arc_buf_hdr_t arc_eviction_hdr; | ||||
#define GHOST_STATE(state) \ | #define GHOST_STATE(state) \ | ||||
((state) == arc_mru_ghost || (state) == arc_mfu_ghost || \ | ((state) == arc_mru_ghost || (state) == arc_mfu_ghost || \ | ||||
(state) == arc_l2c_only) | (state) == arc_l2c_only) | ||||
▲ Show 20 Lines • Show All 4,394 Lines • ▼ Show 20 Lines | #ifdef _KERNEL | ||||
/* | /* | ||||
* On architectures where the physical memory can be larger | * On architectures where the physical memory can be larger | ||||
* than the addressable space (intel in 32-bit mode), we may | * than the addressable space (intel in 32-bit mode), we may | ||||
* need to limit the cache to 1/8 of VM size. | * need to limit the cache to 1/8 of VM size. | ||||
*/ | */ | ||||
arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8); | arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8); | ||||
#endif | #endif | ||||
#endif /* illumos */ | #endif /* illumos */ | ||||
/* set min cache to 1/32 of all memory, or 16MB, whichever is more */ | /* set min cache to 1/32 of all memory, or arc_abs_min, whichever is more */ | ||||
arc_c_min = MAX(arc_c / 4, 16 << 20); | arc_c_min = MAX(arc_c / 4, arc_abs_min); | ||||
/* set max to 1/2 of all memory, or all but 1GB, whichever is more */ | /* set max to 1/2 of all memory, or all but 1GB, whichever is more */ | ||||
if (arc_c * 8 >= 1 << 30) | if (arc_c * 8 >= 1 << 30) | ||||
arc_c_max = (arc_c * 8) - (1 << 30); | arc_c_max = (arc_c * 8) - (1 << 30); | ||||
else | else | ||||
arc_c_max = arc_c_min; | arc_c_max = arc_c_min; | ||||
arc_c_max = MAX(arc_c * 5, arc_c_max); | arc_c_max = MAX(arc_c * 5, arc_c_max); | ||||
/* | /* | ||||
* In userland, there's only the memory pressure that we artificially | * In userland, there's only the memory pressure that we artificially | ||||
* create (see arc_available_memory()). Don't let arc_c get too | * create (see arc_available_memory()). Don't let arc_c get too | ||||
* small, because it can cause transactions to be larger than | * small, because it can cause transactions to be larger than | ||||
* arc_c, causing arc_tempreserve_space() to fail. | * arc_c, causing arc_tempreserve_space() to fail. | ||||
*/ | */ | ||||
#ifndef _KERNEL | #ifndef _KERNEL | ||||
arc_c_min = arc_c_max / 2; | arc_c_min = arc_c_max / 2; | ||||
#endif | #endif | ||||
#ifdef _KERNEL | #ifdef _KERNEL | ||||
/* | /* | ||||
* Allow the tunables to override our calculations if they are | * Allow the tunables to override our calculations if they are | ||||
* reasonable (ie. over 16MB) | * reasonable. | ||||
*/ | */ | ||||
if (zfs_arc_max > 16 << 20 && zfs_arc_max < kmem_size()) | if (zfs_arc_max > arc_abs_min && zfs_arc_max < kmem_size()) | ||||
arc_c_max = zfs_arc_max; | arc_c_max = zfs_arc_max; | ||||
if (zfs_arc_min > 16 << 20 && zfs_arc_min <= arc_c_max) | if (zfs_arc_min > arc_abs_min && zfs_arc_min <= arc_c_max) | ||||
arc_c_min = zfs_arc_min; | arc_c_min = zfs_arc_min; | ||||
#endif | #endif | ||||
arc_c = arc_c_max; | arc_c = arc_c_max; | ||||
arc_p = (arc_c >> 1); | arc_p = (arc_c >> 1); | ||||
/* limit meta-data to 1/4 of the arc capacity */ | /* limit meta-data to 1/4 of the arc capacity */ | ||||
arc_meta_limit = arc_c_max / 4; | arc_meta_limit = arc_c_max / 4; | ||||
▲ Show 20 Lines • Show All 1,685 Lines • Show Last 20 Lines |
It might be worth calling arc_kmem_reap_now() or kmem_reap() if the user is lowing the arc_max (as they obviously want memory back)
This cleans up and usually results in lowering the amount of 'wired' memory.
I am going to quickly experiment with that and get back to you.