Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F136652415
D702.id1737.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
D702.id1737.diff
View Options
Index: sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
===================================================================
--- sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
+++ sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
@@ -126,42 +126,6 @@
}
SYSINIT(kmem_size_init, SI_SUB_KMEM, SI_ORDER_ANY, kmem_size_init, NULL);
-/*
- * The return values from kmem_free_* are only valid once the pagedaemon
- * has been initialised, before then they return 0.
- *
- * To ensure the returns are valid the caller can use a SYSINIT with
- * subsystem set to SI_SUB_KTHREAD_PAGE and an order of at least
- * SI_ORDER_SECOND.
- */
-u_int
-kmem_free_target(void)
-{
-
- return (vm_cnt.v_free_target);
-}
-
-u_int
-kmem_free_min(void)
-{
-
- return (vm_cnt.v_free_min);
-}
-
-u_int
-kmem_free_count(void)
-{
-
- return (vm_cnt.v_free_count + vm_cnt.v_cache_count);
-}
-
-u_int
-kmem_page_count(void)
-{
-
- return (vm_cnt.v_page_count);
-}
-
uint64_t
kmem_size(void)
{
@@ -169,13 +133,6 @@
return (kmem_size_val);
}
-uint64_t
-kmem_used(void)
-{
-
- return (vmem_size(kmem_arena, VMEM_ALLOC));
-}
-
static int
kmem_std_constructor(void *mem, int size __unused, void *private, int flags)
{
Index: sys/cddl/compat/opensolaris/sys/kmem.h
===================================================================
--- sys/cddl/compat/opensolaris/sys/kmem.h
+++ sys/cddl/compat/opensolaris/sys/kmem.h
@@ -66,17 +66,6 @@
void *zfs_kmem_alloc(size_t size, int kmflags);
void zfs_kmem_free(void *buf, size_t size);
uint64_t kmem_size(void);
-uint64_t kmem_used(void);
-u_int kmem_page_count(void);
-
-/*
- * The return values from kmem_free_* are only valid once the pagedaemon
- * has been initialised, before then they return 0.
- */
-u_int kmem_free_count(void);
-u_int kmem_free_target(void);
-u_int kmem_free_min(void);
-
kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align,
int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags);
@@ -88,6 +77,9 @@
int kmem_debugging(void);
void *calloc(size_t n, size_t s);
+#define freemem (vm_cnt.v_free_count + vm_cnt.v_cache_count)
+#define minfree vm_cnt.v_free_min
+#define heap_arena kmem_arena
#define kmem_alloc(size, kmflags) zfs_kmem_alloc((size), (kmflags))
#define kmem_zalloc(size, kmflags) zfs_kmem_alloc((size), (kmflags) | M_ZERO)
#define kmem_free(buf, size) zfs_kmem_free((buf), (size))
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -138,6 +138,7 @@
#include <sys/sdt.h>
#include <vm/vm_pageout.h>
+#include <machine/vmparam.h>
#ifdef illumos
#ifndef _KERNEL
@@ -201,7 +202,7 @@
int zfs_arc_p_min_shift = 0;
int zfs_disable_dup_eviction = 0;
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
-u_int zfs_arc_free_target = (1 << 19); /* default before pagedaemon init only */
+u_int zfs_arc_free_target = 0;
static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS);
@@ -210,11 +211,10 @@
arc_free_target_init(void *unused __unused)
{
- zfs_arc_free_target = kmem_free_target();
+ zfs_arc_free_target = vm_pageout_wakeup_thresh;
}
SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
arc_free_target_init, NULL);
-#endif
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit);
SYSCTL_DECL(_vfs_zfs);
@@ -245,15 +245,16 @@
if (err != 0 || req->newptr == NULL)
return (err);
- if (val < kmem_free_min())
+ if (val < minfree)
return (EINVAL);
- if (val > kmem_page_count())
+ if (val > vm_cnt.v_page_count)
return (EINVAL);
zfs_arc_free_target = val;
return (0);
}
+#endif
/*
* Note that buffers can be in one of 6 states:
@@ -2462,8 +2463,8 @@
if (arc_c > arc_c_min) {
uint64_t to_free;
- DTRACE_PROBE2(arc__shrink, uint64_t, arc_c, uint64_t,
- arc_c_min);
+ DTRACE_PROBE4(arc__shrink, uint64_t, arc_c, uint64_t,
+ arc_c_min, uint64_t, arc_p, uint64_t, to_free);
#ifdef _KERNEL
to_free = arc_c >> arc_shrink_shift;
#else
@@ -2479,6 +2480,10 @@
arc_c = MAX(arc_size, arc_c_min);
if (arc_p > arc_c)
arc_p = (arc_c >> 1);
+
+ DTRACE_PROBE2(arc__shrunk, uint64_t, arc_c, uint64_t,
+ arc_p);
+
ASSERT(arc_c >= arc_c_min);
ASSERT((int64_t)arc_p >= 0);
}
@@ -2503,18 +2508,13 @@
return (1);
}
- if (kmem_free_count() < zfs_arc_free_target) {
- DTRACE_PROBE2(arc__reclaim_freetarget, uint64_t,
- kmem_free_count(), uint64_t, zfs_arc_free_target);
- return (1);
- }
-
/*
* Cooperate with pagedaemon when it's time for it to scan
* and reclaim some pages.
*/
- if (vm_paging_needed()) {
- DTRACE_PROBE(arc__reclaim_paging);
+ if (freemem < zfs_arc_free_target) {
+ DTRACE_PROBE2(arc__reclaim_freemem, uint64_t,
+ freemem, uint64_t, zfs_arc_free_target);
return (1);
}
@@ -2544,7 +2544,18 @@
if (availrmem < swapfs_minfree + swapfs_reserve + extra)
return (1);
-#if defined(__i386)
+ /*
+ * Check that we have enough availrmem that memory locking (e.g., via
+ * mlock(3C) or memcntl(2)) can still succeed. (pages_pp_maximum
+ * stores the number of pages that cannot be locked; when availrmem
+ * drops below pages_pp_maximum, page locking mechanisms such as
+ * page_pp_lock() will fail.)
+ */
+ if (availrmem <= pages_pp_maximum)
+ return (1);
+
+#endif /* sun */
+#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
/*
* If we're on an i386 platform, it's possible that we'll exhaust the
* kernel heap space before we ever run out of available physical
@@ -2556,25 +2567,33 @@
* heap is allocated. (Or, in the calculation, if less than 1/4th is
* free)
*/
- if (btop(vmem_size(heap_arena, VMEM_FREE)) <
- (btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2))
- return (1);
-#endif
-#else /* sun */
-#ifdef __i386__
- /* i386 has KVA limits that the raw page counts above don't consider */
- if (kmem_used() > (kmem_size() * 3) / 4) {
+ if (vmem_size(heap_arena, VMEM_FREE) <
+ (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2)) {
DTRACE_PROBE2(arc__reclaim_used, uint64_t,
- kmem_used(), uint64_t, (kmem_size() * 3) / 4);
+ vmem_size(heap_arena, VMEM_FREE), uint64_t,
+ (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2);
return (1);
}
#endif
+#ifdef sun
+ /*
+ * If zio data pages are being allocated out of a separate heap segment,
+ * then enforce that the size of available vmem for this arena remains
+ * above about 1/16th free.
+ *
+ * Note: The 1/16th arena free requirement was put in place
+ * to aggressively evict memory from the arc in order to avoid
+ * memory fragmentation issues.
+ */
+ if (zio_arena != NULL &&
+ vmem_size(zio_arena, VMEM_FREE) <
+ (vmem_size(zio_arena, VMEM_ALLOC) >> 4))
+ return (1);
#endif /* sun */
-
-#else
+#else /* _KERNEL */
if (spa_get_random(100) == 0)
return (1);
-#endif
+#endif /* _KERNEL */
DTRACE_PROBE(arc__reclaim_no);
return (0);
@@ -2583,13 +2602,14 @@
extern kmem_cache_t *zio_buf_cache[];
extern kmem_cache_t *zio_data_buf_cache[];
-static void
+static void __used
arc_kmem_reap_now(arc_reclaim_strategy_t strat)
{
size_t i;
kmem_cache_t *prev_cache = NULL;
kmem_cache_t *prev_data_cache = NULL;
+ DTRACE_PROBE(arc__kmem_reap_start);
#ifdef _KERNEL
if (arc_meta_used >= arc_meta_limit) {
/*
@@ -2625,6 +2645,16 @@
}
kmem_cache_reap_now(buf_cache);
kmem_cache_reap_now(hdr_cache);
+
+#ifdef sun
+ /*
+ * Ask the vmem arena to reclaim unused memory from its
+ * quantum caches.
+ */
+ if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR)
+ vmem_qcache_reap(zio_arena);
+#endif
+ DTRACE_PROBE(arc__kmem_reap_end);
}
static void
@@ -2642,6 +2672,7 @@
if (arc_no_grow) {
if (last_reclaim == ARC_RECLAIM_CONS) {
+ DTRACE_PROBE(arc__reclaim_aggr_no_grow);
last_reclaim = ARC_RECLAIM_AGGR;
} else {
last_reclaim = ARC_RECLAIM_CONS;
@@ -2649,6 +2680,7 @@
} else {
arc_no_grow = TRUE;
last_reclaim = ARC_RECLAIM_AGGR;
+ DTRACE_PROBE(arc__reclaim_aggr);
membar_producer();
}
@@ -2753,6 +2785,7 @@
* cache size, increment the target cache size
*/
if (arc_size > arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) {
+ DTRACE_PROBE1(arc__inc_adapt, int, bytes);
atomic_add_64(&arc_c, (int64_t)bytes);
if (arc_c > arc_c_max)
arc_c = arc_c_max;
@@ -2774,20 +2807,6 @@
if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit)
return (1);
-#ifdef sun
-#ifdef _KERNEL
- /*
- * If zio data pages are being allocated out of a separate heap segment,
- * then enforce that the size of available vmem for this area remains
- * above about 1/32nd free.
- */
- if (type == ARC_BUFC_DATA && zio_arena != NULL &&
- vmem_size(zio_arena, VMEM_FREE) <
- (vmem_size(zio_arena, VMEM_ALLOC) >> 5))
- return (1);
-#endif
-#endif /* sun */
-
if (arc_reclaim_needed())
return (1);
@@ -3946,20 +3965,16 @@
arc_memory_throttle(uint64_t reserve, uint64_t txg)
{
#ifdef _KERNEL
- uint64_t available_memory =
- ptoa((uintmax_t)vm_cnt.v_free_count + vm_cnt.v_cache_count);
+ uint64_t available_memory = ptob(freemem);
static uint64_t page_load = 0;
static uint64_t last_txg = 0;
-#ifdef sun
-#if defined(__i386)
+#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
available_memory =
- MIN(available_memory, vmem_size(heap_arena, VMEM_FREE));
+ MIN(available_memory, ptob(vmem_size(heap_arena, VMEM_FREE)));
#endif
-#endif /* sun */
- if (vm_cnt.v_free_count + vm_cnt.v_cache_count >
- (uint64_t)physmem * arc_lotsfree_percent / 100)
+ if (freemem > (uint64_t)physmem * arc_lotsfree_percent / 100)
return (0);
if (txg > last_txg) {
@@ -3972,7 +3987,7 @@
* continue to let page writes occur as quickly as possible.
*/
if (curproc == pageproc) {
- if (page_load > available_memory / 4)
+ if (page_load > MAX(ptob(minfree), available_memory) / 4)
return (SET_ERROR(ERESTART));
/* Note: reserve is inflated, so we deflate */
page_load += reserve / 8;
@@ -4000,8 +4015,10 @@
int error;
uint64_t anon_size;
- if (reserve > arc_c/4 && !arc_no_grow)
+ if (reserve > arc_c/4 && !arc_no_grow) {
arc_c = MIN(arc_c_max, reserve * 4);
+ DTRACE_PROBE1(arc__set_reserve, uint64_t, arc_c);
+ }
if (reserve > arc_c)
return (SET_ERROR(ENOMEM));
@@ -4055,6 +4072,7 @@
mutex_enter(&arc_lowmem_lock);
mutex_enter(&arc_reclaim_thr_lock);
needfree = 1;
+ DTRACE_PROBE(arc__needfree);
cv_signal(&arc_reclaim_thr_cv);
/*
Index: sys/vm/vm_pageout.c
===================================================================
--- sys/vm/vm_pageout.c
+++ sys/vm/vm_pageout.c
@@ -76,6 +76,7 @@
__FBSDID("$FreeBSD$");
#include "opt_vm.h"
+#include "opt_kdtrace.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@@ -89,6 +90,7 @@
#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
+#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/smp.h>
#include <sys/vnode.h>
@@ -133,6 +135,10 @@
SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start,
&page_kp);
+SDT_PROVIDER_DEFINE(vm);
+SDT_PROBE_DEFINE(vm, , , vm__lowmem_cache);
+SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan);
+
#if !defined(NO_SWAPPING)
/* the kernel process "vm_daemon"*/
static void vm_daemon(void);
@@ -667,6 +673,7 @@
* may acquire locks and/or sleep, so they can only be invoked
* when "tries" is greater than zero.
*/
+ SDT_PROBE0(vm, , , vm__lowmem_cache);
EVENTHANDLER_INVOKE(vm_lowmem, 0);
/*
@@ -916,10 +923,11 @@
* some. We rate limit to avoid thrashing.
*/
if (vmd == &vm_dom[0] && pass > 0 &&
- lowmem_ticks + (lowmem_period * hz) < ticks) {
+ (ticks - lowmem_ticks) / hz >= lowmem_period) {
/*
* Decrease registered cache sizes.
*/
+ SDT_PROBE0(vm, , , vm__lowmem_scan);
EVENTHANDLER_INVOKE(vm_lowmem, 0);
/*
* We do this explicitly after the caches have been
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Nov 19, 6:23 PM (19 h, 44 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25662458
Default Alt Text
D702.id1737.diff (11 KB)
Attached To
Mode
D702: Improved i386 support in arc_memory_throttle
Attached
Detach File
Event Timeline
Log In to Comment