Page MenuHomeFreeBSD

D702.id1737.diff
No OneTemporary

D702.id1737.diff

Index: sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
===================================================================
--- sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
+++ sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
@@ -126,42 +126,6 @@
}
SYSINIT(kmem_size_init, SI_SUB_KMEM, SI_ORDER_ANY, kmem_size_init, NULL);
-/*
- * The return values from kmem_free_* are only valid once the pagedaemon
- * has been initialised, before then they return 0.
- *
- * To ensure the returns are valid the caller can use a SYSINIT with
- * subsystem set to SI_SUB_KTHREAD_PAGE and an order of at least
- * SI_ORDER_SECOND.
- */
-u_int
-kmem_free_target(void)
-{
-
- return (vm_cnt.v_free_target);
-}
-
-u_int
-kmem_free_min(void)
-{
-
- return (vm_cnt.v_free_min);
-}
-
-u_int
-kmem_free_count(void)
-{
-
- return (vm_cnt.v_free_count + vm_cnt.v_cache_count);
-}
-
-u_int
-kmem_page_count(void)
-{
-
- return (vm_cnt.v_page_count);
-}
-
uint64_t
kmem_size(void)
{
@@ -169,13 +133,6 @@
return (kmem_size_val);
}
-uint64_t
-kmem_used(void)
-{
-
- return (vmem_size(kmem_arena, VMEM_ALLOC));
-}
-
static int
kmem_std_constructor(void *mem, int size __unused, void *private, int flags)
{
Index: sys/cddl/compat/opensolaris/sys/kmem.h
===================================================================
--- sys/cddl/compat/opensolaris/sys/kmem.h
+++ sys/cddl/compat/opensolaris/sys/kmem.h
@@ -66,17 +66,6 @@
void *zfs_kmem_alloc(size_t size, int kmflags);
void zfs_kmem_free(void *buf, size_t size);
uint64_t kmem_size(void);
-uint64_t kmem_used(void);
-u_int kmem_page_count(void);
-
-/*
- * The return values from kmem_free_* are only valid once the pagedaemon
- * has been initialised, before then they return 0.
- */
-u_int kmem_free_count(void);
-u_int kmem_free_target(void);
-u_int kmem_free_min(void);
-
kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align,
int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags);
@@ -88,6 +77,9 @@
int kmem_debugging(void);
void *calloc(size_t n, size_t s);
+#define freemem (vm_cnt.v_free_count + vm_cnt.v_cache_count)
+#define minfree vm_cnt.v_free_min
+#define heap_arena kmem_arena
#define kmem_alloc(size, kmflags) zfs_kmem_alloc((size), (kmflags))
#define kmem_zalloc(size, kmflags) zfs_kmem_alloc((size), (kmflags) | M_ZERO)
#define kmem_free(buf, size) zfs_kmem_free((buf), (size))
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -138,6 +138,7 @@
#include <sys/sdt.h>
#include <vm/vm_pageout.h>
+#include <machine/vmparam.h>
#ifdef illumos
#ifndef _KERNEL
@@ -201,7 +202,7 @@
int zfs_arc_p_min_shift = 0;
int zfs_disable_dup_eviction = 0;
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
-u_int zfs_arc_free_target = (1 << 19); /* default before pagedaemon init only */
+u_int zfs_arc_free_target = 0;
static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS);
@@ -210,11 +211,10 @@
arc_free_target_init(void *unused __unused)
{
- zfs_arc_free_target = kmem_free_target();
+ zfs_arc_free_target = vm_pageout_wakeup_thresh;
}
SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
arc_free_target_init, NULL);
-#endif
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit);
SYSCTL_DECL(_vfs_zfs);
@@ -245,15 +245,16 @@
if (err != 0 || req->newptr == NULL)
return (err);
- if (val < kmem_free_min())
+ if (val < minfree)
return (EINVAL);
- if (val > kmem_page_count())
+ if (val > vm_cnt.v_page_count)
return (EINVAL);
zfs_arc_free_target = val;
return (0);
}
+#endif
/*
* Note that buffers can be in one of 6 states:
@@ -2462,8 +2463,8 @@
if (arc_c > arc_c_min) {
uint64_t to_free;
- DTRACE_PROBE2(arc__shrink, uint64_t, arc_c, uint64_t,
- arc_c_min);
+ DTRACE_PROBE4(arc__shrink, uint64_t, arc_c, uint64_t,
+ arc_c_min, uint64_t, arc_p, uint64_t, to_free);
#ifdef _KERNEL
to_free = arc_c >> arc_shrink_shift;
#else
@@ -2479,6 +2480,10 @@
arc_c = MAX(arc_size, arc_c_min);
if (arc_p > arc_c)
arc_p = (arc_c >> 1);
+
+ DTRACE_PROBE2(arc__shrunk, uint64_t, arc_c, uint64_t,
+ arc_p);
+
ASSERT(arc_c >= arc_c_min);
ASSERT((int64_t)arc_p >= 0);
}
@@ -2503,18 +2508,13 @@
return (1);
}
- if (kmem_free_count() < zfs_arc_free_target) {
- DTRACE_PROBE2(arc__reclaim_freetarget, uint64_t,
- kmem_free_count(), uint64_t, zfs_arc_free_target);
- return (1);
- }
-
/*
* Cooperate with pagedaemon when it's time for it to scan
* and reclaim some pages.
*/
- if (vm_paging_needed()) {
- DTRACE_PROBE(arc__reclaim_paging);
+ if (freemem < zfs_arc_free_target) {
+ DTRACE_PROBE2(arc__reclaim_freemem, uint64_t,
+ freemem, uint64_t, zfs_arc_free_target);
return (1);
}
@@ -2544,7 +2544,18 @@
if (availrmem < swapfs_minfree + swapfs_reserve + extra)
return (1);
-#if defined(__i386)
+ /*
+ * Check that we have enough availrmem that memory locking (e.g., via
+ * mlock(3C) or memcntl(2)) can still succeed. (pages_pp_maximum
+ * stores the number of pages that cannot be locked; when availrmem
+ * drops below pages_pp_maximum, page locking mechanisms such as
+ * page_pp_lock() will fail.)
+ */
+ if (availrmem <= pages_pp_maximum)
+ return (1);
+
+#endif /* sun */
+#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
/*
* If we're on an i386 platform, it's possible that we'll exhaust the
* kernel heap space before we ever run out of available physical
@@ -2556,25 +2567,33 @@
* heap is allocated. (Or, in the calculation, if less than 1/4th is
* free)
*/
- if (btop(vmem_size(heap_arena, VMEM_FREE)) <
- (btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2))
- return (1);
-#endif
-#else /* sun */
-#ifdef __i386__
- /* i386 has KVA limits that the raw page counts above don't consider */
- if (kmem_used() > (kmem_size() * 3) / 4) {
+ if (vmem_size(heap_arena, VMEM_FREE) <
+ (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2)) {
DTRACE_PROBE2(arc__reclaim_used, uint64_t,
- kmem_used(), uint64_t, (kmem_size() * 3) / 4);
+ vmem_size(heap_arena, VMEM_FREE), uint64_t,
+ (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2);
return (1);
}
#endif
+#ifdef sun
+ /*
+ * If zio data pages are being allocated out of a separate heap segment,
+ * then enforce that the size of available vmem for this arena remains
+ * above about 1/16th free.
+ *
+ * Note: The 1/16th arena free requirement was put in place
+ * to aggressively evict memory from the arc in order to avoid
+ * memory fragmentation issues.
+ */
+ if (zio_arena != NULL &&
+ vmem_size(zio_arena, VMEM_FREE) <
+ (vmem_size(zio_arena, VMEM_ALLOC) >> 4))
+ return (1);
#endif /* sun */
-
-#else
+#else /* _KERNEL */
if (spa_get_random(100) == 0)
return (1);
-#endif
+#endif /* _KERNEL */
DTRACE_PROBE(arc__reclaim_no);
return (0);
@@ -2583,13 +2602,14 @@
extern kmem_cache_t *zio_buf_cache[];
extern kmem_cache_t *zio_data_buf_cache[];
-static void
+static void __used
arc_kmem_reap_now(arc_reclaim_strategy_t strat)
{
size_t i;
kmem_cache_t *prev_cache = NULL;
kmem_cache_t *prev_data_cache = NULL;
+ DTRACE_PROBE(arc__kmem_reap_start);
#ifdef _KERNEL
if (arc_meta_used >= arc_meta_limit) {
/*
@@ -2625,6 +2645,16 @@
}
kmem_cache_reap_now(buf_cache);
kmem_cache_reap_now(hdr_cache);
+
+#ifdef sun
+ /*
+ * Ask the vmem arena to reclaim unused memory from its
+ * quantum caches.
+ */
+ if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR)
+ vmem_qcache_reap(zio_arena);
+#endif
+ DTRACE_PROBE(arc__kmem_reap_end);
}
static void
@@ -2642,6 +2672,7 @@
if (arc_no_grow) {
if (last_reclaim == ARC_RECLAIM_CONS) {
+ DTRACE_PROBE(arc__reclaim_aggr_no_grow);
last_reclaim = ARC_RECLAIM_AGGR;
} else {
last_reclaim = ARC_RECLAIM_CONS;
@@ -2649,6 +2680,7 @@
} else {
arc_no_grow = TRUE;
last_reclaim = ARC_RECLAIM_AGGR;
+ DTRACE_PROBE(arc__reclaim_aggr);
membar_producer();
}
@@ -2753,6 +2785,7 @@
* cache size, increment the target cache size
*/
if (arc_size > arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) {
+ DTRACE_PROBE1(arc__inc_adapt, int, bytes);
atomic_add_64(&arc_c, (int64_t)bytes);
if (arc_c > arc_c_max)
arc_c = arc_c_max;
@@ -2774,20 +2807,6 @@
if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit)
return (1);
-#ifdef sun
-#ifdef _KERNEL
- /*
- * If zio data pages are being allocated out of a separate heap segment,
- * then enforce that the size of available vmem for this area remains
- * above about 1/32nd free.
- */
- if (type == ARC_BUFC_DATA && zio_arena != NULL &&
- vmem_size(zio_arena, VMEM_FREE) <
- (vmem_size(zio_arena, VMEM_ALLOC) >> 5))
- return (1);
-#endif
-#endif /* sun */
-
if (arc_reclaim_needed())
return (1);
@@ -3946,20 +3965,16 @@
arc_memory_throttle(uint64_t reserve, uint64_t txg)
{
#ifdef _KERNEL
- uint64_t available_memory =
- ptoa((uintmax_t)vm_cnt.v_free_count + vm_cnt.v_cache_count);
+ uint64_t available_memory = ptob(freemem);
static uint64_t page_load = 0;
static uint64_t last_txg = 0;
-#ifdef sun
-#if defined(__i386)
+#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
available_memory =
- MIN(available_memory, vmem_size(heap_arena, VMEM_FREE));
+ MIN(available_memory, ptob(vmem_size(heap_arena, VMEM_FREE)));
#endif
-#endif /* sun */
- if (vm_cnt.v_free_count + vm_cnt.v_cache_count >
- (uint64_t)physmem * arc_lotsfree_percent / 100)
+ if (freemem > (uint64_t)physmem * arc_lotsfree_percent / 100)
return (0);
if (txg > last_txg) {
@@ -3972,7 +3987,7 @@
* continue to let page writes occur as quickly as possible.
*/
if (curproc == pageproc) {
- if (page_load > available_memory / 4)
+ if (page_load > MAX(ptob(minfree), available_memory) / 4)
return (SET_ERROR(ERESTART));
/* Note: reserve is inflated, so we deflate */
page_load += reserve / 8;
@@ -4000,8 +4015,10 @@
int error;
uint64_t anon_size;
- if (reserve > arc_c/4 && !arc_no_grow)
+ if (reserve > arc_c/4 && !arc_no_grow) {
arc_c = MIN(arc_c_max, reserve * 4);
+ DTRACE_PROBE1(arc__set_reserve, uint64_t, arc_c);
+ }
if (reserve > arc_c)
return (SET_ERROR(ENOMEM));
@@ -4055,6 +4072,7 @@
mutex_enter(&arc_lowmem_lock);
mutex_enter(&arc_reclaim_thr_lock);
needfree = 1;
+ DTRACE_PROBE(arc__needfree);
cv_signal(&arc_reclaim_thr_cv);
/*
Index: sys/vm/vm_pageout.c
===================================================================
--- sys/vm/vm_pageout.c
+++ sys/vm/vm_pageout.c
@@ -76,6 +76,7 @@
__FBSDID("$FreeBSD$");
#include "opt_vm.h"
+#include "opt_kdtrace.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@@ -89,6 +90,7 @@
#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
+#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/smp.h>
#include <sys/vnode.h>
@@ -133,6 +135,10 @@
SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start,
&page_kp);
+SDT_PROVIDER_DEFINE(vm);
+SDT_PROBE_DEFINE(vm, , , vm__lowmem_cache);
+SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan);
+
#if !defined(NO_SWAPPING)
/* the kernel process "vm_daemon"*/
static void vm_daemon(void);
@@ -667,6 +673,7 @@
* may acquire locks and/or sleep, so they can only be invoked
* when "tries" is greater than zero.
*/
+ SDT_PROBE0(vm, , , vm__lowmem_cache);
EVENTHANDLER_INVOKE(vm_lowmem, 0);
/*
@@ -916,10 +923,11 @@
* some. We rate limit to avoid thrashing.
*/
if (vmd == &vm_dom[0] && pass > 0 &&
- lowmem_ticks + (lowmem_period * hz) < ticks) {
+ (ticks - lowmem_ticks) / hz >= lowmem_period) {
/*
* Decrease registered cache sizes.
*/
+ SDT_PROBE0(vm, , , vm__lowmem_scan);
EVENTHANDLER_INVOKE(vm_lowmem, 0);
/*
* We do this explicitly after the caches have been

File Metadata

Mime Type
text/plain
Expires
Wed, Nov 19, 6:23 PM (19 h, 44 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25662458
Default Alt Text
D702.id1737.diff (11 KB)

Event Timeline