diff --git a/config/kernel-shrink.m4 b/config/kernel-shrink.m4 index 1c5f753d411c..4a529c43b5b0 100644 --- a/config/kernel-shrink.m4 +++ b/config/kernel-shrink.m4 @@ -1,221 +1,263 @@ dnl # dnl # 3.1 API change dnl # The super_block structure now stores a per-filesystem shrinker. dnl # This interface is preferable because it can be used to specifically dnl # target only the zfs filesystem for pruning. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK], [ ZFS_LINUX_TEST_SRC([super_block_s_shrink], [ #include int shrink(struct shrinker *s, struct shrink_control *sc) { return 0; } static const struct super_block sb __attribute__ ((unused)) = { .s_shrink.seeks = DEFAULT_SEEKS, .s_shrink.batch = 0, }; ],[]) ]) dnl # dnl # 6.7 API change dnl # s_shrink is now a pointer. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK_PTR], [ ZFS_LINUX_TEST_SRC([super_block_s_shrink_ptr], [ #include unsigned long shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } static struct shrinker shrinker = { .count_objects = shrinker_cb, .scan_objects = shrinker_cb, .seeks = DEFAULT_SEEKS, }; static const struct super_block sb __attribute__ ((unused)) = { .s_shrink = &shrinker, }; ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK], [ AC_MSG_CHECKING([whether super_block has s_shrink]) ZFS_LINUX_TEST_RESULT([super_block_s_shrink], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SUPER_BLOCK_S_SHRINK, 1, [have super_block s_shrink]) ],[ AC_MSG_RESULT(no) AC_MSG_CHECKING([whether super_block has s_shrink pointer]) ZFS_LINUX_TEST_RESULT([super_block_s_shrink_ptr], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SUPER_BLOCK_S_SHRINK_PTR, 1, [have super_block s_shrink pointer]) ],[ AC_MSG_RESULT(no) ZFS_LINUX_TEST_ERROR([sb->s_shrink()]) ]) ]) ]) dnl # dnl # 3.12 API change dnl # The nid member was added to struct shrink_control to support dnl # NUMA-aware shrinkers. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID], [ ZFS_LINUX_TEST_SRC([shrink_control_nid], [ #include ],[ struct shrink_control sc __attribute__ ((unused)); unsigned long scnidsize __attribute__ ((unused)) = sizeof(sc.nid); ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID], [ AC_MSG_CHECKING([whether shrink_control has nid]) ZFS_LINUX_TEST_RESULT([shrink_control_nid], [ AC_MSG_RESULT(yes) AC_DEFINE(SHRINK_CONTROL_HAS_NID, 1, [struct shrink_control has nid]) ],[ AC_MSG_RESULT(no) ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG], [ ZFS_LINUX_TEST_SRC([register_shrinker_vararg], [ #include unsigned long shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } ],[ struct shrinker cache_shrinker = { .count_objects = shrinker_cb, .scan_objects = shrinker_cb, .seeks = DEFAULT_SEEKS, }; register_shrinker(&cache_shrinker, "vararg-reg-shrink-test"); ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [ ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control], [ #include int shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } ],[ struct shrinker cache_shrinker = { .shrink = shrinker_cb, .seeks = DEFAULT_SEEKS, }; register_shrinker(&cache_shrinker); ]) ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control_split], [ #include unsigned long shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } ],[ struct shrinker cache_shrinker = { .count_objects = shrinker_cb, .scan_objects = shrinker_cb, .seeks = DEFAULT_SEEKS, }; register_shrinker(&cache_shrinker); ]) ]) +dnl # +dnl # 6.7 API change +dnl # register_shrinker has been replaced by shrinker_register. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_REGISTER], [ + ZFS_LINUX_TEST_SRC([shrinker_register], [ + #include + unsigned long shrinker_cb(struct shrinker *shrink, + struct shrink_control *sc) { return 0; } + ],[ + struct shrinker cache_shrinker = { + .count_objects = shrinker_cb, + .scan_objects = shrinker_cb, + .seeks = DEFAULT_SEEKS, + }; + shrinker_register(&cache_shrinker); + ]) +]) + AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ dnl # dnl # 6.0 API change dnl # register_shrinker() becomes a var-arg function that takes dnl # a printf-style format string as args > 0 dnl # AC_MSG_CHECKING([whether new var-arg register_shrinker() exists]) ZFS_LINUX_TEST_RESULT([register_shrinker_vararg], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_REGISTER_SHRINKER_VARARG, 1, [register_shrinker is vararg]) dnl # We assume that the split shrinker callback exists if the dnl # vararg register_shrinker() exists, because the latter is dnl # a much more recent addition, and the macro test for the dnl # var-arg version only works if the callback is split AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1, [cs->count_objects exists]) ],[ AC_MSG_RESULT(no) dnl # dnl # 3.0 - 3.11 API change dnl # cs->shrink(struct shrinker *, struct shrink_control *sc) dnl # AC_MSG_CHECKING([whether new 2-argument shrinker exists]) ZFS_LINUX_TEST_RESULT([shrinker_cb_shrink_control], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SINGLE_SHRINKER_CALLBACK, 1, [new shrinker callback wants 2 args]) ],[ AC_MSG_RESULT(no) dnl # dnl # 3.12 API change, dnl # cs->shrink() is logically split in to dnl # cs->count_objects() and cs->scan_objects() dnl # - AC_MSG_CHECKING([if cs->count_objects callback exists]) + AC_MSG_CHECKING( + [whether cs->count_objects callback exists]) ZFS_LINUX_TEST_RESULT( - [shrinker_cb_shrink_control_split],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1, - [cs->count_objects exists]) + [shrinker_cb_shrink_control_split],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1, + [cs->count_objects exists]) ],[ + AC_MSG_RESULT(no) + + AC_MSG_CHECKING( + [whether shrinker_register exists]) + ZFS_LINUX_TEST_RESULT([shrinker_register], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SHRINKER_REGISTER, 1, + [shrinker_register exists]) + + dnl # We assume that the split shrinker + dnl # callback exists if + dnl # shrinker_register() exists, + dnl # because the latter is a much more + dnl # recent addition, and the macro + dnl # test for shrinker_register() only + dnl # works if the callback is split + AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, + 1, [cs->count_objects exists]) + ],[ + AC_MSG_RESULT(no) ZFS_LINUX_TEST_ERROR([shrinker]) + ]) ]) ]) ]) ]) dnl # dnl # 2.6.39 API change, dnl # Shrinker adjust to use common shrink_control structure. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT], [ ZFS_LINUX_TEST_SRC([shrink_control_struct], [ #include ],[ struct shrink_control sc __attribute__ ((unused)); sc.nr_to_scan = 0; sc.gfp_mask = GFP_KERNEL; ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT], [ AC_MSG_CHECKING([whether struct shrink_control exists]) ZFS_LINUX_TEST_RESULT([shrink_control_struct], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SHRINK_CONTROL_STRUCT, 1, [struct shrink_control exists]) ],[ ZFS_LINUX_TEST_ERROR([shrink_control]) ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER], [ ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK_PTR ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG + ZFS_AC_KERNEL_SRC_SHRINKER_REGISTER ]) AC_DEFUN([ZFS_AC_KERNEL_SHRINKER], [ ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID ZFS_AC_KERNEL_SHRINKER_CALLBACK ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT ]) diff --git a/include/os/linux/spl/sys/shrinker.h b/include/os/linux/spl/sys/shrinker.h index d472754be4f4..bca4c850694a 100644 --- a/include/os/linux/spl/sys/shrinker.h +++ b/include/os/linux/spl/sys/shrinker.h @@ -1,113 +1,85 @@ /* * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. * Copyright (C) 2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Brian Behlendorf . * UCRL-CODE-235197 * * This file is part of the SPL, Solaris Porting Layer. * * The SPL is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * The SPL is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with the SPL. If not, see . */ #ifndef _SPL_SHRINKER_H #define _SPL_SHRINKER_H #include #include /* * Due to frequent changes in the shrinker API the following - * compatibility wrappers should be used. They are as follows: + * compatibility wrapper should be used. * - * SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost); + * shrinker = spl_register_shrinker(name, countfunc, scanfunc, seek_cost); + * spl_unregister_shrinker(shrinker); * - * SPL_SHRINKER_DECLARE is used to declare a shrinker with the name varname, - * which is passed to spl_register_shrinker()/spl_unregister_shrinker(). + * spl_register_shrinker is used to create and register a shrinker with the + * given name. * The countfunc returns the number of free-able objects. * The scanfunc returns the number of objects that were freed. * The callbacks can return SHRINK_STOP if further calls can't make any more * progress. Note that a return value of SHRINK_EMPTY is currently not * supported. * * Example: * * static unsigned long * my_count(struct shrinker *shrink, struct shrink_control *sc) * { * ...calculate number of objects in the cache... * * return (number of objects in the cache); * } * * static unsigned long * my_scan(struct shrinker *shrink, struct shrink_control *sc) * { * ...scan objects in the cache and reclaim them... * } * - * SPL_SHRINKER_DECLARE(my_shrinker, my_count, my_scan, DEFAULT_SEEKS); + * static struct shrinker *my_shrinker; * * void my_init_func(void) { - * spl_register_shrinker(&my_shrinker); + * my_shrinker = spl_register_shrinker("my-shrinker", + * my_count, my_scan, DEFAULT_SEEKS); + * } + * + * void my_fini_func(void) { + * spl_unregister_shrinker(my_shrinker); * } */ -#ifdef HAVE_REGISTER_SHRINKER_VARARG -#define spl_register_shrinker(x) register_shrinker(x, "zfs-arc-shrinker") -#else -#define spl_register_shrinker(x) register_shrinker(x) -#endif -#define spl_unregister_shrinker(x) unregister_shrinker(x) +typedef unsigned long (*spl_shrinker_cb) + (struct shrinker *, struct shrink_control *); -/* - * Linux 3.0 to 3.11 Shrinker API Compatibility. - */ -#if defined(HAVE_SINGLE_SHRINKER_CALLBACK) -#define SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost) \ -static int \ -__ ## varname ## _wrapper(struct shrinker *shrink, struct shrink_control *sc)\ -{ \ - if (sc->nr_to_scan != 0) { \ - (void) scanfunc(shrink, sc); \ - } \ - return (countfunc(shrink, sc)); \ -} \ - \ -static struct shrinker varname = { \ - .shrink = __ ## varname ## _wrapper, \ - .seeks = seek_cost, \ -} +struct shrinker *spl_register_shrinker(const char *name, + spl_shrinker_cb countfunc, spl_shrinker_cb scanfunc, int seek_cost); +void spl_unregister_shrinker(struct shrinker *); +#ifndef SHRINK_STOP +/* 3.0-3.11 compatibility */ #define SHRINK_STOP (-1) - -/* - * Linux 3.12 and later Shrinker API Compatibility. - */ -#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) -#define SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost) \ -static struct shrinker varname = { \ - .count_objects = countfunc, \ - .scan_objects = scanfunc, \ - .seeks = seek_cost, \ -} - -#else -/* - * Linux 2.x to 2.6.22, or a newer shrinker API has been introduced. - */ -#error "Unknown shrinker callback" #endif #endif /* SPL_SHRINKER_H */ diff --git a/module/os/linux/spl/Makefile.in b/module/os/linux/spl/Makefile.in index b2325f91b4a7..ad2dc6e3eccc 100644 --- a/module/os/linux/spl/Makefile.in +++ b/module/os/linux/spl/Makefile.in @@ -1,17 +1,18 @@ $(MODULE)-objs += ../os/linux/spl/spl-atomic.o $(MODULE)-objs += ../os/linux/spl/spl-condvar.o $(MODULE)-objs += ../os/linux/spl/spl-cred.o $(MODULE)-objs += ../os/linux/spl/spl-err.o $(MODULE)-objs += ../os/linux/spl/spl-generic.o $(MODULE)-objs += ../os/linux/spl/spl-kmem.o $(MODULE)-objs += ../os/linux/spl/spl-kmem-cache.o $(MODULE)-objs += ../os/linux/spl/spl-kstat.o $(MODULE)-objs += ../os/linux/spl/spl-proc.o $(MODULE)-objs += ../os/linux/spl/spl-procfs-list.o +$(MODULE)-objs += ../os/linux/spl/spl-shrinker.o $(MODULE)-objs += ../os/linux/spl/spl-taskq.o $(MODULE)-objs += ../os/linux/spl/spl-thread.o $(MODULE)-objs += ../os/linux/spl/spl-trace.o $(MODULE)-objs += ../os/linux/spl/spl-tsd.o $(MODULE)-objs += ../os/linux/spl/spl-vmem.o $(MODULE)-objs += ../os/linux/spl/spl-xdr.o $(MODULE)-objs += ../os/linux/spl/spl-zlib.o diff --git a/module/os/linux/spl/spl-shrinker.c b/module/os/linux/spl/spl-shrinker.c new file mode 100644 index 000000000000..d5c8da471cbb --- /dev/null +++ b/module/os/linux/spl/spl-shrinker.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. + * Copyright (C) 2007 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Brian Behlendorf . + * UCRL-CODE-235197 + * + * This file is part of the SPL, Solaris Porting Layer. + * + * The SPL is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * The SPL is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with the SPL. If not, see . + * + * Solaris Porting Layer (SPL) Shrinker Implementation. + */ + +#include +#include + +#ifdef HAVE_SINGLE_SHRINKER_CALLBACK +/* 3.0-3.11: single shrink() callback, which we wrap to carry both functions */ +struct spl_shrinker_wrap { + struct shrinker shrinker; + spl_shrinker_cb countfunc; + spl_shrinker_cb scanfunc; +}; + +static int +spl_shrinker_single_cb(struct shrinker *shrinker, struct shrink_control *sc) +{ + struct spl_shrinker_wrap *sw = (struct spl_shrinker_wrap *)shrinker; + + if (sc->nr_to_scan != 0) + (void) sw->scanfunc(&sw->shrinker, sc); + return (sw->countfunc(&sw->shrinker, sc)); +} +#endif + +struct shrinker * +spl_register_shrinker(const char *name, spl_shrinker_cb countfunc, + spl_shrinker_cb scanfunc, int seek_cost) +{ + struct shrinker *shrinker; + + /* allocate shrinker */ +#if defined(HAVE_SHRINKER_REGISTER) + /* 6.7: kernel will allocate the shrinker for us */ + shrinker = shrinker_alloc(0, name); +#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) + /* 3.12-6.6: we allocate the shrinker */ + shrinker = kmem_zalloc(sizeof (struct shrinker), KM_SLEEP); +#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK) + /* 3.0-3.11: allocate a wrapper */ + struct spl_shrinker_wrap *sw = + kmem_zalloc(sizeof (struct spl_shrinker_wrap), KM_SLEEP); + shrinker = &sw->shrinker; +#else + /* 2.x-2.6.22, or a newer shrinker API has been introduced. */ +#error "Unknown shrinker API" +#endif + + if (shrinker == NULL) + return (NULL); + + /* set callbacks */ +#ifdef HAVE_SINGLE_SHRINKER_CALLBACK + sw->countfunc = countfunc; + sw->scanfunc = scanfunc; + shrinker->shrink = spl_shrinker_single_cb; +#else + shrinker->count_objects = countfunc; + shrinker->scan_objects = scanfunc; +#endif + + /* set params */ + shrinker->seeks = seek_cost; + + /* register with kernel */ +#if defined(HAVE_SHRINKER_REGISTER) + shrinker_register(shrinker); +#elif defined(HAVE_REGISTER_SHRINKER_VARARG) + register_shrinker(shrinker, name); +#else + register_shrinker(shrinker); +#endif + + return (shrinker); +} +EXPORT_SYMBOL(spl_register_shrinker); + +void +spl_unregister_shrinker(struct shrinker *shrinker) +{ +#if defined(HAVE_SHRINKER_REGISTER) + shrinker_free(shrinker); +#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) + unregister_shrinker(shrinker); + kmem_free(shrinker, sizeof (struct shrinker)); +#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK) + unregister_shrinker(shrinker); + kmem_free(shrinker, sizeof (struct spl_shrinker_wrap)); +#else +#error "Unknown shrinker API" +#endif +} +EXPORT_SYMBOL(spl_unregister_shrinker); diff --git a/module/os/linux/zfs/arc_os.c b/module/os/linux/zfs/arc_os.c index fc76fe0e0b5c..19540221d688 100644 --- a/module/os/linux/zfs/arc_os.c +++ b/module/os/linux/zfs/arc_os.c @@ -1,545 +1,548 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2018, Joyent, Inc. * Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _KERNEL #include #include #include #include #include #include #endif #include #include #include #include #include #include #include /* * This is a limit on how many pages the ARC shrinker makes available for * eviction in response to one page allocation attempt. Note that in * practice, the kernel's shrinker can ask us to evict up to about 4x this * for one allocation attempt. * * The default limit of 10,000 (in practice, 160MB per allocation attempt * with 4K pages) limits the amount of time spent attempting to reclaim ARC * memory to less than 100ms per allocation attempt, even with a small * average compressed block size of ~8KB. * * See also the comment in arc_shrinker_count(). * Set to 0 to disable limit. */ int zfs_arc_shrinker_limit = 10000; #ifdef CONFIG_MEMORY_HOTPLUG static struct notifier_block arc_hotplug_callback_mem_nb; #endif /* * Return a default max arc size based on the amount of physical memory. */ uint64_t arc_default_max(uint64_t min, uint64_t allmem) { /* Default to 1/2 of all memory. */ return (MAX(allmem / 2, min)); } #ifdef _KERNEL /* * Return maximum amount of memory that we could possibly use. Reduced * to half of all memory in user space which is primarily used for testing. */ uint64_t arc_all_memory(void) { #ifdef CONFIG_HIGHMEM return (ptob(zfs_totalram_pages - zfs_totalhigh_pages)); #else return (ptob(zfs_totalram_pages)); #endif /* CONFIG_HIGHMEM */ } /* * Return the amount of memory that is considered free. In user space * which is primarily used for testing we pretend that free memory ranges * from 0-20% of all memory. */ uint64_t arc_free_memory(void) { #ifdef CONFIG_HIGHMEM struct sysinfo si; si_meminfo(&si); return (ptob(si.freeram - si.freehigh)); #else return (ptob(nr_free_pages() + nr_inactive_file_pages())); #endif /* CONFIG_HIGHMEM */ } /* * Return the amount of memory that can be consumed before reclaim will be * needed. Positive if there is sufficient free memory, negative indicates * the amount of memory that needs to be freed up. */ int64_t arc_available_memory(void) { return (arc_free_memory() - arc_sys_free); } static uint64_t arc_evictable_memory(void) { int64_t asize = aggsum_value(&arc_sums.arcstat_size); uint64_t arc_clean = zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_DATA]) + zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) + zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_DATA]) + zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]); uint64_t arc_dirty = MAX((int64_t)asize - (int64_t)arc_clean, 0); /* * Scale reported evictable memory in proportion to page cache, cap * at specified min/max. */ uint64_t min = (ptob(nr_file_pages()) / 100) * zfs_arc_pc_percent; min = MAX(arc_c_min, MIN(arc_c_max, min)); if (arc_dirty >= min) return (arc_clean); return (MAX((int64_t)asize - (int64_t)min, 0)); } /* * The _count() function returns the number of free-able objects. * The _scan() function returns the number of objects that were freed. */ static unsigned long arc_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { /* * __GFP_FS won't be set if we are called from ZFS code (see * kmem_flags_convert(), which removes it). To avoid a deadlock, we * don't allow evicting in this case. We return 0 rather than * SHRINK_STOP so that the shrinker logic doesn't accumulate a * deficit against us. */ if (!(sc->gfp_mask & __GFP_FS)) { return (0); } /* * This code is reached in the "direct reclaim" case, where the * kernel (outside ZFS) is trying to allocate a page, and the system * is low on memory. * * The kernel's shrinker code doesn't understand how many pages the * ARC's callback actually frees, so it may ask the ARC to shrink a * lot for one page allocation. This is problematic because it may * take a long time, thus delaying the page allocation, and because * it may force the ARC to unnecessarily shrink very small. * * Therefore, we limit the amount of data that we say is evictable, * which limits the amount that the shrinker will ask us to evict for * one page allocation attempt. * * In practice, we may be asked to shrink 4x the limit to satisfy one * page allocation, before the kernel's shrinker code gives up on us. * When that happens, we rely on the kernel code to find the pages * that we freed before invoking the OOM killer. This happens in * __alloc_pages_slowpath(), which retries and finds the pages we * freed when it calls get_page_from_freelist(). * * See also the comment above zfs_arc_shrinker_limit. */ int64_t limit = zfs_arc_shrinker_limit != 0 ? zfs_arc_shrinker_limit : INT64_MAX; return (MIN(limit, btop((int64_t)arc_evictable_memory()))); } static unsigned long arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) { ASSERT((sc->gfp_mask & __GFP_FS) != 0); /* The arc is considered warm once reclaim has occurred */ if (unlikely(arc_warm == B_FALSE)) arc_warm = B_TRUE; /* * Evict the requested number of pages by reducing arc_c and waiting * for the requested amount of data to be evicted. */ arc_reduce_target_size(ptob(sc->nr_to_scan)); arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE); if (current->reclaim_state != NULL) #ifdef HAVE_RECLAIM_STATE_RECLAIMED current->reclaim_state->reclaimed += sc->nr_to_scan; #else current->reclaim_state->reclaimed_slab += sc->nr_to_scan; #endif /* * We are experiencing memory pressure which the arc_evict_zthr was * unable to keep up with. Set arc_no_grow to briefly pause arc * growth to avoid compounding the memory pressure. */ arc_no_grow = B_TRUE; /* * When direct reclaim is observed it usually indicates a rapid * increase in memory pressure. This occurs because the kswapd * threads were unable to asynchronously keep enough free memory * available. */ if (current_is_kswapd()) { ARCSTAT_BUMP(arcstat_memory_indirect_count); } else { ARCSTAT_BUMP(arcstat_memory_direct_count); } return (sc->nr_to_scan); } -SPL_SHRINKER_DECLARE(arc_shrinker, - arc_shrinker_count, arc_shrinker_scan, DEFAULT_SEEKS); +static struct shrinker *arc_shrinker = NULL; int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg) { uint64_t free_memory = arc_free_memory(); if (free_memory > arc_all_memory() * arc_lotsfree_percent / 100) return (0); if (txg > spa->spa_lowmem_last_txg) { spa->spa_lowmem_last_txg = txg; spa->spa_lowmem_page_load = 0; } /* * If we are in pageout, we know that memory is already tight, * the arc is already going to be evicting, so we just want to * continue to let page writes occur as quickly as possible. */ if (current_is_kswapd()) { if (spa->spa_lowmem_page_load > MAX(arc_sys_free / 4, free_memory) / 4) { DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim); return (SET_ERROR(ERESTART)); } /* Note: reserve is inflated, so we deflate */ atomic_add_64(&spa->spa_lowmem_page_load, reserve / 8); return (0); } else if (spa->spa_lowmem_page_load > 0 && arc_reclaim_needed()) { /* memory is low, delay before restarting */ ARCSTAT_INCR(arcstat_memory_throttle_count, 1); DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim); return (SET_ERROR(EAGAIN)); } spa->spa_lowmem_page_load = 0; return (0); } static void arc_set_sys_free(uint64_t allmem) { /* * The ARC tries to keep at least this much memory available for the * system. This gives the ARC time to shrink in response to memory * pressure, before running completely out of memory and invoking the * direct-reclaim ARC shrinker. * * This should be more than twice high_wmark_pages(), so that * arc_wait_for_eviction() will wait until at least the * high_wmark_pages() are free (see arc_evict_state_impl()). * * Note: Even when the system is very low on memory, the kernel's * shrinker code may only ask for one "batch" of pages (512KB) to be * evicted. If concurrent allocations consume these pages, there may * still be insufficient free pages, and the OOM killer takes action. * * By setting arc_sys_free large enough, and having * arc_wait_for_eviction() wait until there is at least arc_sys_free/2 * free memory, it is much less likely that concurrent allocations can * consume all the memory that was evicted before checking for * OOM. * * It's hard to iterate the zones from a linux kernel module, which * makes it difficult to determine the watermark dynamically. Instead * we compute the maximum high watermark for this system, based * on the amount of memory, assuming default parameters on Linux kernel * 5.3. */ /* * Base wmark_low is 4 * the square root of Kbytes of RAM. */ long wmark = 4 * int_sqrt(allmem/1024) * 1024; /* * Clamp to between 128K and 64MB. */ wmark = MAX(wmark, 128 * 1024); wmark = MIN(wmark, 64 * 1024 * 1024); /* * watermark_boost can increase the wmark by up to 150%. */ wmark += wmark * 150 / 100; /* * arc_sys_free needs to be more than 2x the watermark, because * arc_wait_for_eviction() waits for half of arc_sys_free. Bump this up * to 3x to ensure we're above it. */ arc_sys_free = wmark * 3 + allmem / 32; } void arc_lowmem_init(void) { uint64_t allmem = arc_all_memory(); /* * Register a shrinker to support synchronous (direct) memory * reclaim from the arc. This is done to prevent kswapd from * swapping out pages when it is preferable to shrink the arc. */ - spl_register_shrinker(&arc_shrinker); + arc_shrinker = spl_register_shrinker("zfs-arc-shrinker", + arc_shrinker_count, arc_shrinker_scan, DEFAULT_SEEKS); + VERIFY(arc_shrinker); + arc_set_sys_free(allmem); } void arc_lowmem_fini(void) { - spl_unregister_shrinker(&arc_shrinker); + spl_unregister_shrinker(arc_shrinker); + arc_shrinker = NULL; } int param_set_arc_long(const char *buf, zfs_kernel_param_t *kp) { int error; error = param_set_long(buf, kp); if (error < 0) return (SET_ERROR(error)); arc_tuning_update(B_TRUE); return (0); } int param_set_arc_min(const char *buf, zfs_kernel_param_t *kp) { return (param_set_arc_long(buf, kp)); } int param_set_arc_max(const char *buf, zfs_kernel_param_t *kp) { return (param_set_arc_long(buf, kp)); } int param_set_arc_int(const char *buf, zfs_kernel_param_t *kp) { int error; error = param_set_int(buf, kp); if (error < 0) return (SET_ERROR(error)); arc_tuning_update(B_TRUE); return (0); } #ifdef CONFIG_MEMORY_HOTPLUG /* ARGSUSED */ static int arc_hotplug_callback(struct notifier_block *self, unsigned long action, void *arg) { uint64_t allmem = arc_all_memory(); if (action != MEM_ONLINE) return (NOTIFY_OK); arc_set_limits(allmem); #ifdef __LP64__ if (zfs_dirty_data_max_max == 0) zfs_dirty_data_max_max = MIN(4ULL * 1024 * 1024 * 1024, allmem * zfs_dirty_data_max_max_percent / 100); #else if (zfs_dirty_data_max_max == 0) zfs_dirty_data_max_max = MIN(1ULL * 1024 * 1024 * 1024, allmem * zfs_dirty_data_max_max_percent / 100); #endif arc_set_sys_free(allmem); return (NOTIFY_OK); } #endif void arc_register_hotplug(void) { #ifdef CONFIG_MEMORY_HOTPLUG arc_hotplug_callback_mem_nb.notifier_call = arc_hotplug_callback; /* There is no significance to the value 100 */ arc_hotplug_callback_mem_nb.priority = 100; register_memory_notifier(&arc_hotplug_callback_mem_nb); #endif } void arc_unregister_hotplug(void) { #ifdef CONFIG_MEMORY_HOTPLUG unregister_memory_notifier(&arc_hotplug_callback_mem_nb); #endif } #else /* _KERNEL */ int64_t arc_available_memory(void) { int64_t lowest = INT64_MAX; /* Every 100 calls, free a small amount */ if (random_in_range(100) == 0) lowest = -1024; return (lowest); } int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg) { return (0); } uint64_t arc_all_memory(void) { return (ptob(physmem) / 2); } uint64_t arc_free_memory(void) { return (random_in_range(arc_all_memory() * 20 / 100)); } void arc_register_hotplug(void) { } void arc_unregister_hotplug(void) { } #endif /* _KERNEL */ /* * Helper function for arc_prune_async() it is responsible for safely * handling the execution of a registered arc_prune_func_t. */ static void arc_prune_task(void *ptr) { arc_prune_t *ap = (arc_prune_t *)ptr; arc_prune_func_t *func = ap->p_pfunc; if (func != NULL) func(ap->p_adjust, ap->p_private); zfs_refcount_remove(&ap->p_refcnt, func); } /* * Notify registered consumers they must drop holds on a portion of the ARC * buffered they reference. This provides a mechanism to ensure the ARC can * honor the arc_meta_limit and reclaim otherwise pinned ARC buffers. This * is analogous to dnlc_reduce_cache() but more generic. * * This operation is performed asynchronously so it may be safely called * in the context of the arc_reclaim_thread(). A reference is taken here * for each registered arc_prune_t and the arc_prune_task() is responsible * for releasing it once the registered arc_prune_func_t has completed. */ void arc_prune_async(int64_t adjust) { arc_prune_t *ap; mutex_enter(&arc_prune_mtx); for (ap = list_head(&arc_prune_list); ap != NULL; ap = list_next(&arc_prune_list, ap)) { if (zfs_refcount_count(&ap->p_refcnt) >= 2) continue; zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc); ap->p_adjust = adjust; if (taskq_dispatch(arc_prune_taskq, arc_prune_task, ap, TQ_SLEEP) == TASKQID_INVALID) { zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc); continue; } ARCSTAT_BUMP(arcstat_prune); } mutex_exit(&arc_prune_mtx); } /* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW, "Limit on number of pages that ARC shrinker can reclaim at once"); /* END CSTYLED */