diff --git a/config/kernel-shrink.m4 b/config/kernel-shrink.m4 index 1c5f753d411c..4a529c43b5b0 100644 --- a/config/kernel-shrink.m4 +++ b/config/kernel-shrink.m4 @@ -1,221 +1,263 @@ dnl # dnl # 3.1 API change dnl # The super_block structure now stores a per-filesystem shrinker. dnl # This interface is preferable because it can be used to specifically dnl # target only the zfs filesystem for pruning. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK], [ ZFS_LINUX_TEST_SRC([super_block_s_shrink], [ #include int shrink(struct shrinker *s, struct shrink_control *sc) { return 0; } static const struct super_block sb __attribute__ ((unused)) = { .s_shrink.seeks = DEFAULT_SEEKS, .s_shrink.batch = 0, }; ],[]) ]) dnl # dnl # 6.7 API change dnl # s_shrink is now a pointer. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK_PTR], [ ZFS_LINUX_TEST_SRC([super_block_s_shrink_ptr], [ #include unsigned long shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } static struct shrinker shrinker = { .count_objects = shrinker_cb, .scan_objects = shrinker_cb, .seeks = DEFAULT_SEEKS, }; static const struct super_block sb __attribute__ ((unused)) = { .s_shrink = &shrinker, }; ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK], [ AC_MSG_CHECKING([whether super_block has s_shrink]) ZFS_LINUX_TEST_RESULT([super_block_s_shrink], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SUPER_BLOCK_S_SHRINK, 1, [have super_block s_shrink]) ],[ AC_MSG_RESULT(no) AC_MSG_CHECKING([whether super_block has s_shrink pointer]) ZFS_LINUX_TEST_RESULT([super_block_s_shrink_ptr], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SUPER_BLOCK_S_SHRINK_PTR, 1, [have super_block s_shrink pointer]) ],[ AC_MSG_RESULT(no) ZFS_LINUX_TEST_ERROR([sb->s_shrink()]) ]) ]) ]) dnl # dnl # 3.12 API change dnl # The nid member was added to struct shrink_control to support dnl # NUMA-aware shrinkers. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID], [ ZFS_LINUX_TEST_SRC([shrink_control_nid], [ #include ],[ struct shrink_control sc __attribute__ ((unused)); unsigned long scnidsize __attribute__ ((unused)) = sizeof(sc.nid); ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID], [ AC_MSG_CHECKING([whether shrink_control has nid]) ZFS_LINUX_TEST_RESULT([shrink_control_nid], [ AC_MSG_RESULT(yes) AC_DEFINE(SHRINK_CONTROL_HAS_NID, 1, [struct shrink_control has nid]) ],[ AC_MSG_RESULT(no) ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG], [ ZFS_LINUX_TEST_SRC([register_shrinker_vararg], [ #include unsigned long shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } ],[ struct shrinker cache_shrinker = { .count_objects = shrinker_cb, .scan_objects = shrinker_cb, .seeks = DEFAULT_SEEKS, }; register_shrinker(&cache_shrinker, "vararg-reg-shrink-test"); ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [ ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control], [ #include int shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } ],[ struct shrinker cache_shrinker = { .shrink = shrinker_cb, .seeks = DEFAULT_SEEKS, }; register_shrinker(&cache_shrinker); ]) ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control_split], [ #include unsigned long shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } ],[ struct shrinker cache_shrinker = { .count_objects = shrinker_cb, .scan_objects = shrinker_cb, .seeks = DEFAULT_SEEKS, }; register_shrinker(&cache_shrinker); ]) ]) +dnl # +dnl # 6.7 API change +dnl # register_shrinker has been replaced by shrinker_register. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_REGISTER], [ + ZFS_LINUX_TEST_SRC([shrinker_register], [ + #include + unsigned long shrinker_cb(struct shrinker *shrink, + struct shrink_control *sc) { return 0; } + ],[ + struct shrinker cache_shrinker = { + .count_objects = shrinker_cb, + .scan_objects = shrinker_cb, + .seeks = DEFAULT_SEEKS, + }; + shrinker_register(&cache_shrinker); + ]) +]) + AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ dnl # dnl # 6.0 API change dnl # register_shrinker() becomes a var-arg function that takes dnl # a printf-style format string as args > 0 dnl # AC_MSG_CHECKING([whether new var-arg register_shrinker() exists]) ZFS_LINUX_TEST_RESULT([register_shrinker_vararg], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_REGISTER_SHRINKER_VARARG, 1, [register_shrinker is vararg]) dnl # We assume that the split shrinker callback exists if the dnl # vararg register_shrinker() exists, because the latter is dnl # a much more recent addition, and the macro test for the dnl # var-arg version only works if the callback is split AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1, [cs->count_objects exists]) ],[ AC_MSG_RESULT(no) dnl # dnl # 3.0 - 3.11 API change dnl # cs->shrink(struct shrinker *, struct shrink_control *sc) dnl # AC_MSG_CHECKING([whether new 2-argument shrinker exists]) ZFS_LINUX_TEST_RESULT([shrinker_cb_shrink_control], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SINGLE_SHRINKER_CALLBACK, 1, [new shrinker callback wants 2 args]) ],[ AC_MSG_RESULT(no) dnl # dnl # 3.12 API change, dnl # cs->shrink() is logically split in to dnl # cs->count_objects() and cs->scan_objects() dnl # - AC_MSG_CHECKING([if cs->count_objects callback exists]) + AC_MSG_CHECKING( + [whether cs->count_objects callback exists]) ZFS_LINUX_TEST_RESULT( - [shrinker_cb_shrink_control_split],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1, - [cs->count_objects exists]) + [shrinker_cb_shrink_control_split],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1, + [cs->count_objects exists]) ],[ + AC_MSG_RESULT(no) + + AC_MSG_CHECKING( + [whether shrinker_register exists]) + ZFS_LINUX_TEST_RESULT([shrinker_register], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SHRINKER_REGISTER, 1, + [shrinker_register exists]) + + dnl # We assume that the split shrinker + dnl # callback exists if + dnl # shrinker_register() exists, + dnl # because the latter is a much more + dnl # recent addition, and the macro + dnl # test for shrinker_register() only + dnl # works if the callback is split + AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, + 1, [cs->count_objects exists]) + ],[ + AC_MSG_RESULT(no) ZFS_LINUX_TEST_ERROR([shrinker]) + ]) ]) ]) ]) ]) dnl # dnl # 2.6.39 API change, dnl # Shrinker adjust to use common shrink_control structure. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT], [ ZFS_LINUX_TEST_SRC([shrink_control_struct], [ #include ],[ struct shrink_control sc __attribute__ ((unused)); sc.nr_to_scan = 0; sc.gfp_mask = GFP_KERNEL; ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT], [ AC_MSG_CHECKING([whether struct shrink_control exists]) ZFS_LINUX_TEST_RESULT([shrink_control_struct], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SHRINK_CONTROL_STRUCT, 1, [struct shrink_control exists]) ],[ ZFS_LINUX_TEST_ERROR([shrink_control]) ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER], [ ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK_PTR ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG + ZFS_AC_KERNEL_SRC_SHRINKER_REGISTER ]) AC_DEFUN([ZFS_AC_KERNEL_SHRINKER], [ ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID ZFS_AC_KERNEL_SHRINKER_CALLBACK ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT ]) diff --git a/include/os/linux/spl/sys/shrinker.h b/include/os/linux/spl/sys/shrinker.h index d472754be4f4..bca4c850694a 100644 --- a/include/os/linux/spl/sys/shrinker.h +++ b/include/os/linux/spl/sys/shrinker.h @@ -1,113 +1,85 @@ /* * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. * Copyright (C) 2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Brian Behlendorf . * UCRL-CODE-235197 * * This file is part of the SPL, Solaris Porting Layer. * * The SPL is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * The SPL is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with the SPL. If not, see . */ #ifndef _SPL_SHRINKER_H #define _SPL_SHRINKER_H #include #include /* * Due to frequent changes in the shrinker API the following - * compatibility wrappers should be used. They are as follows: + * compatibility wrapper should be used. * - * SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost); + * shrinker = spl_register_shrinker(name, countfunc, scanfunc, seek_cost); + * spl_unregister_shrinker(shrinker); * - * SPL_SHRINKER_DECLARE is used to declare a shrinker with the name varname, - * which is passed to spl_register_shrinker()/spl_unregister_shrinker(). + * spl_register_shrinker is used to create and register a shrinker with the + * given name. * The countfunc returns the number of free-able objects. * The scanfunc returns the number of objects that were freed. * The callbacks can return SHRINK_STOP if further calls can't make any more * progress. Note that a return value of SHRINK_EMPTY is currently not * supported. * * Example: * * static unsigned long * my_count(struct shrinker *shrink, struct shrink_control *sc) * { * ...calculate number of objects in the cache... * * return (number of objects in the cache); * } * * static unsigned long * my_scan(struct shrinker *shrink, struct shrink_control *sc) * { * ...scan objects in the cache and reclaim them... * } * - * SPL_SHRINKER_DECLARE(my_shrinker, my_count, my_scan, DEFAULT_SEEKS); + * static struct shrinker *my_shrinker; * * void my_init_func(void) { - * spl_register_shrinker(&my_shrinker); + * my_shrinker = spl_register_shrinker("my-shrinker", + * my_count, my_scan, DEFAULT_SEEKS); + * } + * + * void my_fini_func(void) { + * spl_unregister_shrinker(my_shrinker); * } */ -#ifdef HAVE_REGISTER_SHRINKER_VARARG -#define spl_register_shrinker(x) register_shrinker(x, "zfs-arc-shrinker") -#else -#define spl_register_shrinker(x) register_shrinker(x) -#endif -#define spl_unregister_shrinker(x) unregister_shrinker(x) +typedef unsigned long (*spl_shrinker_cb) + (struct shrinker *, struct shrink_control *); -/* - * Linux 3.0 to 3.11 Shrinker API Compatibility. - */ -#if defined(HAVE_SINGLE_SHRINKER_CALLBACK) -#define SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost) \ -static int \ -__ ## varname ## _wrapper(struct shrinker *shrink, struct shrink_control *sc)\ -{ \ - if (sc->nr_to_scan != 0) { \ - (void) scanfunc(shrink, sc); \ - } \ - return (countfunc(shrink, sc)); \ -} \ - \ -static struct shrinker varname = { \ - .shrink = __ ## varname ## _wrapper, \ - .seeks = seek_cost, \ -} +struct shrinker *spl_register_shrinker(const char *name, + spl_shrinker_cb countfunc, spl_shrinker_cb scanfunc, int seek_cost); +void spl_unregister_shrinker(struct shrinker *); +#ifndef SHRINK_STOP +/* 3.0-3.11 compatibility */ #define SHRINK_STOP (-1) - -/* - * Linux 3.12 and later Shrinker API Compatibility. - */ -#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) -#define SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost) \ -static struct shrinker varname = { \ - .count_objects = countfunc, \ - .scan_objects = scanfunc, \ - .seeks = seek_cost, \ -} - -#else -/* - * Linux 2.x to 2.6.22, or a newer shrinker API has been introduced. - */ -#error "Unknown shrinker callback" #endif #endif /* SPL_SHRINKER_H */ diff --git a/module/Kbuild.in b/module/Kbuild.in index e34b9fab9efc..fb22bfe733c0 100644 --- a/module/Kbuild.in +++ b/module/Kbuild.in @@ -1,503 +1,504 @@ # When integrated in to a monolithic kernel the spl module must appear # first. This ensures its module initialization function is run before # any of the other module initialization functions which depend on it. ZFS_MODULE_CFLAGS += -std=gnu99 -Wno-declaration-after-statement ZFS_MODULE_CFLAGS += -Wmissing-prototypes ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @NO_FORMAT_ZERO_LENGTH@ ifneq ($(KBUILD_EXTMOD),) zfs_include = @abs_top_srcdir@/include icp_include = @abs_srcdir@/icp/include zstd_include = @abs_srcdir@/zstd/include ZFS_MODULE_CFLAGS += -include @abs_top_builddir@/zfs_config.h ZFS_MODULE_CFLAGS += -I@abs_top_builddir@/include src = @abs_srcdir@ obj = @abs_builddir@ else zfs_include = $(srctree)/include/zfs icp_include = $(srctree)/$(src)/icp/include zstd_include = $(srctree)/$(src)/zstd/include ZFS_MODULE_CFLAGS += -include $(zfs_include)/zfs_config.h endif ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/kernel ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/spl ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/zfs ZFS_MODULE_CFLAGS += -I$(zfs_include) ZFS_MODULE_CPPFLAGS += -D_KERNEL ZFS_MODULE_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@ # KASAN enables -Werror=frame-larger-than=1024, which # breaks oh so many parts of our build. ifeq ($(CONFIG_KASAN),y) ZFS_MODULE_CFLAGS += -Wno-error=frame-larger-than= endif # Generated binary search code is particularly bad with this optimization. # Oddly, range_tree.c is not affected when unrolling is not done and dsl_scan.c # is not affected when unrolling is done. # Disable it until the following upstream issue is resolved: # https://github.com/llvm/llvm-project/issues/62790 ifeq ($(CONFIG_X86),y) ifeq ($(CONFIG_CC_IS_CLANG),y) CFLAGS_zfs/dsl_scan.o += -mllvm -x86-cmov-converter=false CFLAGS_zfs/metaslab.o += -mllvm -x86-cmov-converter=false CFLAGS_zfs/range_tree.o += -mllvm -x86-cmov-converter=false CFLAGS_zfs/zap_micro.o += -mllvm -x86-cmov-converter=false endif endif ifneq ($(KBUILD_EXTMOD),) @CONFIG_QAT_TRUE@ZFS_MODULE_CFLAGS += -I@QAT_SRC@/include @CONFIG_QAT_TRUE@KBUILD_EXTRA_SYMBOLS += @QAT_SYMBOLS@ endif asflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS) ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS) ifeq ($(CONFIG_ARM64),y) CFLAGS_REMOVE_zcommon/zfs_fletcher_aarch64_neon.o += -mgeneral-regs-only CFLAGS_REMOVE_zfs/vdev_raidz_math_aarch64_neon.o += -mgeneral-regs-only CFLAGS_REMOVE_zfs/vdev_raidz_math_aarch64_neonx2.o += -mgeneral-regs-only endif # Suppress unused-value warnings in sparc64 architecture headers ccflags-$(CONFIG_SPARC64) += -Wno-unused-value obj-$(CONFIG_ZFS) := spl.o zfs.o SPL_OBJS := \ spl-atomic.o \ spl-condvar.o \ spl-cred.o \ spl-err.o \ spl-generic.o \ spl-kmem-cache.o \ spl-kmem.o \ spl-kstat.o \ spl-proc.o \ spl-procfs-list.o \ + spl-shrinker.o \ spl-taskq.o \ spl-thread.o \ spl-trace.o \ spl-tsd.o \ spl-vmem.o \ spl-xdr.o \ spl-zlib.o \ spl-zone.o spl-objs += $(addprefix os/linux/spl/,$(SPL_OBJS)) zfs-objs += avl/avl.o ICP_OBJS := \ algs/aes/aes_impl.o \ algs/aes/aes_impl_generic.o \ algs/aes/aes_modes.o \ algs/blake3/blake3.o \ algs/blake3/blake3_generic.o \ algs/blake3/blake3_impl.o \ algs/edonr/edonr.o \ algs/modes/cbc.o \ algs/modes/ccm.o \ algs/modes/ctr.o \ algs/modes/ecb.o \ algs/modes/gcm.o \ algs/modes/gcm_generic.o \ algs/modes/modes.o \ algs/sha2/sha2_generic.o \ algs/sha2/sha256_impl.o \ algs/sha2/sha512_impl.o \ algs/skein/skein.o \ algs/skein/skein_block.o \ algs/skein/skein_iv.o \ api/kcf_cipher.o \ api/kcf_ctxops.o \ api/kcf_mac.o \ core/kcf_callprov.o \ core/kcf_mech_tabs.o \ core/kcf_prov_lib.o \ core/kcf_prov_tabs.o \ core/kcf_sched.o \ illumos-crypto.o \ io/aes.o \ io/sha2_mod.o \ io/skein_mod.o \ spi/kcf_spi.o ICP_OBJS_X86_64 := \ asm-x86_64/aes/aes_aesni.o \ asm-x86_64/aes/aes_amd64.o \ asm-x86_64/aes/aeskey.o \ asm-x86_64/blake3/blake3_avx2.o \ asm-x86_64/blake3/blake3_avx512.o \ asm-x86_64/blake3/blake3_sse2.o \ asm-x86_64/blake3/blake3_sse41.o \ asm-x86_64/sha2/sha256-x86_64.o \ asm-x86_64/sha2/sha512-x86_64.o \ asm-x86_64/modes/aesni-gcm-x86_64.o \ asm-x86_64/modes/gcm_pclmulqdq.o \ asm-x86_64/modes/ghash-x86_64.o ICP_OBJS_X86 := \ algs/aes/aes_impl_aesni.o \ algs/aes/aes_impl_x86-64.o \ algs/modes/gcm_pclmulqdq.o ICP_OBJS_ARM := \ asm-arm/sha2/sha256-armv7.o \ asm-arm/sha2/sha512-armv7.o ICP_OBJS_ARM64 := \ asm-aarch64/blake3/b3_aarch64_sse2.o \ asm-aarch64/blake3/b3_aarch64_sse41.o \ asm-aarch64/sha2/sha256-armv8.o \ asm-aarch64/sha2/sha512-armv8.o ICP_OBJS_PPC_PPC64 := \ asm-ppc64/blake3/b3_ppc64le_sse2.o \ asm-ppc64/blake3/b3_ppc64le_sse41.o \ asm-ppc64/sha2/sha256-p8.o \ asm-ppc64/sha2/sha512-p8.o \ asm-ppc64/sha2/sha256-ppc.o \ asm-ppc64/sha2/sha512-ppc.o zfs-objs += $(addprefix icp/,$(ICP_OBJS)) zfs-$(CONFIG_X86) += $(addprefix icp/,$(ICP_OBJS_X86)) zfs-$(CONFIG_UML_X86)+= $(addprefix icp/,$(ICP_OBJS_X86)) zfs-$(CONFIG_X86_64) += $(addprefix icp/,$(ICP_OBJS_X86_64)) zfs-$(CONFIG_ARM) += $(addprefix icp/,$(ICP_OBJS_ARM)) zfs-$(CONFIG_ARM64) += $(addprefix icp/,$(ICP_OBJS_ARM64)) zfs-$(CONFIG_PPC) += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64)) zfs-$(CONFIG_PPC64) += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64)) $(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \ $(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : asflags-y += -I$(icp_include) -I$(zfs_include)/os/linux/spl -I$(zfs_include) $(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \ $(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : ccflags-y += -I$(icp_include) -I$(zfs_include)/os/linux/spl -I$(zfs_include) # Suppress objtool "return with modified stack frame" warnings. OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y # Suppress objtool "unsupported stack pointer realignment" warnings. # See #6950 for the reasoning. OBJECT_FILES_NON_STANDARD_sha256-x86_64.o := y OBJECT_FILES_NON_STANDARD_sha512-x86_64.o := y LUA_OBJS := \ lapi.o \ lauxlib.o \ lbaselib.o \ lcode.o \ lcompat.o \ lcorolib.o \ lctype.o \ ldebug.o \ ldo.o \ lfunc.o \ lgc.o \ llex.o \ lmem.o \ lobject.o \ lopcodes.o \ lparser.o \ lstate.o \ lstring.o \ lstrlib.o \ ltable.o \ ltablib.o \ ltm.o \ lvm.o \ lzio.o \ setjmp/setjmp.o zfs-objs += $(addprefix lua/,$(LUA_OBJS)) NVPAIR_OBJS := \ fnvpair.o \ nvpair.o \ nvpair_alloc_fixed.o \ nvpair_alloc_spl.o zfs-objs += $(addprefix nvpair/,$(NVPAIR_OBJS)) UNICODE_OBJS := \ u8_textprep.o \ uconv.o zfs-objs += $(addprefix unicode/,$(UNICODE_OBJS)) ZCOMMON_OBJS := \ cityhash.o \ zfeature_common.o \ zfs_comutil.o \ zfs_deleg.o \ zfs_fletcher.o \ zfs_fletcher_superscalar.o \ zfs_fletcher_superscalar4.o \ zfs_namecheck.o \ zfs_prop.o \ zpool_prop.o \ zprop_common.o ZCOMMON_OBJS_X86 := \ zfs_fletcher_avx512.o \ zfs_fletcher_intel.o \ zfs_fletcher_sse.o ZCOMMON_OBJS_ARM64 := \ zfs_fletcher_aarch64_neon.o zfs-objs += $(addprefix zcommon/,$(ZCOMMON_OBJS)) zfs-$(CONFIG_X86) += $(addprefix zcommon/,$(ZCOMMON_OBJS_X86)) zfs-$(CONFIG_UML_X86)+= $(addprefix zcommon/,$(ZCOMMON_OBJS_X86)) zfs-$(CONFIG_ARM64) += $(addprefix zcommon/,$(ZCOMMON_OBJS_ARM64)) # Zstd uses -O3 by default, so we should follow ZFS_ZSTD_FLAGS := -O3 # -fno-tree-vectorize gets set for gcc in zstd/common/compiler.h # Set it for other compilers, too. ZFS_ZSTD_FLAGS += -fno-tree-vectorize # SSE register return with SSE disabled if -march=znverX is passed ZFS_ZSTD_FLAGS += -U__BMI__ # Quiet warnings about frame size due to unused code in unmodified zstd lib ZFS_ZSTD_FLAGS += -Wframe-larger-than=20480 ZSTD_OBJS := \ zfs_zstd.o \ zstd_sparc.o ZSTD_UPSTREAM_OBJS := \ lib/common/entropy_common.o \ lib/common/error_private.o \ lib/common/fse_decompress.o \ lib/common/pool.o \ lib/common/zstd_common.o \ lib/compress/fse_compress.o \ lib/compress/hist.o \ lib/compress/huf_compress.o \ lib/compress/zstd_compress.o \ lib/compress/zstd_compress_literals.o \ lib/compress/zstd_compress_sequences.o \ lib/compress/zstd_compress_superblock.o \ lib/compress/zstd_double_fast.o \ lib/compress/zstd_fast.o \ lib/compress/zstd_lazy.o \ lib/compress/zstd_ldm.o \ lib/compress/zstd_opt.o \ lib/decompress/huf_decompress.o \ lib/decompress/zstd_ddict.o \ lib/decompress/zstd_decompress.o \ lib/decompress/zstd_decompress_block.o zfs-objs += $(addprefix zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS)) # Disable aarch64 neon SIMD instructions for kernel mode $(addprefix $(obj)/zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS)) : ccflags-y += -I$(zstd_include) $(ZFS_ZSTD_FLAGS) $(addprefix $(obj)/zstd/,$(ZSTD_OBJS) $(ZSTD_UPSTREAM_OBJS)) : asflags-y += -I$(zstd_include) $(addprefix $(obj)/zstd/,$(ZSTD_UPSTREAM_OBJS)) : ccflags-y += -include $(zstd_include)/aarch64_compat.h -include $(zstd_include)/zstd_compat_wrapper.h -Wp,-w $(obj)/zstd/zfs_zstd.o : ccflags-y += -include $(zstd_include)/zstd_compat_wrapper.h ZFS_OBJS := \ abd.o \ aggsum.o \ arc.o \ blake3_zfs.o \ blkptr.o \ bplist.o \ bpobj.o \ bptree.o \ bqueue.o \ brt.o \ btree.o \ dataset_kstats.o \ dbuf.o \ dbuf_stats.o \ ddt.o \ ddt_zap.o \ dmu.o \ dmu_diff.o \ dmu_object.o \ dmu_objset.o \ dmu_recv.o \ dmu_redact.o \ dmu_send.o \ dmu_traverse.o \ dmu_tx.o \ dmu_zfetch.o \ dnode.o \ dnode_sync.o \ dsl_bookmark.o \ dsl_crypt.o \ dsl_dataset.o \ dsl_deadlist.o \ dsl_deleg.o \ dsl_destroy.o \ dsl_dir.o \ dsl_pool.o \ dsl_prop.o \ dsl_scan.o \ dsl_synctask.o \ dsl_userhold.o \ edonr_zfs.o \ fm.o \ gzip.o \ hkdf.o \ lz4.o \ lz4_zfs.o \ lzjb.o \ metaslab.o \ mmp.o \ multilist.o \ objlist.o \ pathname.o \ range_tree.o \ refcount.o \ rrwlock.o \ sa.o \ sha2_zfs.o \ skein_zfs.o \ spa.o \ spa_checkpoint.o \ spa_config.o \ spa_errlog.o \ spa_history.o \ spa_log_spacemap.o \ spa_misc.o \ spa_stats.o \ space_map.o \ space_reftree.o \ txg.o \ uberblock.o \ unique.o \ vdev.o \ vdev_draid.o \ vdev_draid_rand.o \ vdev_indirect.o \ vdev_indirect_births.o \ vdev_indirect_mapping.o \ vdev_initialize.o \ vdev_label.o \ vdev_mirror.o \ vdev_missing.o \ vdev_queue.o \ vdev_raidz.o \ vdev_raidz_math.o \ vdev_raidz_math_scalar.o \ vdev_rebuild.o \ vdev_removal.o \ vdev_root.o \ vdev_trim.o \ zap.o \ zap_leaf.o \ zap_micro.o \ zcp.o \ zcp_get.o \ zcp_global.o \ zcp_iter.o \ zcp_set.o \ zcp_synctask.o \ zfeature.o \ zfs_byteswap.o \ zfs_chksum.o \ zfs_fm.o \ zfs_fuid.o \ zfs_impl.o \ zfs_ioctl.o \ zfs_log.o \ zfs_onexit.o \ zfs_quota.o \ zfs_ratelimit.o \ zfs_replay.o \ zfs_rlock.o \ zfs_sa.o \ zfs_vnops.o \ zil.o \ zio.o \ zio_checksum.o \ zio_compress.o \ zio_inject.o \ zle.o \ zrlock.o \ zthr.o \ zvol.o ZFS_OBJS_OS := \ abd_os.o \ arc_os.o \ mmp_os.o \ policy.o \ qat.o \ qat_compress.o \ qat_crypt.o \ spa_misc_os.o \ trace.o \ vdev_disk.o \ vdev_file.o \ vdev_label_os.o \ zfs_acl.o \ zfs_ctldir.o \ zfs_debug.o \ zfs_dir.o \ zfs_file_os.o \ zfs_ioctl_os.o \ zfs_racct.o \ zfs_sysfs.o \ zfs_uio.o \ zfs_vfsops.o \ zfs_vnops_os.o \ zfs_znode.o \ zio_crypt.o \ zpl_ctldir.o \ zpl_export.o \ zpl_file.o \ zpl_file_range.o \ zpl_inode.o \ zpl_super.o \ zpl_xattr.o \ zvol_os.o ZFS_OBJS_X86 := \ vdev_raidz_math_avx2.o \ vdev_raidz_math_avx512bw.o \ vdev_raidz_math_avx512f.o \ vdev_raidz_math_sse2.o \ vdev_raidz_math_ssse3.o ZFS_OBJS_ARM64 := \ vdev_raidz_math_aarch64_neon.o \ vdev_raidz_math_aarch64_neonx2.o ZFS_OBJS_PPC_PPC64 := \ vdev_raidz_math_powerpc_altivec.o zfs-objs += $(addprefix zfs/,$(ZFS_OBJS)) $(addprefix os/linux/zfs/,$(ZFS_OBJS_OS)) zfs-$(CONFIG_X86) += $(addprefix zfs/,$(ZFS_OBJS_X86)) zfs-$(CONFIG_UML_X86)+= $(addprefix zfs/,$(ZFS_OBJS_X86)) zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64)) zfs-$(CONFIG_PPC) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64)) zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64)) UBSAN_SANITIZE_zap_leaf.o := n UBSAN_SANITIZE_zap_micro.o := n UBSAN_SANITIZE_sa.o := n # Suppress incorrect warnings from versions of objtool which are not # aware of x86 EVEX prefix instructions used for AVX512. OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512f.o := y ifeq ($(CONFIG_ALTIVEC),y) $(obj)/zfs/vdev_raidz_math_powerpc_altivec.o : c_flags += -maltivec endif diff --git a/module/os/linux/spl/spl-shrinker.c b/module/os/linux/spl/spl-shrinker.c new file mode 100644 index 000000000000..d5c8da471cbb --- /dev/null +++ b/module/os/linux/spl/spl-shrinker.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. + * Copyright (C) 2007 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Brian Behlendorf . + * UCRL-CODE-235197 + * + * This file is part of the SPL, Solaris Porting Layer. + * + * The SPL is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * The SPL is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with the SPL. If not, see . + * + * Solaris Porting Layer (SPL) Shrinker Implementation. + */ + +#include +#include + +#ifdef HAVE_SINGLE_SHRINKER_CALLBACK +/* 3.0-3.11: single shrink() callback, which we wrap to carry both functions */ +struct spl_shrinker_wrap { + struct shrinker shrinker; + spl_shrinker_cb countfunc; + spl_shrinker_cb scanfunc; +}; + +static int +spl_shrinker_single_cb(struct shrinker *shrinker, struct shrink_control *sc) +{ + struct spl_shrinker_wrap *sw = (struct spl_shrinker_wrap *)shrinker; + + if (sc->nr_to_scan != 0) + (void) sw->scanfunc(&sw->shrinker, sc); + return (sw->countfunc(&sw->shrinker, sc)); +} +#endif + +struct shrinker * +spl_register_shrinker(const char *name, spl_shrinker_cb countfunc, + spl_shrinker_cb scanfunc, int seek_cost) +{ + struct shrinker *shrinker; + + /* allocate shrinker */ +#if defined(HAVE_SHRINKER_REGISTER) + /* 6.7: kernel will allocate the shrinker for us */ + shrinker = shrinker_alloc(0, name); +#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) + /* 3.12-6.6: we allocate the shrinker */ + shrinker = kmem_zalloc(sizeof (struct shrinker), KM_SLEEP); +#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK) + /* 3.0-3.11: allocate a wrapper */ + struct spl_shrinker_wrap *sw = + kmem_zalloc(sizeof (struct spl_shrinker_wrap), KM_SLEEP); + shrinker = &sw->shrinker; +#else + /* 2.x-2.6.22, or a newer shrinker API has been introduced. */ +#error "Unknown shrinker API" +#endif + + if (shrinker == NULL) + return (NULL); + + /* set callbacks */ +#ifdef HAVE_SINGLE_SHRINKER_CALLBACK + sw->countfunc = countfunc; + sw->scanfunc = scanfunc; + shrinker->shrink = spl_shrinker_single_cb; +#else + shrinker->count_objects = countfunc; + shrinker->scan_objects = scanfunc; +#endif + + /* set params */ + shrinker->seeks = seek_cost; + + /* register with kernel */ +#if defined(HAVE_SHRINKER_REGISTER) + shrinker_register(shrinker); +#elif defined(HAVE_REGISTER_SHRINKER_VARARG) + register_shrinker(shrinker, name); +#else + register_shrinker(shrinker); +#endif + + return (shrinker); +} +EXPORT_SYMBOL(spl_register_shrinker); + +void +spl_unregister_shrinker(struct shrinker *shrinker) +{ +#if defined(HAVE_SHRINKER_REGISTER) + shrinker_free(shrinker); +#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) + unregister_shrinker(shrinker); + kmem_free(shrinker, sizeof (struct shrinker)); +#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK) + unregister_shrinker(shrinker); + kmem_free(shrinker, sizeof (struct spl_shrinker_wrap)); +#else +#error "Unknown shrinker API" +#endif +} +EXPORT_SYMBOL(spl_unregister_shrinker); diff --git a/module/os/linux/zfs/arc_os.c b/module/os/linux/zfs/arc_os.c index 55cdbba5b5eb..02dd80c06062 100644 --- a/module/os/linux/zfs/arc_os.c +++ b/module/os/linux/zfs/arc_os.c @@ -1,499 +1,502 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2018, Joyent, Inc. * Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _KERNEL #include #include #include #include #include #include #endif #include #include #include #include #include #include #include /* * This is a limit on how many pages the ARC shrinker makes available for * eviction in response to one page allocation attempt. Note that in * practice, the kernel's shrinker can ask us to evict up to about 4x this * for one allocation attempt. * * The default limit of 10,000 (in practice, 160MB per allocation attempt * with 4K pages) limits the amount of time spent attempting to reclaim ARC * memory to less than 100ms per allocation attempt, even with a small * average compressed block size of ~8KB. * * See also the comment in arc_shrinker_count(). * Set to 0 to disable limit. */ int zfs_arc_shrinker_limit = 10000; #ifdef CONFIG_MEMORY_HOTPLUG static struct notifier_block arc_hotplug_callback_mem_nb; #endif /* * Return a default max arc size based on the amount of physical memory. * This may be overridden by tuning the zfs_arc_max module parameter. */ uint64_t arc_default_max(uint64_t min, uint64_t allmem) { uint64_t size; if (allmem >= 1 << 30) size = allmem - (1 << 30); else size = min; return (MAX(allmem * 5 / 8, size)); } #ifdef _KERNEL /* * Return maximum amount of memory that we could possibly use. Reduced * to half of all memory in user space which is primarily used for testing. */ uint64_t arc_all_memory(void) { #ifdef CONFIG_HIGHMEM return (ptob(zfs_totalram_pages - zfs_totalhigh_pages)); #else return (ptob(zfs_totalram_pages)); #endif /* CONFIG_HIGHMEM */ } /* * Return the amount of memory that is considered free. In user space * which is primarily used for testing we pretend that free memory ranges * from 0-20% of all memory. */ uint64_t arc_free_memory(void) { #ifdef CONFIG_HIGHMEM struct sysinfo si; si_meminfo(&si); return (ptob(si.freeram - si.freehigh)); #else return (ptob(nr_free_pages() + nr_inactive_file_pages())); #endif /* CONFIG_HIGHMEM */ } /* * Return the amount of memory that can be consumed before reclaim will be * needed. Positive if there is sufficient free memory, negative indicates * the amount of memory that needs to be freed up. */ int64_t arc_available_memory(void) { return (arc_free_memory() - arc_sys_free); } static uint64_t arc_evictable_memory(void) { int64_t asize = aggsum_value(&arc_sums.arcstat_size); uint64_t arc_clean = zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_DATA]) + zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) + zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_DATA]) + zfs_refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]); uint64_t arc_dirty = MAX((int64_t)asize - (int64_t)arc_clean, 0); /* * Scale reported evictable memory in proportion to page cache, cap * at specified min/max. */ uint64_t min = (ptob(nr_file_pages()) / 100) * zfs_arc_pc_percent; min = MAX(arc_c_min, MIN(arc_c_max, min)); if (arc_dirty >= min) return (arc_clean); return (MAX((int64_t)asize - (int64_t)min, 0)); } /* * The _count() function returns the number of free-able objects. * The _scan() function returns the number of objects that were freed. */ static unsigned long arc_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { /* * __GFP_FS won't be set if we are called from ZFS code (see * kmem_flags_convert(), which removes it). To avoid a deadlock, we * don't allow evicting in this case. We return 0 rather than * SHRINK_STOP so that the shrinker logic doesn't accumulate a * deficit against us. */ if (!(sc->gfp_mask & __GFP_FS)) { return (0); } /* * This code is reached in the "direct reclaim" case, where the * kernel (outside ZFS) is trying to allocate a page, and the system * is low on memory. * * The kernel's shrinker code doesn't understand how many pages the * ARC's callback actually frees, so it may ask the ARC to shrink a * lot for one page allocation. This is problematic because it may * take a long time, thus delaying the page allocation, and because * it may force the ARC to unnecessarily shrink very small. * * Therefore, we limit the amount of data that we say is evictable, * which limits the amount that the shrinker will ask us to evict for * one page allocation attempt. * * In practice, we may be asked to shrink 4x the limit to satisfy one * page allocation, before the kernel's shrinker code gives up on us. * When that happens, we rely on the kernel code to find the pages * that we freed before invoking the OOM killer. This happens in * __alloc_pages_slowpath(), which retries and finds the pages we * freed when it calls get_page_from_freelist(). * * See also the comment above zfs_arc_shrinker_limit. */ int64_t limit = zfs_arc_shrinker_limit != 0 ? zfs_arc_shrinker_limit : INT64_MAX; return (MIN(limit, btop((int64_t)arc_evictable_memory()))); } static unsigned long arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) { ASSERT((sc->gfp_mask & __GFP_FS) != 0); /* The arc is considered warm once reclaim has occurred */ if (unlikely(arc_warm == B_FALSE)) arc_warm = B_TRUE; /* * Evict the requested number of pages by reducing arc_c and waiting * for the requested amount of data to be evicted. */ arc_reduce_target_size(ptob(sc->nr_to_scan)); arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE); if (current->reclaim_state != NULL) #ifdef HAVE_RECLAIM_STATE_RECLAIMED current->reclaim_state->reclaimed += sc->nr_to_scan; #else current->reclaim_state->reclaimed_slab += sc->nr_to_scan; #endif /* * We are experiencing memory pressure which the arc_evict_zthr was * unable to keep up with. Set arc_no_grow to briefly pause arc * growth to avoid compounding the memory pressure. */ arc_no_grow = B_TRUE; /* * When direct reclaim is observed it usually indicates a rapid * increase in memory pressure. This occurs because the kswapd * threads were unable to asynchronously keep enough free memory * available. */ if (current_is_kswapd()) { ARCSTAT_BUMP(arcstat_memory_indirect_count); } else { ARCSTAT_BUMP(arcstat_memory_direct_count); } return (sc->nr_to_scan); } -SPL_SHRINKER_DECLARE(arc_shrinker, - arc_shrinker_count, arc_shrinker_scan, DEFAULT_SEEKS); +static struct shrinker *arc_shrinker = NULL; int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg) { uint64_t free_memory = arc_free_memory(); if (free_memory > arc_all_memory() * arc_lotsfree_percent / 100) return (0); if (txg > spa->spa_lowmem_last_txg) { spa->spa_lowmem_last_txg = txg; spa->spa_lowmem_page_load = 0; } /* * If we are in pageout, we know that memory is already tight, * the arc is already going to be evicting, so we just want to * continue to let page writes occur as quickly as possible. */ if (current_is_kswapd()) { if (spa->spa_lowmem_page_load > MAX(arc_sys_free / 4, free_memory) / 4) { DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim); return (SET_ERROR(ERESTART)); } /* Note: reserve is inflated, so we deflate */ atomic_add_64(&spa->spa_lowmem_page_load, reserve / 8); return (0); } else if (spa->spa_lowmem_page_load > 0 && arc_reclaim_needed()) { /* memory is low, delay before restarting */ ARCSTAT_INCR(arcstat_memory_throttle_count, 1); DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim); return (SET_ERROR(EAGAIN)); } spa->spa_lowmem_page_load = 0; return (0); } static void arc_set_sys_free(uint64_t allmem) { /* * The ARC tries to keep at least this much memory available for the * system. This gives the ARC time to shrink in response to memory * pressure, before running completely out of memory and invoking the * direct-reclaim ARC shrinker. * * This should be more than twice high_wmark_pages(), so that * arc_wait_for_eviction() will wait until at least the * high_wmark_pages() are free (see arc_evict_state_impl()). * * Note: Even when the system is very low on memory, the kernel's * shrinker code may only ask for one "batch" of pages (512KB) to be * evicted. If concurrent allocations consume these pages, there may * still be insufficient free pages, and the OOM killer takes action. * * By setting arc_sys_free large enough, and having * arc_wait_for_eviction() wait until there is at least arc_sys_free/2 * free memory, it is much less likely that concurrent allocations can * consume all the memory that was evicted before checking for * OOM. * * It's hard to iterate the zones from a linux kernel module, which * makes it difficult to determine the watermark dynamically. Instead * we compute the maximum high watermark for this system, based * on the amount of memory, assuming default parameters on Linux kernel * 5.3. */ /* * Base wmark_low is 4 * the square root of Kbytes of RAM. */ long wmark = 4 * int_sqrt(allmem/1024) * 1024; /* * Clamp to between 128K and 64MB. */ wmark = MAX(wmark, 128 * 1024); wmark = MIN(wmark, 64 * 1024 * 1024); /* * watermark_boost can increase the wmark by up to 150%. */ wmark += wmark * 150 / 100; /* * arc_sys_free needs to be more than 2x the watermark, because * arc_wait_for_eviction() waits for half of arc_sys_free. Bump this up * to 3x to ensure we're above it. */ arc_sys_free = wmark * 3 + allmem / 32; } void arc_lowmem_init(void) { uint64_t allmem = arc_all_memory(); /* * Register a shrinker to support synchronous (direct) memory * reclaim from the arc. This is done to prevent kswapd from * swapping out pages when it is preferable to shrink the arc. */ - spl_register_shrinker(&arc_shrinker); + arc_shrinker = spl_register_shrinker("zfs-arc-shrinker", + arc_shrinker_count, arc_shrinker_scan, DEFAULT_SEEKS); + VERIFY(arc_shrinker); + arc_set_sys_free(allmem); } void arc_lowmem_fini(void) { - spl_unregister_shrinker(&arc_shrinker); + spl_unregister_shrinker(arc_shrinker); + arc_shrinker = NULL; } int param_set_arc_u64(const char *buf, zfs_kernel_param_t *kp) { int error; error = spl_param_set_u64(buf, kp); if (error < 0) return (SET_ERROR(error)); arc_tuning_update(B_TRUE); return (0); } int param_set_arc_min(const char *buf, zfs_kernel_param_t *kp) { return (param_set_arc_u64(buf, kp)); } int param_set_arc_max(const char *buf, zfs_kernel_param_t *kp) { return (param_set_arc_u64(buf, kp)); } int param_set_arc_int(const char *buf, zfs_kernel_param_t *kp) { int error; error = param_set_int(buf, kp); if (error < 0) return (SET_ERROR(error)); arc_tuning_update(B_TRUE); return (0); } #ifdef CONFIG_MEMORY_HOTPLUG static int arc_hotplug_callback(struct notifier_block *self, unsigned long action, void *arg) { (void) self, (void) arg; uint64_t allmem = arc_all_memory(); if (action != MEM_ONLINE) return (NOTIFY_OK); arc_set_limits(allmem); #ifdef __LP64__ if (zfs_dirty_data_max_max == 0) zfs_dirty_data_max_max = MIN(4ULL * 1024 * 1024 * 1024, allmem * zfs_dirty_data_max_max_percent / 100); #else if (zfs_dirty_data_max_max == 0) zfs_dirty_data_max_max = MIN(1ULL * 1024 * 1024 * 1024, allmem * zfs_dirty_data_max_max_percent / 100); #endif arc_set_sys_free(allmem); return (NOTIFY_OK); } #endif void arc_register_hotplug(void) { #ifdef CONFIG_MEMORY_HOTPLUG arc_hotplug_callback_mem_nb.notifier_call = arc_hotplug_callback; /* There is no significance to the value 100 */ arc_hotplug_callback_mem_nb.priority = 100; register_memory_notifier(&arc_hotplug_callback_mem_nb); #endif } void arc_unregister_hotplug(void) { #ifdef CONFIG_MEMORY_HOTPLUG unregister_memory_notifier(&arc_hotplug_callback_mem_nb); #endif } #else /* _KERNEL */ int64_t arc_available_memory(void) { int64_t lowest = INT64_MAX; /* Every 100 calls, free a small amount */ if (random_in_range(100) == 0) lowest = -1024; return (lowest); } int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg) { (void) spa, (void) reserve, (void) txg; return (0); } uint64_t arc_all_memory(void) { return (ptob(physmem) / 2); } uint64_t arc_free_memory(void) { return (random_in_range(arc_all_memory() * 20 / 100)); } void arc_register_hotplug(void) { } void arc_unregister_hotplug(void) { } #endif /* _KERNEL */ ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW, "Limit on number of pages that ARC shrinker can reclaim at once");