diff --git a/config/kernel-drop-inode.m4 b/config/kernel-drop-inode.m4 new file mode 100644 index 000000000000..6f2b12cadc02 --- /dev/null +++ b/config/kernel-drop-inode.m4 @@ -0,0 +1,24 @@ +dnl # +dnl # 6.18 API change +dnl # - generic_drop_inode() renamed to inode_generic_drop() +dnl # - generic_delete_inode() renamed to inode_just_drop() +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GENERIC_DROP], [ + ZFS_LINUX_TEST_SRC([inode_generic_drop], [ + #include + ],[ + struct inode *ip = NULL; + inode_generic_drop(ip); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_INODE_GENERIC_DROP], [ + AC_MSG_CHECKING([whether inode_generic_drop() exists]) + ZFS_LINUX_TEST_RESULT([inode_generic_drop], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_INODE_GENERIC_DROP, 1, + [inode_generic_drop() exists]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 8484bcfb1612..40b7de739882 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -1,1068 +1,1070 @@ dnl # dnl # Default ZFS kernel configuration dnl # AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ AM_COND_IF([BUILD_LINUX], [ dnl # Setup the kernel build environment. ZFS_AC_KERNEL ZFS_AC_QAT dnl # Sanity checks for module building and CONFIG_* defines ZFS_AC_KERNEL_CONFIG_DEFINED ZFS_AC_MODULE_SYMVERS dnl # Sequential ZFS_LINUX_TRY_COMPILE tests ZFS_AC_KERNEL_FPU_HEADER ZFS_AC_KERNEL_OBJTOOL_HEADER ZFS_AC_KERNEL_MISC_MINOR ZFS_AC_KERNEL_DECLARE_EVENT_CLASS dnl # Parallel ZFS_LINUX_TEST_SRC / ZFS_LINUX_TEST_RESULT tests ZFS_AC_KERNEL_TEST_SRC ZFS_AC_KERNEL_TEST_RESULT AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ KERNEL_MAKE="$KERNEL_MAKE O=$LINUX_OBJ" ]) AC_SUBST(KERNEL_MAKE) ]) ]) dnl # dnl # Generate and compile all of the kernel API test cases to determine dnl # which interfaces are available. By invoking the kernel build system dnl # only once the compilation can be done in parallel significantly dnl # speeding up the process. dnl # AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_TYPES ZFS_AC_KERNEL_SRC_OBJTOOL ZFS_AC_KERNEL_SRC_ACCESS_OK_TYPE ZFS_AC_KERNEL_SRC_PDE_DATA ZFS_AC_KERNEL_SRC_GENERIC_FADVISE ZFS_AC_KERNEL_SRC_SCHED ZFS_AC_KERNEL_SRC_USLEEP_RANGE ZFS_AC_KERNEL_SRC_VMALLOC_PAGE_KERNEL ZFS_AC_KERNEL_SRC_INODE_TIMES ZFS_AC_KERNEL_SRC_PROC_OPERATIONS ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_SRC_BIO ZFS_AC_KERNEL_SRC_BLKDEV ZFS_AC_KERNEL_SRC_BLK_QUEUE ZFS_AC_KERNEL_SRC_GENHD_FLAGS ZFS_AC_KERNEL_SRC_REVALIDATE_DISK ZFS_AC_KERNEL_SRC_GET_DISK_RO ZFS_AC_KERNEL_SRC_DISCARD_GRANULARITY ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE ZFS_AC_KERNEL_SRC_XATTR ZFS_AC_KERNEL_SRC_ACL ZFS_AC_KERNEL_SRC_INODE_SETATTR ZFS_AC_KERNEL_SRC_INODE_GETATTR ZFS_AC_KERNEL_SRC_SHOW_OPTIONS ZFS_AC_KERNEL_SRC_SHRINKER ZFS_AC_KERNEL_SRC_MKDIR ZFS_AC_KERNEL_SRC_LOOKUP_FLAGS ZFS_AC_KERNEL_SRC_CREATE ZFS_AC_KERNEL_SRC_PERMISSION ZFS_AC_KERNEL_SRC_TMPFILE ZFS_AC_KERNEL_SRC_AUTOMOUNT ZFS_AC_KERNEL_SRC_COMMIT_METADATA ZFS_AC_KERNEL_SRC_SETATTR_PREPARE ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED ZFS_AC_KERNEL_SRC_DENTRY ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SRC_SECURITY_INODE ZFS_AC_KERNEL_SRC_FST_MOUNT ZFS_AC_KERNEL_SRC_SB_DYING ZFS_AC_KERNEL_SRC_SET_NLINK ZFS_AC_KERNEL_SRC_SGET ZFS_AC_KERNEL_SRC_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO ZFS_AC_KERNEL_SRC_VFS_MIGRATE_FOLIO ZFS_AC_KERNEL_SRC_VFS_MIGRATEPAGE ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_SRC_VFS_READPAGES ZFS_AC_KERNEL_SRC_VFS_WRITEPAGE ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS ZFS_AC_KERNEL_SRC_VFS_IOV_ITER ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_SPLICE_COPY_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE ZFS_AC_KERNEL_SRC_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_SRC_KMAP_LOCAL_PAGE ZFS_AC_KERNEL_SRC_FOLLOW_DOWN_ONE ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT ZFS_AC_KERNEL_SRC_FPU ZFS_AC_KERNEL_SRC_FMODE_T ZFS_AC_KERNEL_SRC_KUIDGID_T ZFS_AC_KERNEL_SRC_KUID_HELPERS ZFS_AC_KERNEL_SRC_RENAME ZFS_AC_KERNEL_SRC_TOTALRAM_PAGES_FUNC ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES ZFS_AC_KERNEL_SRC_PERCPU ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR ZFS_AC_KERNEL_SRC_MKNOD ZFS_AC_KERNEL_SRC_SYMLINK ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS ZFS_AC_KERNEL_SRC_SIGINFO ZFS_AC_KERNEL_SRC_SYSFS ZFS_AC_KERNEL_SRC_STANDALONE_LINUX_STDARG ZFS_AC_KERNEL_SRC_STRLCPY ZFS_AC_KERNEL_SRC_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_SRC_PAGEMAP_READAHEAD_PAGE ZFS_AC_KERNEL_SRC_ADD_DISK ZFS_AC_KERNEL_SRC_KTHREAD ZFS_AC_KERNEL_SRC_ZERO_PAGE ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC ZFS_AC_KERNEL_SRC_IDMAP_MNT_API ZFS_AC_KERNEL_SRC_IDMAP_NO_USERNS ZFS_AC_KERNEL_SRC_IATTR_VFSID ZFS_AC_KERNEL_SRC_WRITEBACK ZFS_AC_KERNEL_SRC_RECLAIMED ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_SZ ZFS_AC_KERNEL_SRC_PROC_HANDLER_CTL_TABLE_CONST ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ ZFS_AC_KERNEL_SRC_SYNC_BDEV ZFS_AC_KERNEL_SRC_MM_PAGE_FLAGS ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE ZFS_AC_KERNEL_SRC_MM_PAGE_MAPPING ZFS_AC_KERNEL_SRC_FILE ZFS_AC_KERNEL_SRC_PIN_USER_PAGES ZFS_AC_KERNEL_SRC_TIMER ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR ZFS_AC_KERNEL_SRC_SOPS_FREE_INODE ZFS_AC_KERNEL_SRC_NAMESPACE + ZFS_AC_KERNEL_SRC_INODE_GENERIC_DROP case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE ;; riscv*) ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE ;; esac AC_MSG_CHECKING([for available kernel interfaces]) ZFS_LINUX_TEST_COMPILE_ALL([kabi]) AC_MSG_RESULT([done]) ]) dnl # dnl # Check results of kernel interface tests. dnl # AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_TYPES ZFS_AC_KERNEL_ACCESS_OK_TYPE ZFS_AC_KERNEL_OBJTOOL ZFS_AC_KERNEL_PDE_DATA ZFS_AC_KERNEL_GENERIC_FADVISE ZFS_AC_KERNEL_SCHED ZFS_AC_KERNEL_USLEEP_RANGE ZFS_AC_KERNEL_VMALLOC_PAGE_KERNEL ZFS_AC_KERNEL_INODE_TIMES ZFS_AC_KERNEL_PROC_OPERATIONS ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_BIO ZFS_AC_KERNEL_BLKDEV ZFS_AC_KERNEL_BLK_QUEUE ZFS_AC_KERNEL_GENHD_FLAGS ZFS_AC_KERNEL_REVALIDATE_DISK ZFS_AC_KERNEL_GET_DISK_RO ZFS_AC_KERNEL_DISCARD_GRANULARITY ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE ZFS_AC_KERNEL_XATTR ZFS_AC_KERNEL_ACL ZFS_AC_KERNEL_INODE_SETATTR ZFS_AC_KERNEL_INODE_GETATTR ZFS_AC_KERNEL_SHOW_OPTIONS ZFS_AC_KERNEL_SHRINKER ZFS_AC_KERNEL_MKDIR ZFS_AC_KERNEL_LOOKUP_FLAGS ZFS_AC_KERNEL_CREATE ZFS_AC_KERNEL_PERMISSION ZFS_AC_KERNEL_TMPFILE ZFS_AC_KERNEL_AUTOMOUNT ZFS_AC_KERNEL_COMMIT_METADATA ZFS_AC_KERNEL_SETATTR_PREPARE ZFS_AC_KERNEL_INSERT_INODE_LOCKED ZFS_AC_KERNEL_DENTRY ZFS_AC_KERNEL_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SECURITY_INODE ZFS_AC_KERNEL_FST_MOUNT ZFS_AC_KERNEL_SB_DYING ZFS_AC_KERNEL_SET_NLINK ZFS_AC_KERNEL_SGET ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_VFS_READ_FOLIO ZFS_AC_KERNEL_VFS_MIGRATE_FOLIO ZFS_AC_KERNEL_VFS_MIGRATEPAGE ZFS_AC_KERNEL_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_VFS_READPAGES ZFS_AC_KERNEL_VFS_WRITEPAGE ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS ZFS_AC_KERNEL_VFS_IOV_ITER ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE ZFS_AC_KERNEL_VFS_SPLICE_COPY_FILE_RANGE ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_KMAP_LOCAL_PAGE ZFS_AC_KERNEL_FOLLOW_DOWN_ONE ZFS_AC_KERNEL_MAKE_REQUEST_FN ZFS_AC_KERNEL_GENERIC_IO_ACCT ZFS_AC_KERNEL_FPU ZFS_AC_KERNEL_FMODE_T ZFS_AC_KERNEL_KUIDGID_T ZFS_AC_KERNEL_KUID_HELPERS ZFS_AC_KERNEL_RENAME ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC ZFS_AC_KERNEL_TOTALHIGH_PAGES ZFS_AC_KERNEL_PERCPU ZFS_AC_KERNEL_GENERIC_FILLATTR ZFS_AC_KERNEL_MKNOD ZFS_AC_KERNEL_SYMLINK ZFS_AC_KERNEL_BIO_MAX_SEGS ZFS_AC_KERNEL_SIGINFO ZFS_AC_KERNEL_SYSFS ZFS_AC_KERNEL_STANDALONE_LINUX_STDARG ZFS_AC_KERNEL_STRLCPY ZFS_AC_KERNEL_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_PAGEMAP_READAHEAD_PAGE ZFS_AC_KERNEL_ADD_DISK ZFS_AC_KERNEL_KTHREAD ZFS_AC_KERNEL_ZERO_PAGE ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC ZFS_AC_KERNEL_IDMAP_MNT_API ZFS_AC_KERNEL_IDMAP_NO_USERNS ZFS_AC_KERNEL_IATTR_VFSID ZFS_AC_KERNEL_WRITEBACK ZFS_AC_KERNEL_RECLAIMED ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE ZFS_AC_KERNEL_REGISTER_SYSCTL_SZ ZFS_AC_KERNEL_PROC_HANDLER_CTL_TABLE_CONST ZFS_AC_KERNEL_COPY_SPLICE_READ ZFS_AC_KERNEL_SYNC_BDEV ZFS_AC_KERNEL_MM_PAGE_FLAGS ZFS_AC_KERNEL_MM_PAGE_SIZE ZFS_AC_KERNEL_MM_PAGE_MAPPING ZFS_AC_KERNEL_1ARG_ASSIGN_STR ZFS_AC_KERNEL_FILE ZFS_AC_KERNEL_PIN_USER_PAGES ZFS_AC_KERNEL_TIMER ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR ZFS_AC_KERNEL_SOPS_FREE_INODE ZFS_AC_KERNEL_NAMESPACE + ZFS_AC_KERNEL_INODE_GENERIC_DROP case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_CPU_HAS_FEATURE ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE ;; riscv*) ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE ;; esac ]) dnl # dnl # Detect name used for Module.symvers file in kernel dnl # AC_DEFUN([ZFS_AC_MODULE_SYMVERS], [ modpost=$LINUX/scripts/Makefile.modpost AC_MSG_CHECKING([kernel file name for module symbols]) AS_IF([test "x$enable_linux_builtin" != xyes -a -f "$modpost"], [ AS_IF([grep -q Modules.symvers $modpost], [ LINUX_SYMBOLS=Modules.symvers ], [ LINUX_SYMBOLS=Module.symvers ]) AS_IF([test ! -f "$LINUX_OBJ/$LINUX_SYMBOLS"], [ AC_MSG_ERROR([ *** Please make sure the kernel devel package for your distribution *** is installed. If you are building with a custom kernel, make sure *** the kernel is configured, built, and the '--with-linux=PATH' *** configure option refers to the location of the kernel source. ]) ]) ], [ LINUX_SYMBOLS=NONE ]) AC_MSG_RESULT($LINUX_SYMBOLS) AC_SUBST(LINUX_SYMBOLS) ]) dnl # dnl # Detect the kernel to be built against dnl # dnl # Most modern Linux distributions have separate locations for bare dnl # source (source) and prebuilt (build) files. Additionally, there are dnl # `source` and `build` symlinks in `/lib/modules/$(KERNEL_VERSION)` dnl # pointing to them. The directory search order is now: dnl # dnl # - `configure` command line values if both `--with-linux` and dnl # `--with-linux-obj` were defined dnl # dnl # - If only `--with-linux` was defined, `--with-linux-obj` is assumed dnl # to have the same value as `--with-linux` dnl # dnl # - If neither `--with-linux` nor `--with-linux-obj` were defined dnl # autodetection is used: dnl # dnl # - `/lib/modules/$(uname -r)/{source,build}` respectively, if exist. dnl # dnl # - If only `/lib/modules/$(uname -r)/build` exists, it is assumed dnl # to be both source and build directory. dnl # dnl # - The first directory in `/lib/modules` with the highest version dnl # number according to `sort -V` which contains both `source` and dnl # `build` symlinks/directories. If module directory contains only dnl # `build` component, it is assumed to be both source and build dnl # directory. dnl # dnl # - Last resort: the first directory matching `/usr/src/kernels/*` dnl # and `/usr/src/linux-*` with the highest version number according dnl # to `sort -V` is assumed to be both source and build directory. dnl # AC_DEFUN([ZFS_AC_KERNEL], [ AC_ARG_WITH([linux], AS_HELP_STRING([--with-linux=PATH], [Path to kernel source]), [kernelsrc="$withval"]) AC_ARG_WITH(linux-obj, AS_HELP_STRING([--with-linux-obj=PATH], [Path to kernel build objects]), [kernelbuild="$withval"]) AC_MSG_CHECKING([kernel source and build directories]) AS_IF([test -n "$kernelsrc" && test -z "$kernelbuild"], [ kernelbuild="$kernelsrc" ], [test -z "$kernelsrc"], [ AS_IF([test -e "/lib/modules/$(uname -r)/source" && \ test -e "/lib/modules/$(uname -r)/build"], [ src="/lib/modules/$(uname -r)/source" build="/lib/modules/$(uname -r)/build" ], [test -e "/lib/modules/$(uname -r)/build"], [ build="/lib/modules/$(uname -r)/build" src="$build" ], [ src= for d in $(ls -1d /lib/modules/* 2>/dev/null | sort -Vr); do if test -e "$d/source" && test -e "$d/build"; then src="$d/source" build="$d/build" break fi if test -e "$d/build"; then src="$d/build" build="$d/build" break fi done # the least reliable method if test -z "$src"; then src=$(ls -1d /usr/src/kernels/* /usr/src/linux-* \ 2>/dev/null | grep -v obj | sort -Vr | head -1) build="$src" fi ]) AS_IF([test -n "$src" && test -e "$src"], [ kernelsrc=$(readlink -e "$src") ], [ kernelsrc="[Not found]" ]) AS_IF([test -n "$build" && test -e "$build"], [ kernelbuild=$(readlink -e "$build") ], [ kernelbuild="[Not found]" ]) ], [ AS_IF([test "$kernelsrc" = "NONE"], [ kernsrcver=NONE ]) withlinux=yes ]) AC_MSG_RESULT([done]) AC_MSG_CHECKING([kernel source directory]) AC_MSG_RESULT([$kernelsrc]) AC_MSG_CHECKING([kernel build directory]) AC_MSG_RESULT([$kernelbuild]) AS_IF([test ! -d "$kernelsrc" || test ! -d "$kernelbuild"], [ AC_MSG_ERROR([ *** Please make sure the kernel devel package for your distribution *** is installed and then try again. If that fails, you can specify the *** location of the kernel source and build with the '--with-linux=PATH' and *** '--with-linux-obj=PATH' options respectively.]) ]) AC_MSG_CHECKING([kernel source version]) utsrelease1=$kernelbuild/include/linux/version.h utsrelease2=$kernelbuild/include/linux/utsrelease.h utsrelease3=$kernelbuild/include/generated/utsrelease.h AS_IF([test -r $utsrelease1 && grep -qF UTS_RELEASE $utsrelease1], [ utsrelease=$utsrelease1 ], [test -r $utsrelease2 && grep -qF UTS_RELEASE $utsrelease2], [ utsrelease=$utsrelease2 ], [test -r $utsrelease3 && grep -qF UTS_RELEASE $utsrelease3], [ utsrelease=$utsrelease3 ]) AS_IF([test -n "$utsrelease"], [ kernsrcver=$($AWK '/UTS_RELEASE/ { gsub(/"/, "", $[3]); print $[3] }' $utsrelease) AS_IF([test -z "$kernsrcver"], [ AC_MSG_RESULT([Not found]) AC_MSG_ERROR([ *** Cannot determine kernel version. ]) ]) ], [ AC_MSG_RESULT([Not found]) if test "x$enable_linux_builtin" != xyes; then AC_MSG_ERROR([ *** Cannot find UTS_RELEASE definition. ]) else AC_MSG_ERROR([ *** Cannot find UTS_RELEASE definition. *** Please run 'make prepare' inside the kernel source tree.]) fi ]) AC_MSG_RESULT([$kernsrcver]) AX_COMPARE_VERSION([$kernsrcver], [ge], [$ZFS_META_KVER_MIN], [], [ AC_MSG_ERROR([ *** Cannot build against kernel version $kernsrcver. *** The minimum supported kernel version is $ZFS_META_KVER_MIN. ]) ]) AC_ARG_ENABLE([linux-experimental], AS_HELP_STRING([--enable-linux-experimental], [Allow building against some unsupported kernel versions])) AX_COMPARE_VERSION([$kernsrcver], [ge], [$ZFS_META_KVER_MAX], [ AX_COMPARE_VERSION([$kernsrcver], [eq2], [$ZFS_META_KVER_MAX], [ kern_max_version_ok=yes ], [ kern_max_version_ok=no ]) ], [ kern_max_version_ok=yes ]) AS_IF([test "x$kern_max_version_ok" != "xyes"], [ AS_IF([test "x$enable_linux_experimental" == "xyes"], [ AC_DEFINE(HAVE_LINUX_EXPERIMENTAL, 1, [building against unsupported kernel version]) ], [ AC_MSG_ERROR([ *** Cannot build against kernel version $kernsrcver. *** The maximum supported kernel version is $ZFS_META_KVER_MAX. ]) ]) ]) LINUX=${kernelsrc} LINUX_OBJ=${kernelbuild} LINUX_VERSION=${kernsrcver} AC_SUBST(LINUX) AC_SUBST(LINUX_OBJ) AC_SUBST(LINUX_VERSION) dnl # create a relatively unique numeric checksum based on the kernel dnl # version and path. this is included in the cache key below, dnl # allowing different cached values for different kernels _zfs_linux_cache_checksum=$(echo ${kernelsrc} {$kernelbuild} ${kernsrcver} | cksum | cut -f1 -d' ') ]) AC_DEFUN([ZFS_AC_KERNEL_VERSION_WARNING], [ AS_IF([test "x$enable_linux_experimental" = "xyes" && \ test "x$kern_max_version_ok" != "xyes"], [ AC_MSG_WARN([ You are building OpenZFS against Linux version $kernsrcver. This combination is considered EXPERIMENTAL by the OpenZFS project. Even if it appears to build and run correctly, there may be bugs that can cause SERIOUS DATA LOSS. YOU HAVE BEEN WARNED! If you choose to continue, we'd appreciate if you could report your results on the OpenZFS issue tracker at: https://github.com/openzfs/zfs/issues/new Your feedback will help us prepare a new OpenZFS release that supports this version of Linux. ]) ]) ]) dnl # dnl # Detect the QAT module to be built against, QAT provides hardware dnl # acceleration for data compression: dnl # dnl # https://01.org/intel-quickassist-technology dnl # dnl # 1) Download and install QAT driver from the above link dnl # 2) Start QAT driver in your system: dnl # service qat_service start dnl # 3) Enable QAT in ZFS, e.g.: dnl # ./configure --with-qat=/QAT1.6 dnl # make dnl # 4) Set GZIP compression in ZFS dataset: dnl # zfs set compression = gzip dnl # dnl # Then the data written to this ZFS pool is compressed by QAT accelerator dnl # automatically, and de-compressed by QAT when read from the pool. dnl # dnl # 1) Get QAT hardware statistics with: dnl # cat /proc/icp_dh895xcc_dev/qat dnl # 2) To disable QAT: dnl # insmod zfs.ko zfs_qat_disable=1 dnl # AC_DEFUN([ZFS_AC_QAT], [ AC_ARG_WITH([qat], AS_HELP_STRING([--with-qat=PATH], [Path to qat source]), AS_IF([test "$withval" = "yes"], AC_MSG_ERROR([--with-qat=PATH requires a PATH]), [qatsrc="$withval"])) AC_ARG_WITH([qat-obj], AS_HELP_STRING([--with-qat-obj=PATH], [Path to qat build objects]), [qatbuild="$withval"]) AS_IF([test ! -z "${qatsrc}"], [ AC_MSG_CHECKING([qat source directory]) AC_MSG_RESULT([$qatsrc]) QAT_SRC="${qatsrc}/quickassist" AS_IF([ test ! -e "$QAT_SRC/include/cpa.h"], [ AC_MSG_ERROR([ *** Please make sure the qat driver package is installed *** and specify the location of the qat source with the *** '--with-qat=PATH' option then try again. Failed to *** find cpa.h in: ${QAT_SRC}/include]) ]) ]) AS_IF([test ! -z "${qatsrc}"], [ AC_MSG_CHECKING([qat build directory]) AS_IF([test -z "$qatbuild"], [ qatbuild="${qatsrc}/build" ]) AC_MSG_RESULT([$qatbuild]) QAT_OBJ=${qatbuild} AS_IF([ ! test -e "$QAT_OBJ/icp_qa_al.ko" && ! test -e "$QAT_OBJ/qat_api.ko"], [ AC_MSG_ERROR([ *** Please make sure the qat driver is installed then try again. *** Failed to find icp_qa_al.ko or qat_api.ko in: $QAT_OBJ]) ]) AC_SUBST(QAT_SRC) AC_SUBST(QAT_OBJ) AC_DEFINE(HAVE_QAT, 1, [qat is enabled and existed]) ]) dnl # dnl # Detect the name used for the QAT Module.symvers file. dnl # AS_IF([test ! -z "${qatsrc}"], [ AC_MSG_CHECKING([qat file for module symbols]) QAT_SYMBOLS=$QAT_SRC/lookaside/access_layer/src/Module.symvers AS_IF([test -r $QAT_SYMBOLS], [ AC_MSG_RESULT([$QAT_SYMBOLS]) AC_SUBST(QAT_SYMBOLS) ],[ AC_MSG_ERROR([ *** Please make sure the qat driver is installed then try again. *** Failed to find Module.symvers in: $QAT_SYMBOLS ]) ]) ]) ]) dnl # dnl # ZFS_LINUX_CONFTEST_H dnl # AC_DEFUN([ZFS_LINUX_CONFTEST_H], [ test -d build/$2 || mkdir -p build/$2 cat - <<_ACEOF >build/$2/$2.h $1 _ACEOF ]) dnl # dnl # ZFS_LINUX_CONFTEST_C dnl # AC_DEFUN([ZFS_LINUX_CONFTEST_C], [ test -d build/$2 || mkdir -p build/$2 cat confdefs.h - <<_ACEOF >build/$2/$2.c $1 _ACEOF ]) dnl # dnl # ZFS_LINUX_CONFTEST_MAKEFILE dnl # dnl # $1 - test case name dnl # $2 - add to top-level Makefile dnl # $3 - additional build flags dnl # AC_DEFUN([ZFS_LINUX_CONFTEST_MAKEFILE], [ test -d build || mkdir -p build test -d build/$1 || mkdir -p build/$1 file=build/$1/Makefile dnl # Example command line to manually build source. cat - <<_ACEOF >$file # Example command line to manually build source # make modules -C $LINUX_OBJ $ARCH_UM M=$PWD/build/$1 ccflags-y := -Werror $FRAME_LARGER_THAN _ACEOF dnl # Additional custom CFLAGS as requested. m4_ifval($3, [echo "ccflags-y += $3" >>$file], []) dnl # Test case source echo "obj-m := $1.o" >>$file AS_IF([test "x$2" = "xyes"], [echo "obj-m += $1/" >>build/Makefile], []) ]) dnl # dnl # ZFS_LINUX_TEST_PROGRAM(C)([PROLOGUE], [BODY]) dnl # m4_define([ZFS_LINUX_TEST_PROGRAM], [ #include $1 int main (void) { $2 ; return 0; } MODULE_DESCRIPTION("conftest"); MODULE_AUTHOR(ZFS_META_AUTHOR); MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE); MODULE_LICENSE($3); ]) dnl # dnl # ZFS_LINUX_TEST_REMOVE dnl # dnl # Removes the specified test source and results. dnl # AC_DEFUN([ZFS_LINUX_TEST_REMOVE], [ test -d build/$1 && rm -Rf build/$1 test -f build/Makefile && sed '/$1/d' build/Makefile ]) dnl # dnl # ZFS_LINUX_COMPILE dnl # dnl # $1 - build dir dnl # $2 - test command dnl # $3 - pass command dnl # $4 - fail command dnl # $5 - set KBUILD_MODPOST_NOFINAL='yes' dnl # $6 - set KBUILD_MODPOST_WARN='yes' dnl # dnl # Used internally by ZFS_LINUX_TEST_{COMPILE,MODPOST} dnl # AC_DEFUN([ZFS_LINUX_COMPILE], [ AC_ARG_VAR([KERNEL_CC], [C compiler for building kernel modules]) AC_ARG_VAR([KERNEL_LD], [Linker for building kernel modules]) AC_ARG_VAR([KERNEL_LLVM], [Binary option to build kernel modules with LLVM/CLANG toolchain]) AC_ARG_VAR([KERNEL_CROSS_COMPILE], [Cross compile prefix for kernel module builds]) AC_ARG_VAR([KERNEL_ARCH], [Architecture to build kernel modules for]) AC_TRY_COMMAND([ KBUILD_MODPOST_NOFINAL="$5" KBUILD_MODPOST_WARN="$6" make modules -k -j$TEST_JOBS ${KERNEL_CC:+CC=$KERNEL_CC} ${KERNEL_LD:+LD=$KERNEL_LD} ${KERNEL_LLVM:+LLVM=$KERNEL_LLVM} CONFIG_MODULES=y CFLAGS_MODULE=-DCONFIG_MODULES ${KERNEL_CROSS_COMPILE:+CROSS_COMPILE=$KERNEL_CROSS_COMPILE} ${KERNEL_ARCH:+ARCH=$KERNEL_ARCH} -C $LINUX_OBJ $ARCH_UM M=$PWD/$1 >$1/build.log 2>&1]) AS_IF([AC_TRY_COMMAND([$2])], [$3], [$4]) ]) dnl # dnl # ZFS_LINUX_TEST_COMPILE dnl # dnl # Perform a full compile excluding the final modpost phase. dnl # AC_DEFUN([ZFS_LINUX_TEST_COMPILE], [ ZFS_LINUX_COMPILE([$2], [test -f $2/build.log], [ mv $2/Makefile $2/Makefile.compile.$1 mv $2/build.log $2/build.log.$1 ],[ AC_MSG_ERROR([ *** Unable to compile test source to determine kernel interfaces.]) ], [yes], []) ]) dnl # dnl # ZFS_LINUX_TEST_MODPOST dnl # dnl # Perform a full compile including the modpost phase. This may dnl # be an incremental build if the objects have already been built. dnl # AC_DEFUN([ZFS_LINUX_TEST_MODPOST], [ ZFS_LINUX_COMPILE([$2], [test -f $2/build.log], [ mv $2/Makefile $2/Makefile.modpost.$1 cat $2/build.log >>build/build.log.$1 ],[ AC_MSG_ERROR([ *** Unable to modpost test source to determine kernel interfaces.]) ], [], [yes]) ]) dnl # dnl # Perform the compilation of the test cases in two phases. dnl # dnl # Phase 1) attempt to build the object files for all of the tests dnl # defined by the ZFS_LINUX_TEST_SRC macro. But do not dnl # perform the final modpost stage. dnl # dnl # Phase 2) disable all tests which failed the initial compilation, dnl # then invoke the final modpost step for the remaining tests. dnl # dnl # This allows us efficiently build the test cases in parallel while dnl # remaining resilient to build failures which are expected when dnl # detecting the available kernel interfaces. dnl # dnl # The maximum allowed parallelism can be controlled by setting the dnl # TEST_JOBS environment variable. Otherwise, it default to $(nproc). dnl # AC_DEFUN([ZFS_LINUX_TEST_COMPILE_ALL], [ dnl # Phase 1 - Compilation only, final linking is skipped. ZFS_LINUX_TEST_COMPILE([$1], [build]) dnl # dnl # Phase 2 - When building external modules disable test cases dnl # which failed to compile and invoke modpost to verify the dnl # final linking. dnl # dnl # Test names suffixed with '_license' call modpost independently dnl # to ensure that a single incompatibility does not result in the dnl # modpost phase exiting early. This check is not performed on dnl # every symbol since the majority are compatible and doing so dnl # would significantly slow down this phase. dnl # dnl # When configuring for builtin (--enable-linux-builtin) dnl # fake the linking step artificially create the expected .ko dnl # files for tests which did compile. This is required for dnl # kernels which do not have loadable module support or have dnl # not yet been built. dnl # AS_IF([test "x$enable_linux_builtin" = "xno"], [ for dir in $(awk '/^obj-m/ { print [$]3 }' \ build/Makefile.compile.$1); do name=${dir%/} AS_IF([test -f build/$name/$name.o], [ AS_IF([test "${name##*_}" = "license"], [ ZFS_LINUX_TEST_MODPOST([$1], [build/$name]) echo "obj-n += $dir" >>build/Makefile ], [ echo "obj-m += $dir" >>build/Makefile ]) ], [ echo "obj-n += $dir" >>build/Makefile ]) done ZFS_LINUX_TEST_MODPOST([$1], [build]) ], [ for dir in $(awk '/^obj-m/ { print [$]3 }' \ build/Makefile.compile.$1); do name=${dir%/} AS_IF([test -f build/$name/$name.o], [ touch build/$name/$name.ko ]) done ]) ]) dnl # dnl # ZFS_LINUX_TEST_SRC dnl # dnl # $1 - name dnl # $2 - global dnl # $3 - source dnl # $4 - extra cflags dnl # $5 - check license-compatibility dnl # dnl # Check if the test source is buildable at all and then if it is dnl # license compatible. dnl # dnl # N.B because all of the test cases are compiled in parallel they dnl # must never depend on the results of previous tests. Each test dnl # needs to be entirely independent. dnl # AC_DEFUN([ZFS_LINUX_TEST_SRC], [ cachevar="zfs_cv_kernel_[$1]_$_zfs_linux_cache_checksum" eval "cacheval=\$$cachevar" AS_IF([test "x$cacheval" = "x"], [ ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM([[$2]], [[$3]], [["Dual BSD/GPL"]])], [$1]) ZFS_LINUX_CONFTEST_MAKEFILE([$1], [yes], [$4]) AS_IF([ test -n "$5" ], [ ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM( [[$2]], [[$3]], [[$5]])], [$1_license]) ZFS_LINUX_CONFTEST_MAKEFILE([$1_license], [yes], [$4]) ]) ]) ]) dnl # dnl # ZFS_LINUX_TEST_RESULT dnl # dnl # $1 - name of a test source (ZFS_LINUX_TEST_SRC) dnl # $2 - run on success (valid .ko generated) dnl # $3 - run on failure (unable to compile) dnl # AC_DEFUN([ZFS_LINUX_TEST_RESULT], [ cachevar="zfs_cv_kernel_[$1]_$_zfs_linux_cache_checksum" AC_CACHE_VAL([$cachevar], [ AS_IF([test -d build/$1], [ AS_IF([test -f build/$1/$1.ko], [ eval "$cachevar=yes" ], [ eval "$cachevar=no" ]) ], [ AC_MSG_ERROR([ *** No matching source for the "$1" test, check that *** both the test source and result macros refer to the same name. ]) ]) ]) eval "cacheval=\$$cachevar" AS_IF([test "x$cacheval" = "xyes"], [$2], [$3]) ]) dnl # dnl # ZFS_LINUX_TEST_ERROR dnl # dnl # Generic error message which can be used when none of the expected dnl # kernel interfaces were detected. dnl # AC_DEFUN([ZFS_LINUX_TEST_ERROR], [ AC_MSG_ERROR([ *** None of the expected "$1" interfaces were detected. *** This may be because your kernel version is newer than what is *** supported, or you are using a patched custom kernel with *** incompatible modifications. *** *** ZFS Version: $ZFS_META_ALIAS *** Compatible Kernels: $ZFS_META_KVER_MIN - $ZFS_META_KVER_MAX ]) ]) dnl # dnl # ZFS_LINUX_TEST_RESULT_SYMBOL dnl # dnl # Like ZFS_LINUX_TEST_RESULT except ZFS_CHECK_SYMBOL_EXPORT is called to dnl # verify symbol exports, unless --enable-linux-builtin was provided to dnl # configure. dnl # AC_DEFUN([ZFS_LINUX_TEST_RESULT_SYMBOL], [ cachevar="zfs_cv_kernel_[$1]_$_zfs_linux_cache_checksum" AC_CACHE_VAL([$cachevar], [ AS_IF([ ! test -f build/$1/$1.ko], [ eval "$cachevar=no" ], [ AS_IF([test "x$enable_linux_builtin" != "xyes"], [ ZFS_CHECK_SYMBOL_EXPORT([$2], [$3], [ eval "$cachevar=yes" ], [ eval "$cachevar=no" ]) ], [ eval "$cachevar=yes" ]) ]) ]) eval "cacheval=\$$cachevar" AS_IF([test "x$cacheval" = "xyes"], [$4], [$5]) ]) dnl # dnl # ZFS_LINUX_COMPILE_IFELSE dnl # AC_DEFUN([ZFS_LINUX_COMPILE_IFELSE], [ ZFS_LINUX_TEST_REMOVE([conftest]) m4_ifvaln([$1], [ZFS_LINUX_CONFTEST_C([$1], [conftest])]) m4_ifvaln([$5], [ZFS_LINUX_CONFTEST_H([$5], [conftest])], [ZFS_LINUX_CONFTEST_H([], [conftest])]) ZFS_LINUX_CONFTEST_MAKEFILE([conftest], [no], [m4_ifvaln([$5], [-I$PWD/build/conftest], [])]) ZFS_LINUX_COMPILE([build/conftest], [$2], [$3], [$4], [], []) ]) dnl # dnl # ZFS_LINUX_TRY_COMPILE dnl # dnl # $1 - global dnl # $2 - source dnl # $3 - run on success (valid .ko generated) dnl # $4 - run on failure (unable to compile) dnl # dnl # When configuring as builtin (--enable-linux-builtin) for kernels dnl # without loadable module support (CONFIG_MODULES=n) only the object dnl # file is created. See ZFS_LINUX_TEST_COMPILE_ALL for details. dnl # AC_DEFUN([ZFS_LINUX_TRY_COMPILE], [ AS_IF([test "x$enable_linux_builtin" = "xyes"], [ ZFS_LINUX_COMPILE_IFELSE( [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]], [[ZFS_META_LICENSE]])], [test -f build/conftest/conftest.o], [$3], [$4]) ], [ ZFS_LINUX_COMPILE_IFELSE( [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]], [[ZFS_META_LICENSE]])], [test -f build/conftest/conftest.ko], [$3], [$4]) ]) ]) dnl # dnl # ZFS_CHECK_SYMBOL_EXPORT dnl # dnl # Check if a symbol is exported on not by consulting the symbols dnl # file, or optionally the source code. dnl # AC_DEFUN([ZFS_CHECK_SYMBOL_EXPORT], [ grep -q -E '[[[:space:]]]$1[[[:space:]]]' \ $LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null rc=$? if test $rc -ne 0; then export=0 for file in $2; do grep -q -E "EXPORT_SYMBOL.*($1)" \ "$LINUX/$file" 2>/dev/null rc=$? if test $rc -eq 0; then export=1 break; fi done if test $export -eq 0; then : $4 else : $3 fi else : $3 fi ]) dnl # dnl # ZFS_LINUX_TRY_COMPILE_SYMBOL dnl # dnl # Like ZFS_LINUX_TRY_COMPILER except ZFS_CHECK_SYMBOL_EXPORT is called dnl # to verify symbol exports, unless --enable-linux-builtin was provided dnl # to configure. dnl # AC_DEFUN([ZFS_LINUX_TRY_COMPILE_SYMBOL], [ ZFS_LINUX_TRY_COMPILE([$1], [$2], [rc=0], [rc=1]) if test $rc -ne 0; then : $6 else if test "x$enable_linux_builtin" != xyes; then ZFS_CHECK_SYMBOL_EXPORT([$3], [$4], [rc=0], [rc=1]) fi if test $rc -ne 0; then : $6 else : $5 fi fi ]) dnl # dnl # ZFS_LINUX_TRY_COMPILE_HEADER dnl # like ZFS_LINUX_TRY_COMPILE, except the contents conftest.h are dnl # provided via the fifth parameter dnl # AC_DEFUN([ZFS_LINUX_TRY_COMPILE_HEADER], [ AS_IF([test "x$enable_linux_builtin" = "xyes"], [ ZFS_LINUX_COMPILE_IFELSE( [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]], [[ZFS_META_LICENSE]])], [test -f build/conftest/conftest.o], [$3], [$4], [$5]) ], [ ZFS_LINUX_COMPILE_IFELSE( [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]], [[ZFS_META_LICENSE]])], [test -f build/conftest/conftest.ko], [$3], [$4], [$5]) ]) ]) dnl # dnl # AS_VERSION_COMPARE_LE dnl # like AS_VERSION_COMPARE_LE, but runs $3 if (and only if) $1 <= $2 dnl # AS_VERSION_COMPARE_LE (version-1, version-2, [action-if-less-or-equal], [action-if-greater]) dnl # AC_DEFUN([AS_VERSION_COMPARE_LE], [ AS_VERSION_COMPARE([$1], [$2], [$3], [$3], [$4]) ]) dnl # dnl # ZFS_LINUX_REQUIRE_API dnl # like ZFS_LINUX_TEST_ERROR, except only fails if the kernel is dnl # at least some specified version. dnl # AC_DEFUN([ZFS_LINUX_REQUIRE_API], [ AS_VERSION_COMPARE_LE([$2], [$kernsrcver], [ AC_MSG_ERROR([ *** None of the expected "$1" interfaces were detected. This *** interface is expected for kernels version "$2" and above. *** This may be because your kernel version is newer than what is *** supported, or you are using a patched custom kernel with *** incompatible modifications. Newer kernels may have incompatible *** APIs. *** *** ZFS Version: $ZFS_META_ALIAS *** Compatible Kernels: $ZFS_META_KVER_MIN - $ZFS_META_KVER_MAX ]) ], [ AC_MSG_RESULT(no) ]) ]) diff --git a/include/os/linux/kernel/linux/vfs_compat.h b/include/os/linux/kernel/linux/vfs_compat.h index cbf14e28371f..d9dc904bc322 100644 --- a/include/os/linux/kernel/linux/vfs_compat.h +++ b/include/os/linux/kernel/linux/vfs_compat.h @@ -1,265 +1,272 @@ // SPDX-License-Identifier: CDDL-1.0 /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (C) 2011 Lawrence Livermore National Security, LLC. * Copyright (C) 2015 Jörg Thalheim. + * Copyright (c) 2025, Rob Norris */ #ifndef _ZFS_VFS_H #define _ZFS_VFS_H #include #include #include #include /* * 4.14 adds SB_* flag definitions, define them to MS_* equivalents * if not set. */ #ifndef SB_RDONLY #define SB_RDONLY MS_RDONLY #endif #ifndef SB_SILENT #define SB_SILENT MS_SILENT #endif #ifndef SB_ACTIVE #define SB_ACTIVE MS_ACTIVE #endif #ifndef SB_POSIXACL #define SB_POSIXACL MS_POSIXACL #endif #ifndef SB_MANDLOCK #define SB_MANDLOCK MS_MANDLOCK #endif #ifndef SB_NOATIME #define SB_NOATIME MS_NOATIME #endif #if defined(SEEK_HOLE) && defined(SEEK_DATA) static inline loff_t lseek_execute( struct file *filp, struct inode *inode, loff_t offset, loff_t maxsize) { #ifdef FMODE_UNSIGNED_OFFSET if (offset < 0 && !(filp->f_mode & FMODE_UNSIGNED_OFFSET)) #else if (offset < 0 && !(filp->f_op->fop_flags & FOP_UNSIGNED_OFFSET)) #endif return (-EINVAL); if (offset > maxsize) return (-EINVAL); if (offset != filp->f_pos) { spin_lock(&filp->f_lock); filp->f_pos = offset; #ifdef HAVE_FILE_F_VERSION filp->f_version = 0; #endif spin_unlock(&filp->f_lock); } return (offset); } #endif /* SEEK_HOLE && SEEK_DATA */ #if defined(CONFIG_FS_POSIX_ACL) /* * These functions safely approximates the behavior of posix_acl_release() * which cannot be used because it calls the GPL-only symbol kfree_rcu(). * The in-kernel version, which can access the RCU, frees the ACLs after * the grace period expires. Because we're unsure how long that grace * period may be this implementation conservatively delays for 60 seconds. * This is several orders of magnitude larger than expected grace period. * At 60 seconds the kernel will also begin issuing RCU stall warnings. */ #include void zpl_posix_acl_release_impl(struct posix_acl *); static inline void zpl_posix_acl_release(struct posix_acl *acl) { if ((acl == NULL) || (acl == ACL_NOT_CACHED)) return; if (refcount_dec_and_test(&acl->a_refcount)) zpl_posix_acl_release_impl(acl); } #endif /* CONFIG_FS_POSIX_ACL */ static inline uid_t zfs_uid_read_impl(struct inode *ip) { return (from_kuid(kcred->user_ns, ip->i_uid)); } static inline uid_t zfs_uid_read(struct inode *ip) { return (zfs_uid_read_impl(ip)); } static inline gid_t zfs_gid_read_impl(struct inode *ip) { return (from_kgid(kcred->user_ns, ip->i_gid)); } static inline gid_t zfs_gid_read(struct inode *ip) { return (zfs_gid_read_impl(ip)); } static inline void zfs_uid_write(struct inode *ip, uid_t uid) { ip->i_uid = make_kuid(kcred->user_ns, uid); } static inline void zfs_gid_write(struct inode *ip, gid_t gid) { ip->i_gid = make_kgid(kcred->user_ns, gid); } /* * 3.15 API change */ #ifndef RENAME_NOREPLACE #define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ #endif #ifndef RENAME_EXCHANGE #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ #endif #ifndef RENAME_WHITEOUT #define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ #endif /* * 4.9 API change */ #if !(defined(HAVE_SETATTR_PREPARE_NO_USERNS) || \ defined(HAVE_SETATTR_PREPARE_USERNS) || \ defined(HAVE_SETATTR_PREPARE_IDMAP)) static inline int setattr_prepare(struct dentry *dentry, struct iattr *ia) { return (inode_change_ok(dentry->d_inode, ia)); } #endif /* * 4.11 API change * These macros are defined by kernel 4.11. We define them so that the same * code builds under kernels < 4.11 and >= 4.11. The macros are set to 0 so * that it will create obvious failures if they are accidentally used when built * against a kernel >= 4.11. */ #ifndef STATX_BASIC_STATS #define STATX_BASIC_STATS 0 #endif #ifndef AT_STATX_SYNC_AS_STAT #define AT_STATX_SYNC_AS_STAT 0 #endif /* * 4.11 API change * 4.11 takes struct path *, < 4.11 takes vfsmount * */ #if defined(HAVE_PATH_IOPS_GETATTR) #define ZPL_GETATTR_WRAPPER(func) \ static int \ func(const struct path *path, struct kstat *stat, u32 request_mask, \ unsigned int query_flags) \ { \ return (func##_impl(path, stat, request_mask, query_flags)); \ } #elif defined(HAVE_USERNS_IOPS_GETATTR) #define ZPL_GETATTR_WRAPPER(func) \ static int \ func(struct user_namespace *user_ns, const struct path *path, \ struct kstat *stat, u32 request_mask, unsigned int query_flags) \ { \ return (func##_impl(user_ns, path, stat, request_mask, \ query_flags)); \ } #elif defined(HAVE_IDMAP_IOPS_GETATTR) #define ZPL_GETATTR_WRAPPER(func) \ static int \ func(struct mnt_idmap *user_ns, const struct path *path, \ struct kstat *stat, u32 request_mask, unsigned int query_flags) \ { \ return (func##_impl(user_ns, path, stat, request_mask, \ query_flags)); \ } #else #error #endif /* * Returns true when called in the context of a 32-bit system call. */ static inline int zpl_is_32bit_api(void) { #ifdef CONFIG_COMPAT return (in_compat_syscall()); #else return (BITS_PER_LONG == 32); #endif } /* * 5.12 API change * To support id-mapped mounts, generic_fillattr() was modified to * accept a new struct user_namespace* as its first arg. * * 6.3 API change * generic_fillattr() first arg is changed to struct mnt_idmap * * * 6.6 API change * generic_fillattr() gets new second arg request_mask, a u32 type * */ #ifdef HAVE_GENERIC_FILLATTR_IDMAP #define zpl_generic_fillattr(idmap, ip, sp) \ generic_fillattr(idmap, ip, sp) #elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK) #define zpl_generic_fillattr(idmap, rqm, ip, sp) \ generic_fillattr(idmap, rqm, ip, sp) #elif defined(HAVE_GENERIC_FILLATTR_USERNS) #define zpl_generic_fillattr(user_ns, ip, sp) \ generic_fillattr(user_ns, ip, sp) #else #define zpl_generic_fillattr(user_ns, ip, sp) generic_fillattr(ip, sp) #endif +#ifdef HAVE_INODE_GENERIC_DROP +/* 6.18 API change. These were renamed, alias the old names to the new. */ +#define generic_delete_inode(ip) inode_just_drop(ip) +#define generic_drop_inode(ip) inode_generic_drop(ip) +#endif + #endif /* _ZFS_VFS_H */ diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c index 444948d03cb3..347b352506e5 100644 --- a/module/os/linux/zfs/zpl_super.c +++ b/module/os/linux/zfs/zpl_super.c @@ -1,570 +1,572 @@ // SPDX-License-Identifier: CDDL-1.0 /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2011, Lawrence Livermore National Security, LLC. * Copyright (c) 2023, Datto Inc. All rights reserved. * Copyright (c) 2025, Klara, Inc. + * Copyright (c) 2025, Rob Norris */ #include #include #include #include #include #include #include +#include /* * What to do when the last reference to an inode is released. If 0, the kernel * will cache it on the superblock. If 1, the inode will be freed immediately. * See zpl_drop_inode(). */ int zfs_delete_inode = 0; /* * What to do when the last reference to a dentry is released. If 0, the kernel * will cache it until the entry (file) is destroyed. If 1, the dentry will be * marked for cleanup, at which time its inode reference will be released. See * zpl_dentry_delete(). */ int zfs_delete_dentry = 0; static struct inode * zpl_inode_alloc(struct super_block *sb) { struct inode *ip; VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0); inode_set_iversion(ip, 1); return (ip); } #ifdef HAVE_SOPS_FREE_INODE static void zpl_inode_free(struct inode *ip) { ASSERT0(atomic_read(&ip->i_count)); zfs_inode_free(ip); } #endif static void zpl_inode_destroy(struct inode *ip) { ASSERT0(atomic_read(&ip->i_count)); zfs_inode_destroy(ip); } /* * Called from __mark_inode_dirty() to reflect that something in the * inode has changed. We use it to ensure the znode system attributes * are always strictly update to date with respect to the inode. */ static void zpl_dirty_inode(struct inode *ip, int flags) { fstrans_cookie_t cookie; cookie = spl_fstrans_mark(); zfs_dirty_inode(ip, flags); spl_fstrans_unmark(cookie); } /* * ->drop_inode() is called when the last reference to an inode is released. * Its return value indicates if the inode should be destroyed immediately, or * cached on the superblock structure. * * By default (zfs_delete_inode=0), we call generic_drop_inode(), which returns * "destroy immediately" if the inode is unhashed and has no links (roughly: no * longer exists on disk). On datasets with millions of rarely-accessed files, * this can cause a large amount of memory to be "pinned" by cached inodes, * which in turn pin their associated dnodes and dbufs, until the kernel starts * reporting memory pressure and requests OpenZFS release some memory (see * zfs_prune()). * - * When set to 1, we call generic_delete_node(), which always returns "destroy + * When set to 1, we call generic_delete_inode(), which always returns "destroy * immediately", resulting in inodes being destroyed immediately, releasing * their associated dnodes and dbufs to the dbuf cached and the ARC to be * evicted as normal. * * Note that the "last reference" doesn't always mean the last _userspace_ * reference; the dentry cache also holds a reference, so "busy" inodes will * still be kept alive that way (subject to dcache tuning). */ static int zpl_drop_inode(struct inode *ip) { if (zfs_delete_inode) return (generic_delete_inode(ip)); return (generic_drop_inode(ip)); } /* * The ->evict_inode() callback must minimally truncate the inode pages, * and call clear_inode(). For 2.6.35 and later kernels this will * simply update the inode state, with the sync occurring before the * truncate in evict(). For earlier kernels clear_inode() maps to * end_writeback() which is responsible for completing all outstanding * write back. In either case, once this is done it is safe to cleanup * any remaining inode specific data via zfs_inactive(). * remaining filesystem specific data. */ static void zpl_evict_inode(struct inode *ip) { fstrans_cookie_t cookie; cookie = spl_fstrans_mark(); truncate_setsize(ip, 0); clear_inode(ip); zfs_inactive(ip); spl_fstrans_unmark(cookie); } static void zpl_put_super(struct super_block *sb) { fstrans_cookie_t cookie; int error; cookie = spl_fstrans_mark(); error = -zfs_umount(sb); spl_fstrans_unmark(cookie); ASSERT3S(error, <=, 0); } /* * zfs_sync() is the underlying implementation for the sync(2) and syncfs(2) * syscalls, via sb->s_op->sync_fs(). * * Before kernel 5.17 (torvalds/linux@5679897eb104), syncfs() -> * sync_filesystem() would ignore the return from sync_fs(), instead only * considing the error from syncing the underlying block device (sb->s_dev). * Since OpenZFS doesn't _have_ an underlying block device, there's no way for * us to report a sync directly. * * However, in 5.8 (torvalds/linux@735e4ae5ba28) the superblock gained an extra * error store `s_wb_err`, to carry errors seen on page writeback since the * last call to syncfs(). If sync_filesystem() does not return an error, any * existing writeback error on the superblock will be used instead (and cleared * either way). We don't use this (page writeback is a different thing for us), * so for 5.8-5.17 we can use that instead to get syncfs() to return the error. * * Before 5.8, we have no other good options - no matter what happens, the * userspace program will be told the call has succeeded, and so we must make * it so, Therefore, when we are asked to wait for sync to complete (wait == * 1), if zfs_sync() has returned an error we have no choice but to block, * regardless of the reason. * * The 5.17 change was backported to the 5.10, 5.15 and 5.16 series, and likely * to some vendor kernels. Meanwhile, s_wb_err is still in use in 6.15 (the * mainline Linux series at time of writing), and has likely been backported to * vendor kernels before 5.8. We don't really want to use a workaround when we * don't have to, but we can't really detect whether or not sync_filesystem() * will return our errors (without a difficult runtime test anyway). So, we use * a static version check: any kernel reporting its version as 5.17+ will use a * direct error return, otherwise, we'll either use s_wb_err if it was detected * at configure (5.8-5.16 + vendor backports). If it's unavailable, we will * block to ensure the correct semantics. * * See https://github.com/openzfs/zfs/issues/17416 for further discussion. */ static int zpl_sync_fs(struct super_block *sb, int wait) { fstrans_cookie_t cookie; cred_t *cr = CRED(); int error; crhold(cr); cookie = spl_fstrans_mark(); error = -zfs_sync(sb, wait, cr); #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 17, 0) #ifdef HAVE_SUPER_BLOCK_S_WB_ERR if (error && wait) errseq_set(&sb->s_wb_err, error); #else if (error && wait) { zfsvfs_t *zfsvfs = sb->s_fs_info; ASSERT3P(zfsvfs, !=, NULL); if (zfs_enter(zfsvfs, FTAG) == 0) { txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); zfs_exit(zfsvfs, FTAG); error = 0; } } #endif #endif /* < 5.17.0 */ spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); return (error); } static int zpl_statfs(struct dentry *dentry, struct kstatfs *statp) { fstrans_cookie_t cookie; int error; cookie = spl_fstrans_mark(); error = -zfs_statvfs(dentry->d_inode, statp); spl_fstrans_unmark(cookie); ASSERT3S(error, <=, 0); /* * If required by a 32-bit system call, dynamically scale the * block size up to 16MiB and decrease the block counts. This * allows for a maximum size of 64EiB to be reported. The file * counts must be artificially capped at 2^32-1. */ if (unlikely(zpl_is_32bit_api())) { while (statp->f_blocks > UINT32_MAX && statp->f_bsize < SPA_MAXBLOCKSIZE) { statp->f_frsize <<= 1; statp->f_bsize <<= 1; statp->f_blocks >>= 1; statp->f_bfree >>= 1; statp->f_bavail >>= 1; } uint64_t usedobjs = statp->f_files - statp->f_ffree; statp->f_ffree = MIN(statp->f_ffree, UINT32_MAX - usedobjs); statp->f_files = statp->f_ffree + usedobjs; } return (error); } static int zpl_remount_fs(struct super_block *sb, int *flags, char *data) { zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = data }; fstrans_cookie_t cookie; int error; cookie = spl_fstrans_mark(); error = -zfs_remount(sb, flags, &zm); spl_fstrans_unmark(cookie); ASSERT3S(error, <=, 0); return (error); } static int __zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs) { int error; if ((error = zpl_enter(zfsvfs, FTAG)) != 0) return (error); char *fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); dmu_objset_name(zfsvfs->z_os, fsname); for (int i = 0; fsname[i] != 0; i++) { /* * Spaces in the dataset name must be converted to their * octal escape sequence for getmntent(3) to correctly * parse then fsname portion of /proc/self/mounts. */ if (fsname[i] == ' ') { seq_puts(seq, "\\040"); } else { seq_putc(seq, fsname[i]); } } kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN); zpl_exit(zfsvfs, FTAG); return (0); } static int zpl_show_devname(struct seq_file *seq, struct dentry *root) { return (__zpl_show_devname(seq, root->d_sb->s_fs_info)); } static int __zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs) { seq_printf(seq, ",%s", zfsvfs->z_flags & ZSB_XATTR ? "xattr" : "noxattr"); #ifdef CONFIG_FS_POSIX_ACL switch (zfsvfs->z_acl_type) { case ZFS_ACLTYPE_POSIX: seq_puts(seq, ",posixacl"); break; default: seq_puts(seq, ",noacl"); break; } #endif /* CONFIG_FS_POSIX_ACL */ switch (zfsvfs->z_case) { case ZFS_CASE_SENSITIVE: seq_puts(seq, ",casesensitive"); break; case ZFS_CASE_INSENSITIVE: seq_puts(seq, ",caseinsensitive"); break; default: seq_puts(seq, ",casemixed"); break; } return (0); } static int zpl_show_options(struct seq_file *seq, struct dentry *root) { return (__zpl_show_options(seq, root->d_sb->s_fs_info)); } static int zpl_fill_super(struct super_block *sb, void *data, int silent) { zfs_mnt_t *zm = (zfs_mnt_t *)data; fstrans_cookie_t cookie; int error; cookie = spl_fstrans_mark(); error = -zfs_domount(sb, zm, silent); spl_fstrans_unmark(cookie); ASSERT3S(error, <=, 0); return (error); } static int zpl_test_super(struct super_block *s, void *data) { zfsvfs_t *zfsvfs = s->s_fs_info; objset_t *os = data; /* * If the os doesn't match the z_os in the super_block, assume it is * not a match. Matching would imply a multimount of a dataset. It is * possible that during a multimount, there is a simultaneous operation * that changes the z_os, e.g., rollback, where the match will be * missed, but in that case the user will get an EBUSY. */ return (zfsvfs != NULL && os == zfsvfs->z_os); } static struct super_block * zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm) { struct super_block *s; objset_t *os; boolean_t issnap = B_FALSE; int err; err = dmu_objset_hold(zm->mnt_osname, FTAG, &os); if (err) return (ERR_PTR(-err)); /* * The dsl pool lock must be released prior to calling sget(). * It is possible sget() may block on the lock in grab_super() * while deactivate_super() holds that same lock and waits for * a txg sync. If the dsl_pool lock is held over sget() * this can prevent the pool sync and cause a deadlock. */ dsl_dataset_long_hold(dmu_objset_ds(os), FTAG); dsl_pool_rele(dmu_objset_pool(os), FTAG); s = sget(fs_type, zpl_test_super, set_anon_super, flags, os); /* * Recheck with the lock held to prevent mounting the wrong dataset * since z_os can be stale when the teardown lock is held. * * We can't do this in zpl_test_super in since it's under spinlock and * also s_umount lock is not held there so it would race with * zfs_umount and zfsvfs can be freed. */ if (!IS_ERR(s) && s->s_fs_info != NULL) { zfsvfs_t *zfsvfs = s->s_fs_info; if (zpl_enter(zfsvfs, FTAG) == 0) { if (os != zfsvfs->z_os) err = -SET_ERROR(EBUSY); issnap = zfsvfs->z_issnap; zpl_exit(zfsvfs, FTAG); } else { err = -SET_ERROR(EBUSY); } } dsl_dataset_long_rele(dmu_objset_ds(os), FTAG); dsl_dataset_rele(dmu_objset_ds(os), FTAG); if (IS_ERR(s)) return (ERR_CAST(s)); if (err) { deactivate_locked_super(s); return (ERR_PTR(err)); } if (s->s_root == NULL) { err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0); if (err) { deactivate_locked_super(s); return (ERR_PTR(err)); } s->s_flags |= SB_ACTIVE; } else if (!issnap && ((flags ^ s->s_flags) & SB_RDONLY)) { /* * Skip ro check for snap since snap is always ro regardless * ro flag is passed by mount or not. */ deactivate_locked_super(s); return (ERR_PTR(-EBUSY)); } return (s); } static struct dentry * zpl_mount(struct file_system_type *fs_type, int flags, const char *osname, void *data) { zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data }; struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm); if (IS_ERR(sb)) return (ERR_CAST(sb)); return (dget(sb->s_root)); } static void zpl_kill_sb(struct super_block *sb) { zfs_preumount(sb); kill_anon_super(sb); } void zpl_prune_sb(uint64_t nr_to_scan, void *arg) { struct super_block *sb = (struct super_block *)arg; int objects = 0; /* * Ensure the superblock is not in the process of being torn down. */ #ifdef HAVE_SB_DYING if (down_read_trylock(&sb->s_umount)) { if (!(sb->s_flags & SB_DYING) && sb->s_root && (sb->s_flags & SB_BORN)) { (void) zfs_prune(sb, nr_to_scan, &objects); } up_read(&sb->s_umount); } #else if (down_read_trylock(&sb->s_umount)) { if (!hlist_unhashed(&sb->s_instances) && sb->s_root && (sb->s_flags & SB_BORN)) { (void) zfs_prune(sb, nr_to_scan, &objects); } up_read(&sb->s_umount); } #endif } const struct super_operations zpl_super_operations = { .alloc_inode = zpl_inode_alloc, #ifdef HAVE_SOPS_FREE_INODE .free_inode = zpl_inode_free, #endif .destroy_inode = zpl_inode_destroy, .dirty_inode = zpl_dirty_inode, .write_inode = NULL, .drop_inode = zpl_drop_inode, .evict_inode = zpl_evict_inode, .put_super = zpl_put_super, .sync_fs = zpl_sync_fs, .statfs = zpl_statfs, .remount_fs = zpl_remount_fs, .show_devname = zpl_show_devname, .show_options = zpl_show_options, .show_stats = NULL, }; /* * ->d_delete() is called when the last reference to a dentry is released. Its * return value indicates if the dentry should be destroyed immediately, or * retained in the dentry cache. * * By default (zfs_delete_dentry=0) the kernel will always cache unused * entries. Each dentry holds an inode reference, so cached dentries can hold * the final inode reference indefinitely, leading to the inode and its related * data being pinned (see zpl_drop_inode()). * * When set to 1, we signal that the dentry should be destroyed immediately and * never cached. This reduces memory usage, at the cost of higher overheads to * lookup a file, as the inode and its underlying data (dnode/dbuf) need to be * reloaded and reinflated. * * Note that userspace does not have direct control over dentry references and * reclaim; rather, this is part of the kernel's caching and reclaim subsystems * (eg vm.vfs_cache_pressure). */ static int zpl_dentry_delete(const struct dentry *dentry) { return (zfs_delete_dentry ? 1 : 0); } const struct dentry_operations zpl_dentry_operations = { .d_delete = zpl_dentry_delete, }; struct file_system_type zpl_fs_type = { .owner = THIS_MODULE, .name = ZFS_DRIVER, #if defined(HAVE_IDMAP_MNT_API) .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP, #else .fs_flags = FS_USERNS_MOUNT, #endif .mount = zpl_mount, .kill_sb = zpl_kill_sb, }; ZFS_MODULE_PARAM(zfs, zfs_, delete_inode, INT, ZMOD_RW, "Delete inodes as soon as the last reference is released."); ZFS_MODULE_PARAM(zfs, zfs_, delete_dentry, INT, ZMOD_RW, "Delete dentries from dentry cache as soon as the last reference is " "released.");