diff --git a/config/kernel-sb-wb-err.m4 b/config/kernel-sb-wb-err.m4 new file mode 100644 index 000000000000..814d2ca5323b --- /dev/null +++ b/config/kernel-sb-wb-err.m4 @@ -0,0 +1,27 @@ +# dnl +# dnl 5.8 (735e4ae5ba28) introduced a superblock scoped errseq_t to use to +# dnl record writeback errors for syncfs() to return. Up until 5.17, when +# dnl sync_fs errors were returned directly, this is the only way for us to +# dnl report an error from syncfs(). +# dnl +AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR], [ + ZFS_LINUX_TEST_SRC([super_block_s_wb_err], [ + #include + + static const struct super_block + sb __attribute__ ((unused)) = { + .s_wb_err = 0, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR], [ + AC_MSG_CHECKING([whether super_block has s_wb_err]) + ZFS_LINUX_TEST_RESULT([super_block_s_wb_err], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SUPER_BLOCK_S_WB_ERR, 1, + [have super_block s_wb_err]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index b933475e9e70..c99aed357fb7 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -1,1056 +1,1058 @@ dnl # dnl # Default ZFS kernel configuration dnl # AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ AM_COND_IF([BUILD_LINUX], [ dnl # Setup the kernel build environment. ZFS_AC_KERNEL ZFS_AC_QAT dnl # Sanity checks for module building and CONFIG_* defines ZFS_AC_KERNEL_CONFIG_DEFINED ZFS_AC_MODULE_SYMVERS dnl # Sequential ZFS_LINUX_TRY_COMPILE tests ZFS_AC_KERNEL_FPU_HEADER ZFS_AC_KERNEL_OBJTOOL_HEADER ZFS_AC_KERNEL_MISC_MINOR ZFS_AC_KERNEL_DECLARE_EVENT_CLASS dnl # Parallel ZFS_LINUX_TEST_SRC / ZFS_LINUX_TEST_RESULT tests ZFS_AC_KERNEL_TEST_SRC ZFS_AC_KERNEL_TEST_RESULT AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ KERNEL_MAKE="$KERNEL_MAKE O=$LINUX_OBJ" ]) AC_SUBST(KERNEL_MAKE) ]) ]) dnl # dnl # Generate and compile all of the kernel API test cases to determine dnl # which interfaces are available. By invoking the kernel build system dnl # only once the compilation can be done in parallel significantly dnl # speeding up the process. dnl # AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_TYPES ZFS_AC_KERNEL_SRC_OBJTOOL ZFS_AC_KERNEL_SRC_ACCESS_OK_TYPE ZFS_AC_KERNEL_SRC_PDE_DATA ZFS_AC_KERNEL_SRC_GENERIC_FADVISE ZFS_AC_KERNEL_SRC_SCHED ZFS_AC_KERNEL_SRC_USLEEP_RANGE ZFS_AC_KERNEL_SRC_VMALLOC_PAGE_KERNEL ZFS_AC_KERNEL_SRC_INODE_TIMES ZFS_AC_KERNEL_SRC_PROC_OPERATIONS ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_SRC_BIO ZFS_AC_KERNEL_SRC_BLKDEV ZFS_AC_KERNEL_SRC_BLK_QUEUE ZFS_AC_KERNEL_SRC_GENHD_FLAGS ZFS_AC_KERNEL_SRC_REVALIDATE_DISK ZFS_AC_KERNEL_SRC_GET_DISK_RO ZFS_AC_KERNEL_SRC_DISCARD_GRANULARITY ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE ZFS_AC_KERNEL_SRC_XATTR ZFS_AC_KERNEL_SRC_ACL ZFS_AC_KERNEL_SRC_INODE_SETATTR ZFS_AC_KERNEL_SRC_INODE_GETATTR ZFS_AC_KERNEL_SRC_SHOW_OPTIONS ZFS_AC_KERNEL_SRC_SHRINKER ZFS_AC_KERNEL_SRC_MKDIR ZFS_AC_KERNEL_SRC_LOOKUP_FLAGS ZFS_AC_KERNEL_SRC_CREATE ZFS_AC_KERNEL_SRC_PERMISSION ZFS_AC_KERNEL_SRC_TMPFILE ZFS_AC_KERNEL_SRC_AUTOMOUNT ZFS_AC_KERNEL_SRC_COMMIT_METADATA ZFS_AC_KERNEL_SRC_SETATTR_PREPARE ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SRC_SECURITY_INODE ZFS_AC_KERNEL_SRC_FST_MOUNT ZFS_AC_KERNEL_SRC_SB_DYING ZFS_AC_KERNEL_SRC_SET_NLINK ZFS_AC_KERNEL_SRC_SGET ZFS_AC_KERNEL_SRC_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO ZFS_AC_KERNEL_SRC_VFS_MIGRATE_FOLIO ZFS_AC_KERNEL_SRC_VFS_MIGRATEPAGE ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_SRC_VFS_READPAGES ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS ZFS_AC_KERNEL_SRC_VFS_IOV_ITER ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_SPLICE_COPY_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE ZFS_AC_KERNEL_SRC_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_SRC_KMAP_LOCAL_PAGE ZFS_AC_KERNEL_SRC_FOLLOW_DOWN_ONE ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT ZFS_AC_KERNEL_SRC_FPU ZFS_AC_KERNEL_SRC_FMODE_T ZFS_AC_KERNEL_SRC_KUIDGID_T ZFS_AC_KERNEL_SRC_KUID_HELPERS ZFS_AC_KERNEL_SRC_RENAME ZFS_AC_KERNEL_SRC_TOTALRAM_PAGES_FUNC ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES ZFS_AC_KERNEL_SRC_PERCPU ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR ZFS_AC_KERNEL_SRC_MKNOD ZFS_AC_KERNEL_SRC_SYMLINK ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS ZFS_AC_KERNEL_SRC_SIGINFO ZFS_AC_KERNEL_SRC_SYSFS ZFS_AC_KERNEL_SRC_STANDALONE_LINUX_STDARG ZFS_AC_KERNEL_SRC_STRLCPY ZFS_AC_KERNEL_SRC_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_SRC_ADD_DISK ZFS_AC_KERNEL_SRC_KTHREAD ZFS_AC_KERNEL_SRC_ZERO_PAGE ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC ZFS_AC_KERNEL_SRC_IDMAP_MNT_API ZFS_AC_KERNEL_SRC_IDMAP_NO_USERNS ZFS_AC_KERNEL_SRC_IATTR_VFSID ZFS_AC_KERNEL_SRC_WRITEPAGE_T ZFS_AC_KERNEL_SRC_RECLAIMED ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_SZ ZFS_AC_KERNEL_SRC_PROC_HANDLER_CTL_TABLE_CONST ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ ZFS_AC_KERNEL_SRC_SYNC_BDEV ZFS_AC_KERNEL_SRC_MM_PAGE_FLAGS ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE ZFS_AC_KERNEL_SRC_MM_PAGE_MAPPING ZFS_AC_KERNEL_SRC_FILE ZFS_AC_KERNEL_SRC_PIN_USER_PAGES ZFS_AC_KERNEL_SRC_TIMER + ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE ;; riscv*) ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE ;; esac AC_MSG_CHECKING([for available kernel interfaces]) ZFS_LINUX_TEST_COMPILE_ALL([kabi]) AC_MSG_RESULT([done]) ]) dnl # dnl # Check results of kernel interface tests. dnl # AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_TYPES ZFS_AC_KERNEL_ACCESS_OK_TYPE ZFS_AC_KERNEL_OBJTOOL ZFS_AC_KERNEL_PDE_DATA ZFS_AC_KERNEL_GENERIC_FADVISE ZFS_AC_KERNEL_SCHED ZFS_AC_KERNEL_USLEEP_RANGE ZFS_AC_KERNEL_VMALLOC_PAGE_KERNEL ZFS_AC_KERNEL_INODE_TIMES ZFS_AC_KERNEL_PROC_OPERATIONS ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_BIO ZFS_AC_KERNEL_BLKDEV ZFS_AC_KERNEL_BLK_QUEUE ZFS_AC_KERNEL_GENHD_FLAGS ZFS_AC_KERNEL_REVALIDATE_DISK ZFS_AC_KERNEL_GET_DISK_RO ZFS_AC_KERNEL_DISCARD_GRANULARITY ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE ZFS_AC_KERNEL_XATTR ZFS_AC_KERNEL_ACL ZFS_AC_KERNEL_INODE_SETATTR ZFS_AC_KERNEL_INODE_GETATTR ZFS_AC_KERNEL_SHOW_OPTIONS ZFS_AC_KERNEL_SHRINKER ZFS_AC_KERNEL_MKDIR ZFS_AC_KERNEL_LOOKUP_FLAGS ZFS_AC_KERNEL_CREATE ZFS_AC_KERNEL_PERMISSION ZFS_AC_KERNEL_TMPFILE ZFS_AC_KERNEL_AUTOMOUNT ZFS_AC_KERNEL_COMMIT_METADATA ZFS_AC_KERNEL_SETATTR_PREPARE ZFS_AC_KERNEL_INSERT_INODE_LOCKED ZFS_AC_KERNEL_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SECURITY_INODE ZFS_AC_KERNEL_FST_MOUNT ZFS_AC_KERNEL_SB_DYING ZFS_AC_KERNEL_SET_NLINK ZFS_AC_KERNEL_SGET ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_VFS_READ_FOLIO ZFS_AC_KERNEL_VFS_MIGRATE_FOLIO ZFS_AC_KERNEL_VFS_MIGRATEPAGE ZFS_AC_KERNEL_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_VFS_READPAGES ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS ZFS_AC_KERNEL_VFS_IOV_ITER ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE ZFS_AC_KERNEL_VFS_SPLICE_COPY_FILE_RANGE ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_KMAP_LOCAL_PAGE ZFS_AC_KERNEL_FOLLOW_DOWN_ONE ZFS_AC_KERNEL_MAKE_REQUEST_FN ZFS_AC_KERNEL_GENERIC_IO_ACCT ZFS_AC_KERNEL_FPU ZFS_AC_KERNEL_FMODE_T ZFS_AC_KERNEL_KUIDGID_T ZFS_AC_KERNEL_KUID_HELPERS ZFS_AC_KERNEL_RENAME ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC ZFS_AC_KERNEL_TOTALHIGH_PAGES ZFS_AC_KERNEL_PERCPU ZFS_AC_KERNEL_GENERIC_FILLATTR ZFS_AC_KERNEL_MKNOD ZFS_AC_KERNEL_SYMLINK ZFS_AC_KERNEL_BIO_MAX_SEGS ZFS_AC_KERNEL_SIGINFO ZFS_AC_KERNEL_SYSFS ZFS_AC_KERNEL_STANDALONE_LINUX_STDARG ZFS_AC_KERNEL_STRLCPY ZFS_AC_KERNEL_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_ADD_DISK ZFS_AC_KERNEL_KTHREAD ZFS_AC_KERNEL_ZERO_PAGE ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC ZFS_AC_KERNEL_IDMAP_MNT_API ZFS_AC_KERNEL_IDMAP_NO_USERNS ZFS_AC_KERNEL_IATTR_VFSID ZFS_AC_KERNEL_WRITEPAGE_T ZFS_AC_KERNEL_RECLAIMED ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE ZFS_AC_KERNEL_REGISTER_SYSCTL_SZ ZFS_AC_KERNEL_PROC_HANDLER_CTL_TABLE_CONST ZFS_AC_KERNEL_COPY_SPLICE_READ ZFS_AC_KERNEL_SYNC_BDEV ZFS_AC_KERNEL_MM_PAGE_FLAGS ZFS_AC_KERNEL_MM_PAGE_SIZE ZFS_AC_KERNEL_MM_PAGE_MAPPING ZFS_AC_KERNEL_1ARG_ASSIGN_STR ZFS_AC_KERNEL_FILE ZFS_AC_KERNEL_PIN_USER_PAGES ZFS_AC_KERNEL_TIMER + ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_CPU_HAS_FEATURE ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE ;; riscv*) ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE ;; esac ]) dnl # dnl # Detect name used for Module.symvers file in kernel dnl # AC_DEFUN([ZFS_AC_MODULE_SYMVERS], [ modpost=$LINUX/scripts/Makefile.modpost AC_MSG_CHECKING([kernel file name for module symbols]) AS_IF([test "x$enable_linux_builtin" != xyes -a -f "$modpost"], [ AS_IF([grep -q Modules.symvers $modpost], [ LINUX_SYMBOLS=Modules.symvers ], [ LINUX_SYMBOLS=Module.symvers ]) AS_IF([test ! -f "$LINUX_OBJ/$LINUX_SYMBOLS"], [ AC_MSG_ERROR([ *** Please make sure the kernel devel package for your distribution *** is installed. If you are building with a custom kernel, make sure *** the kernel is configured, built, and the '--with-linux=PATH' *** configure option refers to the location of the kernel source. ]) ]) ], [ LINUX_SYMBOLS=NONE ]) AC_MSG_RESULT($LINUX_SYMBOLS) AC_SUBST(LINUX_SYMBOLS) ]) dnl # dnl # Detect the kernel to be built against dnl # dnl # Most modern Linux distributions have separate locations for bare dnl # source (source) and prebuilt (build) files. Additionally, there are dnl # `source` and `build` symlinks in `/lib/modules/$(KERNEL_VERSION)` dnl # pointing to them. The directory search order is now: dnl # dnl # - `configure` command line values if both `--with-linux` and dnl # `--with-linux-obj` were defined dnl # dnl # - If only `--with-linux` was defined, `--with-linux-obj` is assumed dnl # to have the same value as `--with-linux` dnl # dnl # - If neither `--with-linux` nor `--with-linux-obj` were defined dnl # autodetection is used: dnl # dnl # - `/lib/modules/$(uname -r)/{source,build}` respectively, if exist. dnl # dnl # - If only `/lib/modules/$(uname -r)/build` exists, it is assumed dnl # to be both source and build directory. dnl # dnl # - The first directory in `/lib/modules` with the highest version dnl # number according to `sort -V` which contains both `source` and dnl # `build` symlinks/directories. If module directory contains only dnl # `build` component, it is assumed to be both source and build dnl # directory. dnl # dnl # - Last resort: the first directory matching `/usr/src/kernels/*` dnl # and `/usr/src/linux-*` with the highest version number according dnl # to `sort -V` is assumed to be both source and build directory. dnl # AC_DEFUN([ZFS_AC_KERNEL], [ AC_ARG_WITH([linux], AS_HELP_STRING([--with-linux=PATH], [Path to kernel source]), [kernelsrc="$withval"]) AC_ARG_WITH(linux-obj, AS_HELP_STRING([--with-linux-obj=PATH], [Path to kernel build objects]), [kernelbuild="$withval"]) AC_MSG_CHECKING([kernel source and build directories]) AS_IF([test -n "$kernelsrc" && test -z "$kernelbuild"], [ kernelbuild="$kernelsrc" ], [test -z "$kernelsrc"], [ AS_IF([test -e "/lib/modules/$(uname -r)/source" && \ test -e "/lib/modules/$(uname -r)/build"], [ src="/lib/modules/$(uname -r)/source" build="/lib/modules/$(uname -r)/build" ], [test -e "/lib/modules/$(uname -r)/build"], [ build="/lib/modules/$(uname -r)/build" src="$build" ], [ src= for d in $(ls -1d /lib/modules/* 2>/dev/null | sort -Vr); do if test -e "$d/source" && test -e "$d/build"; then src="$d/source" build="$d/build" break fi if test -e "$d/build"; then src="$d/build" build="$d/build" break fi done # the least reliable method if test -z "$src"; then src=$(ls -1d /usr/src/kernels/* /usr/src/linux-* \ 2>/dev/null | grep -v obj | sort -Vr | head -1) build="$src" fi ]) AS_IF([test -n "$src" && test -e "$src"], [ kernelsrc=$(readlink -e "$src") ], [ kernelsrc="[Not found]" ]) AS_IF([test -n "$build" && test -e "$build"], [ kernelbuild=$(readlink -e "$build") ], [ kernelbuild="[Not found]" ]) ], [ AS_IF([test "$kernelsrc" = "NONE"], [ kernsrcver=NONE ]) withlinux=yes ]) AC_MSG_RESULT([done]) AC_MSG_CHECKING([kernel source directory]) AC_MSG_RESULT([$kernelsrc]) AC_MSG_CHECKING([kernel build directory]) AC_MSG_RESULT([$kernelbuild]) AS_IF([test ! -d "$kernelsrc" || test ! -d "$kernelbuild"], [ AC_MSG_ERROR([ *** Please make sure the kernel devel package for your distribution *** is installed and then try again. If that fails, you can specify the *** location of the kernel source and build with the '--with-linux=PATH' and *** '--with-linux-obj=PATH' options respectively.]) ]) AC_MSG_CHECKING([kernel source version]) utsrelease1=$kernelbuild/include/linux/version.h utsrelease2=$kernelbuild/include/linux/utsrelease.h utsrelease3=$kernelbuild/include/generated/utsrelease.h AS_IF([test -r $utsrelease1 && grep -qF UTS_RELEASE $utsrelease1], [ utsrelease=$utsrelease1 ], [test -r $utsrelease2 && grep -qF UTS_RELEASE $utsrelease2], [ utsrelease=$utsrelease2 ], [test -r $utsrelease3 && grep -qF UTS_RELEASE $utsrelease3], [ utsrelease=$utsrelease3 ]) AS_IF([test -n "$utsrelease"], [ kernsrcver=$($AWK '/UTS_RELEASE/ { gsub(/"/, "", $[3]); print $[3] }' $utsrelease) AS_IF([test -z "$kernsrcver"], [ AC_MSG_RESULT([Not found]) AC_MSG_ERROR([ *** Cannot determine kernel version. ]) ]) ], [ AC_MSG_RESULT([Not found]) if test "x$enable_linux_builtin" != xyes; then AC_MSG_ERROR([ *** Cannot find UTS_RELEASE definition. ]) else AC_MSG_ERROR([ *** Cannot find UTS_RELEASE definition. *** Please run 'make prepare' inside the kernel source tree.]) fi ]) AC_MSG_RESULT([$kernsrcver]) AX_COMPARE_VERSION([$kernsrcver], [ge], [$ZFS_META_KVER_MIN], [], [ AC_MSG_ERROR([ *** Cannot build against kernel version $kernsrcver. *** The minimum supported kernel version is $ZFS_META_KVER_MIN. ]) ]) AC_ARG_ENABLE([linux-experimental], AS_HELP_STRING([--enable-linux-experimental], [Allow building against some unsupported kernel versions])) AX_COMPARE_VERSION([$kernsrcver], [ge], [$ZFS_META_KVER_MAX], [ AX_COMPARE_VERSION([$kernsrcver], [eq2], [$ZFS_META_KVER_MAX], [ kern_max_version_ok=yes ], [ kern_max_version_ok=no ]) ], [ kern_max_version_ok=yes ]) AS_IF([test "x$kern_max_version_ok" != "xyes"], [ AS_IF([test "x$enable_linux_experimental" == "xyes"], [ AC_DEFINE(HAVE_LINUX_EXPERIMENTAL, 1, [building against unsupported kernel version]) ], [ AC_MSG_ERROR([ *** Cannot build against kernel version $kernsrcver. *** The maximum supported kernel version is $ZFS_META_KVER_MAX. ]) ]) ]) LINUX=${kernelsrc} LINUX_OBJ=${kernelbuild} LINUX_VERSION=${kernsrcver} AC_SUBST(LINUX) AC_SUBST(LINUX_OBJ) AC_SUBST(LINUX_VERSION) dnl # create a relatively unique numeric checksum based on the kernel dnl # version and path. this is included in the cache key below, dnl # allowing different cached values for different kernels _zfs_linux_cache_checksum=$(echo ${kernelsrc} {$kernelbuild} ${kernsrcver} | cksum | cut -f1 -d' ') ]) AC_DEFUN([ZFS_AC_KERNEL_VERSION_WARNING], [ AS_IF([test "x$enable_linux_experimental" = "xyes" && \ test "x$kern_max_version_ok" != "xyes"], [ AC_MSG_WARN([ You are building OpenZFS against Linux version $kernsrcver. This combination is considered EXPERIMENTAL by the OpenZFS project. Even if it appears to build and run correctly, there may be bugs that can cause SERIOUS DATA LOSS. YOU HAVE BEEN WARNED! If you choose to continue, we'd appreciate if you could report your results on the OpenZFS issue tracker at: https://github.com/openzfs/zfs/issues/new Your feedback will help us prepare a new OpenZFS release that supports this version of Linux. ]) ]) ]) dnl # dnl # Detect the QAT module to be built against, QAT provides hardware dnl # acceleration for data compression: dnl # dnl # https://01.org/intel-quickassist-technology dnl # dnl # 1) Download and install QAT driver from the above link dnl # 2) Start QAT driver in your system: dnl # service qat_service start dnl # 3) Enable QAT in ZFS, e.g.: dnl # ./configure --with-qat=/QAT1.6 dnl # make dnl # 4) Set GZIP compression in ZFS dataset: dnl # zfs set compression = gzip dnl # dnl # Then the data written to this ZFS pool is compressed by QAT accelerator dnl # automatically, and de-compressed by QAT when read from the pool. dnl # dnl # 1) Get QAT hardware statistics with: dnl # cat /proc/icp_dh895xcc_dev/qat dnl # 2) To disable QAT: dnl # insmod zfs.ko zfs_qat_disable=1 dnl # AC_DEFUN([ZFS_AC_QAT], [ AC_ARG_WITH([qat], AS_HELP_STRING([--with-qat=PATH], [Path to qat source]), AS_IF([test "$withval" = "yes"], AC_MSG_ERROR([--with-qat=PATH requires a PATH]), [qatsrc="$withval"])) AC_ARG_WITH([qat-obj], AS_HELP_STRING([--with-qat-obj=PATH], [Path to qat build objects]), [qatbuild="$withval"]) AS_IF([test ! -z "${qatsrc}"], [ AC_MSG_CHECKING([qat source directory]) AC_MSG_RESULT([$qatsrc]) QAT_SRC="${qatsrc}/quickassist" AS_IF([ test ! -e "$QAT_SRC/include/cpa.h"], [ AC_MSG_ERROR([ *** Please make sure the qat driver package is installed *** and specify the location of the qat source with the *** '--with-qat=PATH' option then try again. Failed to *** find cpa.h in: ${QAT_SRC}/include]) ]) ]) AS_IF([test ! -z "${qatsrc}"], [ AC_MSG_CHECKING([qat build directory]) AS_IF([test -z "$qatbuild"], [ qatbuild="${qatsrc}/build" ]) AC_MSG_RESULT([$qatbuild]) QAT_OBJ=${qatbuild} AS_IF([ ! test -e "$QAT_OBJ/icp_qa_al.ko" && ! test -e "$QAT_OBJ/qat_api.ko"], [ AC_MSG_ERROR([ *** Please make sure the qat driver is installed then try again. *** Failed to find icp_qa_al.ko or qat_api.ko in: $QAT_OBJ]) ]) AC_SUBST(QAT_SRC) AC_SUBST(QAT_OBJ) AC_DEFINE(HAVE_QAT, 1, [qat is enabled and existed]) ]) dnl # dnl # Detect the name used for the QAT Module.symvers file. dnl # AS_IF([test ! -z "${qatsrc}"], [ AC_MSG_CHECKING([qat file for module symbols]) QAT_SYMBOLS=$QAT_SRC/lookaside/access_layer/src/Module.symvers AS_IF([test -r $QAT_SYMBOLS], [ AC_MSG_RESULT([$QAT_SYMBOLS]) AC_SUBST(QAT_SYMBOLS) ],[ AC_MSG_ERROR([ *** Please make sure the qat driver is installed then try again. *** Failed to find Module.symvers in: $QAT_SYMBOLS ]) ]) ]) ]) dnl # dnl # ZFS_LINUX_CONFTEST_H dnl # AC_DEFUN([ZFS_LINUX_CONFTEST_H], [ test -d build/$2 || mkdir -p build/$2 cat - <<_ACEOF >build/$2/$2.h $1 _ACEOF ]) dnl # dnl # ZFS_LINUX_CONFTEST_C dnl # AC_DEFUN([ZFS_LINUX_CONFTEST_C], [ test -d build/$2 || mkdir -p build/$2 cat confdefs.h - <<_ACEOF >build/$2/$2.c $1 _ACEOF ]) dnl # dnl # ZFS_LINUX_CONFTEST_MAKEFILE dnl # dnl # $1 - test case name dnl # $2 - add to top-level Makefile dnl # $3 - additional build flags dnl # AC_DEFUN([ZFS_LINUX_CONFTEST_MAKEFILE], [ test -d build || mkdir -p build test -d build/$1 || mkdir -p build/$1 file=build/$1/Makefile dnl # Example command line to manually build source. cat - <<_ACEOF >$file # Example command line to manually build source # make modules -C $LINUX_OBJ $ARCH_UM M=$PWD/build/$1 ccflags-y := -Werror $FRAME_LARGER_THAN _ACEOF dnl # Additional custom CFLAGS as requested. m4_ifval($3, [echo "ccflags-y += $3" >>$file], []) dnl # Test case source echo "obj-m := $1.o" >>$file AS_IF([test "x$2" = "xyes"], [echo "obj-m += $1/" >>build/Makefile], []) ]) dnl # dnl # ZFS_LINUX_TEST_PROGRAM(C)([PROLOGUE], [BODY]) dnl # m4_define([ZFS_LINUX_TEST_PROGRAM], [ #include $1 int main (void) { $2 ; return 0; } MODULE_DESCRIPTION("conftest"); MODULE_AUTHOR(ZFS_META_AUTHOR); MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE); MODULE_LICENSE($3); ]) dnl # dnl # ZFS_LINUX_TEST_REMOVE dnl # dnl # Removes the specified test source and results. dnl # AC_DEFUN([ZFS_LINUX_TEST_REMOVE], [ test -d build/$1 && rm -Rf build/$1 test -f build/Makefile && sed '/$1/d' build/Makefile ]) dnl # dnl # ZFS_LINUX_COMPILE dnl # dnl # $1 - build dir dnl # $2 - test command dnl # $3 - pass command dnl # $4 - fail command dnl # $5 - set KBUILD_MODPOST_NOFINAL='yes' dnl # $6 - set KBUILD_MODPOST_WARN='yes' dnl # dnl # Used internally by ZFS_LINUX_TEST_{COMPILE,MODPOST} dnl # AC_DEFUN([ZFS_LINUX_COMPILE], [ AC_ARG_VAR([KERNEL_CC], [C compiler for building kernel modules]) AC_ARG_VAR([KERNEL_LD], [Linker for building kernel modules]) AC_ARG_VAR([KERNEL_LLVM], [Binary option to build kernel modules with LLVM/CLANG toolchain]) AC_ARG_VAR([KERNEL_CROSS_COMPILE], [Cross compile prefix for kernel module builds]) AC_ARG_VAR([KERNEL_ARCH], [Architecture to build kernel modules for]) AC_TRY_COMMAND([ KBUILD_MODPOST_NOFINAL="$5" KBUILD_MODPOST_WARN="$6" make modules -k -j$TEST_JOBS ${KERNEL_CC:+CC=$KERNEL_CC} ${KERNEL_LD:+LD=$KERNEL_LD} ${KERNEL_LLVM:+LLVM=$KERNEL_LLVM} CONFIG_MODULES=y CFLAGS_MODULE=-DCONFIG_MODULES ${KERNEL_CROSS_COMPILE:+CROSS_COMPILE=$KERNEL_CROSS_COMPILE} ${KERNEL_ARCH:+ARCH=$KERNEL_ARCH} -C $LINUX_OBJ $ARCH_UM M=$PWD/$1 >$1/build.log 2>&1]) AS_IF([AC_TRY_COMMAND([$2])], [$3], [$4]) ]) dnl # dnl # ZFS_LINUX_TEST_COMPILE dnl # dnl # Perform a full compile excluding the final modpost phase. dnl # AC_DEFUN([ZFS_LINUX_TEST_COMPILE], [ ZFS_LINUX_COMPILE([$2], [test -f $2/build.log], [ mv $2/Makefile $2/Makefile.compile.$1 mv $2/build.log $2/build.log.$1 ],[ AC_MSG_ERROR([ *** Unable to compile test source to determine kernel interfaces.]) ], [yes], []) ]) dnl # dnl # ZFS_LINUX_TEST_MODPOST dnl # dnl # Perform a full compile including the modpost phase. This may dnl # be an incremental build if the objects have already been built. dnl # AC_DEFUN([ZFS_LINUX_TEST_MODPOST], [ ZFS_LINUX_COMPILE([$2], [test -f $2/build.log], [ mv $2/Makefile $2/Makefile.modpost.$1 cat $2/build.log >>build/build.log.$1 ],[ AC_MSG_ERROR([ *** Unable to modpost test source to determine kernel interfaces.]) ], [], [yes]) ]) dnl # dnl # Perform the compilation of the test cases in two phases. dnl # dnl # Phase 1) attempt to build the object files for all of the tests dnl # defined by the ZFS_LINUX_TEST_SRC macro. But do not dnl # perform the final modpost stage. dnl # dnl # Phase 2) disable all tests which failed the initial compilation, dnl # then invoke the final modpost step for the remaining tests. dnl # dnl # This allows us efficiently build the test cases in parallel while dnl # remaining resilient to build failures which are expected when dnl # detecting the available kernel interfaces. dnl # dnl # The maximum allowed parallelism can be controlled by setting the dnl # TEST_JOBS environment variable. Otherwise, it default to $(nproc). dnl # AC_DEFUN([ZFS_LINUX_TEST_COMPILE_ALL], [ dnl # Phase 1 - Compilation only, final linking is skipped. ZFS_LINUX_TEST_COMPILE([$1], [build]) dnl # dnl # Phase 2 - When building external modules disable test cases dnl # which failed to compile and invoke modpost to verify the dnl # final linking. dnl # dnl # Test names suffixed with '_license' call modpost independently dnl # to ensure that a single incompatibility does not result in the dnl # modpost phase exiting early. This check is not performed on dnl # every symbol since the majority are compatible and doing so dnl # would significantly slow down this phase. dnl # dnl # When configuring for builtin (--enable-linux-builtin) dnl # fake the linking step artificially create the expected .ko dnl # files for tests which did compile. This is required for dnl # kernels which do not have loadable module support or have dnl # not yet been built. dnl # AS_IF([test "x$enable_linux_builtin" = "xno"], [ for dir in $(awk '/^obj-m/ { print [$]3 }' \ build/Makefile.compile.$1); do name=${dir%/} AS_IF([test -f build/$name/$name.o], [ AS_IF([test "${name##*_}" = "license"], [ ZFS_LINUX_TEST_MODPOST([$1], [build/$name]) echo "obj-n += $dir" >>build/Makefile ], [ echo "obj-m += $dir" >>build/Makefile ]) ], [ echo "obj-n += $dir" >>build/Makefile ]) done ZFS_LINUX_TEST_MODPOST([$1], [build]) ], [ for dir in $(awk '/^obj-m/ { print [$]3 }' \ build/Makefile.compile.$1); do name=${dir%/} AS_IF([test -f build/$name/$name.o], [ touch build/$name/$name.ko ]) done ]) ]) dnl # dnl # ZFS_LINUX_TEST_SRC dnl # dnl # $1 - name dnl # $2 - global dnl # $3 - source dnl # $4 - extra cflags dnl # $5 - check license-compatibility dnl # dnl # Check if the test source is buildable at all and then if it is dnl # license compatible. dnl # dnl # N.B because all of the test cases are compiled in parallel they dnl # must never depend on the results of previous tests. Each test dnl # needs to be entirely independent. dnl # AC_DEFUN([ZFS_LINUX_TEST_SRC], [ cachevar="zfs_cv_kernel_[$1]_$_zfs_linux_cache_checksum" eval "cacheval=\$$cachevar" AS_IF([test "x$cacheval" = "x"], [ ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM([[$2]], [[$3]], [["Dual BSD/GPL"]])], [$1]) ZFS_LINUX_CONFTEST_MAKEFILE([$1], [yes], [$4]) AS_IF([ test -n "$5" ], [ ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM( [[$2]], [[$3]], [[$5]])], [$1_license]) ZFS_LINUX_CONFTEST_MAKEFILE([$1_license], [yes], [$4]) ]) ]) ]) dnl # dnl # ZFS_LINUX_TEST_RESULT dnl # dnl # $1 - name of a test source (ZFS_LINUX_TEST_SRC) dnl # $2 - run on success (valid .ko generated) dnl # $3 - run on failure (unable to compile) dnl # AC_DEFUN([ZFS_LINUX_TEST_RESULT], [ cachevar="zfs_cv_kernel_[$1]_$_zfs_linux_cache_checksum" AC_CACHE_VAL([$cachevar], [ AS_IF([test -d build/$1], [ AS_IF([test -f build/$1/$1.ko], [ eval "$cachevar=yes" ], [ eval "$cachevar=no" ]) ], [ AC_MSG_ERROR([ *** No matching source for the "$1" test, check that *** both the test source and result macros refer to the same name. ]) ]) ]) eval "cacheval=\$$cachevar" AS_IF([test "x$cacheval" = "xyes"], [$2], [$3]) ]) dnl # dnl # ZFS_LINUX_TEST_ERROR dnl # dnl # Generic error message which can be used when none of the expected dnl # kernel interfaces were detected. dnl # AC_DEFUN([ZFS_LINUX_TEST_ERROR], [ AC_MSG_ERROR([ *** None of the expected "$1" interfaces were detected. *** This may be because your kernel version is newer than what is *** supported, or you are using a patched custom kernel with *** incompatible modifications. *** *** ZFS Version: $ZFS_META_ALIAS *** Compatible Kernels: $ZFS_META_KVER_MIN - $ZFS_META_KVER_MAX ]) ]) dnl # dnl # ZFS_LINUX_TEST_RESULT_SYMBOL dnl # dnl # Like ZFS_LINUX_TEST_RESULT except ZFS_CHECK_SYMBOL_EXPORT is called to dnl # verify symbol exports, unless --enable-linux-builtin was provided to dnl # configure. dnl # AC_DEFUN([ZFS_LINUX_TEST_RESULT_SYMBOL], [ cachevar="zfs_cv_kernel_[$1]_$_zfs_linux_cache_checksum" AC_CACHE_VAL([$cachevar], [ AS_IF([ ! test -f build/$1/$1.ko], [ eval "$cachevar=no" ], [ AS_IF([test "x$enable_linux_builtin" != "xyes"], [ ZFS_CHECK_SYMBOL_EXPORT([$2], [$3], [ eval "$cachevar=yes" ], [ eval "$cachevar=no" ]) ], [ eval "$cachevar=yes" ]) ]) ]) eval "cacheval=\$$cachevar" AS_IF([test "x$cacheval" = "xyes"], [$4], [$5]) ]) dnl # dnl # ZFS_LINUX_COMPILE_IFELSE dnl # AC_DEFUN([ZFS_LINUX_COMPILE_IFELSE], [ ZFS_LINUX_TEST_REMOVE([conftest]) m4_ifvaln([$1], [ZFS_LINUX_CONFTEST_C([$1], [conftest])]) m4_ifvaln([$5], [ZFS_LINUX_CONFTEST_H([$5], [conftest])], [ZFS_LINUX_CONFTEST_H([], [conftest])]) ZFS_LINUX_CONFTEST_MAKEFILE([conftest], [no], [m4_ifvaln([$5], [-I$PWD/build/conftest], [])]) ZFS_LINUX_COMPILE([build/conftest], [$2], [$3], [$4], [], []) ]) dnl # dnl # ZFS_LINUX_TRY_COMPILE dnl # dnl # $1 - global dnl # $2 - source dnl # $3 - run on success (valid .ko generated) dnl # $4 - run on failure (unable to compile) dnl # dnl # When configuring as builtin (--enable-linux-builtin) for kernels dnl # without loadable module support (CONFIG_MODULES=n) only the object dnl # file is created. See ZFS_LINUX_TEST_COMPILE_ALL for details. dnl # AC_DEFUN([ZFS_LINUX_TRY_COMPILE], [ AS_IF([test "x$enable_linux_builtin" = "xyes"], [ ZFS_LINUX_COMPILE_IFELSE( [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]], [[ZFS_META_LICENSE]])], [test -f build/conftest/conftest.o], [$3], [$4]) ], [ ZFS_LINUX_COMPILE_IFELSE( [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]], [[ZFS_META_LICENSE]])], [test -f build/conftest/conftest.ko], [$3], [$4]) ]) ]) dnl # dnl # ZFS_CHECK_SYMBOL_EXPORT dnl # dnl # Check if a symbol is exported on not by consulting the symbols dnl # file, or optionally the source code. dnl # AC_DEFUN([ZFS_CHECK_SYMBOL_EXPORT], [ grep -q -E '[[[:space:]]]$1[[[:space:]]]' \ $LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null rc=$? if test $rc -ne 0; then export=0 for file in $2; do grep -q -E "EXPORT_SYMBOL.*($1)" \ "$LINUX/$file" 2>/dev/null rc=$? if test $rc -eq 0; then export=1 break; fi done if test $export -eq 0; then : $4 else : $3 fi else : $3 fi ]) dnl # dnl # ZFS_LINUX_TRY_COMPILE_SYMBOL dnl # dnl # Like ZFS_LINUX_TRY_COMPILER except ZFS_CHECK_SYMBOL_EXPORT is called dnl # to verify symbol exports, unless --enable-linux-builtin was provided dnl # to configure. dnl # AC_DEFUN([ZFS_LINUX_TRY_COMPILE_SYMBOL], [ ZFS_LINUX_TRY_COMPILE([$1], [$2], [rc=0], [rc=1]) if test $rc -ne 0; then : $6 else if test "x$enable_linux_builtin" != xyes; then ZFS_CHECK_SYMBOL_EXPORT([$3], [$4], [rc=0], [rc=1]) fi if test $rc -ne 0; then : $6 else : $5 fi fi ]) dnl # dnl # ZFS_LINUX_TRY_COMPILE_HEADER dnl # like ZFS_LINUX_TRY_COMPILE, except the contents conftest.h are dnl # provided via the fifth parameter dnl # AC_DEFUN([ZFS_LINUX_TRY_COMPILE_HEADER], [ AS_IF([test "x$enable_linux_builtin" = "xyes"], [ ZFS_LINUX_COMPILE_IFELSE( [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]], [[ZFS_META_LICENSE]])], [test -f build/conftest/conftest.o], [$3], [$4], [$5]) ], [ ZFS_LINUX_COMPILE_IFELSE( [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]], [[ZFS_META_LICENSE]])], [test -f build/conftest/conftest.ko], [$3], [$4], [$5]) ]) ]) dnl # dnl # AS_VERSION_COMPARE_LE dnl # like AS_VERSION_COMPARE_LE, but runs $3 if (and only if) $1 <= $2 dnl # AS_VERSION_COMPARE_LE (version-1, version-2, [action-if-less-or-equal], [action-if-greater]) dnl # AC_DEFUN([AS_VERSION_COMPARE_LE], [ AS_VERSION_COMPARE([$1], [$2], [$3], [$3], [$4]) ]) dnl # dnl # ZFS_LINUX_REQUIRE_API dnl # like ZFS_LINUX_TEST_ERROR, except only fails if the kernel is dnl # at least some specified version. dnl # AC_DEFUN([ZFS_LINUX_REQUIRE_API], [ AS_VERSION_COMPARE_LE([$2], [$kernsrcver], [ AC_MSG_ERROR([ *** None of the expected "$1" interfaces were detected. This *** interface is expected for kernels version "$2" and above. *** This may be because your kernel version is newer than what is *** supported, or you are using a patched custom kernel with *** incompatible modifications. Newer kernels may have incompatible *** APIs. *** *** ZFS Version: $ZFS_META_ALIAS *** Compatible Kernels: $ZFS_META_KVER_MIN - $ZFS_META_KVER_MAX ]) ], [ AC_MSG_RESULT(no) ]) ]) diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c index 40c25e464c5d..a682bfd33c38 100644 --- a/module/os/linux/zfs/zpl_super.c +++ b/module/os/linux/zfs/zpl_super.c @@ -1,426 +1,481 @@ // SPDX-License-Identifier: CDDL-1.0 /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2011, Lawrence Livermore National Security, LLC. * Copyright (c) 2023, Datto Inc. All rights reserved. */ #include #include #include #include #include #include +#include static struct inode * zpl_inode_alloc(struct super_block *sb) { struct inode *ip; VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0); inode_set_iversion(ip, 1); return (ip); } static void zpl_inode_destroy(struct inode *ip) { ASSERT(atomic_read(&ip->i_count) == 0); zfs_inode_destroy(ip); } /* * Called from __mark_inode_dirty() to reflect that something in the * inode has changed. We use it to ensure the znode system attributes * are always strictly update to date with respect to the inode. */ static void zpl_dirty_inode(struct inode *ip, int flags) { fstrans_cookie_t cookie; cookie = spl_fstrans_mark(); zfs_dirty_inode(ip, flags); spl_fstrans_unmark(cookie); } /* * When ->drop_inode() is called its return value indicates if the * inode should be evicted from the inode cache. If the inode is * unhashed and has no links the default policy is to evict it * immediately. * * The ->evict_inode() callback must minimally truncate the inode pages, * and call clear_inode(). For 2.6.35 and later kernels this will * simply update the inode state, with the sync occurring before the * truncate in evict(). For earlier kernels clear_inode() maps to * end_writeback() which is responsible for completing all outstanding * write back. In either case, once this is done it is safe to cleanup * any remaining inode specific data via zfs_inactive(). * remaining filesystem specific data. */ static void zpl_evict_inode(struct inode *ip) { fstrans_cookie_t cookie; cookie = spl_fstrans_mark(); truncate_setsize(ip, 0); clear_inode(ip); zfs_inactive(ip); spl_fstrans_unmark(cookie); } static void zpl_put_super(struct super_block *sb) { fstrans_cookie_t cookie; int error; cookie = spl_fstrans_mark(); error = -zfs_umount(sb); spl_fstrans_unmark(cookie); ASSERT3S(error, <=, 0); } +/* + * zfs_sync() is the underlying implementation for the sync(2) and syncfs(2) + * syscalls, via sb->s_op->sync_fs(). + * + * Before kernel 5.17 (torvalds/linux@5679897eb104), syncfs() -> + * sync_filesystem() would ignore the return from sync_fs(), instead only + * considing the error from syncing the underlying block device (sb->s_dev). + * Since OpenZFS doesn't _have_ an underlying block device, there's no way for + * us to report a sync directly. + * + * However, in 5.8 (torvalds/linux@735e4ae5ba28) the superblock gained an extra + * error store `s_wb_err`, to carry errors seen on page writeback since the + * last call to syncfs(). If sync_filesystem() does not return an error, any + * existing writeback error on the superblock will be used instead (and cleared + * either way). We don't use this (page writeback is a different thing for us), + * so for 5.8-5.17 we can use that instead to get syncfs() to return the error. + * + * Before 5.8, we have no other good options - no matter what happens, the + * userspace program will be told the call has succeeded, and so we must make + * it so, Therefore, when we are asked to wait for sync to complete (wait == + * 1), if zfs_sync() has returned an error we have no choice but to block, + * regardless of the reason. + * + * The 5.17 change was backported to the 5.10, 5.15 and 5.16 series, and likely + * to some vendor kernels. Meanwhile, s_wb_err is still in use in 6.15 (the + * mainline Linux series at time of writing), and has likely been backported to + * vendor kernels before 5.8. We don't really want to use a workaround when we + * don't have to, but we can't really detect whether or not sync_filesystem() + * will return our errors (without a difficult runtime test anyway). So, we use + * a static version check: any kernel reporting its version as 5.17+ will use a + * direct error return, otherwise, we'll either use s_wb_err if it was detected + * at configure (5.8-5.16 + vendor backports). If it's unavailable, we will + * block to ensure the correct semantics. + * + * See https://github.com/openzfs/zfs/issues/17416 for further discussion. + */ static int zpl_sync_fs(struct super_block *sb, int wait) { fstrans_cookie_t cookie; cred_t *cr = CRED(); int error; crhold(cr); cookie = spl_fstrans_mark(); error = -zfs_sync(sb, wait, cr); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 17, 0) +#ifdef HAVE_SUPER_BLOCK_S_WB_ERR + if (error && wait) + errseq_set(&sb->s_wb_err, error); +#else + if (error && wait) { + zfsvfs_t *zfsvfs = sb->s_fs_info; + ASSERT3P(zfsvfs, !=, NULL); + if (zfs_enter(zfsvfs, FTAG) == 0) { + txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); + zfs_exit(zfsvfs, FTAG); + error = 0; + } + } +#endif +#endif /* < 5.17.0 */ + spl_fstrans_unmark(cookie); crfree(cr); - ASSERT3S(error, <=, 0); + ASSERT3S(error, <=, 0); return (error); } static int zpl_statfs(struct dentry *dentry, struct kstatfs *statp) { fstrans_cookie_t cookie; int error; cookie = spl_fstrans_mark(); error = -zfs_statvfs(dentry->d_inode, statp); spl_fstrans_unmark(cookie); ASSERT3S(error, <=, 0); /* * If required by a 32-bit system call, dynamically scale the * block size up to 16MiB and decrease the block counts. This * allows for a maximum size of 64EiB to be reported. The file * counts must be artificially capped at 2^32-1. */ if (unlikely(zpl_is_32bit_api())) { while (statp->f_blocks > UINT32_MAX && statp->f_bsize < SPA_MAXBLOCKSIZE) { statp->f_frsize <<= 1; statp->f_bsize <<= 1; statp->f_blocks >>= 1; statp->f_bfree >>= 1; statp->f_bavail >>= 1; } uint64_t usedobjs = statp->f_files - statp->f_ffree; statp->f_ffree = MIN(statp->f_ffree, UINT32_MAX - usedobjs); statp->f_files = statp->f_ffree + usedobjs; } return (error); } static int zpl_remount_fs(struct super_block *sb, int *flags, char *data) { zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = data }; fstrans_cookie_t cookie; int error; cookie = spl_fstrans_mark(); error = -zfs_remount(sb, flags, &zm); spl_fstrans_unmark(cookie); ASSERT3S(error, <=, 0); return (error); } static int __zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs) { int error; if ((error = zpl_enter(zfsvfs, FTAG)) != 0) return (error); char *fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); dmu_objset_name(zfsvfs->z_os, fsname); for (int i = 0; fsname[i] != 0; i++) { /* * Spaces in the dataset name must be converted to their * octal escape sequence for getmntent(3) to correctly * parse then fsname portion of /proc/self/mounts. */ if (fsname[i] == ' ') { seq_puts(seq, "\\040"); } else { seq_putc(seq, fsname[i]); } } kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN); zpl_exit(zfsvfs, FTAG); return (0); } static int zpl_show_devname(struct seq_file *seq, struct dentry *root) { return (__zpl_show_devname(seq, root->d_sb->s_fs_info)); } static int __zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs) { seq_printf(seq, ",%s", zfsvfs->z_flags & ZSB_XATTR ? "xattr" : "noxattr"); #ifdef CONFIG_FS_POSIX_ACL switch (zfsvfs->z_acl_type) { case ZFS_ACLTYPE_POSIX: seq_puts(seq, ",posixacl"); break; default: seq_puts(seq, ",noacl"); break; } #endif /* CONFIG_FS_POSIX_ACL */ switch (zfsvfs->z_case) { case ZFS_CASE_SENSITIVE: seq_puts(seq, ",casesensitive"); break; case ZFS_CASE_INSENSITIVE: seq_puts(seq, ",caseinsensitive"); break; default: seq_puts(seq, ",casemixed"); break; } return (0); } static int zpl_show_options(struct seq_file *seq, struct dentry *root) { return (__zpl_show_options(seq, root->d_sb->s_fs_info)); } static int zpl_fill_super(struct super_block *sb, void *data, int silent) { zfs_mnt_t *zm = (zfs_mnt_t *)data; fstrans_cookie_t cookie; int error; cookie = spl_fstrans_mark(); error = -zfs_domount(sb, zm, silent); spl_fstrans_unmark(cookie); ASSERT3S(error, <=, 0); return (error); } static int zpl_test_super(struct super_block *s, void *data) { zfsvfs_t *zfsvfs = s->s_fs_info; objset_t *os = data; /* * If the os doesn't match the z_os in the super_block, assume it is * not a match. Matching would imply a multimount of a dataset. It is * possible that during a multimount, there is a simultaneous operation * that changes the z_os, e.g., rollback, where the match will be * missed, but in that case the user will get an EBUSY. */ return (zfsvfs != NULL && os == zfsvfs->z_os); } static struct super_block * zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm) { struct super_block *s; objset_t *os; boolean_t issnap = B_FALSE; int err; err = dmu_objset_hold(zm->mnt_osname, FTAG, &os); if (err) return (ERR_PTR(-err)); /* * The dsl pool lock must be released prior to calling sget(). * It is possible sget() may block on the lock in grab_super() * while deactivate_super() holds that same lock and waits for * a txg sync. If the dsl_pool lock is held over sget() * this can prevent the pool sync and cause a deadlock. */ dsl_dataset_long_hold(dmu_objset_ds(os), FTAG); dsl_pool_rele(dmu_objset_pool(os), FTAG); s = sget(fs_type, zpl_test_super, set_anon_super, flags, os); /* * Recheck with the lock held to prevent mounting the wrong dataset * since z_os can be stale when the teardown lock is held. * * We can't do this in zpl_test_super in since it's under spinlock and * also s_umount lock is not held there so it would race with * zfs_umount and zfsvfs can be freed. */ if (!IS_ERR(s) && s->s_fs_info != NULL) { zfsvfs_t *zfsvfs = s->s_fs_info; if (zpl_enter(zfsvfs, FTAG) == 0) { if (os != zfsvfs->z_os) err = -SET_ERROR(EBUSY); issnap = zfsvfs->z_issnap; zpl_exit(zfsvfs, FTAG); } else { err = -SET_ERROR(EBUSY); } } dsl_dataset_long_rele(dmu_objset_ds(os), FTAG); dsl_dataset_rele(dmu_objset_ds(os), FTAG); if (IS_ERR(s)) return (ERR_CAST(s)); if (err) { deactivate_locked_super(s); return (ERR_PTR(err)); } if (s->s_root == NULL) { err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0); if (err) { deactivate_locked_super(s); return (ERR_PTR(err)); } s->s_flags |= SB_ACTIVE; } else if (!issnap && ((flags ^ s->s_flags) & SB_RDONLY)) { /* * Skip ro check for snap since snap is always ro regardless * ro flag is passed by mount or not. */ deactivate_locked_super(s); return (ERR_PTR(-EBUSY)); } return (s); } static struct dentry * zpl_mount(struct file_system_type *fs_type, int flags, const char *osname, void *data) { zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data }; struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm); if (IS_ERR(sb)) return (ERR_CAST(sb)); return (dget(sb->s_root)); } static void zpl_kill_sb(struct super_block *sb) { zfs_preumount(sb); kill_anon_super(sb); } void zpl_prune_sb(uint64_t nr_to_scan, void *arg) { struct super_block *sb = (struct super_block *)arg; int objects = 0; /* * Ensure the superblock is not in the process of being torn down. */ #ifdef HAVE_SB_DYING if (down_read_trylock(&sb->s_umount)) { if (!(sb->s_flags & SB_DYING) && sb->s_root && (sb->s_flags & SB_BORN)) { (void) zfs_prune(sb, nr_to_scan, &objects); } up_read(&sb->s_umount); } #else if (down_read_trylock(&sb->s_umount)) { if (!hlist_unhashed(&sb->s_instances) && sb->s_root && (sb->s_flags & SB_BORN)) { (void) zfs_prune(sb, nr_to_scan, &objects); } up_read(&sb->s_umount); } #endif } const struct super_operations zpl_super_operations = { .alloc_inode = zpl_inode_alloc, .destroy_inode = zpl_inode_destroy, .dirty_inode = zpl_dirty_inode, .write_inode = NULL, .evict_inode = zpl_evict_inode, .put_super = zpl_put_super, .sync_fs = zpl_sync_fs, .statfs = zpl_statfs, .remount_fs = zpl_remount_fs, .show_devname = zpl_show_devname, .show_options = zpl_show_options, .show_stats = NULL, }; struct file_system_type zpl_fs_type = { .owner = THIS_MODULE, .name = ZFS_DRIVER, #if defined(HAVE_IDMAP_MNT_API) .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP, #else .fs_flags = FS_USERNS_MOUNT, #endif .mount = zpl_mount, .kill_sb = zpl_kill_sb, };