diff --git a/config/kernel-pin-user-pages.m4 b/config/kernel-pin-user-pages.m4 new file mode 100644 index 000000000000..fe7aff375208 --- /dev/null +++ b/config/kernel-pin-user-pages.m4 @@ -0,0 +1,33 @@ +dnl # +dnl # Check for pin_user_pages_unlocked(). +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_PIN_USER_PAGES], [ + ZFS_LINUX_TEST_SRC([pin_user_pages_unlocked], [ + #include + ],[ + unsigned long start = 0; + unsigned long nr_pages = 1; + struct page **pages = NULL; + unsigned int gup_flags = 0; + long ret __attribute__ ((unused)); + + ret = pin_user_pages_unlocked(start, nr_pages, pages, + gup_flags); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_PIN_USER_PAGES], [ + + dnl # + dnl # Kernal 5.8 introduced the pin_user_pages* interfaces which should + dnl # be used for Direct I/O requests. + dnl # + AC_MSG_CHECKING([whether pin_user_pages_unlocked() is available]) + ZFS_LINUX_TEST_RESULT([pin_user_pages_unlocked], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_PIN_USER_PAGES_UNLOCKED, 1, + [pin_user_pages_unlocked() is available]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel-vfs-iov_iter.m4 b/config/kernel-vfs-iov_iter.m4 index 29e19acddbb1..a223343030db 100644 --- a/config/kernel-vfs-iov_iter.m4 +++ b/config/kernel-vfs-iov_iter.m4 @@ -1,98 +1,93 @@ dnl # dnl # Check for available iov_iter functionality. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [ ZFS_LINUX_TEST_SRC([fault_in_iov_iter_readable], [ #include #include ],[ struct iov_iter iter = { 0 }; size_t size = 512; int error __attribute__ ((unused)); error = fault_in_iov_iter_readable(&iter, size); ]) - ZFS_LINUX_TEST_SRC([iov_iter_get_pages2], [ + ZFS_LINUX_TEST_SRC([iov_iter_type], [ + #include #include ],[ struct iov_iter iter = { 0 }; - struct page **pages = NULL; - size_t maxsize = 4096; - unsigned maxpages = 1; - size_t start; - size_t ret __attribute__ ((unused)); - - ret = iov_iter_get_pages2(&iter, pages, maxsize, maxpages, - &start); + __attribute__((unused)) enum iter_type i = iov_iter_type(&iter); ]) - ZFS_LINUX_TEST_SRC([iov_iter_type], [ - #include + ZFS_LINUX_TEST_SRC([iter_is_ubuf], [ #include ],[ struct iov_iter iter = { 0 }; - __attribute__((unused)) enum iter_type i = iov_iter_type(&iter); + bool ret __attribute__((unused)); + + ret = iter_is_ubuf(&iter); ]) ZFS_LINUX_TEST_SRC([iter_iov], [ #include #include ],[ struct iov_iter iter = { 0 }; __attribute__((unused)) const struct iovec *iov = iter_iov(&iter); ]) ]) AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [ AC_MSG_CHECKING([whether fault_in_iov_iter_readable() is available]) ZFS_LINUX_TEST_RESULT([fault_in_iov_iter_readable], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_FAULT_IN_IOV_ITER_READABLE, 1, [fault_in_iov_iter_readable() is available]) ],[ AC_MSG_RESULT(no) ]) - dnl # - dnl # Kernel 6.0 changed iov_iter_get_pages() to iov_iter_page_pages2(). - dnl # - AC_MSG_CHECKING([whether iov_iter_get_pages2() is available]) - ZFS_LINUX_TEST_RESULT([iov_iter_get_pages2], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IOV_ITER_GET_PAGES2, 1, - [iov_iter_get_pages2() is available]) - ],[ - AC_MSG_RESULT(no) - ]) - dnl # dnl # This checks for iov_iter_type() in linux/uio.h. It is not dnl # required, however, and the module will compiled without it dnl # using direct access of the member attribute dnl # AC_MSG_CHECKING([whether iov_iter_type() is available]) ZFS_LINUX_TEST_RESULT([iov_iter_type], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_IOV_ITER_TYPE, 1, [iov_iter_type() is available]) ],[ AC_MSG_RESULT(no) ]) + dnl # + dnl # Kernel 6.0 introduced the ITER_UBUF iov_iter type. iter_is_ubuf() + dnl # was also added to determine if the iov_iter is an ITER_UBUF. + dnl # + AC_MSG_CHECKING([whether iter_is_ubuf() is available]) + ZFS_LINUX_TEST_RESULT([iter_is_ubuf], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_ITER_IS_UBUF, 1, [iter_is_ubuf() is available]) + ],[ + AC_MSG_RESULT(no) + ]) + dnl # dnl # Kernel 6.5 introduces the iter_iov() function that returns the dnl # __iov member of an iov_iter*. The iov member was renamed to this dnl # __iov member, and is intended to be accessed via the helper dnl # function now. dnl # AC_MSG_CHECKING([whether iter_iov() is available]) ZFS_LINUX_TEST_RESULT([iter_iov], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_ITER_IOV, 1, [iter_iov() is available]) ],[ AC_MSG_RESULT(no) ]) ]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 49ec6266e87a..ae66633907bf 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -1,1016 +1,1018 @@ dnl # dnl # Default ZFS kernel configuration dnl # AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ AM_COND_IF([BUILD_LINUX], [ dnl # Setup the kernel build environment. ZFS_AC_KERNEL ZFS_AC_QAT dnl # Sanity checks for module building and CONFIG_* defines ZFS_AC_KERNEL_CONFIG_DEFINED ZFS_AC_MODULE_SYMVERS dnl # Sequential ZFS_LINUX_TRY_COMPILE tests ZFS_AC_KERNEL_FPU_HEADER ZFS_AC_KERNEL_OBJTOOL_HEADER ZFS_AC_KERNEL_MISC_MINOR ZFS_AC_KERNEL_DECLARE_EVENT_CLASS dnl # Parallel ZFS_LINUX_TEST_SRC / ZFS_LINUX_TEST_RESULT tests ZFS_AC_KERNEL_TEST_SRC ZFS_AC_KERNEL_TEST_RESULT AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ KERNEL_MAKE="$KERNEL_MAKE O=$LINUX_OBJ" ]) AC_SUBST(KERNEL_MAKE) ]) ]) dnl # dnl # Generate and compile all of the kernel API test cases to determine dnl # which interfaces are available. By invoking the kernel build system dnl # only once the compilation can be done in parallel significantly dnl # speeding up the process. dnl # AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_TYPES ZFS_AC_KERNEL_SRC_OBJTOOL ZFS_AC_KERNEL_SRC_ACCESS_OK_TYPE ZFS_AC_KERNEL_SRC_PDE_DATA ZFS_AC_KERNEL_SRC_GENERIC_FADVISE ZFS_AC_KERNEL_SRC_SCHED ZFS_AC_KERNEL_SRC_USLEEP_RANGE ZFS_AC_KERNEL_SRC_VMALLOC_PAGE_KERNEL ZFS_AC_KERNEL_SRC_INODE_TIMES ZFS_AC_KERNEL_SRC_PROC_OPERATIONS ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_SRC_BIO ZFS_AC_KERNEL_SRC_BLKDEV ZFS_AC_KERNEL_SRC_BLK_QUEUE ZFS_AC_KERNEL_SRC_GENHD_FLAGS ZFS_AC_KERNEL_SRC_REVALIDATE_DISK ZFS_AC_KERNEL_SRC_GET_DISK_RO ZFS_AC_KERNEL_SRC_DISCARD_GRANULARITY ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE ZFS_AC_KERNEL_SRC_XATTR ZFS_AC_KERNEL_SRC_ACL ZFS_AC_KERNEL_SRC_INODE_SETATTR ZFS_AC_KERNEL_SRC_INODE_GETATTR ZFS_AC_KERNEL_SRC_SHOW_OPTIONS ZFS_AC_KERNEL_SRC_SHRINKER ZFS_AC_KERNEL_SRC_MKDIR ZFS_AC_KERNEL_SRC_LOOKUP_FLAGS ZFS_AC_KERNEL_SRC_CREATE ZFS_AC_KERNEL_SRC_PERMISSION ZFS_AC_KERNEL_SRC_TMPFILE ZFS_AC_KERNEL_SRC_AUTOMOUNT ZFS_AC_KERNEL_SRC_COMMIT_METADATA ZFS_AC_KERNEL_SRC_SETATTR_PREPARE ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SRC_SECURITY_INODE ZFS_AC_KERNEL_SRC_FST_MOUNT ZFS_AC_KERNEL_SRC_SET_NLINK ZFS_AC_KERNEL_SRC_SGET ZFS_AC_KERNEL_SRC_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO ZFS_AC_KERNEL_SRC_VFS_MIGRATE_FOLIO ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_SRC_VFS_READPAGES ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS ZFS_AC_KERNEL_SRC_VFS_IOV_ITER ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_SPLICE_COPY_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE ZFS_AC_KERNEL_SRC_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_SRC_KMAP_LOCAL_PAGE ZFS_AC_KERNEL_SRC_FOLLOW_DOWN_ONE ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT ZFS_AC_KERNEL_SRC_FPU ZFS_AC_KERNEL_SRC_FMODE_T ZFS_AC_KERNEL_SRC_KUIDGID_T ZFS_AC_KERNEL_SRC_KUID_HELPERS ZFS_AC_KERNEL_SRC_RENAME ZFS_AC_KERNEL_SRC_TOTALRAM_PAGES_FUNC ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES ZFS_AC_KERNEL_SRC_PERCPU ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR ZFS_AC_KERNEL_SRC_MKNOD ZFS_AC_KERNEL_SRC_SYMLINK ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS ZFS_AC_KERNEL_SRC_SIGINFO ZFS_AC_KERNEL_SRC_SYSFS ZFS_AC_KERNEL_SRC_STANDALONE_LINUX_STDARG ZFS_AC_KERNEL_SRC_STRLCPY ZFS_AC_KERNEL_SRC_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_SRC_ADD_DISK ZFS_AC_KERNEL_SRC_KTHREAD ZFS_AC_KERNEL_SRC_ZERO_PAGE ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC ZFS_AC_KERNEL_SRC_IDMAP_MNT_API ZFS_AC_KERNEL_SRC_IDMAP_NO_USERNS ZFS_AC_KERNEL_SRC_IATTR_VFSID ZFS_AC_KERNEL_SRC_WRITEPAGE_T ZFS_AC_KERNEL_SRC_RECLAIMED ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_SZ ZFS_AC_KERNEL_SRC_PROC_HANDLER_CTL_TABLE_CONST ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ ZFS_AC_KERNEL_SRC_SYNC_BDEV ZFS_AC_KERNEL_SRC_MM_PAGE_FLAGS ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE ZFS_AC_KERNEL_SRC_MM_PAGE_MAPPING ZFS_AC_KERNEL_SRC_FILE + ZFS_AC_KERNEL_SRC_PIN_USER_PAGES case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE ;; riscv*) ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE ;; esac AC_MSG_CHECKING([for available kernel interfaces]) ZFS_LINUX_TEST_COMPILE_ALL([kabi]) AC_MSG_RESULT([done]) ]) dnl # dnl # Check results of kernel interface tests. dnl # AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_TYPES ZFS_AC_KERNEL_ACCESS_OK_TYPE ZFS_AC_KERNEL_OBJTOOL ZFS_AC_KERNEL_PDE_DATA ZFS_AC_KERNEL_GENERIC_FADVISE ZFS_AC_KERNEL_SCHED ZFS_AC_KERNEL_USLEEP_RANGE ZFS_AC_KERNEL_VMALLOC_PAGE_KERNEL ZFS_AC_KERNEL_INODE_TIMES ZFS_AC_KERNEL_PROC_OPERATIONS ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_BIO ZFS_AC_KERNEL_BLKDEV ZFS_AC_KERNEL_BLK_QUEUE ZFS_AC_KERNEL_GENHD_FLAGS ZFS_AC_KERNEL_REVALIDATE_DISK ZFS_AC_KERNEL_GET_DISK_RO ZFS_AC_KERNEL_DISCARD_GRANULARITY ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE ZFS_AC_KERNEL_XATTR ZFS_AC_KERNEL_ACL ZFS_AC_KERNEL_INODE_SETATTR ZFS_AC_KERNEL_INODE_GETATTR ZFS_AC_KERNEL_SHOW_OPTIONS ZFS_AC_KERNEL_SHRINKER ZFS_AC_KERNEL_MKDIR ZFS_AC_KERNEL_LOOKUP_FLAGS ZFS_AC_KERNEL_CREATE ZFS_AC_KERNEL_PERMISSION ZFS_AC_KERNEL_TMPFILE ZFS_AC_KERNEL_AUTOMOUNT ZFS_AC_KERNEL_COMMIT_METADATA ZFS_AC_KERNEL_SETATTR_PREPARE ZFS_AC_KERNEL_INSERT_INODE_LOCKED ZFS_AC_KERNEL_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SECURITY_INODE ZFS_AC_KERNEL_FST_MOUNT ZFS_AC_KERNEL_SET_NLINK ZFS_AC_KERNEL_SGET ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_VFS_READ_FOLIO ZFS_AC_KERNEL_VFS_MIGRATE_FOLIO ZFS_AC_KERNEL_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_VFS_READPAGES ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS ZFS_AC_KERNEL_VFS_IOV_ITER ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE ZFS_AC_KERNEL_VFS_SPLICE_COPY_FILE_RANGE ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_KMAP_LOCAL_PAGE ZFS_AC_KERNEL_FOLLOW_DOWN_ONE ZFS_AC_KERNEL_MAKE_REQUEST_FN ZFS_AC_KERNEL_GENERIC_IO_ACCT ZFS_AC_KERNEL_FPU ZFS_AC_KERNEL_FMODE_T ZFS_AC_KERNEL_KUIDGID_T ZFS_AC_KERNEL_KUID_HELPERS ZFS_AC_KERNEL_RENAME ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC ZFS_AC_KERNEL_TOTALHIGH_PAGES ZFS_AC_KERNEL_PERCPU ZFS_AC_KERNEL_GENERIC_FILLATTR ZFS_AC_KERNEL_MKNOD ZFS_AC_KERNEL_SYMLINK ZFS_AC_KERNEL_BIO_MAX_SEGS ZFS_AC_KERNEL_SIGINFO ZFS_AC_KERNEL_SYSFS ZFS_AC_KERNEL_STANDALONE_LINUX_STDARG ZFS_AC_KERNEL_STRLCPY ZFS_AC_KERNEL_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_ADD_DISK ZFS_AC_KERNEL_KTHREAD ZFS_AC_KERNEL_ZERO_PAGE ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC ZFS_AC_KERNEL_IDMAP_MNT_API ZFS_AC_KERNEL_IDMAP_NO_USERNS ZFS_AC_KERNEL_IATTR_VFSID ZFS_AC_KERNEL_WRITEPAGE_T ZFS_AC_KERNEL_RECLAIMED ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE ZFS_AC_KERNEL_REGISTER_SYSCTL_SZ ZFS_AC_KERNEL_PROC_HANDLER_CTL_TABLE_CONST ZFS_AC_KERNEL_COPY_SPLICE_READ ZFS_AC_KERNEL_SYNC_BDEV ZFS_AC_KERNEL_MM_PAGE_FLAGS ZFS_AC_KERNEL_MM_PAGE_SIZE ZFS_AC_KERNEL_MM_PAGE_MAPPING ZFS_AC_KERNEL_1ARG_ASSIGN_STR ZFS_AC_KERNEL_FILE + ZFS_AC_KERNEL_PIN_USER_PAGES case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_CPU_HAS_FEATURE ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE ;; riscv*) ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE ;; esac ]) dnl # dnl # Detect name used for Module.symvers file in kernel dnl # AC_DEFUN([ZFS_AC_MODULE_SYMVERS], [ modpost=$LINUX/scripts/Makefile.modpost AC_MSG_CHECKING([kernel file name for module symbols]) AS_IF([test "x$enable_linux_builtin" != xyes -a -f "$modpost"], [ AS_IF([grep -q Modules.symvers $modpost], [ LINUX_SYMBOLS=Modules.symvers ], [ LINUX_SYMBOLS=Module.symvers ]) AS_IF([test ! -f "$LINUX_OBJ/$LINUX_SYMBOLS"], [ AC_MSG_ERROR([ *** Please make sure the kernel devel package for your distribution *** is installed. If you are building with a custom kernel, make sure *** the kernel is configured, built, and the '--with-linux=PATH' *** configure option refers to the location of the kernel source. ]) ]) ], [ LINUX_SYMBOLS=NONE ]) AC_MSG_RESULT($LINUX_SYMBOLS) AC_SUBST(LINUX_SYMBOLS) ]) dnl # dnl # Detect the kernel to be built against dnl # dnl # Most modern Linux distributions have separate locations for bare dnl # source (source) and prebuilt (build) files. Additionally, there are dnl # `source` and `build` symlinks in `/lib/modules/$(KERNEL_VERSION)` dnl # pointing to them. The directory search order is now: dnl # dnl # - `configure` command line values if both `--with-linux` and dnl # `--with-linux-obj` were defined dnl # dnl # - If only `--with-linux` was defined, `--with-linux-obj` is assumed dnl # to have the same value as `--with-linux` dnl # dnl # - If neither `--with-linux` nor `--with-linux-obj` were defined dnl # autodetection is used: dnl # dnl # - `/lib/modules/$(uname -r)/{source,build}` respectively, if exist. dnl # dnl # - If only `/lib/modules/$(uname -r)/build` exists, it is assumed dnl # to be both source and build directory. dnl # dnl # - The first directory in `/lib/modules` with the highest version dnl # number according to `sort -V` which contains both `source` and dnl # `build` symlinks/directories. If module directory contains only dnl # `build` component, it is assumed to be both source and build dnl # directory. dnl # dnl # - Last resort: the first directory matching `/usr/src/kernels/*` dnl # and `/usr/src/linux-*` with the highest version number according dnl # to `sort -V` is assumed to be both source and build directory. dnl # AC_DEFUN([ZFS_AC_KERNEL], [ AC_ARG_WITH([linux], AS_HELP_STRING([--with-linux=PATH], [Path to kernel source]), [kernelsrc="$withval"]) AC_ARG_WITH(linux-obj, AS_HELP_STRING([--with-linux-obj=PATH], [Path to kernel build objects]), [kernelbuild="$withval"]) AC_MSG_CHECKING([kernel source and build directories]) AS_IF([test -n "$kernelsrc" && test -z "$kernelbuild"], [ kernelbuild="$kernelsrc" ], [test -z "$kernelsrc"], [ AS_IF([test -e "/lib/modules/$(uname -r)/source" && \ test -e "/lib/modules/$(uname -r)/build"], [ src="/lib/modules/$(uname -r)/source" build="/lib/modules/$(uname -r)/build" ], [test -e "/lib/modules/$(uname -r)/build"], [ build="/lib/modules/$(uname -r)/build" src="$build" ], [ src= for d in $(ls -1d /lib/modules/* 2>/dev/null | sort -Vr); do if test -e "$d/source" && test -e "$d/build"; then src="$d/source" build="$d/build" break fi if test -e "$d/build"; then src="$d/build" build="$d/build" break fi done # the least reliable method if test -z "$src"; then src=$(ls -1d /usr/src/kernels/* /usr/src/linux-* \ 2>/dev/null | grep -v obj | sort -Vr | head -1) build="$src" fi ]) AS_IF([test -n "$src" && test -e "$src"], [ kernelsrc=$(readlink -e "$src") ], [ kernelsrc="[Not found]" ]) AS_IF([test -n "$build" && test -e "$build"], [ kernelbuild=$(readlink -e "$build") ], [ kernelbuild="[Not found]" ]) ], [ AS_IF([test "$kernelsrc" = "NONE"], [ kernsrcver=NONE ]) withlinux=yes ]) AC_MSG_RESULT([done]) AC_MSG_CHECKING([kernel source directory]) AC_MSG_RESULT([$kernelsrc]) AC_MSG_CHECKING([kernel build directory]) AC_MSG_RESULT([$kernelbuild]) AS_IF([test ! -d "$kernelsrc" || test ! -d "$kernelbuild"], [ AC_MSG_ERROR([ *** Please make sure the kernel devel package for your distribution *** is installed and then try again. If that fails, you can specify the *** location of the kernel source and build with the '--with-linux=PATH' and *** '--with-linux-obj=PATH' options respectively.]) ]) AC_MSG_CHECKING([kernel source version]) utsrelease1=$kernelbuild/include/linux/version.h utsrelease2=$kernelbuild/include/linux/utsrelease.h utsrelease3=$kernelbuild/include/generated/utsrelease.h AS_IF([test -r $utsrelease1 && grep -qF UTS_RELEASE $utsrelease1], [ utsrelease=$utsrelease1 ], [test -r $utsrelease2 && grep -qF UTS_RELEASE $utsrelease2], [ utsrelease=$utsrelease2 ], [test -r $utsrelease3 && grep -qF UTS_RELEASE $utsrelease3], [ utsrelease=$utsrelease3 ]) AS_IF([test -n "$utsrelease"], [ kernsrcver=$($AWK '/UTS_RELEASE/ { gsub(/"/, "", $[3]); print $[3] }' $utsrelease) AS_IF([test -z "$kernsrcver"], [ AC_MSG_RESULT([Not found]) AC_MSG_ERROR([ *** Cannot determine kernel version. ]) ]) ], [ AC_MSG_RESULT([Not found]) if test "x$enable_linux_builtin" != xyes; then AC_MSG_ERROR([ *** Cannot find UTS_RELEASE definition. ]) else AC_MSG_ERROR([ *** Cannot find UTS_RELEASE definition. *** Please run 'make prepare' inside the kernel source tree.]) fi ]) AC_MSG_RESULT([$kernsrcver]) AX_COMPARE_VERSION([$kernsrcver], [ge], [$ZFS_META_KVER_MIN], [], [ AC_MSG_ERROR([ *** Cannot build against kernel version $kernsrcver. *** The minimum supported kernel version is $ZFS_META_KVER_MIN. ]) ]) AC_ARG_ENABLE([linux-experimental], AS_HELP_STRING([--enable-linux-experimental], [Allow building against some unsupported kernel versions])) AX_COMPARE_VERSION([$kernsrcver], [ge], [$ZFS_META_KVER_MAX], [ AX_COMPARE_VERSION([$kernsrcver], [eq2], [$ZFS_META_KVER_MAX], [ kern_max_version_ok=yes ], [ kern_max_version_ok=no ]) ], [ kern_max_version_ok=yes ]) AS_IF([test "x$kern_max_version_ok" != "xyes"], [ AS_IF([test "x$enable_linux_experimental" == "xyes"], [ AC_DEFINE(HAVE_LINUX_EXPERIMENTAL, 1, [building against unsupported kernel version]) ], [ AC_MSG_ERROR([ *** Cannot build against kernel version $kernsrcver. *** The maximum supported kernel version is $ZFS_META_KVER_MAX. ]) ]) ]) LINUX=${kernelsrc} LINUX_OBJ=${kernelbuild} LINUX_VERSION=${kernsrcver} AC_SUBST(LINUX) AC_SUBST(LINUX_OBJ) AC_SUBST(LINUX_VERSION) ]) AC_DEFUN([ZFS_AC_KERNEL_VERSION_WARNING], [ AS_IF([test "x$enable_linux_experimental" = "xyes" && \ test "x$kern_max_version_ok" != "xyes"], [ AC_MSG_WARN([ You are building OpenZFS against Linux version $kernsrcver. This combination is considered EXPERIMENTAL by the OpenZFS project. Even if it appears to build and run correctly, there may be bugs that can cause SERIOUS DATA LOSS. YOU HAVE BEEN WARNED! If you choose to continue, we'd appreciate if you could report your results on the OpenZFS issue tracker at: https://github.com/openzfs/zfs/issues/new Your feedback will help us prepare a new OpenZFS release that supports this version of Linux. ]) ]) ]) dnl # dnl # Detect the QAT module to be built against, QAT provides hardware dnl # acceleration for data compression: dnl # dnl # https://01.org/intel-quickassist-technology dnl # dnl # 1) Download and install QAT driver from the above link dnl # 2) Start QAT driver in your system: dnl # service qat_service start dnl # 3) Enable QAT in ZFS, e.g.: dnl # ./configure --with-qat=/QAT1.6 dnl # make dnl # 4) Set GZIP compression in ZFS dataset: dnl # zfs set compression = gzip dnl # dnl # Then the data written to this ZFS pool is compressed by QAT accelerator dnl # automatically, and de-compressed by QAT when read from the pool. dnl # dnl # 1) Get QAT hardware statistics with: dnl # cat /proc/icp_dh895xcc_dev/qat dnl # 2) To disable QAT: dnl # insmod zfs.ko zfs_qat_disable=1 dnl # AC_DEFUN([ZFS_AC_QAT], [ AC_ARG_WITH([qat], AS_HELP_STRING([--with-qat=PATH], [Path to qat source]), AS_IF([test "$withval" = "yes"], AC_MSG_ERROR([--with-qat=PATH requires a PATH]), [qatsrc="$withval"])) AC_ARG_WITH([qat-obj], AS_HELP_STRING([--with-qat-obj=PATH], [Path to qat build objects]), [qatbuild="$withval"]) AS_IF([test ! -z "${qatsrc}"], [ AC_MSG_CHECKING([qat source directory]) AC_MSG_RESULT([$qatsrc]) QAT_SRC="${qatsrc}/quickassist" AS_IF([ test ! -e "$QAT_SRC/include/cpa.h"], [ AC_MSG_ERROR([ *** Please make sure the qat driver package is installed *** and specify the location of the qat source with the *** '--with-qat=PATH' option then try again. Failed to *** find cpa.h in: ${QAT_SRC}/include]) ]) ]) AS_IF([test ! -z "${qatsrc}"], [ AC_MSG_CHECKING([qat build directory]) AS_IF([test -z "$qatbuild"], [ qatbuild="${qatsrc}/build" ]) AC_MSG_RESULT([$qatbuild]) QAT_OBJ=${qatbuild} AS_IF([ ! test -e "$QAT_OBJ/icp_qa_al.ko" && ! test -e "$QAT_OBJ/qat_api.ko"], [ AC_MSG_ERROR([ *** Please make sure the qat driver is installed then try again. *** Failed to find icp_qa_al.ko or qat_api.ko in: $QAT_OBJ]) ]) AC_SUBST(QAT_SRC) AC_SUBST(QAT_OBJ) AC_DEFINE(HAVE_QAT, 1, [qat is enabled and existed]) ]) dnl # dnl # Detect the name used for the QAT Module.symvers file. dnl # AS_IF([test ! -z "${qatsrc}"], [ AC_MSG_CHECKING([qat file for module symbols]) QAT_SYMBOLS=$QAT_SRC/lookaside/access_layer/src/Module.symvers AS_IF([test -r $QAT_SYMBOLS], [ AC_MSG_RESULT([$QAT_SYMBOLS]) AC_SUBST(QAT_SYMBOLS) ],[ AC_MSG_ERROR([ *** Please make sure the qat driver is installed then try again. *** Failed to find Module.symvers in: $QAT_SYMBOLS ]) ]) ]) ]) dnl # dnl # ZFS_LINUX_CONFTEST_H dnl # AC_DEFUN([ZFS_LINUX_CONFTEST_H], [ test -d build/$2 || mkdir -p build/$2 cat - <<_ACEOF >build/$2/$2.h $1 _ACEOF ]) dnl # dnl # ZFS_LINUX_CONFTEST_C dnl # AC_DEFUN([ZFS_LINUX_CONFTEST_C], [ test -d build/$2 || mkdir -p build/$2 cat confdefs.h - <<_ACEOF >build/$2/$2.c $1 _ACEOF ]) dnl # dnl # ZFS_LINUX_CONFTEST_MAKEFILE dnl # dnl # $1 - test case name dnl # $2 - add to top-level Makefile dnl # $3 - additional build flags dnl # AC_DEFUN([ZFS_LINUX_CONFTEST_MAKEFILE], [ test -d build || mkdir -p build test -d build/$1 || mkdir -p build/$1 file=build/$1/Makefile dnl # Example command line to manually build source. cat - <<_ACEOF >$file # Example command line to manually build source # make modules -C $LINUX_OBJ $ARCH_UM M=$PWD/build/$1 ccflags-y := -Werror $FRAME_LARGER_THAN _ACEOF dnl # Additional custom CFLAGS as requested. m4_ifval($3, [echo "ccflags-y += $3" >>$file], []) dnl # Test case source echo "obj-m := $1.o" >>$file AS_IF([test "x$2" = "xyes"], [echo "obj-m += $1/" >>build/Makefile], []) ]) dnl # dnl # ZFS_LINUX_TEST_PROGRAM(C)([PROLOGUE], [BODY]) dnl # m4_define([ZFS_LINUX_TEST_PROGRAM], [ #include $1 int main (void) { $2 ; return 0; } MODULE_DESCRIPTION("conftest"); MODULE_AUTHOR(ZFS_META_AUTHOR); MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE); MODULE_LICENSE($3); ]) dnl # dnl # ZFS_LINUX_TEST_REMOVE dnl # dnl # Removes the specified test source and results. dnl # AC_DEFUN([ZFS_LINUX_TEST_REMOVE], [ test -d build/$1 && rm -Rf build/$1 test -f build/Makefile && sed '/$1/d' build/Makefile ]) dnl # dnl # ZFS_LINUX_COMPILE dnl # dnl # $1 - build dir dnl # $2 - test command dnl # $3 - pass command dnl # $4 - fail command dnl # $5 - set KBUILD_MODPOST_NOFINAL='yes' dnl # $6 - set KBUILD_MODPOST_WARN='yes' dnl # dnl # Used internally by ZFS_LINUX_TEST_{COMPILE,MODPOST} dnl # AC_DEFUN([ZFS_LINUX_COMPILE], [ AC_ARG_VAR([KERNEL_CC], [C compiler for building kernel modules]) AC_ARG_VAR([KERNEL_LD], [Linker for building kernel modules]) AC_ARG_VAR([KERNEL_LLVM], [Binary option to build kernel modules with LLVM/CLANG toolchain]) AC_TRY_COMMAND([ KBUILD_MODPOST_NOFINAL="$5" KBUILD_MODPOST_WARN="$6" make modules -k -j$TEST_JOBS ${KERNEL_CC:+CC=$KERNEL_CC} ${KERNEL_LD:+LD=$KERNEL_LD} ${KERNEL_LLVM:+LLVM=$KERNEL_LLVM} CONFIG_MODULES=y CFLAGS_MODULE=-DCONFIG_MODULES -C $LINUX_OBJ $ARCH_UM M=$PWD/$1 >$1/build.log 2>&1]) AS_IF([AC_TRY_COMMAND([$2])], [$3], [$4]) ]) dnl # dnl # ZFS_LINUX_TEST_COMPILE dnl # dnl # Perform a full compile excluding the final modpost phase. dnl # AC_DEFUN([ZFS_LINUX_TEST_COMPILE], [ ZFS_LINUX_COMPILE([$2], [test -f $2/build.log], [ mv $2/Makefile $2/Makefile.compile.$1 mv $2/build.log $2/build.log.$1 ],[ AC_MSG_ERROR([ *** Unable to compile test source to determine kernel interfaces.]) ], [yes], []) ]) dnl # dnl # ZFS_LINUX_TEST_MODPOST dnl # dnl # Perform a full compile including the modpost phase. This may dnl # be an incremental build if the objects have already been built. dnl # AC_DEFUN([ZFS_LINUX_TEST_MODPOST], [ ZFS_LINUX_COMPILE([$2], [test -f $2/build.log], [ mv $2/Makefile $2/Makefile.modpost.$1 cat $2/build.log >>build/build.log.$1 ],[ AC_MSG_ERROR([ *** Unable to modpost test source to determine kernel interfaces.]) ], [], [yes]) ]) dnl # dnl # Perform the compilation of the test cases in two phases. dnl # dnl # Phase 1) attempt to build the object files for all of the tests dnl # defined by the ZFS_LINUX_TEST_SRC macro. But do not dnl # perform the final modpost stage. dnl # dnl # Phase 2) disable all tests which failed the initial compilation, dnl # then invoke the final modpost step for the remaining tests. dnl # dnl # This allows us efficiently build the test cases in parallel while dnl # remaining resilient to build failures which are expected when dnl # detecting the available kernel interfaces. dnl # dnl # The maximum allowed parallelism can be controlled by setting the dnl # TEST_JOBS environment variable. Otherwise, it default to $(nproc). dnl # AC_DEFUN([ZFS_LINUX_TEST_COMPILE_ALL], [ dnl # Phase 1 - Compilation only, final linking is skipped. ZFS_LINUX_TEST_COMPILE([$1], [build]) dnl # dnl # Phase 2 - When building external modules disable test cases dnl # which failed to compile and invoke modpost to verify the dnl # final linking. dnl # dnl # Test names suffixed with '_license' call modpost independently dnl # to ensure that a single incompatibility does not result in the dnl # modpost phase exiting early. This check is not performed on dnl # every symbol since the majority are compatible and doing so dnl # would significantly slow down this phase. dnl # dnl # When configuring for builtin (--enable-linux-builtin) dnl # fake the linking step artificially create the expected .ko dnl # files for tests which did compile. This is required for dnl # kernels which do not have loadable module support or have dnl # not yet been built. dnl # AS_IF([test "x$enable_linux_builtin" = "xno"], [ for dir in $(awk '/^obj-m/ { print [$]3 }' \ build/Makefile.compile.$1); do name=${dir%/} AS_IF([test -f build/$name/$name.o], [ AS_IF([test "${name##*_}" = "license"], [ ZFS_LINUX_TEST_MODPOST([$1], [build/$name]) echo "obj-n += $dir" >>build/Makefile ], [ echo "obj-m += $dir" >>build/Makefile ]) ], [ echo "obj-n += $dir" >>build/Makefile ]) done ZFS_LINUX_TEST_MODPOST([$1], [build]) ], [ for dir in $(awk '/^obj-m/ { print [$]3 }' \ build/Makefile.compile.$1); do name=${dir%/} AS_IF([test -f build/$name/$name.o], [ touch build/$name/$name.ko ]) done ]) ]) dnl # dnl # ZFS_LINUX_TEST_SRC dnl # dnl # $1 - name dnl # $2 - global dnl # $3 - source dnl # $4 - extra cflags dnl # $5 - check license-compatibility dnl # dnl # Check if the test source is buildable at all and then if it is dnl # license compatible. dnl # dnl # N.B because all of the test cases are compiled in parallel they dnl # must never depend on the results of previous tests. Each test dnl # needs to be entirely independent. dnl # AC_DEFUN([ZFS_LINUX_TEST_SRC], [ ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM([[$2]], [[$3]], [["Dual BSD/GPL"]])], [$1]) ZFS_LINUX_CONFTEST_MAKEFILE([$1], [yes], [$4]) AS_IF([ test -n "$5" ], [ ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM( [[$2]], [[$3]], [[$5]])], [$1_license]) ZFS_LINUX_CONFTEST_MAKEFILE([$1_license], [yes], [$4]) ]) ]) dnl # dnl # ZFS_LINUX_TEST_RESULT dnl # dnl # $1 - name of a test source (ZFS_LINUX_TEST_SRC) dnl # $2 - run on success (valid .ko generated) dnl # $3 - run on failure (unable to compile) dnl # AC_DEFUN([ZFS_LINUX_TEST_RESULT], [ AS_IF([test -d build/$1], [ AS_IF([test -f build/$1/$1.ko], [$2], [$3]) ], [ AC_MSG_ERROR([ *** No matching source for the "$1" test, check that *** both the test source and result macros refer to the same name. ]) ]) ]) dnl # dnl # ZFS_LINUX_TEST_ERROR dnl # dnl # Generic error message which can be used when none of the expected dnl # kernel interfaces were detected. dnl # AC_DEFUN([ZFS_LINUX_TEST_ERROR], [ AC_MSG_ERROR([ *** None of the expected "$1" interfaces were detected. *** This may be because your kernel version is newer than what is *** supported, or you are using a patched custom kernel with *** incompatible modifications. *** *** ZFS Version: $ZFS_META_ALIAS *** Compatible Kernels: $ZFS_META_KVER_MIN - $ZFS_META_KVER_MAX ]) ]) dnl # dnl # ZFS_LINUX_TEST_RESULT_SYMBOL dnl # dnl # Like ZFS_LINUX_TEST_RESULT except ZFS_CHECK_SYMBOL_EXPORT is called to dnl # verify symbol exports, unless --enable-linux-builtin was provided to dnl # configure. dnl # AC_DEFUN([ZFS_LINUX_TEST_RESULT_SYMBOL], [ AS_IF([ ! test -f build/$1/$1.ko], [ $5 ], [ AS_IF([test "x$enable_linux_builtin" != "xyes"], [ ZFS_CHECK_SYMBOL_EXPORT([$2], [$3], [$4], [$5]) ], [ $4 ]) ]) ]) dnl # dnl # ZFS_LINUX_COMPILE_IFELSE dnl # AC_DEFUN([ZFS_LINUX_COMPILE_IFELSE], [ ZFS_LINUX_TEST_REMOVE([conftest]) m4_ifvaln([$1], [ZFS_LINUX_CONFTEST_C([$1], [conftest])]) m4_ifvaln([$5], [ZFS_LINUX_CONFTEST_H([$5], [conftest])], [ZFS_LINUX_CONFTEST_H([], [conftest])]) ZFS_LINUX_CONFTEST_MAKEFILE([conftest], [no], [m4_ifvaln([$5], [-I$PWD/build/conftest], [])]) ZFS_LINUX_COMPILE([build/conftest], [$2], [$3], [$4], [], []) ]) dnl # dnl # ZFS_LINUX_TRY_COMPILE dnl # dnl # $1 - global dnl # $2 - source dnl # $3 - run on success (valid .ko generated) dnl # $4 - run on failure (unable to compile) dnl # dnl # When configuring as builtin (--enable-linux-builtin) for kernels dnl # without loadable module support (CONFIG_MODULES=n) only the object dnl # file is created. See ZFS_LINUX_TEST_COMPILE_ALL for details. dnl # AC_DEFUN([ZFS_LINUX_TRY_COMPILE], [ AS_IF([test "x$enable_linux_builtin" = "xyes"], [ ZFS_LINUX_COMPILE_IFELSE( [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]], [[ZFS_META_LICENSE]])], [test -f build/conftest/conftest.o], [$3], [$4]) ], [ ZFS_LINUX_COMPILE_IFELSE( [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]], [[ZFS_META_LICENSE]])], [test -f build/conftest/conftest.ko], [$3], [$4]) ]) ]) dnl # dnl # ZFS_CHECK_SYMBOL_EXPORT dnl # dnl # Check if a symbol is exported on not by consulting the symbols dnl # file, or optionally the source code. dnl # AC_DEFUN([ZFS_CHECK_SYMBOL_EXPORT], [ grep -q -E '[[[:space:]]]$1[[[:space:]]]' \ $LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null rc=$? if test $rc -ne 0; then export=0 for file in $2; do grep -q -E "EXPORT_SYMBOL.*($1)" \ "$LINUX/$file" 2>/dev/null rc=$? if test $rc -eq 0; then export=1 break; fi done if test $export -eq 0; then : $4 else : $3 fi else : $3 fi ]) dnl # dnl # ZFS_LINUX_TRY_COMPILE_SYMBOL dnl # dnl # Like ZFS_LINUX_TRY_COMPILER except ZFS_CHECK_SYMBOL_EXPORT is called dnl # to verify symbol exports, unless --enable-linux-builtin was provided dnl # to configure. dnl # AC_DEFUN([ZFS_LINUX_TRY_COMPILE_SYMBOL], [ ZFS_LINUX_TRY_COMPILE([$1], [$2], [rc=0], [rc=1]) if test $rc -ne 0; then : $6 else if test "x$enable_linux_builtin" != xyes; then ZFS_CHECK_SYMBOL_EXPORT([$3], [$4], [rc=0], [rc=1]) fi if test $rc -ne 0; then : $6 else : $5 fi fi ]) dnl # dnl # ZFS_LINUX_TRY_COMPILE_HEADER dnl # like ZFS_LINUX_TRY_COMPILE, except the contents conftest.h are dnl # provided via the fifth parameter dnl # AC_DEFUN([ZFS_LINUX_TRY_COMPILE_HEADER], [ AS_IF([test "x$enable_linux_builtin" = "xyes"], [ ZFS_LINUX_COMPILE_IFELSE( [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]], [[ZFS_META_LICENSE]])], [test -f build/conftest/conftest.o], [$3], [$4], [$5]) ], [ ZFS_LINUX_COMPILE_IFELSE( [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]], [[ZFS_META_LICENSE]])], [test -f build/conftest/conftest.ko], [$3], [$4], [$5]) ]) ]) dnl # dnl # AS_VERSION_COMPARE_LE dnl # like AS_VERSION_COMPARE_LE, but runs $3 if (and only if) $1 <= $2 dnl # AS_VERSION_COMPARE_LE (version-1, version-2, [action-if-less-or-equal], [action-if-greater]) dnl # AC_DEFUN([AS_VERSION_COMPARE_LE], [ AS_VERSION_COMPARE([$1], [$2], [$3], [$3], [$4]) ]) dnl # dnl # ZFS_LINUX_REQUIRE_API dnl # like ZFS_LINUX_TEST_ERROR, except only fails if the kernel is dnl # at least some specified version. dnl # AC_DEFUN([ZFS_LINUX_REQUIRE_API], [ AS_VERSION_COMPARE_LE([$2], [$kernsrcver], [ AC_MSG_ERROR([ *** None of the expected "$1" interfaces were detected. This *** interface is expected for kernels version "$2" and above. *** This may be because your kernel version is newer than what is *** supported, or you are using a patched custom kernel with *** incompatible modifications. Newer kernels may have incompatible *** APIs. *** *** ZFS Version: $ZFS_META_ALIAS *** Compatible Kernels: $ZFS_META_KVER_MIN - $ZFS_META_KVER_MAX ]) ], [ AC_MSG_RESULT(no) ]) ]) diff --git a/module/os/linux/zfs/zfs_uio.c b/module/os/linux/zfs/zfs_uio.c index ed11f8b63fbf..db85b626f12a 100644 --- a/module/os/linux/zfs/zfs_uio.c +++ b/module/os/linux/zfs/zfs_uio.c @@ -1,580 +1,657 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* * University Copyright- Copyright (c) 1982, 1986, 1988 * The Regents of the University of California * All Rights Reserved * * University Acknowledgment- Portions of this document are derived from * software developed by the University of California, Berkeley, and its * contributors. */ /* * Copyright (c) 2015 by Chunwei Chen. All rights reserved. */ #ifdef _KERNEL #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Move "n" bytes at byte address "p"; "rw" indicates the direction * of the move, and the I/O parameters are provided in "uio", which is * update to reflect the data which was moved. Returns 0 on success or * a non-zero errno on failure. */ static int zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) { const struct iovec *iov = uio->uio_iov; size_t skip = uio->uio_skip; ulong_t cnt; ASSERT3S(uio->uio_segflg, ==, UIO_SYSSPACE); while (n && uio->uio_resid) { cnt = MIN(iov->iov_len - skip, n); if (rw == UIO_READ) memcpy(iov->iov_base + skip, p, cnt); else memcpy(p, iov->iov_base + skip, cnt); skip += cnt; if (skip == iov->iov_len) { skip = 0; uio->uio_iov = (++iov); uio->uio_iovcnt--; } uio->uio_skip = skip; uio->uio_resid -= cnt; uio->uio_loffset += cnt; p = (caddr_t)p + cnt; n -= cnt; } return (0); } static int zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) { const struct bio_vec *bv = uio->uio_bvec; size_t skip = uio->uio_skip; ulong_t cnt; while (n && uio->uio_resid) { void *paddr; cnt = MIN(bv->bv_len - skip, n); paddr = zfs_kmap_local(bv->bv_page); if (rw == UIO_READ) { /* Copy from buffer 'p' to the bvec data */ memcpy(paddr + bv->bv_offset + skip, p, cnt); } else { /* Copy from bvec data to buffer 'p' */ memcpy(p, paddr + bv->bv_offset + skip, cnt); } zfs_kunmap_local(paddr); skip += cnt; if (skip == bv->bv_len) { skip = 0; uio->uio_bvec = (++bv); uio->uio_iovcnt--; } uio->uio_skip = skip; uio->uio_resid -= cnt; uio->uio_loffset += cnt; p = (caddr_t)p + cnt; n -= cnt; } return (0); } static void zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw, struct bio_vec *bv) { void *paddr; paddr = zfs_kmap_local(bv->bv_page); if (rw == UIO_READ) { /* Copy from buffer 'p' to the bvec data */ memcpy(paddr + bv->bv_offset + skip, p, cnt); } else { /* Copy from bvec data to buffer 'p' */ memcpy(p, paddr + bv->bv_offset + skip, cnt); } zfs_kunmap_local(paddr); } /* * Copy 'n' bytes of data between the buffer p[] and the data represented * by the request in the uio. */ static int zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) { struct request *rq = uio->rq; struct bio_vec bv; struct req_iterator iter; size_t this_seg_start; /* logical offset */ size_t this_seg_end; /* logical offset */ size_t skip_in_seg; size_t copy_from_seg; size_t orig_loffset; int copied = 0; /* * Get the original logical offset of this entire request (because * uio->uio_loffset will be modified over time). */ orig_loffset = io_offset(NULL, rq); this_seg_start = orig_loffset; rq_for_each_segment(bv, rq, iter) { /* * Lookup what the logical offset of the last byte of this * segment is. */ this_seg_end = this_seg_start + bv.bv_len - 1; /* * We only need to operate on segments that have data we're * copying. */ if (uio->uio_loffset >= this_seg_start && uio->uio_loffset <= this_seg_end) { /* * Some, or all, of the data in this segment needs to be * copied. */ /* * We may be not be copying from the first byte in the * segment. Figure out how many bytes to skip copying * from the beginning of this segment. */ skip_in_seg = uio->uio_loffset - this_seg_start; /* * Calculate the total number of bytes from this * segment that we will be copying. */ copy_from_seg = MIN(bv.bv_len - skip_in_seg, n); /* Copy the bytes */ zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv); p = ((char *)p) + copy_from_seg; n -= copy_from_seg; uio->uio_resid -= copy_from_seg; uio->uio_loffset += copy_from_seg; copied = 1; /* We copied some data */ } this_seg_start = this_seg_end + 1; } if (!copied) { /* Didn't copy anything */ uio->uio_resid = 0; } return (0); } static int zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) { if (uio->rq != NULL) return (zfs_uiomove_bvec_rq(p, n, rw, uio)); return (zfs_uiomove_bvec_impl(p, n, rw, uio)); } static int zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, boolean_t revert) { size_t cnt = MIN(n, uio->uio_resid); if (uio->uio_skip) iov_iter_advance(uio->uio_iter, uio->uio_skip); if (rw == UIO_READ) cnt = copy_to_iter(p, cnt, uio->uio_iter); else cnt = copy_from_iter(p, cnt, uio->uio_iter); /* * When operating on a full pipe no bytes are processed. * In which case return EFAULT which is converted to EAGAIN * by the kernel's generic_file_splice_read() function. */ if (cnt == 0) return (EFAULT); /* * Revert advancing the uio_iter. This is set by zfs_uiocopy() * to avoid consuming the uio and its iov_iter structure. */ if (revert) iov_iter_revert(uio->uio_iter, cnt); uio->uio_resid -= cnt; uio->uio_loffset += cnt; return (0); } int zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) { if (uio->uio_segflg == UIO_BVEC) return (zfs_uiomove_bvec(p, n, rw, uio)); else if (uio->uio_segflg == UIO_ITER) return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE)); else return (zfs_uiomove_iov(p, n, rw, uio)); } EXPORT_SYMBOL(zfs_uiomove); /* * Fault in the pages of the first n bytes specified by the uio structure. * 1 byte in each page is touched and the uio struct is unmodified. Any * error will terminate the process as this is only a best attempt to get * the pages resident. */ int zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio) { if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC || (uio->uio_extflg & UIO_DIRECT)) { /* * There's never a need to fault in kernel pages or Direct I/O * write pages. Direct I/O write pages have been pinned in so * there is never a time for these pages a fault will occur. */ return (0); } else { ASSERT3S(uio->uio_segflg, ==, UIO_ITER); /* * At least a Linux 4.18 kernel, iov_iter_fault_in_readable() * can be relied on to fault in user pages when referenced. */ if (iov_iter_fault_in_readable(uio->uio_iter, n)) return (EFAULT); } return (0); } EXPORT_SYMBOL(zfs_uio_prefaultpages); /* * The same as zfs_uiomove() but doesn't modify uio structure. * return in cbytes how many bytes were copied. */ int zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes) { zfs_uio_t uio_copy; int ret; memcpy(&uio_copy, uio, sizeof (zfs_uio_t)); if (uio->uio_segflg == UIO_BVEC) ret = zfs_uiomove_bvec(p, n, rw, &uio_copy); else if (uio->uio_segflg == UIO_ITER) ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE); else ret = zfs_uiomove_iov(p, n, rw, &uio_copy); *cbytes = uio->uio_resid - uio_copy.uio_resid; return (ret); } EXPORT_SYMBOL(zfs_uiocopy); /* * Drop the next n chars out of *uio. */ void zfs_uioskip(zfs_uio_t *uio, size_t n) { if (n > uio->uio_resid) return; /* * When using a uio with a struct request, we simply * use uio_loffset as a pointer to the next logical byte to * copy in the request. We don't have to do any fancy * accounting with uio_bvec/uio_iovcnt since we don't use * them. */ if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) { uio->uio_skip += n; while (uio->uio_iovcnt && uio->uio_skip >= uio->uio_bvec->bv_len) { uio->uio_skip -= uio->uio_bvec->bv_len; uio->uio_bvec++; uio->uio_iovcnt--; } } else if (uio->uio_segflg == UIO_ITER) { iov_iter_advance(uio->uio_iter, n); } else { ASSERT3S(uio->uio_segflg, ==, UIO_SYSSPACE); uio->uio_skip += n; while (uio->uio_iovcnt && uio->uio_skip >= uio->uio_iov->iov_len) { uio->uio_skip -= uio->uio_iov->iov_len; uio->uio_iov++; uio->uio_iovcnt--; } } uio->uio_loffset += n; uio->uio_resid -= n; } EXPORT_SYMBOL(zfs_uioskip); /* * Check if the uio is page-aligned in memory. */ boolean_t zfs_uio_page_aligned(zfs_uio_t *uio) { boolean_t aligned = B_TRUE; if (uio->uio_segflg == UIO_SYSSPACE) { const struct iovec *iov = uio->uio_iov; size_t skip = uio->uio_skip; for (int i = uio->uio_iovcnt; i > 0; iov++, i--) { uintptr_t addr = (uintptr_t)(iov->iov_base + skip); size_t size = iov->iov_len - skip; if ((addr & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { aligned = B_FALSE; break; } skip = 0; } } else if (uio->uio_segflg == UIO_ITER) { unsigned long alignment = iov_iter_alignment(uio->uio_iter); aligned = IS_P2ALIGNED(alignment, PAGE_SIZE); } else { /* Currently not supported */ aligned = B_FALSE; } return (aligned); } #if defined(HAVE_ZERO_PAGE_GPL_ONLY) || !defined(_LP64) #define ZFS_MARKEED_PAGE 0x0 #define IS_ZFS_MARKED_PAGE(_p) 0 #define zfs_mark_page(_p) #define zfs_unmark_page(_p) #define IS_ZERO_PAGE(_p) 0 #else /* * Mark pages to know if they were allocated to replace ZERO_PAGE() for * Direct I/O writes. */ #define ZFS_MARKED_PAGE 0x5a465350414745 /* ASCII: ZFSPAGE */ #define IS_ZFS_MARKED_PAGE(_p) \ (page_private(_p) == (unsigned long)ZFS_MARKED_PAGE) #define IS_ZERO_PAGE(_p) ((_p) == ZERO_PAGE(0)) static inline void zfs_mark_page(struct page *page) { ASSERT3P(page, !=, NULL); get_page(page); SetPagePrivate(page); set_page_private(page, ZFS_MARKED_PAGE); } static inline void zfs_unmark_page(struct page *page) { ASSERT3P(page, !=, NULL); set_page_private(page, 0UL); ClearPagePrivate(page); put_page(page); } #endif /* HAVE_ZERO_PAGE_GPL_ONLY || !_LP64 */ +#if !defined(HAVE_PIN_USER_PAGES_UNLOCKED) static void zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio) { ASSERT3P(uio->uio_dio.pages, !=, NULL); for (long i = 0; i < uio->uio_dio.npages; i++) { struct page *p = uio->uio_dio.pages[i]; lock_page(p); if (IS_ZERO_PAGE(p)) { /* * If the user page points the kernels ZERO_PAGE() a * new zero filled page will just be allocated so the * contents of the page can not be changed by the user * while a Direct I/O write is taking place. */ gfp_t gfp_zero_page = __GFP_NOWARN | GFP_NOIO | __GFP_ZERO | GFP_KERNEL; ASSERT0(IS_ZFS_MARKED_PAGE(p)); unlock_page(p); put_page(p); uio->uio_dio.pages[i] = __page_cache_alloc(gfp_zero_page); zfs_mark_page(uio->uio_dio.pages[i]); } else { unlock_page(p); } } } +#endif void zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw) { ASSERT(uio->uio_extflg & UIO_DIRECT); ASSERT3P(uio->uio_dio.pages, !=, NULL); +#if defined(HAVE_PIN_USER_PAGES_UNLOCKED) + unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages); +#else for (long i = 0; i < uio->uio_dio.npages; i++) { struct page *p = uio->uio_dio.pages[i]; if (IS_ZFS_MARKED_PAGE(p)) { zfs_unmark_page(p); __free_page(p); continue; } put_page(p); } - +#endif vmem_free(uio->uio_dio.pages, uio->uio_dio.npages * sizeof (struct page *)); } +#if defined(HAVE_PIN_USER_PAGES_UNLOCKED) static int -zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw) +zfs_uio_pin_user_pages(zfs_uio_t *uio, zfs_uio_rw_t rw) { + long res; size_t skip = uio->uio_skip; + size_t len = uio->uio_resid - skip; + unsigned int gup_flags = 0; + unsigned long addr; + unsigned long nr_pages; + + /* + * Kernel 6.2 introduced the FOLL_PCI_P2PDMA flag. This flag could + * possibly be used here in the future to allow for P2P operations with + * user pages. + */ + if (rw == UIO_READ) + gup_flags = FOLL_WRITE; + + if (len == 0) + return (0); + +#if defined(HAVE_ITER_IS_UBUF) + if (iter_is_ubuf(uio->uio_iter)) { + nr_pages = DIV_ROUND_UP(len, PAGE_SIZE); + addr = (unsigned long)uio->uio_iter->ubuf + skip; + res = pin_user_pages_unlocked(addr, nr_pages, + &uio->uio_dio.pages[uio->uio_dio.npages], gup_flags); + if (res < 0) { + return (SET_ERROR(-res)); + } else if (len != (res * PAGE_SIZE)) { + uio->uio_dio.npages += res; + return (SET_ERROR(EFAULT)); + } + uio->uio_dio.npages += res; + return (0); + } +#endif + const struct iovec *iovp = zfs_uio_iter_iov(uio->uio_iter); + for (int i = 0; i < uio->uio_iovcnt; i++) { + size_t amt = iovp->iov_len - skip; + if (amt == 0) { + iovp++; + skip = 0; + continue; + } + + addr = (unsigned long)iovp->iov_base + skip; + nr_pages = DIV_ROUND_UP(amt, PAGE_SIZE); + res = pin_user_pages_unlocked(addr, nr_pages, + &uio->uio_dio.pages[uio->uio_dio.npages], gup_flags); + if (res < 0) { + return (SET_ERROR(-res)); + } else if (amt != (res * PAGE_SIZE)) { + uio->uio_dio.npages += res; + return (SET_ERROR(EFAULT)); + } + + len -= amt; + uio->uio_dio.npages += res; + skip = 0; + iovp++; + }; + + ASSERT0(len); + + return (0); +} + +#else +static int +zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw) +{ + size_t start; size_t wanted = uio->uio_resid - uio->uio_skip; ssize_t rollback = 0; ssize_t cnt; unsigned maxpages = DIV_ROUND_UP(wanted, PAGE_SIZE); while (wanted) { -#if defined(HAVE_IOV_ITER_GET_PAGES2) - cnt = iov_iter_get_pages2(uio->uio_iter, - &uio->uio_dio.pages[uio->uio_dio.npages], - wanted, maxpages, &skip); -#else cnt = iov_iter_get_pages(uio->uio_iter, &uio->uio_dio.pages[uio->uio_dio.npages], - wanted, maxpages, &skip); -#endif + wanted, maxpages, &start); if (cnt < 0) { iov_iter_revert(uio->uio_iter, rollback); return (SET_ERROR(-cnt)); } + /* + * All Direct I/O operations must be page aligned. + */ + ASSERT(IS_P2ALIGNED(start, PAGE_SIZE)); uio->uio_dio.npages += DIV_ROUND_UP(cnt, PAGE_SIZE); rollback += cnt; wanted -= cnt; - skip = 0; -#if !defined(HAVE_IOV_ITER_GET_PAGES2) - /* - * iov_iter_get_pages2() advances the iov_iter on success. - */ iov_iter_advance(uio->uio_iter, cnt); -#endif } ASSERT3U(rollback, ==, uio->uio_resid - uio->uio_skip); iov_iter_revert(uio->uio_iter, rollback); return (0); } +#endif /* HAVE_PIN_USER_PAGES_UNLOCKED */ /* * This function pins user pages. In the event that the user pages were not * successfully pinned an error value is returned. * * On success, 0 is returned. */ int zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw) { int error = 0; long npages = DIV_ROUND_UP(uio->uio_resid, PAGE_SIZE); size_t size = npages * sizeof (struct page *); if (uio->uio_segflg == UIO_ITER) { uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP); +#if defined(HAVE_PIN_USER_PAGES_UNLOCKED) + error = zfs_uio_pin_user_pages(uio, rw); +#else error = zfs_uio_get_dio_pages_iov_iter(uio, rw); +#endif } else { return (SET_ERROR(EOPNOTSUPP)); } ASSERT3S(uio->uio_dio.npages, >=, 0); if (error) { +#if defined(HAVE_PIN_USER_PAGES_UNLOCKED) + unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages); +#else for (long i = 0; i < uio->uio_dio.npages; i++) put_page(uio->uio_dio.pages[i]); +#endif vmem_free(uio->uio_dio.pages, size); return (error); } else { ASSERT3S(uio->uio_dio.npages, ==, npages); } - if (rw == UIO_WRITE) { +#if !defined(HAVE_PIN_USER_PAGES_UNLOCKED) + if (rw == UIO_WRITE) zfs_uio_dio_check_for_zero_page(uio); - } +#endif uio->uio_extflg |= UIO_DIRECT; return (0); } #endif /* _KERNEL */