diff --git a/config/kernel-tmpfile.m4 b/config/kernel-tmpfile.m4 new file mode 100644 index 000000000000..5aad90450e8b --- /dev/null +++ b/config/kernel-tmpfile.m4 @@ -0,0 +1,23 @@ +dnl # +dnl # 3.11 API change +dnl # Add support for i_op->tmpfile +dnl # +AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [ + AC_MSG_CHECKING([whether i_op->tmpfile() exists]) + ZFS_LINUX_TRY_COMPILE([ + #include + int tmpfile(struct inode *inode, struct dentry *dentry, + umode_t mode) { return 0; } + static struct inode_operations + iops __attribute__ ((unused)) = { + .tmpfile = tmpfile, + }; + ],[ + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_TMPFILE, 1, + [i_op->tmpfile() exists]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 44a20f2135c6..15e2ef351cb2 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -1,714 +1,715 @@ dnl # dnl # Default ZFS kernel configuration dnl # AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL ZFS_AC_SPL ZFS_AC_TEST_MODULE ZFS_AC_KERNEL_CONFIG ZFS_AC_KERNEL_DECLARE_EVENT_CLASS ZFS_AC_KERNEL_CURRENT_BIO_TAIL ZFS_AC_KERNEL_SUPER_USER_NS ZFS_AC_KERNEL_SUBMIT_BIO ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID ZFS_AC_KERNEL_TYPE_FMODE_T ZFS_AC_KERNEL_3ARG_BLKDEV_GET ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH ZFS_AC_KERNEL_OPEN_BDEV_EXCLUSIVE ZFS_AC_KERNEL_LOOKUP_BDEV ZFS_AC_KERNEL_INVALIDATE_BDEV_ARGS ZFS_AC_KERNEL_BDEV_LOGICAL_BLOCK_SIZE ZFS_AC_KERNEL_BDEV_PHYSICAL_BLOCK_SIZE ZFS_AC_KERNEL_BIO_BVEC_ITER ZFS_AC_KERNEL_BIO_FAILFAST_DTD ZFS_AC_KERNEL_REQ_FAILFAST_MASK ZFS_AC_KERNEL_REQ_OP_DISCARD ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE ZFS_AC_KERNEL_REQ_OP_FLUSH ZFS_AC_KERNEL_BIO_BI_OPF ZFS_AC_KERNEL_BIO_END_IO_T_ARGS ZFS_AC_KERNEL_BIO_RW_BARRIER ZFS_AC_KERNEL_BIO_RW_DISCARD ZFS_AC_KERNEL_BLK_QUEUE_FLUSH ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS ZFS_AC_KERNEL_BLK_QUEUE_HAVE_BIO_RW_UNPLUG ZFS_AC_KERNEL_BLK_QUEUE_HAVE_BLK_PLUG ZFS_AC_KERNEL_GET_DISK_RO ZFS_AC_KERNEL_GET_GENDISK ZFS_AC_KERNEL_DISCARD_GRANULARITY ZFS_AC_KERNEL_CONST_XATTR_HANDLER ZFS_AC_KERNEL_XATTR_HANDLER_NAME ZFS_AC_KERNEL_XATTR_HANDLER_GET ZFS_AC_KERNEL_XATTR_HANDLER_SET ZFS_AC_KERNEL_XATTR_HANDLER_LIST ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE ZFS_AC_KERNEL_POSIX_ACL_FROM_XATTR_USERNS ZFS_AC_KERNEL_POSIX_ACL_RELEASE ZFS_AC_KERNEL_POSIX_ACL_CHMOD ZFS_AC_KERNEL_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION ZFS_AC_KERNEL_INODE_OPERATIONS_PERMISSION_WITH_NAMEIDATA ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL ZFS_AC_KERNEL_INODE_OPERATIONS_CHECK_ACL_WITH_FLAGS ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL ZFS_AC_KERNE_GET_ACL_HANDLE_CACHE ZFS_AC_KERNEL_SHOW_OPTIONS ZFS_AC_KERNEL_FILE_INODE ZFS_AC_KERNEL_FILE_DENTRY ZFS_AC_KERNEL_FSYNC ZFS_AC_KERNEL_EVICT_INODE ZFS_AC_KERNEL_DIRTY_INODE_WITH_FLAGS ZFS_AC_KERNEL_NR_CACHED_OBJECTS ZFS_AC_KERNEL_FREE_CACHED_OBJECTS ZFS_AC_KERNEL_FALLOCATE ZFS_AC_KERNEL_MKDIR_UMODE_T ZFS_AC_KERNEL_LOOKUP_NAMEIDATA ZFS_AC_KERNEL_CREATE_NAMEIDATA ZFS_AC_KERNEL_GET_LINK ZFS_AC_KERNEL_PUT_LINK + ZFS_AC_KERNEL_TMPFILE ZFS_AC_KERNEL_TRUNCATE_RANGE ZFS_AC_KERNEL_AUTOMOUNT ZFS_AC_KERNEL_ENCODE_FH_WITH_INODE ZFS_AC_KERNEL_COMMIT_METADATA ZFS_AC_KERNEL_CLEAR_INODE ZFS_AC_KERNEL_SETATTR_PREPARE ZFS_AC_KERNEL_INSERT_INODE_LOCKED ZFS_AC_KERNEL_D_MAKE_ROOT ZFS_AC_KERNEL_D_OBTAIN_ALIAS ZFS_AC_KERNEL_D_PRUNE_ALIASES ZFS_AC_KERNEL_D_SET_D_OP ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS ZFS_AC_KERNEL_TRUNCATE_SETSIZE ZFS_AC_KERNEL_6ARGS_SECURITY_INODE_INIT_SECURITY ZFS_AC_KERNEL_CALLBACK_SECURITY_INODE_INIT_SECURITY ZFS_AC_KERNEL_MOUNT_NODEV ZFS_AC_KERNEL_SHRINK ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID ZFS_AC_KERNEL_S_INSTANCES_LIST_HEAD ZFS_AC_KERNEL_S_D_OP ZFS_AC_KERNEL_BDI_SETUP_AND_REGISTER ZFS_AC_KERNEL_SET_NLINK ZFS_AC_KERNEL_ELEVATOR_CHANGE ZFS_AC_KERNEL_5ARG_SGET ZFS_AC_KERNEL_LSEEK_EXECUTE ZFS_AC_KERNEL_VFS_ITERATE ZFS_AC_KERNEL_VFS_RW_ITERATE ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_FOLLOW_DOWN_ONE ZFS_AC_KERNEL_MAKE_REQUEST_FN ZFS_AC_KERNEL_GENERIC_IO_ACCT ZFS_AC_KERNEL_FPU ZFS_AC_KERNEL_KUID_HELPERS ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST ZFS_AC_KERNEL_RENAME_WANTS_FLAGS ZFS_AC_KERNEL_HAVE_GENERIC_SETXATTR AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ" ]) AC_SUBST(KERNELMAKE_PARAMS) dnl # -Wall -fno-strict-aliasing -Wstrict-prototypes and other dnl # compiler options are added by the kernel build system. KERNELCPPFLAGS="$KERNELCPPFLAGS $NO_UNUSED_BUT_SET_VARIABLE" KERNELCPPFLAGS="$KERNELCPPFLAGS $NO_BOOL_COMPARE" KERNELCPPFLAGS="$KERNELCPPFLAGS -DHAVE_SPL -D_KERNEL" KERNELCPPFLAGS="$KERNELCPPFLAGS -DTEXT_DOMAIN=\\\"zfs-linux-kernel\\\"" AC_SUBST(KERNELCPPFLAGS) ]) dnl # dnl # Detect name used for Module.symvers file in kernel dnl # AC_DEFUN([ZFS_AC_MODULE_SYMVERS], [ modpost=$LINUX/scripts/Makefile.modpost AC_MSG_CHECKING([kernel file name for module symbols]) AS_IF([test "x$enable_linux_builtin" != xyes -a -f "$modpost"], [ AS_IF([grep -q Modules.symvers $modpost], [ LINUX_SYMBOLS=Modules.symvers ], [ LINUX_SYMBOLS=Module.symvers ]) AS_IF([test ! -f "$LINUX_OBJ/$LINUX_SYMBOLS"], [ AC_MSG_ERROR([ *** Please make sure the kernel devel package for your distribution *** is installed. If you are building with a custom kernel, make sure the *** kernel is configured, built, and the '--with-linux=PATH' configure *** option refers to the location of the kernel source.]) ]) ], [ LINUX_SYMBOLS=NONE ]) AC_MSG_RESULT($LINUX_SYMBOLS) AC_SUBST(LINUX_SYMBOLS) ]) dnl # dnl # Detect the kernel to be built against dnl # AC_DEFUN([ZFS_AC_KERNEL], [ AC_ARG_WITH([linux], AS_HELP_STRING([--with-linux=PATH], [Path to kernel source]), [kernelsrc="$withval"]) AC_ARG_WITH(linux-obj, AS_HELP_STRING([--with-linux-obj=PATH], [Path to kernel build objects]), [kernelbuild="$withval"]) AC_MSG_CHECKING([kernel source directory]) AS_IF([test -z "$kernelsrc"], [ AS_IF([test -e "/lib/modules/$(uname -r)/source"], [ headersdir="/lib/modules/$(uname -r)/source" sourcelink=$(readlink -f "$headersdir") ], [test -e "/lib/modules/$(uname -r)/build"], [ headersdir="/lib/modules/$(uname -r)/build" sourcelink=$(readlink -f "$headersdir") ], [ sourcelink=$(ls -1d /usr/src/kernels/* \ /usr/src/linux-* \ 2>/dev/null | grep -v obj | tail -1) ]) AS_IF([test -n "$sourcelink" && test -e ${sourcelink}], [ kernelsrc=`readlink -f ${sourcelink}` ], [ kernelsrc="[Not found]" ]) ], [ AS_IF([test "$kernelsrc" = "NONE"], [ kernsrcver=NONE ]) ]) AC_MSG_RESULT([$kernelsrc]) AS_IF([test ! -d "$kernelsrc"], [ AC_MSG_ERROR([ *** Please make sure the kernel devel package for your distribution *** is installed and then try again. If that fails, you can specify the *** location of the kernel source with the '--with-linux=PATH' option.]) ]) AC_MSG_CHECKING([kernel build directory]) AS_IF([test -z "$kernelbuild"], [ AS_IF([test -e "/lib/modules/$(uname -r)/build"], [ kernelbuild=`readlink -f /lib/modules/$(uname -r)/build` ], [test -d ${kernelsrc}-obj/${target_cpu}/${target_cpu}], [ kernelbuild=${kernelsrc}-obj/${target_cpu}/${target_cpu} ], [test -d ${kernelsrc}-obj/${target_cpu}/default], [ kernelbuild=${kernelsrc}-obj/${target_cpu}/default ], [test -d `dirname ${kernelsrc}`/build-${target_cpu}], [ kernelbuild=`dirname ${kernelsrc}`/build-${target_cpu} ], [ kernelbuild=${kernelsrc} ]) ]) AC_MSG_RESULT([$kernelbuild]) AC_MSG_CHECKING([kernel source version]) utsrelease1=$kernelbuild/include/linux/version.h utsrelease2=$kernelbuild/include/linux/utsrelease.h utsrelease3=$kernelbuild/include/generated/utsrelease.h AS_IF([test -r $utsrelease1 && fgrep -q UTS_RELEASE $utsrelease1], [ utsrelease=linux/version.h ], [test -r $utsrelease2 && fgrep -q UTS_RELEASE $utsrelease2], [ utsrelease=linux/utsrelease.h ], [test -r $utsrelease3 && fgrep -q UTS_RELEASE $utsrelease3], [ utsrelease=generated/utsrelease.h ]) AS_IF([test "$utsrelease"], [ kernsrcver=`(echo "#include <$utsrelease>"; echo "kernsrcver=UTS_RELEASE") | cpp -I $kernelbuild/include | grep "^kernsrcver=" | cut -d \" -f 2` AS_IF([test -z "$kernsrcver"], [ AC_MSG_RESULT([Not found]) AC_MSG_ERROR([*** Cannot determine kernel version.]) ]) ], [ AC_MSG_RESULT([Not found]) if test "x$enable_linux_builtin" != xyes; then AC_MSG_ERROR([*** Cannot find UTS_RELEASE definition.]) else AC_MSG_ERROR([ *** Cannot find UTS_RELEASE definition. *** Please run 'make prepare' inside the kernel source tree.]) fi ]) AC_MSG_RESULT([$kernsrcver]) LINUX=${kernelsrc} LINUX_OBJ=${kernelbuild} LINUX_VERSION=${kernsrcver} AC_SUBST(LINUX) AC_SUBST(LINUX_OBJ) AC_SUBST(LINUX_VERSION) ZFS_AC_MODULE_SYMVERS ]) dnl # dnl # Detect the SPL module to be built against dnl # AC_DEFUN([ZFS_AC_SPL], [ AC_ARG_WITH([spl], AS_HELP_STRING([--with-spl=PATH], [Path to spl source]), AS_IF([test "$withval" = "yes"], AC_MSG_ERROR([--with-spl=PATH requires a PATH]), [splsrc="$withval"])) AC_ARG_WITH([spl-obj], AS_HELP_STRING([--with-spl-obj=PATH], [Path to spl build objects]), [splbuild="$withval"]) AC_ARG_WITH([spl-timeout], AS_HELP_STRING([--with-spl-timeout=SECS], [Wait SECS for SPL header and symver file @<:@default=0@:>@]), [timeout="$withval"], [timeout=0]) dnl # dnl # The existence of spl.release.in is used to identify a valid dnl # source directory. In order of preference: dnl # splsrc0="/var/lib/dkms/spl/${VERSION}/build" splsrc1="/usr/local/src/spl-${VERSION}/${LINUX_VERSION}" splsrc2="/usr/local/src/spl-${VERSION}" splsrc3="/usr/src/spl-${VERSION}/${LINUX_VERSION}" splsrc4="/usr/src/spl-${VERSION}" splsrc5="../spl/" splsrc6="$LINUX" AC_MSG_CHECKING([spl source directory]) AS_IF([test -z "${splsrc}"], [ [all_spl_sources=" ${splsrc0} ${splsrc1} ${splsrc2} ${splsrc3} ${splsrc4} ${splsrc5} ${splsrc6}"], AS_IF([ test -e "${splsrc0}/spl.release.in"], [ splsrc=${splsrc0} ], [ test -e "${splsrc1}/spl.release.in"], [ splsrc=${splsrc1} ], [ test -e "${splsrc2}/spl.release.in"], [ splsrc=${splsrc2} ], [ test -e "${splsrc3}/spl.release.in"], [ splsrc=$(readlink -f "${splsrc3}") ], [ test -e "${splsrc4}/spl.release.in" ], [ splsrc=${splsrc4} ], [ test -e "${splsrc5}/spl.release.in"], [ splsrc=$(readlink -f "${splsrc5}") ], [ test -e "${splsrc6}/spl.release.in" ], [ splsrc=${splsrc6} ], [ splsrc="[Not found]" ]) ], [ [all_spl_sources="$withval"], AS_IF([test "$splsrc" = "NONE"], [ splbuild=NONE splsrcver=NONE ]) ]) AC_MSG_RESULT([$splsrc]) AS_IF([ test ! -e "$splsrc/spl.release.in"], [ AC_MSG_ERROR([ *** Please make sure the kmod spl devel package for your distribution *** is installed then try again. If that fails you can specify the *** location of the spl source with the '--with-spl=PATH' option. *** The spl version must match the version of ZFS you are building, *** ${VERSION}. Failed to find spl.release.in in the following: $all_spl_sources]) ]) dnl # dnl # The existence of the spl_config.h is used to identify a valid dnl # spl object directory. In many cases the object and source dnl # directory are the same, however the objects may also reside dnl # is a subdirectory named after the kernel version. dnl # dnl # This file is supposed to be available after DKMS finishes dnl # building the SPL kernel module for the target kernel. The dnl # '--with-spl-timeout' option can be passed to pause here, dnl # waiting for the file to appear from a concurrently building dnl # SPL package. dnl # AC_MSG_CHECKING([spl build directory]) all_spl_config_locs="${splsrc}/${LINUX_VERSION} ${splsrc}" while true; do AS_IF([test -z "$splbuild"], [ AS_IF([ test -e "${splsrc}/${LINUX_VERSION}/spl_config.h" ], [ splbuild="${splsrc}/${LINUX_VERSION}" ], [ test -e "${splsrc}/spl_config.h" ], [ splbuild="${splsrc}" ], [ find -L "${splsrc}" -name spl_config.h 2> /dev/null | grep -wq spl_config.h ], [ splbuild=$(find -L "${splsrc}" -name spl_config.h | sed 's,/spl_config.h,,') ], [ splbuild="[Not found]" ]) ]) AS_IF([test -e "$splbuild/spl_config.h" -o $timeout -le 0], [ break; ], [ sleep 1 timeout=$((timeout-1)) ]) done AC_MSG_RESULT([$splbuild]) AS_IF([ ! test -e "$splbuild/spl_config.h"], [ AC_MSG_ERROR([ *** Please make sure the kmod spl devel package for your *** distribution is installed then try again. If that fails you *** can specify the location of the spl objects with the *** '--with-spl-obj=PATH' option. Failed to find spl_config.h in *** any of the following: $all_spl_config_locs]) ]) AC_MSG_CHECKING([spl source version]) AS_IF([test -r $splbuild/spl_config.h && fgrep -q SPL_META_VERSION $splbuild/spl_config.h], [ splsrcver=`(echo "#include "; echo "splsrcver=SPL_META_VERSION-SPL_META_RELEASE") | cpp -I $splbuild | grep "^splsrcver=" | tr -d \" | cut -d= -f2` ]) AS_IF([test -z "$splsrcver"], [ AC_MSG_RESULT([Not found]) AC_MSG_ERROR([ *** Cannot determine the version of the spl source. *** Please prepare the spl source before running this script]) ]) AC_MSG_RESULT([$splsrcver]) SPL=${splsrc} SPL_OBJ=${splbuild} SPL_VERSION=${splsrcver} AC_SUBST(SPL) AC_SUBST(SPL_OBJ) AC_SUBST(SPL_VERSION) dnl # dnl # Detect the name used for the SPL Module.symvers file. If one dnl # does not exist this is likely because the SPL has been configured dnl # but not built. The '--with-spl-timeout' option can be passed dnl # to pause here, waiting for the file to appear from a concurrently dnl # building SPL package. If the file does not appear in time, a good dnl # guess is made as to what this file will be named based on what it dnl # is named in the kernel build products. This file will first be dnl # used at link time so if the guess is wrong the build will fail dnl # then. This unfortunately means the ZFS package does not contain a dnl # reliable mechanism to detect symbols exported by the SPL at dnl # configure time. dnl # AC_MSG_CHECKING([spl file name for module symbols]) SPL_SYMBOLS=NONE while true; do AS_IF([test -r $SPL_OBJ/Module.symvers], [ SPL_SYMBOLS=Module.symvers ], [test -r $SPL_OBJ/Modules.symvers], [ SPL_SYMBOLS=Modules.symvers ], [test -r $SPL_OBJ/module/Module.symvers], [ SPL_SYMBOLS=Module.symvers ], [test -r $SPL_OBJ/module/Modules.symvers], [ SPL_SYMBOLS=Modules.symvers ]) AS_IF([test $SPL_SYMBOLS != NONE -o $timeout -le 0], [ break; ], [ sleep 1 timeout=$((timeout-1)) ]) done AS_IF([test "$SPL_SYMBOLS" = NONE], [ SPL_SYMBOLS=$LINUX_SYMBOLS ]) AC_MSG_RESULT([$SPL_SYMBOLS]) AC_SUBST(SPL_SYMBOLS) ]) dnl # dnl # Basic toolchain sanity check. dnl # AC_DEFUN([ZFS_AC_TEST_MODULE], [ AC_MSG_CHECKING([whether modules can be built]) ZFS_LINUX_TRY_COMPILE([],[],[ AC_MSG_RESULT([yes]) ],[ AC_MSG_RESULT([no]) if test "x$enable_linux_builtin" != xyes; then AC_MSG_ERROR([*** Unable to build an empty module.]) else AC_MSG_ERROR([ *** Unable to build an empty module. *** Please run 'make scripts' inside the kernel source tree.]) fi ]) ]) dnl # dnl # Certain kernel build options are not supported. These must be dnl # detected at configure time and cause a build failure. Otherwise dnl # modules may be successfully built that behave incorrectly. dnl # AC_DEFUN([ZFS_AC_KERNEL_CONFIG], [ AS_IF([test "x$cross_compiling" != xyes], [ AC_RUN_IFELSE([ AC_LANG_PROGRAM([ #include "$LINUX/include/linux/license.h" ], [ return !license_is_gpl_compatible("$ZFS_META_LICENSE"); ]) ], [ AC_DEFINE([ZFS_IS_GPL_COMPATIBLE], [1], [Define to 1 if GPL-only symbols can be used]) ], [ ]) ]) ZFS_AC_KERNEL_CONFIG_THREAD_SIZE ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC ]) dnl # dnl # Check configured THREAD_SIZE dnl # dnl # The stack size will vary by architecture, but as of Linux 3.15 on x86_64 dnl # the default thread stack size was increased to 16K from 8K. Therefore, dnl # on newer kernels and some architectures stack usage optimizations can be dnl # conditionally applied to improve performance without negatively impacting dnl # stability. dnl # AC_DEFUN([ZFS_AC_KERNEL_CONFIG_THREAD_SIZE], [ AC_MSG_CHECKING([whether kernel was built with 16K or larger stacks]) ZFS_LINUX_TRY_COMPILE([ #include ],[ #if (THREAD_SIZE < 16384) #error "THREAD_SIZE is less than 16K" #endif ],[ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_LARGE_STACKS, 1, [kernel has large stacks]) ],[ AC_MSG_RESULT([no]) ]) ]) dnl # dnl # Check CONFIG_DEBUG_LOCK_ALLOC dnl # dnl # This is typically only set for debug kernels because it comes with dnl # a performance penalty. However, when it is set it maps the non-GPL dnl # symbol mutex_lock() to the GPL-only mutex_lock_nested() symbol. dnl # This will cause a failure at link time which we'd rather know about dnl # at compile time. dnl # dnl # Since we plan to pursue making mutex_lock_nested() a non-GPL symbol dnl # with the upstream community we add a check to detect this case. dnl # AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC], [ ZFS_LINUX_CONFIG([DEBUG_LOCK_ALLOC], [ AC_MSG_CHECKING([whether mutex_lock() is GPL-only]) tmp_flags="$EXTRA_KCFLAGS" ZFS_LINUX_TRY_COMPILE([ #include #include MODULE_LICENSE("$ZFS_META_LICENSE"); ],[ struct mutex lock; mutex_init(&lock); mutex_lock(&lock); mutex_unlock(&lock); ],[ AC_MSG_RESULT(no) ],[ AC_MSG_RESULT(yes) AC_MSG_ERROR([ *** Kernel built with CONFIG_DEBUG_LOCK_ALLOC which is incompatible *** with the CDDL license and will prevent the module linking stage *** from succeeding. You must rebuild your kernel without this *** option enabled.]) ]) EXTRA_KCFLAGS="$tmp_flags" ], []) ]) dnl # dnl # ZFS_LINUX_CONFTEST_H dnl # AC_DEFUN([ZFS_LINUX_CONFTEST_H], [ cat - <<_ACEOF >conftest.h $1 _ACEOF ]) dnl # dnl # ZFS_LINUX_CONFTEST_C dnl # AC_DEFUN([ZFS_LINUX_CONFTEST_C], [ cat confdefs.h - <<_ACEOF >conftest.c $1 _ACEOF ]) dnl # dnl # ZFS_LANG_PROGRAM(C)([PROLOGUE], [BODY]) dnl # m4_define([ZFS_LANG_PROGRAM], [ $1 int main (void) { dnl Do *not* indent the following line: there may be CPP directives. dnl Don't move the `;' right after for the same reason. $2 ; return 0; } ]) dnl # dnl # ZFS_LINUX_COMPILE_IFELSE / like AC_COMPILE_IFELSE dnl # AC_DEFUN([ZFS_LINUX_COMPILE_IFELSE], [ m4_ifvaln([$1], [ZFS_LINUX_CONFTEST_C([$1])]) m4_ifvaln([$6], [ZFS_LINUX_CONFTEST_H([$6])], [ZFS_LINUX_CONFTEST_H([])]) rm -Rf build && mkdir -p build && touch build/conftest.mod.c echo "obj-m := conftest.o" >build/Makefile modpost_flag='' test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage AS_IF( [AC_TRY_COMMAND(cp conftest.c conftest.h build && make [$2] -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag) >/dev/null && AC_TRY_COMMAND([$3])], [$4], [_AC_MSG_LOG_CONFTEST m4_ifvaln([$5],[$5])] ) rm -Rf build ]) dnl # dnl # ZFS_LINUX_TRY_COMPILE like AC_TRY_COMPILE dnl # AC_DEFUN([ZFS_LINUX_TRY_COMPILE], [ZFS_LINUX_COMPILE_IFELSE( [AC_LANG_SOURCE([ZFS_LANG_PROGRAM([[$1]], [[$2]])])], [modules], [test -s build/conftest.o], [$3], [$4]) ]) dnl # dnl # ZFS_LINUX_CONFIG dnl # AC_DEFUN([ZFS_LINUX_CONFIG], [AC_MSG_CHECKING([whether kernel was built with CONFIG_$1]) ZFS_LINUX_TRY_COMPILE([ #include ],[ #ifndef CONFIG_$1 #error CONFIG_$1 not #defined #endif ],[ AC_MSG_RESULT([yes]) $2 ],[ AC_MSG_RESULT([no]) $3 ]) ]) dnl # dnl # ZFS_CHECK_SYMBOL_EXPORT dnl # check symbol exported or not dnl # AC_DEFUN([ZFS_CHECK_SYMBOL_EXPORT], [ grep -q -E '[[[:space:]]]$1[[[:space:]]]' \ $LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null rc=$? if test $rc -ne 0; then export=0 for file in $2; do grep -q -E "EXPORT_SYMBOL.*($1)" \ "$LINUX/$file" 2>/dev/null rc=$? if test $rc -eq 0; then export=1 break; fi done if test $export -eq 0; then : $4 else : $3 fi else : $3 fi ]) dnl # dnl # ZFS_LINUX_TRY_COMPILE_SYMBOL dnl # like ZFS_LINUX_TRY_COMPILE, except ZFS_CHECK_SYMBOL_EXPORT dnl # is called if not compiling for builtin dnl # AC_DEFUN([ZFS_LINUX_TRY_COMPILE_SYMBOL], [ ZFS_LINUX_TRY_COMPILE([$1], [$2], [rc=0], [rc=1]) if test $rc -ne 0; then : $6 else if test "x$enable_linux_builtin" != xyes; then ZFS_CHECK_SYMBOL_EXPORT([$3], [$4], [rc=0], [rc=1]) fi if test $rc -ne 0; then : $6 else : $5 fi fi ]) dnl # dnl # ZFS_LINUX_TRY_COMPILE_HEADER dnl # like ZFS_LINUX_TRY_COMPILE, except the contents conftest.h are dnl # provided via the fifth parameter dnl # AC_DEFUN([ZFS_LINUX_TRY_COMPILE_HEADER], [ZFS_LINUX_COMPILE_IFELSE( [AC_LANG_SOURCE([ZFS_LANG_PROGRAM([[$1]], [[$2]])])], [modules], [test -s build/conftest.o], [$3], [$4], [$5]) ]) diff --git a/configure.ac b/configure.ac index 39bd54c17bcd..dffd63cf28b0 100644 --- a/configure.ac +++ b/configure.ac @@ -1,310 +1,311 @@ /* * This file is part of the ZFS Linux port. * * Copyright (c) 2009 Lawrence Livermore National Security, LLC. * Produced at Lawrence Livermore National Laboratory * Written by: * Brian Behlendorf , * Herb Wartens , * Jim Garlick * LLNL-CODE-403049 * * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ AC_INIT(m4_esyscmd(grep Name META | cut -d ':' -f 2 | tr -d ' \n'), m4_esyscmd(grep Version META | cut -d ':' -f 2 | tr -d ' \n')) AC_LANG(C) ZFS_AC_META AC_CONFIG_AUX_DIR([config]) AC_CONFIG_MACRO_DIR([config]) AC_CANONICAL_SYSTEM AM_MAINTAINER_MODE m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) AM_INIT_AUTOMAKE([subdir-objects]) AC_CONFIG_HEADERS([zfs_config.h], [ (mv zfs_config.h zfs_config.h.tmp && awk -f ${ac_srcdir}/config/config.awk zfs_config.h.tmp >zfs_config.h && rm zfs_config.h.tmp) || exit 1]) AC_PROG_INSTALL AC_PROG_CC AC_PROG_LIBTOOL AM_PROG_AS AM_PROG_CC_C_O ZFS_AC_LICENSE ZFS_AC_PACKAGE ZFS_AC_CONFIG ZFS_AC_DEBUG ZFS_AC_DEBUG_DMU_TX AC_CONFIG_FILES([ Makefile udev/Makefile udev/rules.d/Makefile etc/Makefile etc/init.d/Makefile etc/zfs/Makefile etc/systemd/Makefile etc/systemd/system/Makefile etc/modules-load.d/Makefile man/Makefile man/man1/Makefile man/man5/Makefile man/man8/Makefile lib/Makefile lib/libspl/Makefile lib/libspl/asm-generic/Makefile lib/libspl/asm-i386/Makefile lib/libspl/asm-x86_64/Makefile lib/libspl/include/Makefile lib/libspl/include/ia32/Makefile lib/libspl/include/ia32/sys/Makefile lib/libspl/include/rpc/Makefile lib/libspl/include/sys/Makefile lib/libspl/include/sys/dktp/Makefile lib/libspl/include/util/Makefile lib/libavl/Makefile lib/libefi/Makefile lib/libicp/Makefile lib/libnvpair/Makefile lib/libunicode/Makefile lib/libuutil/Makefile lib/libzpool/Makefile lib/libzfs/libzfs.pc lib/libzfs/libzfs_core.pc lib/libzfs/Makefile lib/libzfs_core/Makefile lib/libshare/Makefile cmd/Makefile cmd/zdb/Makefile cmd/zhack/Makefile cmd/zfs/Makefile cmd/zinject/Makefile cmd/zpool/Makefile cmd/zstreamdump/Makefile cmd/ztest/Makefile cmd/zpios/Makefile cmd/mount_zfs/Makefile cmd/fsck_zfs/Makefile cmd/zvol_id/Makefile cmd/vdev_id/Makefile cmd/arcstat/Makefile cmd/dbufstat/Makefile cmd/arc_summary/Makefile cmd/zed/Makefile cmd/raidz_test/Makefile contrib/Makefile contrib/bash_completion.d/Makefile contrib/dracut/Makefile contrib/dracut/02zfsexpandknowledge/Makefile contrib/dracut/90zfs/Makefile contrib/initramfs/Makefile module/Makefile module/avl/Makefile module/nvpair/Makefile module/unicode/Makefile module/zcommon/Makefile module/zfs/Makefile module/zpios/Makefile module/icp/Makefile include/Makefile include/linux/Makefile include/sys/Makefile include/sys/fs/Makefile include/sys/fm/Makefile include/sys/fm/fs/Makefile include/sys/crypto/Makefile include/sys/sysevent/Makefile scripts/Makefile scripts/zpios-profile/Makefile scripts/zpios-test/Makefile scripts/zpool-config/Makefile scripts/common.sh tests/Makefile tests/test-runner/Makefile tests/test-runner/cmd/Makefile tests/test-runner/include/Makefile tests/test-runner/man/Makefile tests/runfiles/Makefile tests/zfs-tests/Makefile tests/zfs-tests/cmd/Makefile tests/zfs-tests/cmd/chg_usr_exec/Makefile tests/zfs-tests/cmd/devname2devid/Makefile tests/zfs-tests/cmd/dir_rd_update/Makefile tests/zfs-tests/cmd/file_check/Makefile tests/zfs-tests/cmd/file_trunc/Makefile tests/zfs-tests/cmd/file_write/Makefile tests/zfs-tests/cmd/largest_file/Makefile tests/zfs-tests/cmd/mkbusy/Makefile tests/zfs-tests/cmd/mkfile/Makefile tests/zfs-tests/cmd/mkfiles/Makefile tests/zfs-tests/cmd/mktree/Makefile tests/zfs-tests/cmd/mmap_exec/Makefile tests/zfs-tests/cmd/mmapwrite/Makefile tests/zfs-tests/cmd/randfree_file/Makefile tests/zfs-tests/cmd/readmmap/Makefile tests/zfs-tests/cmd/rename_dir/Makefile tests/zfs-tests/cmd/rm_lnkcnt_zero_file/Makefile tests/zfs-tests/cmd/threadsappend/Makefile tests/zfs-tests/cmd/xattrtest/Makefile tests/zfs-tests/include/Makefile tests/zfs-tests/include/commands.cfg tests/zfs-tests/include/default.cfg tests/zfs-tests/tests/Makefile tests/zfs-tests/tests/functional/Makefile tests/zfs-tests/tests/functional/acl/Makefile tests/zfs-tests/tests/functional/acl/posix/Makefile tests/zfs-tests/tests/functional/atime/Makefile tests/zfs-tests/tests/functional/bootfs/Makefile tests/zfs-tests/tests/functional/cache/Makefile tests/zfs-tests/tests/functional/cachefile/Makefile tests/zfs-tests/tests/functional/casenorm/Makefile tests/zfs-tests/tests/functional/checksum/Makefile tests/zfs-tests/tests/functional/clean_mirror/Makefile tests/zfs-tests/tests/functional/cli_root/Makefile tests/zfs-tests/tests/functional/cli_root/zdb/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_clone/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_copies/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_create/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_destroy/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_get/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_inherit/Makefile tests/zfs-tests/tests/functional/cli_root/zfs/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_promote/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_property/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_rename/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_reservation/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_rollback/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_send/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_set/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_share/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_unshare/Makefile tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_add/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_attach/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_clear/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_create/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_destroy/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_detach/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_export/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_get/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_history/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile tests/zfs-tests/tests/functional/cli_root/zpool/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_offline/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_online/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_remove/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_replace/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_set/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/Makefile tests/zfs-tests/tests/functional/cli_user/Makefile tests/zfs-tests/tests/functional/cli_user/misc/Makefile tests/zfs-tests/tests/functional/cli_user/zfs_list/Makefile tests/zfs-tests/tests/functional/cli_user/zpool_iostat/Makefile tests/zfs-tests/tests/functional/cli_user/zpool_list/Makefile tests/zfs-tests/tests/functional/compression/Makefile tests/zfs-tests/tests/functional/ctime/Makefile tests/zfs-tests/tests/functional/delegate/Makefile tests/zfs-tests/tests/functional/devices/Makefile tests/zfs-tests/tests/functional/exec/Makefile tests/zfs-tests/tests/functional/features/async_destroy/Makefile tests/zfs-tests/tests/functional/features/large_dnode/Makefile tests/zfs-tests/tests/functional/features/Makefile tests/zfs-tests/tests/functional/grow_pool/Makefile tests/zfs-tests/tests/functional/grow_replicas/Makefile tests/zfs-tests/tests/functional/history/Makefile tests/zfs-tests/tests/functional/inheritance/Makefile tests/zfs-tests/tests/functional/inuse/Makefile tests/zfs-tests/tests/functional/large_files/Makefile tests/zfs-tests/tests/functional/largest_pool/Makefile tests/zfs-tests/tests/functional/link_count/Makefile tests/zfs-tests/tests/functional/migration/Makefile tests/zfs-tests/tests/functional/mmap/Makefile tests/zfs-tests/tests/functional/mount/Makefile tests/zfs-tests/tests/functional/mv_files/Makefile tests/zfs-tests/tests/functional/nestedfs/Makefile tests/zfs-tests/tests/functional/no_space/Makefile tests/zfs-tests/tests/functional/nopwrite/Makefile tests/zfs-tests/tests/functional/online_offline/Makefile tests/zfs-tests/tests/functional/pool_names/Makefile tests/zfs-tests/tests/functional/poolversion/Makefile tests/zfs-tests/tests/functional/privilege/Makefile tests/zfs-tests/tests/functional/quota/Makefile tests/zfs-tests/tests/functional/raidz/Makefile tests/zfs-tests/tests/functional/redundancy/Makefile tests/zfs-tests/tests/functional/refquota/Makefile tests/zfs-tests/tests/functional/refreserv/Makefile tests/zfs-tests/tests/functional/rename_dirs/Makefile tests/zfs-tests/tests/functional/replacement/Makefile tests/zfs-tests/tests/functional/reservation/Makefile tests/zfs-tests/tests/functional/rootpool/Makefile tests/zfs-tests/tests/functional/rsend/Makefile tests/zfs-tests/tests/functional/scrub_mirror/Makefile tests/zfs-tests/tests/functional/slog/Makefile tests/zfs-tests/tests/functional/snapshot/Makefile tests/zfs-tests/tests/functional/snapused/Makefile tests/zfs-tests/tests/functional/sparse/Makefile tests/zfs-tests/tests/functional/threadsappend/Makefile + tests/zfs-tests/tests/functional/tmpfile/Makefile tests/zfs-tests/tests/functional/truncate/Makefile tests/zfs-tests/tests/functional/userquota/Makefile tests/zfs-tests/tests/functional/upgrade/Makefile tests/zfs-tests/tests/functional/vdev_zaps/Makefile tests/zfs-tests/tests/functional/write_dirs/Makefile tests/zfs-tests/tests/functional/xattr/Makefile tests/zfs-tests/tests/functional/zvol/Makefile tests/zfs-tests/tests/functional/zvol/zvol_cli/Makefile tests/zfs-tests/tests/functional/zvol/zvol_ENOSPC/Makefile tests/zfs-tests/tests/functional/zvol/zvol_misc/Makefile tests/zfs-tests/tests/functional/zvol/zvol_swap/Makefile tests/zfs-tests/tests/perf/Makefile tests/zfs-tests/tests/perf/fio/Makefile tests/zfs-tests/tests/perf/regression/Makefile tests/zfs-tests/tests/perf/scripts/Makefile tests/zfs-tests/tests/stress/Makefile rpm/Makefile rpm/redhat/Makefile rpm/redhat/zfs.spec rpm/redhat/zfs-kmod.spec rpm/redhat/zfs-dkms.spec rpm/generic/Makefile rpm/generic/zfs.spec rpm/generic/zfs-kmod.spec rpm/generic/zfs-dkms.spec zfs-script-config.sh zfs.release ]) AC_OUTPUT AS_IF([test "x$user_libdevmapper" != xyes && test "$ZFS_CONFIG" != kernel ], [ AC_MSG_WARN([Building without libdevmapper. Auto-replace, auto-online, \ and statechange-led.sh may not work correctly with device mapper vdevs.]) ]) diff --git a/include/sys/zfs_dir.h b/include/sys/zfs_dir.h index 8610fbe0861d..efda1236aea0 100644 --- a/include/sys/zfs_dir.h +++ b/include/sys/zfs_dir.h @@ -1,74 +1,75 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_FS_ZFS_DIR_H #define _SYS_FS_ZFS_DIR_H #include #include #include #ifdef __cplusplus extern "C" { #endif /* zfs_dirent_lock() flags */ #define ZNEW 0x0001 /* entry should not exist */ #define ZEXISTS 0x0002 /* entry should exist */ #define ZSHARED 0x0004 /* shared access (zfs_dirlook()) */ #define ZXATTR 0x0008 /* we want the xattr dir */ #define ZRENAMING 0x0010 /* znode is being renamed */ #define ZCILOOK 0x0020 /* case-insensitive lookup requested */ #define ZCIEXACT 0x0040 /* c-i requires c-s match (rename) */ #define ZHAVELOCK 0x0080 /* z_name_lock is already held */ /* mknode flags */ #define IS_ROOT_NODE 0x01 /* create a root node */ #define IS_XATTR 0x02 /* create an extended attribute node */ +#define IS_TMPFILE 0x04 /* create a tmpfile */ extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **, int, int *, pathname_t *); extern void zfs_dirent_unlock(zfs_dirlock_t *); extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int); extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int, boolean_t *); extern int zfs_dirlook(znode_t *, char *, struct inode **, int, int *, pathname_t *); extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *, uint_t, znode_t **, zfs_acl_ids_t *); extern void zfs_rmnode(znode_t *); extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old); extern boolean_t zfs_dirempty(znode_t *); extern void zfs_unlinked_add(znode_t *, dmu_tx_t *); extern void zfs_unlinked_drain(zfs_sb_t *); extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr); extern int zfs_get_xattrdir(znode_t *, struct inode **, cred_t *, int); extern int zfs_make_xattrdir(znode_t *, vattr_t *, struct inode **, cred_t *); #ifdef __cplusplus } #endif #endif /* _SYS_FS_ZFS_DIR_H */ diff --git a/include/sys/zfs_vnops.h b/include/sys/zfs_vnops.h index 157e7f9d5ec6..c86fec18df5d 100644 --- a/include/sys/zfs_vnops.h +++ b/include/sys/zfs_vnops.h @@ -1,88 +1,90 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_FS_ZFS_VNOPS_H #define _SYS_FS_ZFS_VNOPS_H #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif extern int zfs_open(struct inode *ip, int mode, int flag, cred_t *cr); extern int zfs_close(struct inode *ip, int flag, cred_t *cr); extern int zfs_holey(struct inode *ip, int cmd, loff_t *off); extern int zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr); extern int zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr); extern int zfs_access(struct inode *ip, int mode, int flag, cred_t *cr); extern int zfs_lookup(struct inode *dip, char *nm, struct inode **ipp, int flags, cred_t *cr, int *direntflags, pathname_t *realpnp); extern int zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl, int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp); +extern int zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl, + int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp); extern int zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags); extern int zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp, cred_t *cr, int flags, vsecattr_t *vsecp); extern int zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr, int flags); extern int zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr); extern int zfs_fsync(struct inode *ip, int syncflag, cred_t *cr); extern int zfs_getattr(struct inode *ip, vattr_t *vap, int flag, cred_t *cr); extern int zfs_getattr_fast(struct inode *ip, struct kstat *sp); extern int zfs_setattr(struct inode *ip, vattr_t *vap, int flag, cred_t *cr); extern int zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm, cred_t *cr, int flags); extern int zfs_symlink(struct inode *dip, char *name, vattr_t *vap, char *link, struct inode **ipp, cred_t *cr, int flags); extern int zfs_follow_link(struct dentry *dentry, struct nameidata *nd); extern int zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr); extern int zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr, int flags); extern void zfs_inactive(struct inode *ip); extern int zfs_space(struct inode *ip, int cmd, flock64_t *bfp, int flag, offset_t offset, cred_t *cr); extern int zfs_fid(struct inode *ip, fid_t *fidp); extern int zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr); extern int zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr); extern int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages); extern int zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc); extern int zfs_dirty_inode(struct inode *ip, int flags); extern int zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len, unsigned long vm_flags); extern void zfs_iput_async(struct inode *ip); #ifdef __cplusplus } #endif #endif /* _SYS_FS_ZFS_VNOPS_H */ diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 5b2099efb4d3..6b5cda95820c 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -1,4775 +1,4919 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2015 by Chunwei Chen. All rights reserved. */ /* Portions Copyright 2007 Jeremy Teo */ /* Portions Copyright 2010 Robert Milkowski */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fs/fs_subr.h" #include #include #include #include #include #include #include #include #include #include #include /* * Programming rules. * * Each vnode op performs some logical unit of work. To do this, the ZPL must * properly lock its in-core state, create a DMU transaction, do the work, * record this work in the intent log (ZIL), commit the DMU transaction, * and wait for the intent log to commit if it is a synchronous operation. * Moreover, the vnode ops must work in both normal and log replay context. * The ordering of events is important to avoid deadlocks and references * to freed memory. The example below illustrates the following Big Rules: * * (1) A check must be made in each zfs thread for a mounted file system. * This is done avoiding races using ZFS_ENTER(zsb). * A ZFS_EXIT(zsb) is needed before all returns. Any znodes * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros * can return EIO from the calling function. * * (2) iput() should always be the last thing except for zil_commit() * (if necessary) and ZFS_EXIT(). This is for 3 reasons: * First, if it's the last reference, the vnode/znode * can be freed, so the zp may point to freed memory. Second, the last * reference will call zfs_zinactive(), which may induce a lot of work -- * pushing cached pages (which acquires range locks) and syncing out * cached atime changes. Third, zfs_zinactive() may require a new tx, * which could deadlock the system if you were already holding one. * If you must call iput() within a tx then use zfs_iput_async(). * * (3) All range locks must be grabbed before calling dmu_tx_assign(), * as they can span dmu_tx_assign() calls. * * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to * dmu_tx_assign(). This is critical because we don't want to block * while holding locks. * * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This * reduces lock contention and CPU usage when we must wait (note that if * throughput is constrained by the storage, nearly every transaction * must wait). * * Note, in particular, that if a lock is sometimes acquired before * the tx assigns, and sometimes after (e.g. z_lock), then failing * to use a non-blocking assign can deadlock the system. The scenario: * * Thread A has grabbed a lock before calling dmu_tx_assign(). * Thread B is in an already-assigned tx, and blocks for this lock. * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() * forever, because the previous txg can't quiesce until B's tx commits. * * If dmu_tx_assign() returns ERESTART and zsb->z_assign is TXG_NOWAIT, * then drop all locks, call dmu_tx_wait(), and try again. On subsequent * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT, * to indicate that this operation has already called dmu_tx_wait(). * This will ensure that we don't retry forever, waiting a short bit * each time. * * (5) If the operation succeeded, generate the intent log entry for it * before dropping locks. This ensures that the ordering of events * in the intent log matches the order in which they actually occurred. * During ZIL replay the zfs_log_* functions will update the sequence * number to indicate the zil transaction has replayed. * * (6) At the end of each vnode op, the DMU tx must always commit, * regardless of whether there were any errors. * * (7) After dropping all locks, invoke zil_commit(zilog, foid) * to ensure that synchronous semantics are provided when necessary. * * In general, this is how things should be ordered in each vnode op: * * ZFS_ENTER(zsb); // exit if unmounted * top: * zfs_dirent_lock(&dl, ...) // lock directory entry (may igrab()) * rw_enter(...); // grab any other locks you need * tx = dmu_tx_create(...); // get DMU tx * dmu_tx_hold_*(); // hold each object you might modify * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); * if (error) { * rw_exit(...); // drop locks * zfs_dirent_unlock(dl); // unlock directory entry * iput(...); // release held vnodes * if (error == ERESTART) { * waited = B_TRUE; * dmu_tx_wait(tx); * dmu_tx_abort(tx); * goto top; * } * dmu_tx_abort(tx); // abort DMU tx * ZFS_EXIT(zsb); // finished in zfs * return (error); // really out of space * } * error = do_real_work(); // do whatever this VOP does * if (error == 0) * zfs_log_*(...); // on success, make ZIL entry * dmu_tx_commit(tx); // commit DMU tx -- error or not * rw_exit(...); // drop locks * zfs_dirent_unlock(dl); // unlock directory entry * iput(...); // release held vnodes * zil_commit(zilog, foid); // synchronous when necessary * ZFS_EXIT(zsb); // finished in zfs * return (error); // done, report error */ /* * Virus scanning is unsupported. It would be possible to add a hook * here to performance the required virus scan. This could be done * entirely in the kernel or potentially as an update to invoke a * scanning utility. */ static int zfs_vscan(struct inode *ip, cred_t *cr, int async) { return (0); } /* ARGSUSED */ int zfs_open(struct inode *ip, int mode, int flag, cred_t *cr) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); /* Honor ZFS_APPENDONLY file attribute */ if ((mode & FMODE_WRITE) && (zp->z_pflags & ZFS_APPENDONLY) && ((flag & O_APPEND) == 0)) { ZFS_EXIT(zsb); return (SET_ERROR(EPERM)); } /* Virus scan eligible files on open */ if (!zfs_has_ctldir(zp) && zsb->z_vscan && S_ISREG(ip->i_mode) && !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { if (zfs_vscan(ip, cr, 0) != 0) { ZFS_EXIT(zsb); return (SET_ERROR(EACCES)); } } /* Keep a count of the synchronous opens in the znode */ if (flag & O_SYNC) atomic_inc_32(&zp->z_sync_cnt); ZFS_EXIT(zsb); return (0); } EXPORT_SYMBOL(zfs_open); /* ARGSUSED */ int zfs_close(struct inode *ip, int flag, cred_t *cr) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); /* Decrement the synchronous opens in the znode */ if (flag & O_SYNC) atomic_dec_32(&zp->z_sync_cnt); if (!zfs_has_ctldir(zp) && zsb->z_vscan && S_ISREG(ip->i_mode) && !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) VERIFY(zfs_vscan(ip, cr, 1) == 0); ZFS_EXIT(zsb); return (0); } EXPORT_SYMBOL(zfs_close); #if defined(SEEK_HOLE) && defined(SEEK_DATA) /* * Lseek support for finding holes (cmd == SEEK_HOLE) and * data (cmd == SEEK_DATA). "off" is an in/out parameter. */ static int zfs_holey_common(struct inode *ip, int cmd, loff_t *off) { znode_t *zp = ITOZ(ip); uint64_t noff = (uint64_t)*off; /* new offset */ uint64_t file_sz; int error; boolean_t hole; file_sz = zp->z_size; if (noff >= file_sz) { return (SET_ERROR(ENXIO)); } if (cmd == SEEK_HOLE) hole = B_TRUE; else hole = B_FALSE; error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff); if (error == ESRCH) return (SET_ERROR(ENXIO)); /* * We could find a hole that begins after the logical end-of-file, * because dmu_offset_next() only works on whole blocks. If the * EOF falls mid-block, then indicate that the "virtual hole" * at the end of the file begins at the logical EOF, rather than * at the end of the last block. */ if (noff > file_sz) { ASSERT(hole); noff = file_sz; } if (noff < *off) return (error); *off = noff; return (error); } int zfs_holey(struct inode *ip, int cmd, loff_t *off) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); int error; ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); error = zfs_holey_common(ip, cmd, off); ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_holey); #endif /* SEEK_HOLE && SEEK_DATA */ #if defined(_KERNEL) /* * When a file is memory mapped, we must keep the IO data synchronized * between the DMU cache and the memory mapped pages. What this means: * * On Write: If we find a memory mapped page, we write to *both* * the page and the dmu buffer. */ static void update_pages(struct inode *ip, int64_t start, int len, objset_t *os, uint64_t oid) { struct address_space *mp = ip->i_mapping; struct page *pp; uint64_t nbytes; int64_t off; void *pb; off = start & (PAGE_SIZE-1); for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) { nbytes = MIN(PAGE_SIZE - off, len); pp = find_lock_page(mp, start >> PAGE_SHIFT); if (pp) { if (mapping_writably_mapped(mp)) flush_dcache_page(pp); pb = kmap(pp); (void) dmu_read(os, oid, start+off, nbytes, pb+off, DMU_READ_PREFETCH); kunmap(pp); if (mapping_writably_mapped(mp)) flush_dcache_page(pp); mark_page_accessed(pp); SetPageUptodate(pp); ClearPageError(pp); unlock_page(pp); put_page(pp); } len -= nbytes; off = 0; } } /* * When a file is memory mapped, we must keep the IO data synchronized * between the DMU cache and the memory mapped pages. What this means: * * On Read: We "read" preferentially from memory mapped pages, * else we default from the dmu buffer. * * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when * the file is memory mapped. */ static int mappedread(struct inode *ip, int nbytes, uio_t *uio) { struct address_space *mp = ip->i_mapping; struct page *pp; znode_t *zp = ITOZ(ip); int64_t start, off; uint64_t bytes; int len = nbytes; int error = 0; void *pb; start = uio->uio_loffset; off = start & (PAGE_SIZE-1); for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) { bytes = MIN(PAGE_SIZE - off, len); pp = find_lock_page(mp, start >> PAGE_SHIFT); if (pp) { ASSERT(PageUptodate(pp)); pb = kmap(pp); error = uiomove(pb + off, bytes, UIO_READ, uio); kunmap(pp); if (mapping_writably_mapped(mp)) flush_dcache_page(pp); mark_page_accessed(pp); unlock_page(pp); put_page(pp); } else { error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), uio, bytes); } len -= bytes; off = 0; if (error) break; } return (error); } #endif /* _KERNEL */ unsigned long zfs_read_chunk_size = 1024 * 1024; /* Tunable */ unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT; /* * Read bytes from specified file into supplied buffer. * * IN: ip - inode of file to be read from. * uio - structure supplying read location, range info, * and return buffer. * ioflag - FSYNC flags; used to provide FRSYNC semantics. * O_DIRECT flag; used to bypass page cache. * cr - credentials of caller. * * OUT: uio - updated offset and range, buffer filled. * * RETURN: 0 on success, error code on failure. * * Side Effects: * inode - atime updated if byte count > 0 */ /* ARGSUSED */ int zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); ssize_t n, nbytes; int error = 0; rl_t *rl; #ifdef HAVE_UIO_ZEROCOPY xuio_t *xuio = NULL; #endif /* HAVE_UIO_ZEROCOPY */ ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); if (zp->z_pflags & ZFS_AV_QUARANTINED) { ZFS_EXIT(zsb); return (SET_ERROR(EACCES)); } /* * Validate file offset */ if (uio->uio_loffset < (offset_t)0) { ZFS_EXIT(zsb); return (SET_ERROR(EINVAL)); } /* * Fasttrack empty reads */ if (uio->uio_resid == 0) { ZFS_EXIT(zsb); return (0); } /* * If we're in FRSYNC mode, sync out this znode before reading it. */ if (ioflag & FRSYNC || zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zsb->z_log, zp->z_id); /* * Lock the range against changes. */ rl = zfs_range_lock(&zp->z_range_lock, uio->uio_loffset, uio->uio_resid, RL_READER); /* * If we are reading past end-of-file we can skip * to the end; but we might still need to set atime. */ if (uio->uio_loffset >= zp->z_size) { error = 0; goto out; } ASSERT(uio->uio_loffset < zp->z_size); n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); #ifdef HAVE_UIO_ZEROCOPY if ((uio->uio_extflg == UIO_XUIO) && (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { int nblk; int blksz = zp->z_blksz; uint64_t offset = uio->uio_loffset; xuio = (xuio_t *)uio; if ((ISP2(blksz))) { nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, blksz)) / blksz; } else { ASSERT(offset + n <= blksz); nblk = 1; } (void) dmu_xuio_init(xuio, nblk); if (vn_has_cached_data(ip)) { /* * For simplicity, we always allocate a full buffer * even if we only expect to read a portion of a block. */ while (--nblk >= 0) { (void) dmu_xuio_add(xuio, dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), blksz), 0, blksz); } } } #endif /* HAVE_UIO_ZEROCOPY */ while (n > 0) { nbytes = MIN(n, zfs_read_chunk_size - P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); if (zp->z_is_mapped && !(ioflag & O_DIRECT)) { error = mappedread(ip, nbytes, uio); } else { error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), uio, nbytes); } if (error) { /* convert checksum errors into IO errors */ if (error == ECKSUM) error = SET_ERROR(EIO); break; } n -= nbytes; } out: zfs_range_unlock(rl); ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_read); /* * Write the bytes to a file. * * IN: ip - inode of file to be written to. * uio - structure supplying write location, range info, * and data buffer. * ioflag - FAPPEND flag set if in append mode. * O_DIRECT flag; used to bypass page cache. * cr - credentials of caller. * * OUT: uio - updated offset and range. * * RETURN: 0 if success * error code if failure * * Timestamps: * ip - ctime|mtime updated if byte count > 0 */ /* ARGSUSED */ int zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) { znode_t *zp = ITOZ(ip); rlim64_t limit = uio->uio_limit; ssize_t start_resid = uio->uio_resid; ssize_t tx_bytes; uint64_t end_size; dmu_tx_t *tx; zfs_sb_t *zsb = ZTOZSB(zp); zilog_t *zilog; offset_t woff; ssize_t n, nbytes; rl_t *rl; int max_blksz = zsb->z_max_blksz; int error = 0; arc_buf_t *abuf; const iovec_t *aiov = NULL; xuio_t *xuio = NULL; int write_eof; int count = 0; sa_bulk_attr_t bulk[4]; uint64_t mtime[2], ctime[2]; uint32_t uid; #ifdef HAVE_UIO_ZEROCOPY int i_iov = 0; const iovec_t *iovp = uio->uio_iov; ASSERTV(int iovcnt = uio->uio_iovcnt); #endif /* * Fasttrack empty write */ n = start_resid; if (n == 0) return (0); if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) limit = MAXOFFSET_T; ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, 8); /* * Callers might not be able to detect properly that we are read-only, * so check it explicitly here. */ if (zfs_is_readonly(zsb)) { ZFS_EXIT(zsb); return (SET_ERROR(EROFS)); } /* * If immutable or not appending then return EPERM */ if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && (uio->uio_loffset < zp->z_size))) { ZFS_EXIT(zsb); return (SET_ERROR(EPERM)); } zilog = zsb->z_log; /* * Validate file offset */ woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; if (woff < 0) { ZFS_EXIT(zsb); return (SET_ERROR(EINVAL)); } /* * Pre-fault the pages to ensure slow (eg NFS) pages * don't hold up txg. * Skip this if uio contains loaned arc_buf. */ #ifdef HAVE_UIO_ZEROCOPY if ((uio->uio_extflg == UIO_XUIO) && (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) xuio = (xuio_t *)uio; else #endif uio_prefaultpages(MIN(n, max_blksz), uio); /* * If in append mode, set the io offset pointer to eof. */ if (ioflag & FAPPEND) { /* * Obtain an appending range lock to guarantee file append * semantics. We reset the write offset once we have the lock. */ rl = zfs_range_lock(&zp->z_range_lock, 0, n, RL_APPEND); woff = rl->r_off; if (rl->r_len == UINT64_MAX) { /* * We overlocked the file because this write will cause * the file block size to increase. * Note that zp_size cannot change with this lock held. */ woff = zp->z_size; } uio->uio_loffset = woff; } else { /* * Note that if the file block size will change as a result of * this write, then this range lock will lock the entire file * so that we can re-write the block safely. */ rl = zfs_range_lock(&zp->z_range_lock, woff, n, RL_WRITER); } if (woff >= limit) { zfs_range_unlock(rl); ZFS_EXIT(zsb); return (SET_ERROR(EFBIG)); } if ((woff + n) > limit || woff > (limit - n)) n = limit - woff; /* Will this write extend the file length? */ write_eof = (woff + n > zp->z_size); end_size = MAX(zp->z_size, woff + n); /* * Write the file in reasonable size chunks. Each chunk is written * in a separate transaction; this keeps the intent log records small * and allows us to do more fine-grained space accounting. */ while (n > 0) { abuf = NULL; woff = uio->uio_loffset; if (zfs_owner_overquota(zsb, zp, B_FALSE) || zfs_owner_overquota(zsb, zp, B_TRUE)) { if (abuf != NULL) dmu_return_arcbuf(abuf); error = SET_ERROR(EDQUOT); break; } if (xuio && abuf == NULL) { #ifdef HAVE_UIO_ZEROCOPY ASSERT(i_iov < iovcnt); ASSERT3U(uio->uio_segflg, !=, UIO_BVEC); aiov = &iovp[i_iov]; abuf = dmu_xuio_arcbuf(xuio, i_iov); dmu_xuio_clear(xuio, i_iov); ASSERT((aiov->iov_base == abuf->b_data) || ((char *)aiov->iov_base - (char *)abuf->b_data + aiov->iov_len == arc_buf_size(abuf))); i_iov++; #endif } else if (abuf == NULL && n >= max_blksz && woff >= zp->z_size && P2PHASE(woff, max_blksz) == 0 && zp->z_blksz == max_blksz) { /* * This write covers a full block. "Borrow" a buffer * from the dmu so that we can fill it before we enter * a transaction. This avoids the possibility of * holding up the transaction if the data copy hangs * up on a pagefault (e.g., from an NFS server mapping). */ size_t cbytes; abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), max_blksz); ASSERT(abuf != NULL); ASSERT(arc_buf_size(abuf) == max_blksz); if ((error = uiocopy(abuf->b_data, max_blksz, UIO_WRITE, uio, &cbytes))) { dmu_return_arcbuf(abuf); break; } ASSERT(cbytes == max_blksz); } /* * Start a transaction. */ tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); if (abuf != NULL) dmu_return_arcbuf(abuf); break; } /* * If zfs_range_lock() over-locked we grow the blocksize * and then reduce the lock range. This will only happen * on the first iteration since zfs_range_reduce() will * shrink down r_len to the appropriate size. */ if (rl->r_len == UINT64_MAX) { uint64_t new_blksz; if (zp->z_blksz > max_blksz) { /* * File's blocksize is already larger than the * "recordsize" property. Only let it grow to * the next power of 2. */ ASSERT(!ISP2(zp->z_blksz)); new_blksz = MIN(end_size, 1 << highbit64(zp->z_blksz)); } else { new_blksz = MIN(end_size, max_blksz); } zfs_grow_blocksize(zp, new_blksz, tx); zfs_range_reduce(rl, woff, n); } /* * XXX - should we really limit each write to z_max_blksz? * Perhaps we should use SPA_MAXBLOCKSIZE chunks? */ nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); if (abuf == NULL) { tx_bytes = uio->uio_resid; error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), uio, nbytes, tx); tx_bytes -= uio->uio_resid; } else { tx_bytes = nbytes; ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); /* * If this is not a full block write, but we are * extending the file past EOF and this data starts * block-aligned, use assign_arcbuf(). Otherwise, * write via dmu_write(). */ if (tx_bytes < max_blksz && (!write_eof || aiov->iov_base != abuf->b_data)) { ASSERT(xuio); dmu_write(zsb->z_os, zp->z_id, woff, aiov->iov_len, aiov->iov_base, tx); dmu_return_arcbuf(abuf); xuio_stat_wbuf_copied(); } else { ASSERT(xuio || tx_bytes == max_blksz); dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), woff, abuf, tx); } ASSERT(tx_bytes <= uio->uio_resid); uioskip(uio, tx_bytes); } if (tx_bytes && zp->z_is_mapped && !(ioflag & O_DIRECT)) update_pages(ip, woff, tx_bytes, zsb->z_os, zp->z_id); /* * If we made no progress, we're done. If we made even * partial progress, update the znode and ZIL accordingly. */ if (tx_bytes == 0) { (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zsb), (void *)&zp->z_size, sizeof (uint64_t), tx); dmu_tx_commit(tx); ASSERT(error != 0); break; } /* * Clear Set-UID/Set-GID bits on successful write if not * privileged and at least one of the excute bits is set. * * It would be nice to to this after all writes have * been done, but that would still expose the ISUID/ISGID * to another app after the partial write is committed. * * Note: we don't call zfs_fuid_map_id() here because * user 0 is not an ephemeral uid. */ mutex_enter(&zp->z_acl_lock); uid = KUID_TO_SUID(ip->i_uid); if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | (S_IXUSR >> 6))) != 0 && (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && secpolicy_vnode_setid_retain(cr, ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) { uint64_t newmode; zp->z_mode &= ~(S_ISUID | S_ISGID); ip->i_mode = newmode = zp->z_mode; (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zsb), (void *)&newmode, sizeof (uint64_t), tx); } mutex_exit(&zp->z_acl_lock); zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); /* * Update the file size (zp_size) if it has changed; * account for possible concurrent updates. */ while ((end_size = zp->z_size) < uio->uio_loffset) { (void) atomic_cas_64(&zp->z_size, end_size, uio->uio_loffset); ASSERT(error == 0); } /* * If we are replaying and eof is non zero then force * the file size to the specified eof. Note, there's no * concurrency during replay. */ if (zsb->z_replay && zsb->z_replay_eof != 0) zp->z_size = zsb->z_replay_eof; error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag, NULL, NULL); dmu_tx_commit(tx); if (error != 0) break; ASSERT(tx_bytes == nbytes); n -= nbytes; if (!xuio && n > 0) uio_prefaultpages(MIN(n, max_blksz), uio); } zfs_inode_update(zp); zfs_range_unlock(rl); /* * If we're in replay mode, or we made no progress, return error. * Otherwise, it's at least a partial write, so it's successful. */ if (zsb->z_replay || uio->uio_resid == start_resid) { ZFS_EXIT(zsb); return (error); } if (ioflag & (FSYNC | FDSYNC) || zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, zp->z_id); ZFS_EXIT(zsb); return (0); } EXPORT_SYMBOL(zfs_write); /* * Drop a reference on the passed inode asynchronously. This ensures * that the caller will never drop the last reference on an inode in * the current context. Doing so while holding open a tx could result * in a deadlock if iput_final() re-enters the filesystem code. */ void zfs_iput_async(struct inode *ip) { objset_t *os = ITOZSB(ip)->z_os; ASSERT(atomic_read(&ip->i_count) > 0); ASSERT(os != NULL); if (atomic_read(&ip->i_count) == 1) VERIFY(taskq_dispatch(dsl_pool_iput_taskq(dmu_objset_pool(os)), (task_func_t *)iput, ip, TQ_SLEEP) != TASKQID_INVALID); else iput(ip); } void zfs_get_done(zgd_t *zgd, int error) { znode_t *zp = zgd->zgd_private; if (zgd->zgd_db) dmu_buf_rele(zgd->zgd_db, zgd); zfs_range_unlock(zgd->zgd_rl); /* * Release the vnode asynchronously as we currently have the * txg stopped from syncing. */ zfs_iput_async(ZTOI(zp)); if (error == 0 && zgd->zgd_bp) zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); kmem_free(zgd, sizeof (zgd_t)); } #ifdef DEBUG static int zil_fault_io = 0; #endif /* * Get data to generate a TX_WRITE intent log record. */ int zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) { zfs_sb_t *zsb = arg; objset_t *os = zsb->z_os; znode_t *zp; uint64_t object = lr->lr_foid; uint64_t offset = lr->lr_offset; uint64_t size = lr->lr_length; blkptr_t *bp = &lr->lr_blkptr; dmu_buf_t *db; zgd_t *zgd; int error = 0; ASSERT(zio != NULL); ASSERT(size != 0); /* * Nothing to do if the file has been removed */ if (zfs_zget(zsb, object, &zp) != 0) return (SET_ERROR(ENOENT)); if (zp->z_unlinked) { /* * Release the vnode asynchronously as we currently have the * txg stopped from syncing. */ zfs_iput_async(ZTOI(zp)); return (SET_ERROR(ENOENT)); } zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); zgd->zgd_zilog = zsb->z_log; zgd->zgd_private = zp; /* * Write records come in two flavors: immediate and indirect. * For small writes it's cheaper to store the data with the * log record (immediate); for large writes it's cheaper to * sync the data and get a pointer to it (indirect) so that * we don't have to write the data twice. */ if (buf != NULL) { /* immediate write */ zgd->zgd_rl = zfs_range_lock(&zp->z_range_lock, offset, size, RL_READER); /* test for truncation needs to be done while range locked */ if (offset >= zp->z_size) { error = SET_ERROR(ENOENT); } else { error = dmu_read(os, object, offset, size, buf, DMU_READ_NO_PREFETCH); } ASSERT(error == 0 || error == ENOENT); } else { /* indirect write */ /* * Have to lock the whole block to ensure when it's * written out and it's checksum is being calculated * that no one can change the data. We need to re-check * blocksize after we get the lock in case it's changed! */ for (;;) { uint64_t blkoff; size = zp->z_blksz; blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; offset -= blkoff; zgd->zgd_rl = zfs_range_lock(&zp->z_range_lock, offset, size, RL_READER); if (zp->z_blksz == size) break; offset += blkoff; zfs_range_unlock(zgd->zgd_rl); } /* test for truncation needs to be done while range locked */ if (lr->lr_offset >= zp->z_size) error = SET_ERROR(ENOENT); #ifdef DEBUG if (zil_fault_io) { error = SET_ERROR(EIO); zil_fault_io = 0; } #endif if (error == 0) error = dmu_buf_hold(os, object, offset, zgd, &db, DMU_READ_NO_PREFETCH); if (error == 0) { blkptr_t *obp = dmu_buf_get_blkptr(db); if (obp) { ASSERT(BP_IS_HOLE(bp)); *bp = *obp; } zgd->zgd_db = db; zgd->zgd_bp = bp; ASSERT(db->db_offset == offset); ASSERT(db->db_size == size); error = dmu_sync(zio, lr->lr_common.lrc_txg, zfs_get_done, zgd); ASSERT(error || lr->lr_length <= zp->z_blksz); /* * On success, we need to wait for the write I/O * initiated by dmu_sync() to complete before we can * release this dbuf. We will finish everything up * in the zfs_get_done() callback. */ if (error == 0) return (0); if (error == EALREADY) { lr->lr_common.lrc_txtype = TX_WRITE2; error = 0; } } } zfs_get_done(zgd, error); return (error); } /*ARGSUSED*/ int zfs_access(struct inode *ip, int mode, int flag, cred_t *cr) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); int error; ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); if (flag & V_ACE_MASK) error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); else error = zfs_zaccess_rwx(zp, mode, flag, cr); ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_access); /* * Lookup an entry in a directory, or an extended attribute directory. * If it exists, return a held inode reference for it. * * IN: dip - inode of directory to search. * nm - name of entry to lookup. * flags - LOOKUP_XATTR set if looking for an attribute. * cr - credentials of caller. * direntflags - directory lookup flags * realpnp - returned pathname. * * OUT: ipp - inode of located entry, NULL if not found. * * RETURN: 0 on success, error code on failure. * * Timestamps: * NA */ /* ARGSUSED */ int zfs_lookup(struct inode *dip, char *nm, struct inode **ipp, int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) { znode_t *zdp = ITOZ(dip); zfs_sb_t *zsb = ITOZSB(dip); int error = 0; /* fast path */ if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { if (!S_ISDIR(dip->i_mode)) { return (SET_ERROR(ENOTDIR)); } else if (zdp->z_sa_hdl == NULL) { return (SET_ERROR(EIO)); } if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { error = zfs_fastaccesschk_execute(zdp, cr); if (!error) { *ipp = dip; igrab(*ipp); return (0); } return (error); #ifdef HAVE_DNLC } else { vnode_t *tvp = dnlc_lookup(dvp, nm); if (tvp) { error = zfs_fastaccesschk_execute(zdp, cr); if (error) { iput(tvp); return (error); } if (tvp == DNLC_NO_VNODE) { iput(tvp); return (SET_ERROR(ENOENT)); } else { *vpp = tvp; return (specvp_check(vpp, cr)); } } #endif /* HAVE_DNLC */ } } ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zdp); *ipp = NULL; if (flags & LOOKUP_XATTR) { /* * We don't allow recursive attributes.. * Maybe someday we will. */ if (zdp->z_pflags & ZFS_XATTR) { ZFS_EXIT(zsb); return (SET_ERROR(EINVAL)); } if ((error = zfs_get_xattrdir(zdp, ipp, cr, flags))) { ZFS_EXIT(zsb); return (error); } /* * Do we have permission to get into attribute directory? */ if ((error = zfs_zaccess(ITOZ(*ipp), ACE_EXECUTE, 0, B_FALSE, cr))) { iput(*ipp); *ipp = NULL; } ZFS_EXIT(zsb); return (error); } if (!S_ISDIR(dip->i_mode)) { ZFS_EXIT(zsb); return (SET_ERROR(ENOTDIR)); } /* * Check accessibility of directory. */ if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) { ZFS_EXIT(zsb); return (error); } if (zsb->z_utf8 && u8_validate(nm, strlen(nm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zsb); return (SET_ERROR(EILSEQ)); } error = zfs_dirlook(zdp, nm, ipp, flags, direntflags, realpnp); if ((error == 0) && (*ipp)) zfs_inode_update(ITOZ(*ipp)); ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_lookup); /* * Attempt to create a new entry in a directory. If the entry * already exists, truncate the file if permissible, else return * an error. Return the ip of the created or trunc'd file. * * IN: dip - inode of directory to put new file entry in. * name - name of new file entry. * vap - attributes of new file. * excl - flag indicating exclusive or non-exclusive mode. * mode - mode to open file with. * cr - credentials of caller. * flag - large file flag [UNUSED]. * vsecp - ACL to be set * * OUT: ipp - inode of created or trunc'd entry. * * RETURN: 0 on success, error code on failure. * * Timestamps: * dip - ctime|mtime updated if new entry created * ip - ctime|mtime always, atime if new */ /* ARGSUSED */ int zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl, int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp) { znode_t *zp, *dzp = ITOZ(dip); zfs_sb_t *zsb = ITOZSB(dip); zilog_t *zilog; objset_t *os; zfs_dirlock_t *dl; dmu_tx_t *tx; int error; uid_t uid; gid_t gid; zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; boolean_t have_acl = B_FALSE; boolean_t waited = B_FALSE; /* * If we have an ephemeral id, ACL, or XVATTR then * make sure file system is at proper version */ gid = crgetgid(cr); uid = crgetuid(cr); if (zsb->z_use_fuids == B_FALSE && (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) return (SET_ERROR(EINVAL)); ZFS_ENTER(zsb); ZFS_VERIFY_ZP(dzp); os = zsb->z_os; zilog = zsb->z_log; if (zsb->z_utf8 && u8_validate(name, strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zsb); return (SET_ERROR(EILSEQ)); } if (vap->va_mask & ATTR_XVATTR) { if ((error = secpolicy_xvattr((xvattr_t *)vap, crgetuid(cr), cr, vap->va_mode)) != 0) { ZFS_EXIT(zsb); return (error); } } top: *ipp = NULL; if (*name == '\0') { /* * Null component name refers to the directory itself. */ igrab(dip); zp = dzp; dl = NULL; error = 0; } else { /* possible igrab(zp) */ int zflg = 0; if (flag & FIGNORECASE) zflg |= ZCILOOK; error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); if (error) { if (have_acl) zfs_acl_ids_free(&acl_ids); if (strcmp(name, "..") == 0) error = SET_ERROR(EISDIR); ZFS_EXIT(zsb); return (error); } } if (zp == NULL) { uint64_t txtype; /* * Create a new file object and update the directory * to reference it. */ if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { if (have_acl) zfs_acl_ids_free(&acl_ids); goto out; } /* * We only support the creation of regular files in * extended attribute directories. */ if ((dzp->z_pflags & ZFS_XATTR) && !S_ISREG(vap->va_mode)) { if (have_acl) zfs_acl_ids_free(&acl_ids); error = SET_ERROR(EINVAL); goto out; } if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, &acl_ids)) != 0) goto out; have_acl = B_TRUE; if (zfs_acl_ids_overquota(zsb, &acl_ids)) { zfs_acl_ids_free(&acl_ids); error = SET_ERROR(EDQUOT); goto out; } tx = dmu_tx_create(os); dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + ZFS_SA_BASE_ATTR_SIZE); fuid_dirtied = zsb->z_fuid_dirty; if (fuid_dirtied) zfs_fuid_txhold(zsb, tx); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); if (!zsb->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, acl_ids.z_aclp->z_acl_bytes); } error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); if (error == ERESTART) { waited = B_TRUE; dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } zfs_acl_ids_free(&acl_ids); dmu_tx_abort(tx); ZFS_EXIT(zsb); return (error); } zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); if (fuid_dirtied) zfs_fuid_sync(zsb, tx); (void) zfs_link_create(dl, zp, tx, ZNEW); txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); if (flag & FIGNORECASE) txtype |= TX_CI; zfs_log_create(zilog, tx, txtype, dzp, zp, name, vsecp, acl_ids.z_fuidp, vap); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); } else { int aflags = (flag & FAPPEND) ? V_APPEND : 0; if (have_acl) zfs_acl_ids_free(&acl_ids); have_acl = B_FALSE; /* * A directory entry already exists for this name. */ /* * Can't truncate an existing file if in exclusive mode. */ if (excl) { error = SET_ERROR(EEXIST); goto out; } /* * Can't open a directory for writing. */ if (S_ISDIR(ZTOI(zp)->i_mode)) { error = SET_ERROR(EISDIR); goto out; } /* * Verify requested access to file. */ if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { goto out; } mutex_enter(&dzp->z_lock); dzp->z_seq++; mutex_exit(&dzp->z_lock); /* * Truncate regular files if requested. */ if (S_ISREG(ZTOI(zp)->i_mode) && (vap->va_mask & ATTR_SIZE) && (vap->va_size == 0)) { /* we can't hold any locks when calling zfs_freesp() */ if (dl) { zfs_dirent_unlock(dl); dl = NULL; } error = zfs_freesp(zp, 0, 0, mode, TRUE); } } out: if (dl) zfs_dirent_unlock(dl); if (error) { if (zp) iput(ZTOI(zp)); } else { zfs_inode_update(dzp); zfs_inode_update(zp); *ipp = ZTOI(zp); } if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_create); +/* ARGSUSED */ +int +zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl, + int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp) +{ + znode_t *zp = NULL, *dzp = ITOZ(dip); + zfs_sb_t *zsb = ITOZSB(dip); + objset_t *os; + dmu_tx_t *tx; + int error; + uid_t uid; + gid_t gid; + zfs_acl_ids_t acl_ids; + boolean_t fuid_dirtied; + boolean_t have_acl = B_FALSE; + boolean_t waited = B_FALSE; + + /* + * If we have an ephemeral id, ACL, or XVATTR then + * make sure file system is at proper version + */ + + gid = crgetgid(cr); + uid = crgetuid(cr); + + if (zsb->z_use_fuids == B_FALSE && + (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) + return (SET_ERROR(EINVAL)); + + ZFS_ENTER(zsb); + ZFS_VERIFY_ZP(dzp); + os = zsb->z_os; + + if (vap->va_mask & ATTR_XVATTR) { + if ((error = secpolicy_xvattr((xvattr_t *)vap, + crgetuid(cr), cr, vap->va_mode)) != 0) { + ZFS_EXIT(zsb); + return (error); + } + } + +top: + *ipp = NULL; + + /* + * Create a new file object and update the directory + * to reference it. + */ + if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { + if (have_acl) + zfs_acl_ids_free(&acl_ids); + goto out; + } + + if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, + cr, vsecp, &acl_ids)) != 0) + goto out; + have_acl = B_TRUE; + + if (zfs_acl_ids_overquota(zsb, &acl_ids)) { + zfs_acl_ids_free(&acl_ids); + error = SET_ERROR(EDQUOT); + goto out; + } + + tx = dmu_tx_create(os); + + dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + + ZFS_SA_BASE_ATTR_SIZE); + dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); + + fuid_dirtied = zsb->z_fuid_dirty; + if (fuid_dirtied) + zfs_fuid_txhold(zsb, tx); + if (!zsb->z_use_sa && + acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, + 0, acl_ids.z_aclp->z_acl_bytes); + } + error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); + if (error) { + if (error == ERESTART) { + waited = B_TRUE; + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + zfs_acl_ids_free(&acl_ids); + dmu_tx_abort(tx); + ZFS_EXIT(zsb); + return (error); + } + zfs_mknode(dzp, vap, tx, cr, IS_TMPFILE, &zp, &acl_ids); + + if (fuid_dirtied) + zfs_fuid_sync(zsb, tx); + + /* Add to unlinked set */ + zp->z_unlinked = 1; + zfs_unlinked_add(zp, tx); + zfs_acl_ids_free(&acl_ids); + dmu_tx_commit(tx); +out: + + if (error) { + if (zp) + iput(ZTOI(zp)); + } else { + zfs_inode_update(dzp); + zfs_inode_update(zp); + *ipp = ZTOI(zp); + } + + ZFS_EXIT(zsb); + return (error); +} + /* * Remove an entry from a directory. * * IN: dip - inode of directory to remove entry from. * name - name of entry to remove. * cr - credentials of caller. * * RETURN: 0 if success * error code if failure * * Timestamps: * dip - ctime|mtime * ip - ctime (if nlink > 0) */ uint64_t null_xattr = 0; /*ARGSUSED*/ int zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags) { znode_t *zp, *dzp = ITOZ(dip); znode_t *xzp; struct inode *ip; zfs_sb_t *zsb = ITOZSB(dip); zilog_t *zilog; uint64_t acl_obj, xattr_obj; uint64_t xattr_obj_unlinked = 0; uint64_t obj = 0; uint64_t links; zfs_dirlock_t *dl; dmu_tx_t *tx; boolean_t may_delete_now, delete_now = FALSE; boolean_t unlinked, toobig = FALSE; uint64_t txtype; pathname_t *realnmp = NULL; pathname_t realnm; int error; int zflg = ZEXISTS; boolean_t waited = B_FALSE; ZFS_ENTER(zsb); ZFS_VERIFY_ZP(dzp); zilog = zsb->z_log; if (flags & FIGNORECASE) { zflg |= ZCILOOK; pn_alloc(&realnm); realnmp = &realnm; } top: xattr_obj = 0; xzp = NULL; /* * Attempt to lock directory; fail if entry doesn't exist. */ if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, realnmp))) { if (realnmp) pn_free(realnmp); ZFS_EXIT(zsb); return (error); } ip = ZTOI(zp); if ((error = zfs_zaccess_delete(dzp, zp, cr))) { goto out; } /* * Need to use rmdir for removing directories. */ if (S_ISDIR(ip->i_mode)) { error = SET_ERROR(EPERM); goto out; } #ifdef HAVE_DNLC if (realnmp) dnlc_remove(dvp, realnmp->pn_buf); else dnlc_remove(dvp, name); #endif /* HAVE_DNLC */ mutex_enter(&zp->z_lock); may_delete_now = atomic_read(&ip->i_count) == 1 && !(zp->z_is_mapped); mutex_exit(&zp->z_lock); /* * We may delete the znode now, or we may put it in the unlinked set; * it depends on whether we're the last link, and on whether there are * other holds on the inode. So we dmu_tx_hold() the right things to * allow for either case. */ obj = zp->z_id; tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); zfs_sa_upgrade_txholds(tx, dzp); if (may_delete_now) { toobig = zp->z_size > zp->z_blksz * zfs_delete_blocks; /* if the file is too big, only hold_free a token amount */ dmu_tx_hold_free(tx, zp->z_id, 0, (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); } /* are there any extended attributes? */ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xattr_obj, sizeof (xattr_obj)); if (error == 0 && xattr_obj) { error = zfs_zget(zsb, xattr_obj, &xzp); ASSERT0(error); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); } mutex_enter(&zp->z_lock); if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); mutex_exit(&zp->z_lock); /* charge as an update -- would be nice not to charge at all */ dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); /* * Mark this transaction as typically resulting in a net free of space */ dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); iput(ip); if (xzp) iput(ZTOI(xzp)); if (error == ERESTART) { waited = B_TRUE; dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } if (realnmp) pn_free(realnmp); dmu_tx_abort(tx); ZFS_EXIT(zsb); return (error); } /* * Remove the directory entry. */ error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); if (error) { dmu_tx_commit(tx); goto out; } if (unlinked) { /* * Hold z_lock so that we can make sure that the ACL obj * hasn't changed. Could have been deleted due to * zfs_sa_upgrade(). */ mutex_enter(&zp->z_lock); (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); delete_now = may_delete_now && !toobig && atomic_read(&ip->i_count) == 1 && !(zp->z_is_mapped) && xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == acl_obj; } if (delete_now) { if (xattr_obj_unlinked) { ASSERT3U(ZTOI(xzp)->i_nlink, ==, 2); mutex_enter(&xzp->z_lock); xzp->z_unlinked = 1; clear_nlink(ZTOI(xzp)); links = 0; error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb), &links, sizeof (links), tx); ASSERT3U(error, ==, 0); mutex_exit(&xzp->z_lock); zfs_unlinked_add(xzp, tx); if (zp->z_is_sa) error = sa_remove(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), tx); else error = sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &null_xattr, sizeof (uint64_t), tx); ASSERT0(error); } /* * Add to the unlinked set because a new reference could be * taken concurrently resulting in a deferred destruction. */ zfs_unlinked_add(zp, tx); mutex_exit(&zp->z_lock); zfs_inode_update(zp); iput(ip); } else if (unlinked) { mutex_exit(&zp->z_lock); zfs_unlinked_add(zp, tx); } txtype = TX_REMOVE; if (flags & FIGNORECASE) txtype |= TX_CI; zfs_log_remove(zilog, tx, txtype, dzp, name, obj); dmu_tx_commit(tx); out: if (realnmp) pn_free(realnmp); zfs_dirent_unlock(dl); zfs_inode_update(dzp); if (!delete_now) { zfs_inode_update(zp); zfs_iput_async(ip); } if (xzp) { zfs_inode_update(xzp); zfs_iput_async(ZTOI(xzp)); } if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_remove); /* * Create a new directory and insert it into dip using the name * provided. Return a pointer to the inserted directory. * * IN: dip - inode of directory to add subdir to. * dirname - name of new directory. * vap - attributes of new directory. * cr - credentials of caller. * vsecp - ACL to be set * * OUT: ipp - inode of created directory. * * RETURN: 0 if success * error code if failure * * Timestamps: * dip - ctime|mtime updated * ipp - ctime|mtime|atime updated */ /*ARGSUSED*/ int zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp, cred_t *cr, int flags, vsecattr_t *vsecp) { znode_t *zp, *dzp = ITOZ(dip); zfs_sb_t *zsb = ITOZSB(dip); zilog_t *zilog; zfs_dirlock_t *dl; uint64_t txtype; dmu_tx_t *tx; int error; int zf = ZNEW; uid_t uid; gid_t gid = crgetgid(cr); zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; boolean_t waited = B_FALSE; ASSERT(S_ISDIR(vap->va_mode)); /* * If we have an ephemeral id, ACL, or XVATTR then * make sure file system is at proper version */ uid = crgetuid(cr); if (zsb->z_use_fuids == B_FALSE && (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) return (SET_ERROR(EINVAL)); ZFS_ENTER(zsb); ZFS_VERIFY_ZP(dzp); zilog = zsb->z_log; if (dzp->z_pflags & ZFS_XATTR) { ZFS_EXIT(zsb); return (SET_ERROR(EINVAL)); } if (zsb->z_utf8 && u8_validate(dirname, strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zsb); return (SET_ERROR(EILSEQ)); } if (flags & FIGNORECASE) zf |= ZCILOOK; if (vap->va_mask & ATTR_XVATTR) { if ((error = secpolicy_xvattr((xvattr_t *)vap, crgetuid(cr), cr, vap->va_mode)) != 0) { ZFS_EXIT(zsb); return (error); } } if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, &acl_ids)) != 0) { ZFS_EXIT(zsb); return (error); } /* * First make sure the new directory doesn't exist. * * Existence is checked first to make sure we don't return * EACCES instead of EEXIST which can cause some applications * to fail. */ top: *ipp = NULL; if ((error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, NULL, NULL))) { zfs_acl_ids_free(&acl_ids); ZFS_EXIT(zsb); return (error); } if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) { zfs_acl_ids_free(&acl_ids); zfs_dirent_unlock(dl); ZFS_EXIT(zsb); return (error); } if (zfs_acl_ids_overquota(zsb, &acl_ids)) { zfs_acl_ids_free(&acl_ids); zfs_dirent_unlock(dl); ZFS_EXIT(zsb); return (SET_ERROR(EDQUOT)); } /* * Add a new entry to the directory. */ tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); fuid_dirtied = zsb->z_fuid_dirty; if (fuid_dirtied) zfs_fuid_txhold(zsb, tx); if (!zsb->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, acl_ids.z_aclp->z_acl_bytes); } dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + ZFS_SA_BASE_ATTR_SIZE); error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); if (error == ERESTART) { waited = B_TRUE; dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } zfs_acl_ids_free(&acl_ids); dmu_tx_abort(tx); ZFS_EXIT(zsb); return (error); } /* * Create new node. */ zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); if (fuid_dirtied) zfs_fuid_sync(zsb, tx); /* * Now put new name in parent dir. */ (void) zfs_link_create(dl, zp, tx, ZNEW); *ipp = ZTOI(zp); txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); if (flags & FIGNORECASE) txtype |= TX_CI; zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, acl_ids.z_fuidp, vap); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); zfs_dirent_unlock(dl); if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); zfs_inode_update(dzp); zfs_inode_update(zp); ZFS_EXIT(zsb); return (0); } EXPORT_SYMBOL(zfs_mkdir); /* * Remove a directory subdir entry. If the current working * directory is the same as the subdir to be removed, the * remove will fail. * * IN: dip - inode of directory to remove from. * name - name of directory to be removed. * cwd - inode of current working directory. * cr - credentials of caller. * flags - case flags * * RETURN: 0 on success, error code on failure. * * Timestamps: * dip - ctime|mtime updated */ /*ARGSUSED*/ int zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr, int flags) { znode_t *dzp = ITOZ(dip); znode_t *zp; struct inode *ip; zfs_sb_t *zsb = ITOZSB(dip); zilog_t *zilog; zfs_dirlock_t *dl; dmu_tx_t *tx; int error; int zflg = ZEXISTS; boolean_t waited = B_FALSE; ZFS_ENTER(zsb); ZFS_VERIFY_ZP(dzp); zilog = zsb->z_log; if (flags & FIGNORECASE) zflg |= ZCILOOK; top: zp = NULL; /* * Attempt to lock directory; fail if entry doesn't exist. */ if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL))) { ZFS_EXIT(zsb); return (error); } ip = ZTOI(zp); if ((error = zfs_zaccess_delete(dzp, zp, cr))) { goto out; } if (!S_ISDIR(ip->i_mode)) { error = SET_ERROR(ENOTDIR); goto out; } if (ip == cwd) { error = SET_ERROR(EINVAL); goto out; } /* * Grab a lock on the directory to make sure that noone is * trying to add (or lookup) entries while we are removing it. */ rw_enter(&zp->z_name_lock, RW_WRITER); /* * Grab a lock on the parent pointer to make sure we play well * with the treewalk and directory rename code. */ rw_enter(&zp->z_parent_lock, RW_WRITER); tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); zfs_sa_upgrade_txholds(tx, zp); zfs_sa_upgrade_txholds(tx, dzp); dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); if (error) { rw_exit(&zp->z_parent_lock); rw_exit(&zp->z_name_lock); zfs_dirent_unlock(dl); iput(ip); if (error == ERESTART) { waited = B_TRUE; dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); ZFS_EXIT(zsb); return (error); } error = zfs_link_destroy(dl, zp, tx, zflg, NULL); if (error == 0) { uint64_t txtype = TX_RMDIR; if (flags & FIGNORECASE) txtype |= TX_CI; zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); } dmu_tx_commit(tx); rw_exit(&zp->z_parent_lock); rw_exit(&zp->z_name_lock); out: zfs_dirent_unlock(dl); zfs_inode_update(dzp); zfs_inode_update(zp); iput(ip); if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_rmdir); /* * Read as many directory entries as will fit into the provided * dirent buffer from the given directory cursor position. * * IN: ip - inode of directory to read. * dirent - buffer for directory entries. * * OUT: dirent - filler buffer of directory entries. * * RETURN: 0 if success * error code if failure * * Timestamps: * ip - atime updated * * Note that the low 4 bits of the cookie returned by zap is always zero. * This allows us to use the low range for "special" directory entries: * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, * we use the offset 2 for the '.zfs' directory. */ /* ARGSUSED */ int zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); objset_t *os; zap_cursor_t zc; zap_attribute_t zap; int error; uint8_t prefetch; uint8_t type; int done = 0; uint64_t parent; uint64_t offset; /* must be unsigned; checks for < 1 */ ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zsb), &parent, sizeof (parent))) != 0) goto out; /* * Quit if directory has been removed (posix) */ if (zp->z_unlinked) goto out; error = 0; os = zsb->z_os; offset = ctx->pos; prefetch = zp->z_zn_prefetch; /* * Initialize the iterator cursor. */ if (offset <= 3) { /* * Start iteration from the beginning of the directory. */ zap_cursor_init(&zc, os, zp->z_id); } else { /* * The offset is a serialized cursor. */ zap_cursor_init_serialized(&zc, os, zp->z_id, offset); } /* * Transform to file-system independent format */ while (!done) { uint64_t objnum; /* * Special case `.', `..', and `.zfs'. */ if (offset == 0) { (void) strcpy(zap.za_name, "."); zap.za_normalization_conflict = 0; objnum = zp->z_id; type = DT_DIR; } else if (offset == 1) { (void) strcpy(zap.za_name, ".."); zap.za_normalization_conflict = 0; objnum = parent; type = DT_DIR; } else if (offset == 2 && zfs_show_ctldir(zp)) { (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); zap.za_normalization_conflict = 0; objnum = ZFSCTL_INO_ROOT; type = DT_DIR; } else { /* * Grab next entry. */ if ((error = zap_cursor_retrieve(&zc, &zap))) { if (error == ENOENT) break; else goto update; } /* * Allow multiple entries provided the first entry is * the object id. Non-zpl consumers may safely make * use of the additional space. * * XXX: This should be a feature flag for compatibility */ if (zap.za_integer_length != 8 || zap.za_num_integers == 0) { cmn_err(CE_WARN, "zap_readdir: bad directory " "entry, obj = %lld, offset = %lld, " "length = %d, num = %lld\n", (u_longlong_t)zp->z_id, (u_longlong_t)offset, zap.za_integer_length, (u_longlong_t)zap.za_num_integers); error = SET_ERROR(ENXIO); goto update; } objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); type = ZFS_DIRENT_TYPE(zap.za_first_integer); } done = !dir_emit(ctx, zap.za_name, strlen(zap.za_name), objnum, type); if (done) break; /* Prefetch znode */ if (prefetch) { dmu_prefetch(os, objnum, 0, 0, 0, ZIO_PRIORITY_SYNC_READ); } /* * Move to the next entry, fill in the previous offset. */ if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { zap_cursor_advance(&zc); offset = zap_cursor_serialize(&zc); } else { offset += 1; } ctx->pos = offset; } zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ update: zap_cursor_fini(&zc); if (error == ENOENT) error = 0; out: ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_readdir); ulong_t zfs_fsync_sync_cnt = 4; int zfs_fsync(struct inode *ip, int syncflag, cred_t *cr) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); if (zsb->z_os->os_sync != ZFS_SYNC_DISABLED) { ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); zil_commit(zsb->z_log, zp->z_id); ZFS_EXIT(zsb); } tsd_set(zfs_fsyncer_key, NULL); return (0); } EXPORT_SYMBOL(zfs_fsync); /* * Get the requested file attributes and place them in the provided * vattr structure. * * IN: ip - inode of file. * vap - va_mask identifies requested attributes. * If ATTR_XVATTR set, then optional attrs are requested * flags - ATTR_NOACLCHECK (CIFS server context) * cr - credentials of caller. * * OUT: vap - attribute values. * * RETURN: 0 (always succeeds) */ /* ARGSUSED */ int zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); int error = 0; uint64_t links; uint64_t atime[2], mtime[2], ctime[2]; xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ xoptattr_t *xoap = NULL; boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; sa_bulk_attr_t bulk[3]; int count = 0; ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL, &atime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { ZFS_EXIT(zsb); return (error); } /* * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. * Also, if we are the owner don't bother, since owner should * always be allowed to read basic attributes of file. */ if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && (vap->va_uid != crgetuid(cr))) { if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, skipaclchk, cr))) { ZFS_EXIT(zsb); return (error); } } /* * Return all attributes. It's cheaper to provide the answer * than to determine whether we were asked the question. */ mutex_enter(&zp->z_lock); vap->va_type = vn_mode_to_vtype(zp->z_mode); vap->va_mode = zp->z_mode; vap->va_fsid = ZTOI(zp)->i_sb->s_dev; vap->va_nodeid = zp->z_id; if ((zp->z_id == zsb->z_root) && zfs_show_ctldir(zp)) links = ZTOI(zp)->i_nlink + 1; else links = ZTOI(zp)->i_nlink; vap->va_nlink = MIN(links, ZFS_LINK_MAX); vap->va_size = i_size_read(ip); vap->va_rdev = ip->i_rdev; vap->va_seq = ip->i_generation; /* * Add in any requested optional attributes and the create time. * Also set the corresponding bits in the returned attribute bitmap. */ if ((xoap = xva_getxoptattr(xvap)) != NULL && zsb->z_use_fuids) { if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { xoap->xoa_archive = ((zp->z_pflags & ZFS_ARCHIVE) != 0); XVA_SET_RTN(xvap, XAT_ARCHIVE); } if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { xoap->xoa_readonly = ((zp->z_pflags & ZFS_READONLY) != 0); XVA_SET_RTN(xvap, XAT_READONLY); } if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { xoap->xoa_system = ((zp->z_pflags & ZFS_SYSTEM) != 0); XVA_SET_RTN(xvap, XAT_SYSTEM); } if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { xoap->xoa_hidden = ((zp->z_pflags & ZFS_HIDDEN) != 0); XVA_SET_RTN(xvap, XAT_HIDDEN); } if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { xoap->xoa_nounlink = ((zp->z_pflags & ZFS_NOUNLINK) != 0); XVA_SET_RTN(xvap, XAT_NOUNLINK); } if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { xoap->xoa_immutable = ((zp->z_pflags & ZFS_IMMUTABLE) != 0); XVA_SET_RTN(xvap, XAT_IMMUTABLE); } if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { xoap->xoa_appendonly = ((zp->z_pflags & ZFS_APPENDONLY) != 0); XVA_SET_RTN(xvap, XAT_APPENDONLY); } if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { xoap->xoa_nodump = ((zp->z_pflags & ZFS_NODUMP) != 0); XVA_SET_RTN(xvap, XAT_NODUMP); } if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { xoap->xoa_opaque = ((zp->z_pflags & ZFS_OPAQUE) != 0); XVA_SET_RTN(xvap, XAT_OPAQUE); } if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { xoap->xoa_av_quarantined = ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); } if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { xoap->xoa_av_modified = ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); XVA_SET_RTN(xvap, XAT_AV_MODIFIED); } if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && S_ISREG(ip->i_mode)) { zfs_sa_get_scanstamp(zp, xvap); } if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { uint64_t times[2]; (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zsb), times, sizeof (times)); ZFS_TIME_DECODE(&xoap->xoa_createtime, times); XVA_SET_RTN(xvap, XAT_CREATETIME); } if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); XVA_SET_RTN(xvap, XAT_REPARSE); } if (XVA_ISSET_REQ(xvap, XAT_GEN)) { xoap->xoa_generation = ip->i_generation; XVA_SET_RTN(xvap, XAT_GEN); } if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { xoap->xoa_offline = ((zp->z_pflags & ZFS_OFFLINE) != 0); XVA_SET_RTN(xvap, XAT_OFFLINE); } if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { xoap->xoa_sparse = ((zp->z_pflags & ZFS_SPARSE) != 0); XVA_SET_RTN(xvap, XAT_SPARSE); } } ZFS_TIME_DECODE(&vap->va_atime, atime); ZFS_TIME_DECODE(&vap->va_mtime, mtime); ZFS_TIME_DECODE(&vap->va_ctime, ctime); mutex_exit(&zp->z_lock); sa_object_size(zp->z_sa_hdl, &vap->va_blksize, &vap->va_nblocks); if (zp->z_blksz == 0) { /* * Block size hasn't been set; suggest maximal I/O transfers. */ vap->va_blksize = zsb->z_max_blksz; } ZFS_EXIT(zsb); return (0); } EXPORT_SYMBOL(zfs_getattr); /* * Get the basic file attributes and place them in the provided kstat * structure. The inode is assumed to be the authoritative source * for most of the attributes. However, the znode currently has the * authoritative atime, blksize, and block count. * * IN: ip - inode of file. * * OUT: sp - kstat values. * * RETURN: 0 (always succeeds) */ /* ARGSUSED */ int zfs_getattr_fast(struct inode *ip, struct kstat *sp) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); uint32_t blksize; u_longlong_t nblocks; ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); mutex_enter(&zp->z_lock); generic_fillattr(ip, sp); sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); sp->blksize = blksize; sp->blocks = nblocks; if (unlikely(zp->z_blksz == 0)) { /* * Block size hasn't been set; suggest maximal I/O transfers. */ sp->blksize = zsb->z_max_blksz; } mutex_exit(&zp->z_lock); /* * Required to prevent NFS client from detecting different inode * numbers of snapshot root dentry before and after snapshot mount. */ if (zsb->z_issnap) { if (ip->i_sb->s_root->d_inode == ip) sp->ino = ZFSCTL_INO_SNAPDIRS - dmu_objset_id(zsb->z_os); } ZFS_EXIT(zsb); return (0); } EXPORT_SYMBOL(zfs_getattr_fast); /* * Set the file attributes to the values contained in the * vattr structure. * * IN: ip - inode of file to be modified. * vap - new attribute values. * If ATTR_XVATTR set, then optional attrs are being set * flags - ATTR_UTIME set if non-default time values provided. * - ATTR_NOACLCHECK (CIFS context only). * cr - credentials of caller. * * RETURN: 0 if success * error code if failure * * Timestamps: * ip - ctime updated, mtime updated if size changed. */ /* ARGSUSED */ int zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); zilog_t *zilog; dmu_tx_t *tx; vattr_t oldva; xvattr_t *tmpxvattr; uint_t mask = vap->va_mask; uint_t saved_mask = 0; int trim_mask = 0; uint64_t new_mode; uint64_t new_kuid = 0, new_kgid = 0, new_uid, new_gid; uint64_t xattr_obj; uint64_t mtime[2], ctime[2], atime[2]; znode_t *attrzp; int need_policy = FALSE; int err, err2; zfs_fuid_info_t *fuidp = NULL; xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ xoptattr_t *xoap; zfs_acl_t *aclp; boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; boolean_t fuid_dirtied = B_FALSE; sa_bulk_attr_t *bulk, *xattr_bulk; int count = 0, xattr_count = 0; if (mask == 0) return (0); ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); zilog = zsb->z_log; /* * Make sure that if we have ephemeral uid/gid or xvattr specified * that file system is at proper version level */ if (zsb->z_use_fuids == B_FALSE && (((mask & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) || ((mask & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) || (mask & ATTR_XVATTR))) { ZFS_EXIT(zsb); return (SET_ERROR(EINVAL)); } if (mask & ATTR_SIZE && S_ISDIR(ip->i_mode)) { ZFS_EXIT(zsb); return (SET_ERROR(EISDIR)); } if (mask & ATTR_SIZE && !S_ISREG(ip->i_mode) && !S_ISFIFO(ip->i_mode)) { ZFS_EXIT(zsb); return (SET_ERROR(EINVAL)); } /* * If this is an xvattr_t, then get a pointer to the structure of * optional attributes. If this is NULL, then we have a vattr_t. */ xoap = xva_getxoptattr(xvap); tmpxvattr = kmem_alloc(sizeof (xvattr_t), KM_SLEEP); xva_init(tmpxvattr); bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * 7, KM_SLEEP); xattr_bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * 7, KM_SLEEP); /* * Immutable files can only alter immutable bit and atime */ if ((zp->z_pflags & ZFS_IMMUTABLE) && ((mask & (ATTR_SIZE|ATTR_UID|ATTR_GID|ATTR_MTIME|ATTR_MODE)) || ((mask & ATTR_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { err = EPERM; goto out3; } if ((mask & ATTR_SIZE) && (zp->z_pflags & ZFS_READONLY)) { err = EPERM; goto out3; } /* * Verify timestamps doesn't overflow 32 bits. * ZFS can handle large timestamps, but 32bit syscalls can't * handle times greater than 2039. This check should be removed * once large timestamps are fully supported. */ if (mask & (ATTR_ATIME | ATTR_MTIME)) { if (((mask & ATTR_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || ((mask & ATTR_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { err = EOVERFLOW; goto out3; } } top: attrzp = NULL; aclp = NULL; /* Can this be moved to before the top label? */ if (zfs_is_readonly(zsb)) { err = EROFS; goto out3; } /* * First validate permissions */ if (mask & ATTR_SIZE) { err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); if (err) goto out3; /* * XXX - Note, we are not providing any open * mode flags here (like FNDELAY), so we may * block if there are locks present... this * should be addressed in openat(). */ /* XXX - would it be OK to generate a log record here? */ err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); if (err) goto out3; } if (mask & (ATTR_ATIME|ATTR_MTIME) || ((mask & ATTR_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || XVA_ISSET_REQ(xvap, XAT_READONLY) || XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || XVA_ISSET_REQ(xvap, XAT_OFFLINE) || XVA_ISSET_REQ(xvap, XAT_SPARSE) || XVA_ISSET_REQ(xvap, XAT_CREATETIME) || XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, skipaclchk, cr); } if (mask & (ATTR_UID|ATTR_GID)) { int idmask = (mask & (ATTR_UID|ATTR_GID)); int take_owner; int take_group; /* * NOTE: even if a new mode is being set, * we may clear S_ISUID/S_ISGID bits. */ if (!(mask & ATTR_MODE)) vap->va_mode = zp->z_mode; /* * Take ownership or chgrp to group we are a member of */ take_owner = (mask & ATTR_UID) && (vap->va_uid == crgetuid(cr)); take_group = (mask & ATTR_GID) && zfs_groupmember(zsb, vap->va_gid, cr); /* * If both ATTR_UID and ATTR_GID are set then take_owner and * take_group must both be set in order to allow taking * ownership. * * Otherwise, send the check through secpolicy_vnode_setattr() * */ if (((idmask == (ATTR_UID|ATTR_GID)) && take_owner && take_group) || ((idmask == ATTR_UID) && take_owner) || ((idmask == ATTR_GID) && take_group)) { if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, skipaclchk, cr) == 0) { /* * Remove setuid/setgid for non-privileged users */ (void) secpolicy_setid_clear(vap, cr); trim_mask = (mask & (ATTR_UID|ATTR_GID)); } else { need_policy = TRUE; } } else { need_policy = TRUE; } } mutex_enter(&zp->z_lock); oldva.va_mode = zp->z_mode; zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); if (mask & ATTR_XVATTR) { /* * Update xvattr mask to include only those attributes * that are actually changing. * * the bits will be restored prior to actually setting * the attributes so the caller thinks they were set. */ if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { if (xoap->xoa_appendonly != ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { need_policy = TRUE; } else { XVA_CLR_REQ(xvap, XAT_APPENDONLY); XVA_SET_REQ(tmpxvattr, XAT_APPENDONLY); } } if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { if (xoap->xoa_nounlink != ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { need_policy = TRUE; } else { XVA_CLR_REQ(xvap, XAT_NOUNLINK); XVA_SET_REQ(tmpxvattr, XAT_NOUNLINK); } } if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { if (xoap->xoa_immutable != ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { need_policy = TRUE; } else { XVA_CLR_REQ(xvap, XAT_IMMUTABLE); XVA_SET_REQ(tmpxvattr, XAT_IMMUTABLE); } } if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { if (xoap->xoa_nodump != ((zp->z_pflags & ZFS_NODUMP) != 0)) { need_policy = TRUE; } else { XVA_CLR_REQ(xvap, XAT_NODUMP); XVA_SET_REQ(tmpxvattr, XAT_NODUMP); } } if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { if (xoap->xoa_av_modified != ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { need_policy = TRUE; } else { XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); XVA_SET_REQ(tmpxvattr, XAT_AV_MODIFIED); } } if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { if ((!S_ISREG(ip->i_mode) && xoap->xoa_av_quarantined) || xoap->xoa_av_quarantined != ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { need_policy = TRUE; } else { XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); XVA_SET_REQ(tmpxvattr, XAT_AV_QUARANTINED); } } if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { mutex_exit(&zp->z_lock); err = EPERM; goto out3; } if (need_policy == FALSE && (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { need_policy = TRUE; } } mutex_exit(&zp->z_lock); if (mask & ATTR_MODE) { if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { err = secpolicy_setid_setsticky_clear(ip, vap, &oldva, cr); if (err) goto out3; trim_mask |= ATTR_MODE; } else { need_policy = TRUE; } } if (need_policy) { /* * If trim_mask is set then take ownership * has been granted or write_acl is present and user * has the ability to modify mode. In that case remove * UID|GID and or MODE from mask so that * secpolicy_vnode_setattr() doesn't revoke it. */ if (trim_mask) { saved_mask = vap->va_mask; vap->va_mask &= ~trim_mask; } err = secpolicy_vnode_setattr(cr, ip, vap, &oldva, flags, (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); if (err) goto out3; if (trim_mask) vap->va_mask |= saved_mask; } /* * secpolicy_vnode_setattr, or take ownership may have * changed va_mask */ mask = vap->va_mask; if ((mask & (ATTR_UID | ATTR_GID))) { err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xattr_obj, sizeof (xattr_obj)); if (err == 0 && xattr_obj) { err = zfs_zget(ZTOZSB(zp), xattr_obj, &attrzp); if (err) goto out2; } if (mask & ATTR_UID) { new_kuid = zfs_fuid_create(zsb, (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); if (new_kuid != KUID_TO_SUID(ZTOI(zp)->i_uid) && zfs_fuid_overquota(zsb, B_FALSE, new_kuid)) { if (attrzp) iput(ZTOI(attrzp)); err = EDQUOT; goto out2; } } if (mask & ATTR_GID) { new_kgid = zfs_fuid_create(zsb, (uint64_t)vap->va_gid, cr, ZFS_GROUP, &fuidp); if (new_kgid != KGID_TO_SGID(ZTOI(zp)->i_gid) && zfs_fuid_overquota(zsb, B_TRUE, new_kgid)) { if (attrzp) iput(ZTOI(attrzp)); err = EDQUOT; goto out2; } } } tx = dmu_tx_create(zsb->z_os); if (mask & ATTR_MODE) { uint64_t pmode = zp->z_mode; uint64_t acl_obj; new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); zfs_acl_chmod_setattr(zp, &aclp, new_mode); mutex_enter(&zp->z_lock); if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { /* * Are we upgrading ACL from old V0 format * to V1 format? */ if (zsb->z_version >= ZPL_VERSION_FUID && zfs_znode_acl_version(zp) == ZFS_ACL_VERSION_INITIAL) { dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes); } else { dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes); } } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes); } mutex_exit(&zp->z_lock); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); } else { if ((mask & ATTR_XVATTR) && XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); else dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); } if (attrzp) { dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); } fuid_dirtied = zsb->z_fuid_dirty; if (fuid_dirtied) zfs_fuid_txhold(zsb, tx); zfs_sa_upgrade_txholds(tx, zp); err = dmu_tx_assign(tx, TXG_WAIT); if (err) goto out; count = 0; /* * Set each attribute requested. * We group settings according to the locks they need to acquire. * * Note: you cannot set ctime directly, although it will be * updated as a side-effect of calling this function. */ if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE)) mutex_enter(&zp->z_acl_lock); mutex_enter(&zp->z_lock); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); if (attrzp) { if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE)) mutex_enter(&attrzp->z_acl_lock); mutex_enter(&attrzp->z_lock); SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, SA_ZPL_FLAGS(zsb), NULL, &attrzp->z_pflags, sizeof (attrzp->z_pflags)); } if (mask & (ATTR_UID|ATTR_GID)) { if (mask & ATTR_UID) { ZTOI(zp)->i_uid = SUID_TO_KUID(new_kuid); new_uid = zfs_uid_read(ZTOI(zp)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &new_uid, sizeof (new_uid)); if (attrzp) { SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, SA_ZPL_UID(zsb), NULL, &new_uid, sizeof (new_uid)); ZTOI(attrzp)->i_uid = SUID_TO_KUID(new_uid); } } if (mask & ATTR_GID) { ZTOI(zp)->i_gid = SGID_TO_KGID(new_kgid); new_gid = zfs_gid_read(ZTOI(zp)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &new_gid, sizeof (new_gid)); if (attrzp) { SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, SA_ZPL_GID(zsb), NULL, &new_gid, sizeof (new_gid)); ZTOI(attrzp)->i_gid = SGID_TO_KGID(new_kgid); } } if (!(mask & ATTR_MODE)) { SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &new_mode, sizeof (new_mode)); new_mode = zp->z_mode; } err = zfs_acl_chown_setattr(zp); ASSERT(err == 0); if (attrzp) { err = zfs_acl_chown_setattr(attrzp); ASSERT(err == 0); } } if (mask & ATTR_MODE) { SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &new_mode, sizeof (new_mode)); zp->z_mode = ZTOI(zp)->i_mode = new_mode; ASSERT3P(aclp, !=, NULL); err = zfs_aclset_common(zp, aclp, cr, tx); ASSERT0(err); if (zp->z_acl_cached) zfs_acl_free(zp->z_acl_cached); zp->z_acl_cached = aclp; aclp = NULL; } if ((mask & ATTR_ATIME) || zp->z_atime_dirty) { zp->z_atime_dirty = 0; ZFS_TIME_ENCODE(&ip->i_atime, atime); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL, &atime, sizeof (atime)); } if (mask & ATTR_MTIME) { ZFS_TIME_ENCODE(&vap->va_mtime, mtime); ZTOI(zp)->i_mtime = timespec_trunc(vap->va_mtime, ZTOI(zp)->i_sb->s_time_gran); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, sizeof (mtime)); } if (mask & ATTR_CTIME) { ZFS_TIME_ENCODE(&vap->va_ctime, ctime); ZTOI(zp)->i_ctime = timespec_trunc(vap->va_ctime, ZTOI(zp)->i_sb->s_time_gran); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, ctime, sizeof (ctime)); } if (attrzp && mask) { SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, SA_ZPL_CTIME(zsb), NULL, &ctime, sizeof (ctime)); } /* * Do this after setting timestamps to prevent timestamp * update from toggling bit */ if (xoap && (mask & ATTR_XVATTR)) { /* * restore trimmed off masks * so that return masks can be set for caller. */ if (XVA_ISSET_REQ(tmpxvattr, XAT_APPENDONLY)) { XVA_SET_REQ(xvap, XAT_APPENDONLY); } if (XVA_ISSET_REQ(tmpxvattr, XAT_NOUNLINK)) { XVA_SET_REQ(xvap, XAT_NOUNLINK); } if (XVA_ISSET_REQ(tmpxvattr, XAT_IMMUTABLE)) { XVA_SET_REQ(xvap, XAT_IMMUTABLE); } if (XVA_ISSET_REQ(tmpxvattr, XAT_NODUMP)) { XVA_SET_REQ(xvap, XAT_NODUMP); } if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_MODIFIED)) { XVA_SET_REQ(xvap, XAT_AV_MODIFIED); } if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_QUARANTINED)) { XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); } if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ASSERT(S_ISREG(ip->i_mode)); zfs_xvattr_set(zp, xvap, tx); } if (fuid_dirtied) zfs_fuid_sync(zsb, tx); if (mask != 0) zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); mutex_exit(&zp->z_lock); if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE)) mutex_exit(&zp->z_acl_lock); if (attrzp) { if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE)) mutex_exit(&attrzp->z_acl_lock); mutex_exit(&attrzp->z_lock); } out: if (err == 0 && attrzp) { err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, xattr_count, tx); ASSERT(err2 == 0); } if (attrzp) iput(ZTOI(attrzp)); if (aclp) zfs_acl_free(aclp); if (fuidp) { zfs_fuid_info_free(fuidp); fuidp = NULL; } if (err) { dmu_tx_abort(tx); if (err == ERESTART) goto top; } else { err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); dmu_tx_commit(tx); zfs_inode_update(zp); } out2: if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); out3: kmem_free(xattr_bulk, sizeof (sa_bulk_attr_t) * 7); kmem_free(bulk, sizeof (sa_bulk_attr_t) * 7); kmem_free(tmpxvattr, sizeof (xvattr_t)); ZFS_EXIT(zsb); return (err); } EXPORT_SYMBOL(zfs_setattr); typedef struct zfs_zlock { krwlock_t *zl_rwlock; /* lock we acquired */ znode_t *zl_znode; /* znode we held */ struct zfs_zlock *zl_next; /* next in list */ } zfs_zlock_t; /* * Drop locks and release vnodes that were held by zfs_rename_lock(). */ static void zfs_rename_unlock(zfs_zlock_t **zlpp) { zfs_zlock_t *zl; while ((zl = *zlpp) != NULL) { if (zl->zl_znode != NULL) iput(ZTOI(zl->zl_znode)); rw_exit(zl->zl_rwlock); *zlpp = zl->zl_next; kmem_free(zl, sizeof (*zl)); } } /* * Search back through the directory tree, using the ".." entries. * Lock each directory in the chain to prevent concurrent renames. * Fail any attempt to move a directory into one of its own descendants. * XXX - z_parent_lock can overlap with map or grow locks */ static int zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) { zfs_zlock_t *zl; znode_t *zp = tdzp; uint64_t rootid = ZTOZSB(zp)->z_root; uint64_t oidp = zp->z_id; krwlock_t *rwlp = &szp->z_parent_lock; krw_t rw = RW_WRITER; /* * First pass write-locks szp and compares to zp->z_id. * Later passes read-lock zp and compare to zp->z_parent. */ do { if (!rw_tryenter(rwlp, rw)) { /* * Another thread is renaming in this path. * Note that if we are a WRITER, we don't have any * parent_locks held yet. */ if (rw == RW_READER && zp->z_id > szp->z_id) { /* * Drop our locks and restart */ zfs_rename_unlock(&zl); *zlpp = NULL; zp = tdzp; oidp = zp->z_id; rwlp = &szp->z_parent_lock; rw = RW_WRITER; continue; } else { /* * Wait for other thread to drop its locks */ rw_enter(rwlp, rw); } } zl = kmem_alloc(sizeof (*zl), KM_SLEEP); zl->zl_rwlock = rwlp; zl->zl_znode = NULL; zl->zl_next = *zlpp; *zlpp = zl; if (oidp == szp->z_id) /* We're a descendant of szp */ return (SET_ERROR(EINVAL)); if (oidp == rootid) /* We've hit the top */ return (0); if (rw == RW_READER) { /* i.e. not the first pass */ int error = zfs_zget(ZTOZSB(zp), oidp, &zp); if (error) return (error); zl->zl_znode = zp; } (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(ZTOZSB(zp)), &oidp, sizeof (oidp)); rwlp = &zp->z_parent_lock; rw = RW_READER; } while (zp->z_id != sdzp->z_id); return (0); } /* * Move an entry from the provided source directory to the target * directory. Change the entry name as indicated. * * IN: sdip - Source directory containing the "old entry". * snm - Old entry name. * tdip - Target directory to contain the "new entry". * tnm - New entry name. * cr - credentials of caller. * flags - case flags * * RETURN: 0 on success, error code on failure. * * Timestamps: * sdip,tdip - ctime|mtime updated */ /*ARGSUSED*/ int zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm, cred_t *cr, int flags) { znode_t *tdzp, *szp, *tzp; znode_t *sdzp = ITOZ(sdip); zfs_sb_t *zsb = ITOZSB(sdip); zilog_t *zilog; zfs_dirlock_t *sdl, *tdl; dmu_tx_t *tx; zfs_zlock_t *zl; int cmp, serr, terr; int error = 0; int zflg = 0; boolean_t waited = B_FALSE; ZFS_ENTER(zsb); ZFS_VERIFY_ZP(sdzp); zilog = zsb->z_log; tdzp = ITOZ(tdip); ZFS_VERIFY_ZP(tdzp); /* * We check i_sb because snapshots and the ctldir must have different * super blocks. */ if (tdip->i_sb != sdip->i_sb || zfsctl_is_node(tdip)) { ZFS_EXIT(zsb); return (SET_ERROR(EXDEV)); } if (zsb->z_utf8 && u8_validate(tnm, strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zsb); return (SET_ERROR(EILSEQ)); } if (flags & FIGNORECASE) zflg |= ZCILOOK; top: szp = NULL; tzp = NULL; zl = NULL; /* * This is to prevent the creation of links into attribute space * by renaming a linked file into/outof an attribute directory. * See the comment in zfs_link() for why this is considered bad. */ if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { ZFS_EXIT(zsb); return (SET_ERROR(EINVAL)); } /* * Lock source and target directory entries. To prevent deadlock, * a lock ordering must be defined. We lock the directory with * the smallest object id first, or if it's a tie, the one with * the lexically first name. */ if (sdzp->z_id < tdzp->z_id) { cmp = -1; } else if (sdzp->z_id > tdzp->z_id) { cmp = 1; } else { /* * First compare the two name arguments without * considering any case folding. */ int nofold = (zsb->z_norm & ~U8_TEXTPREP_TOUPPER); cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); ASSERT(error == 0 || !zsb->z_utf8); if (cmp == 0) { /* * POSIX: "If the old argument and the new argument * both refer to links to the same existing file, * the rename() function shall return successfully * and perform no other action." */ ZFS_EXIT(zsb); return (0); } /* * If the file system is case-folding, then we may * have some more checking to do. A case-folding file * system is either supporting mixed case sensitivity * access or is completely case-insensitive. Note * that the file system is always case preserving. * * In mixed sensitivity mode case sensitive behavior * is the default. FIGNORECASE must be used to * explicitly request case insensitive behavior. * * If the source and target names provided differ only * by case (e.g., a request to rename 'tim' to 'Tim'), * we will treat this as a special case in the * case-insensitive mode: as long as the source name * is an exact match, we will allow this to proceed as * a name-change request. */ if ((zsb->z_case == ZFS_CASE_INSENSITIVE || (zsb->z_case == ZFS_CASE_MIXED && flags & FIGNORECASE)) && u8_strcmp(snm, tnm, 0, zsb->z_norm, U8_UNICODE_LATEST, &error) == 0) { /* * case preserving rename request, require exact * name matches */ zflg |= ZCIEXACT; zflg &= ~ZCILOOK; } } /* * If the source and destination directories are the same, we should * grab the z_name_lock of that directory only once. */ if (sdzp == tdzp) { zflg |= ZHAVELOCK; rw_enter(&sdzp->z_name_lock, RW_READER); } if (cmp < 0) { serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, ZEXISTS | zflg, NULL, NULL); terr = zfs_dirent_lock(&tdl, tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); } else { terr = zfs_dirent_lock(&tdl, tdzp, tnm, &tzp, zflg, NULL, NULL); serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, NULL, NULL); } if (serr) { /* * Source entry invalid or not there. */ if (!terr) { zfs_dirent_unlock(tdl); if (tzp) iput(ZTOI(tzp)); } if (sdzp == tdzp) rw_exit(&sdzp->z_name_lock); if (strcmp(snm, "..") == 0) serr = EINVAL; ZFS_EXIT(zsb); return (serr); } if (terr) { zfs_dirent_unlock(sdl); iput(ZTOI(szp)); if (sdzp == tdzp) rw_exit(&sdzp->z_name_lock); if (strcmp(tnm, "..") == 0) terr = EINVAL; ZFS_EXIT(zsb); return (terr); } /* * Must have write access at the source to remove the old entry * and write access at the target to create the new entry. * Note that if target and source are the same, this can be * done in a single check. */ if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))) goto out; if (S_ISDIR(ZTOI(szp)->i_mode)) { /* * Check to make sure rename is valid. * Can't do a move like this: /usr/a/b to /usr/a/b/c/d */ if ((error = zfs_rename_lock(szp, tdzp, sdzp, &zl))) goto out; } /* * Does target exist? */ if (tzp) { /* * Source and target must be the same type. */ if (S_ISDIR(ZTOI(szp)->i_mode)) { if (!S_ISDIR(ZTOI(tzp)->i_mode)) { error = SET_ERROR(ENOTDIR); goto out; } } else { if (S_ISDIR(ZTOI(tzp)->i_mode)) { error = SET_ERROR(EISDIR); goto out; } } /* * POSIX dictates that when the source and target * entries refer to the same file object, rename * must do nothing and exit without error. */ if (szp->z_id == tzp->z_id) { error = 0; goto out; } } tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); if (sdzp != tdzp) { dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, tdzp); } if (tzp) { dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, tzp); } zfs_sa_upgrade_txholds(tx, szp); dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); if (error) { if (zl != NULL) zfs_rename_unlock(&zl); zfs_dirent_unlock(sdl); zfs_dirent_unlock(tdl); if (sdzp == tdzp) rw_exit(&sdzp->z_name_lock); iput(ZTOI(szp)); if (tzp) iput(ZTOI(tzp)); if (error == ERESTART) { waited = B_TRUE; dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); ZFS_EXIT(zsb); return (error); } if (tzp) /* Attempt to remove the existing target */ error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); if (error == 0) { error = zfs_link_create(tdl, szp, tx, ZRENAMING); if (error == 0) { szp->z_pflags |= ZFS_AV_MODIFIED; error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zsb), (void *)&szp->z_pflags, sizeof (uint64_t), tx); ASSERT0(error); error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); if (error == 0) { zfs_log_rename(zilog, tx, TX_RENAME | (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp); } else { /* * At this point, we have successfully created * the target name, but have failed to remove * the source name. Since the create was done * with the ZRENAMING flag, there are * complications; for one, the link count is * wrong. The easiest way to deal with this * is to remove the newly created target, and * return the original error. This must * succeed; fortunately, it is very unlikely to * fail, since we just created it. */ VERIFY3U(zfs_link_destroy(tdl, szp, tx, ZRENAMING, NULL), ==, 0); } } } dmu_tx_commit(tx); out: if (zl != NULL) zfs_rename_unlock(&zl); zfs_dirent_unlock(sdl); zfs_dirent_unlock(tdl); zfs_inode_update(sdzp); if (sdzp == tdzp) rw_exit(&sdzp->z_name_lock); if (sdzp != tdzp) zfs_inode_update(tdzp); zfs_inode_update(szp); iput(ZTOI(szp)); if (tzp) { zfs_inode_update(tzp); iput(ZTOI(tzp)); } if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_rename); /* * Insert the indicated symbolic reference entry into the directory. * * IN: dip - Directory to contain new symbolic link. * link - Name for new symlink entry. * vap - Attributes of new entry. * target - Target path of new symlink. * * cr - credentials of caller. * flags - case flags * * RETURN: 0 on success, error code on failure. * * Timestamps: * dip - ctime|mtime updated */ /*ARGSUSED*/ int zfs_symlink(struct inode *dip, char *name, vattr_t *vap, char *link, struct inode **ipp, cred_t *cr, int flags) { znode_t *zp, *dzp = ITOZ(dip); zfs_dirlock_t *dl; dmu_tx_t *tx; zfs_sb_t *zsb = ITOZSB(dip); zilog_t *zilog; uint64_t len = strlen(link); int error; int zflg = ZNEW; zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; uint64_t txtype = TX_SYMLINK; boolean_t waited = B_FALSE; ASSERT(S_ISLNK(vap->va_mode)); ZFS_ENTER(zsb); ZFS_VERIFY_ZP(dzp); zilog = zsb->z_log; if (zsb->z_utf8 && u8_validate(name, strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zsb); return (SET_ERROR(EILSEQ)); } if (flags & FIGNORECASE) zflg |= ZCILOOK; if (len > MAXPATHLEN) { ZFS_EXIT(zsb); return (SET_ERROR(ENAMETOOLONG)); } if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, NULL, &acl_ids)) != 0) { ZFS_EXIT(zsb); return (error); } top: *ipp = NULL; /* * Attempt to lock directory; fail if entry already exists. */ error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); if (error) { zfs_acl_ids_free(&acl_ids); ZFS_EXIT(zsb); return (error); } if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { zfs_acl_ids_free(&acl_ids); zfs_dirent_unlock(dl); ZFS_EXIT(zsb); return (error); } if (zfs_acl_ids_overquota(zsb, &acl_ids)) { zfs_acl_ids_free(&acl_ids); zfs_dirent_unlock(dl); ZFS_EXIT(zsb); return (SET_ERROR(EDQUOT)); } tx = dmu_tx_create(zsb->z_os); fuid_dirtied = zsb->z_fuid_dirty; dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + ZFS_SA_BASE_ATTR_SIZE + len); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); if (!zsb->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, acl_ids.z_aclp->z_acl_bytes); } if (fuid_dirtied) zfs_fuid_txhold(zsb, tx); error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); if (error == ERESTART) { waited = B_TRUE; dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } zfs_acl_ids_free(&acl_ids); dmu_tx_abort(tx); ZFS_EXIT(zsb); return (error); } /* * Create a new object for the symlink. * for version 4 ZPL datsets the symlink will be an SA attribute */ zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); if (fuid_dirtied) zfs_fuid_sync(zsb, tx); mutex_enter(&zp->z_lock); if (zp->z_is_sa) error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zsb), link, len, tx); else zfs_sa_symlink(zp, link, len, tx); mutex_exit(&zp->z_lock); zp->z_size = len; (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zsb), &zp->z_size, sizeof (zp->z_size), tx); /* * Insert the new object into the directory. */ (void) zfs_link_create(dl, zp, tx, ZNEW); if (flags & FIGNORECASE) txtype |= TX_CI; zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); zfs_inode_update(dzp); zfs_inode_update(zp); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); zfs_dirent_unlock(dl); *ipp = ZTOI(zp); if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_symlink); /* * Return, in the buffer contained in the provided uio structure, * the symbolic path referred to by ip. * * IN: ip - inode of symbolic link * uio - structure to contain the link path. * cr - credentials of caller. * * RETURN: 0 if success * error code if failure * * Timestamps: * ip - atime updated */ /* ARGSUSED */ int zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); int error; ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); mutex_enter(&zp->z_lock); if (zp->z_is_sa) error = sa_lookup_uio(zp->z_sa_hdl, SA_ZPL_SYMLINK(zsb), uio); else error = zfs_sa_readlink(zp, uio); mutex_exit(&zp->z_lock); ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_readlink); /* * Insert a new entry into directory tdip referencing sip. * * IN: tdip - Directory to contain new entry. * sip - inode of new entry. * name - name of new entry. * cr - credentials of caller. * * RETURN: 0 if success * error code if failure * * Timestamps: * tdip - ctime|mtime updated * sip - ctime updated */ /* ARGSUSED */ int zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr, int flags) { znode_t *dzp = ITOZ(tdip); znode_t *tzp, *szp; zfs_sb_t *zsb = ITOZSB(tdip); zilog_t *zilog; zfs_dirlock_t *dl; dmu_tx_t *tx; int error; int zf = ZNEW; uint64_t parent; uid_t owner; boolean_t waited = B_FALSE; - + boolean_t is_tmpfile = 0; + uint64_t txg; +#ifdef HAVE_TMPFILE + is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE)); +#endif ASSERT(S_ISDIR(tdip->i_mode)); ZFS_ENTER(zsb); ZFS_VERIFY_ZP(dzp); zilog = zsb->z_log; /* * POSIX dictates that we return EPERM here. * Better choices include ENOTSUP or EISDIR. */ if (S_ISDIR(sip->i_mode)) { ZFS_EXIT(zsb); return (SET_ERROR(EPERM)); } szp = ITOZ(sip); ZFS_VERIFY_ZP(szp); /* * We check i_sb because snapshots and the ctldir must have different * super blocks. */ if (sip->i_sb != tdip->i_sb || zfsctl_is_node(sip)) { ZFS_EXIT(zsb); return (SET_ERROR(EXDEV)); } /* Prevent links to .zfs/shares files */ if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zsb), &parent, sizeof (uint64_t))) != 0) { ZFS_EXIT(zsb); return (error); } if (parent == zsb->z_shares_dir) { ZFS_EXIT(zsb); return (SET_ERROR(EPERM)); } if (zsb->z_utf8 && u8_validate(name, strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zsb); return (SET_ERROR(EILSEQ)); } if (flags & FIGNORECASE) zf |= ZCILOOK; /* * We do not support links between attributes and non-attributes * because of the potential security risk of creating links * into "normal" file space in order to circumvent restrictions * imposed in attribute space. */ if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { ZFS_EXIT(zsb); return (SET_ERROR(EINVAL)); } owner = zfs_fuid_map_id(zsb, KUID_TO_SUID(sip->i_uid), cr, ZFS_OWNER); if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) { ZFS_EXIT(zsb); return (SET_ERROR(EPERM)); } if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { ZFS_EXIT(zsb); return (error); } top: /* * Attempt to lock directory; fail if entry already exists. */ error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); if (error) { ZFS_EXIT(zsb); return (error); } tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); + if (is_tmpfile) + dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); + zfs_sa_upgrade_txholds(tx, szp); zfs_sa_upgrade_txholds(tx, dzp); error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); if (error == ERESTART) { waited = B_TRUE; dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); ZFS_EXIT(zsb); return (error); } - + /* unmark z_unlinked so zfs_link_create will not reject */ + if (is_tmpfile) + szp->z_unlinked = 0; error = zfs_link_create(dl, szp, tx, 0); if (error == 0) { uint64_t txtype = TX_LINK; - if (flags & FIGNORECASE) - txtype |= TX_CI; - zfs_log_link(zilog, tx, txtype, dzp, szp, name); + /* + * tmpfile is created to be in z_unlinkedobj, so remove it. + * Also, we don't log in ZIL, be cause all previous file + * operation on the tmpfile are ignored by ZIL. Instead we + * always wait for txg to sync to make sure all previous + * operation are sync safe. + */ + if (is_tmpfile) { + VERIFY(zap_remove_int(zsb->z_os, zsb->z_unlinkedobj, + szp->z_id, tx) == 0); + } else { + if (flags & FIGNORECASE) + txtype |= TX_CI; + zfs_log_link(zilog, tx, txtype, dzp, szp, name); + } + } else if (is_tmpfile) { + /* restore z_unlinked since when linking failed */ + szp->z_unlinked = 1; } - + txg = dmu_tx_get_txg(tx); dmu_tx_commit(tx); zfs_dirent_unlock(dl); - if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) + if (!is_tmpfile && zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); + if (is_tmpfile) + txg_wait_synced(dmu_objset_pool(zsb->z_os), txg); + zfs_inode_update(dzp); zfs_inode_update(szp); ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_link); static void zfs_putpage_commit_cb(void *arg) { struct page *pp = arg; ClearPageError(pp); end_page_writeback(pp); } /* * Push a page out to disk, once the page is on stable storage the * registered commit callback will be run as notification of completion. * * IN: ip - page mapped for inode. * pp - page to push (page is locked) * wbc - writeback control data * * RETURN: 0 if success * error code if failure * * Timestamps: * ip - ctime|mtime updated */ /* ARGSUSED */ int zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); loff_t offset; loff_t pgoff; unsigned int pglen; rl_t *rl; dmu_tx_t *tx; caddr_t va; int err = 0; uint64_t mtime[2], ctime[2]; sa_bulk_attr_t bulk[3]; int cnt = 0; struct address_space *mapping; ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); ASSERT(PageLocked(pp)); pgoff = page_offset(pp); /* Page byte-offset in file */ offset = i_size_read(ip); /* File length in bytes */ pglen = MIN(PAGE_SIZE, /* Page length in bytes */ P2ROUNDUP(offset, PAGE_SIZE)-pgoff); /* Page is beyond end of file */ if (pgoff >= offset) { unlock_page(pp); ZFS_EXIT(zsb); return (0); } /* Truncate page length to end of file */ if (pgoff + pglen > offset) pglen = offset - pgoff; #if 0 /* * FIXME: Allow mmap writes past its quota. The correct fix * is to register a page_mkwrite() handler to count the page * against its quota when it is about to be dirtied. */ if (zfs_owner_overquota(zsb, zp, B_FALSE) || zfs_owner_overquota(zsb, zp, B_TRUE)) { err = EDQUOT; } #endif /* * The ordering here is critical and must adhere to the following * rules in order to avoid deadlocking in either zfs_read() or * zfs_free_range() due to a lock inversion. * * 1) The page must be unlocked prior to acquiring the range lock. * This is critical because zfs_read() calls find_lock_page() * which may block on the page lock while holding the range lock. * * 2) Before setting or clearing write back on a page the range lock * must be held in order to prevent a lock inversion with the * zfs_free_range() function. * * This presents a problem because upon entering this function the * page lock is already held. To safely acquire the range lock the * page lock must be dropped. This creates a window where another * process could truncate, invalidate, dirty, or write out the page. * * Therefore, after successfully reacquiring the range and page locks * the current page state is checked. In the common case everything * will be as is expected and it can be written out. However, if * the page state has changed it must be handled accordingly. */ mapping = pp->mapping; redirty_page_for_writepage(wbc, pp); unlock_page(pp); rl = zfs_range_lock(&zp->z_range_lock, pgoff, pglen, RL_WRITER); lock_page(pp); /* Page mapping changed or it was no longer dirty, we're done */ if (unlikely((mapping != pp->mapping) || !PageDirty(pp))) { unlock_page(pp); zfs_range_unlock(rl); ZFS_EXIT(zsb); return (0); } /* Another process started write block if required */ if (PageWriteback(pp)) { unlock_page(pp); zfs_range_unlock(rl); if (wbc->sync_mode != WB_SYNC_NONE) wait_on_page_writeback(pp); ZFS_EXIT(zsb); return (0); } /* Clear the dirty flag the required locks are held */ if (!clear_page_dirty_for_io(pp)) { unlock_page(pp); zfs_range_unlock(rl); ZFS_EXIT(zsb); return (0); } /* * Counterpart for redirty_page_for_writepage() above. This page * was in fact not skipped and should not be counted as if it were. */ wbc->pages_skipped--; set_page_writeback(pp); unlock_page(pp); tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_write(tx, zp->z_id, pgoff, pglen); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); err = dmu_tx_assign(tx, TXG_NOWAIT); if (err != 0) { if (err == ERESTART) dmu_tx_wait(tx); dmu_tx_abort(tx); __set_page_dirty_nobuffers(pp); ClearPageError(pp); end_page_writeback(pp); zfs_range_unlock(rl); ZFS_EXIT(zsb); return (err); } va = kmap(pp); ASSERT3U(pglen, <=, PAGE_SIZE); dmu_write(zsb->z_os, zp->z_id, pgoff, pglen, va, tx); kunmap(pp); SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, 8); /* Preserve the mtime and ctime provided by the inode */ ZFS_TIME_ENCODE(&ip->i_mtime, mtime); ZFS_TIME_ENCODE(&ip->i_ctime, ctime); zp->z_atime_dirty = 0; zp->z_seq++; err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx); zfs_log_write(zsb->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0, zfs_putpage_commit_cb, pp); dmu_tx_commit(tx); zfs_range_unlock(rl); if (wbc->sync_mode != WB_SYNC_NONE) { /* * Note that this is rarely called under writepages(), because * writepages() normally handles the entire commit for * performance reasons. */ if (zsb->z_log != NULL) zil_commit(zsb->z_log, zp->z_id); } ZFS_EXIT(zsb); return (err); } /* * Update the system attributes when the inode has been dirtied. For the * moment we only update the mode, atime, mtime, and ctime. */ int zfs_dirty_inode(struct inode *ip, int flags) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); dmu_tx_t *tx; uint64_t mode, atime[2], mtime[2], ctime[2]; sa_bulk_attr_t bulk[4]; int error = 0; int cnt = 0; if (zfs_is_readonly(zsb) || dmu_objset_is_snapshot(zsb->z_os)) return (0); ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); #ifdef I_DIRTY_TIME /* * This is the lazytime semantic indroduced in Linux 4.0 * This flag will only be called from update_time when lazytime is set. * (Note, I_DIRTY_SYNC will also set if not lazytime) * Fortunately mtime and ctime are managed within ZFS itself, so we * only need to dirty atime. */ if (flags == I_DIRTY_TIME) { zp->z_atime_dirty = 1; goto out; } #endif tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); goto out; } mutex_enter(&zp->z_lock); zp->z_atime_dirty = 0; SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(zsb), NULL, &mode, 8); SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(zsb), NULL, &atime, 16); SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); /* Preserve the mode, mtime and ctime provided by the inode */ ZFS_TIME_ENCODE(&ip->i_atime, atime); ZFS_TIME_ENCODE(&ip->i_mtime, mtime); ZFS_TIME_ENCODE(&ip->i_ctime, ctime); mode = ip->i_mode; zp->z_mode = mode; error = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx); mutex_exit(&zp->z_lock); dmu_tx_commit(tx); out: ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_dirty_inode); /*ARGSUSED*/ void zfs_inactive(struct inode *ip) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); uint64_t atime[2]; int error; int need_unlock = 0; /* Only read lock if we haven't already write locked, e.g. rollback */ if (!RW_WRITE_HELD(&zsb->z_teardown_inactive_lock)) { need_unlock = 1; rw_enter(&zsb->z_teardown_inactive_lock, RW_READER); } if (zp->z_sa_hdl == NULL) { if (need_unlock) rw_exit(&zsb->z_teardown_inactive_lock); return; } if (zp->z_atime_dirty && zp->z_unlinked == 0) { dmu_tx_t *tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); } else { ZFS_TIME_ENCODE(&ip->i_atime, atime); mutex_enter(&zp->z_lock); (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zsb), (void *)&atime, sizeof (atime), tx); zp->z_atime_dirty = 0; mutex_exit(&zp->z_lock); dmu_tx_commit(tx); } } zfs_zinactive(zp); if (need_unlock) rw_exit(&zsb->z_teardown_inactive_lock); } EXPORT_SYMBOL(zfs_inactive); /* * Bounds-check the seek operation. * * IN: ip - inode seeking within * ooff - old file offset * noffp - pointer to new file offset * ct - caller context * * RETURN: 0 if success * EINVAL if new offset invalid */ /* ARGSUSED */ int zfs_seek(struct inode *ip, offset_t ooff, offset_t *noffp) { if (S_ISDIR(ip->i_mode)) return (0); return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); } EXPORT_SYMBOL(zfs_seek); /* * Fill pages with data from the disk. */ static int zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); objset_t *os; struct page *cur_pp; u_offset_t io_off, total; size_t io_len; loff_t i_size; unsigned page_idx; int err; os = zsb->z_os; io_len = nr_pages << PAGE_SHIFT; i_size = i_size_read(ip); io_off = page_offset(pl[0]); if (io_off + io_len > i_size) io_len = i_size - io_off; /* * Iterate over list of pages and read each page individually. */ page_idx = 0; for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { caddr_t va; cur_pp = pl[page_idx++]; va = kmap(cur_pp); err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, DMU_READ_PREFETCH); kunmap(cur_pp); if (err) { /* convert checksum errors into IO errors */ if (err == ECKSUM) err = SET_ERROR(EIO); return (err); } } return (0); } /* * Uses zfs_fillpage to read data from the file and fill the pages. * * IN: ip - inode of file to get data from. * pl - list of pages to read * nr_pages - number of pages to read * * RETURN: 0 on success, error code on failure. * * Timestamps: * vp - atime updated */ /* ARGSUSED */ int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); int err; if (pl == NULL) return (0); ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); err = zfs_fillpage(ip, pl, nr_pages); ZFS_EXIT(zsb); return (err); } EXPORT_SYMBOL(zfs_getpage); /* * Check ZFS specific permissions to memory map a section of a file. * * IN: ip - inode of the file to mmap * off - file offset * addrp - start address in memory region * len - length of memory region * vm_flags- address flags * * RETURN: 0 if success * error code if failure */ /*ARGSUSED*/ int zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len, unsigned long vm_flags) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); if ((vm_flags & VM_WRITE) && (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { ZFS_EXIT(zsb); return (SET_ERROR(EPERM)); } if ((vm_flags & (VM_READ | VM_EXEC)) && (zp->z_pflags & ZFS_AV_QUARANTINED)) { ZFS_EXIT(zsb); return (SET_ERROR(EACCES)); } if (off < 0 || len > MAXOFFSET_T - off) { ZFS_EXIT(zsb); return (SET_ERROR(ENXIO)); } ZFS_EXIT(zsb); return (0); } EXPORT_SYMBOL(zfs_map); /* * convoff - converts the given data (start, whence) to the * given whence. */ int convoff(struct inode *ip, flock64_t *lckdat, int whence, offset_t offset) { vattr_t vap; int error; if ((lckdat->l_whence == 2) || (whence == 2)) { if ((error = zfs_getattr(ip, &vap, 0, CRED()) != 0)) return (error); } switch (lckdat->l_whence) { case 1: lckdat->l_start += offset; break; case 2: lckdat->l_start += vap.va_size; /* FALLTHRU */ case 0: break; default: return (SET_ERROR(EINVAL)); } if (lckdat->l_start < 0) return (SET_ERROR(EINVAL)); switch (whence) { case 1: lckdat->l_start -= offset; break; case 2: lckdat->l_start -= vap.va_size; /* FALLTHRU */ case 0: break; default: return (SET_ERROR(EINVAL)); } lckdat->l_whence = (short)whence; return (0); } /* * Free or allocate space in a file. Currently, this function only * supports the `F_FREESP' command. However, this command is somewhat * misnamed, as its functionality includes the ability to allocate as * well as free space. * * IN: ip - inode of file to free data in. * cmd - action to take (only F_FREESP supported). * bfp - section of file to free/alloc. * flag - current file open mode flags. * offset - current file offset. * cr - credentials of caller [UNUSED]. * * RETURN: 0 on success, error code on failure. * * Timestamps: * ip - ctime|mtime updated */ /* ARGSUSED */ int zfs_space(struct inode *ip, int cmd, flock64_t *bfp, int flag, offset_t offset, cred_t *cr) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); uint64_t off, len; int error; ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); if (cmd != F_FREESP) { ZFS_EXIT(zsb); return (SET_ERROR(EINVAL)); } /* * Callers might not be able to detect properly that we are read-only, * so check it explicitly here. */ if (zfs_is_readonly(zsb)) { ZFS_EXIT(zsb); return (SET_ERROR(EROFS)); } if ((error = convoff(ip, bfp, 0, offset))) { ZFS_EXIT(zsb); return (error); } if (bfp->l_len < 0) { ZFS_EXIT(zsb); return (SET_ERROR(EINVAL)); } /* * Permissions aren't checked on Solaris because on this OS * zfs_space() can only be called with an opened file handle. * On Linux we can get here through truncate_range() which * operates directly on inodes, so we need to check access rights. */ if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) { ZFS_EXIT(zsb); return (error); } off = bfp->l_start; len = bfp->l_len; /* 0 means from off to end of file */ error = zfs_freesp(zp, off, len, flag, TRUE); ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_space); /*ARGSUSED*/ int zfs_fid(struct inode *ip, fid_t *fidp) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); uint32_t gen; uint64_t gen64; uint64_t object = zp->z_id; zfid_short_t *zfid; int size, i, error; ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zsb), &gen64, sizeof (uint64_t))) != 0) { ZFS_EXIT(zsb); return (error); } gen = (uint32_t)gen64; size = (zsb->z_parent != zsb) ? LONG_FID_LEN : SHORT_FID_LEN; if (fidp->fid_len < size) { fidp->fid_len = size; ZFS_EXIT(zsb); return (SET_ERROR(ENOSPC)); } zfid = (zfid_short_t *)fidp; zfid->zf_len = size; for (i = 0; i < sizeof (zfid->zf_object); i++) zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); /* Must have a non-zero generation number to distinguish from .zfs */ if (gen == 0) gen = 1; for (i = 0; i < sizeof (zfid->zf_gen); i++) zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); if (size == LONG_FID_LEN) { uint64_t objsetid = dmu_objset_id(zsb->z_os); zfid_long_t *zlfid; zlfid = (zfid_long_t *)fidp; for (i = 0; i < sizeof (zlfid->zf_setid); i++) zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); /* XXX - this should be the generation number for the objset */ for (i = 0; i < sizeof (zlfid->zf_setgen); i++) zlfid->zf_setgen[i] = 0; } ZFS_EXIT(zsb); return (0); } EXPORT_SYMBOL(zfs_fid); /*ARGSUSED*/ int zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); int error; boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); error = zfs_getacl(zp, vsecp, skipaclchk, cr); ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_getsecattr); /*ARGSUSED*/ int zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); int error; boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; zilog_t *zilog = zsb->z_log; ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); error = zfs_setacl(zp, vsecp, skipaclchk, cr); if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); ZFS_EXIT(zsb); return (error); } EXPORT_SYMBOL(zfs_setsecattr); #ifdef HAVE_UIO_ZEROCOPY /* * Tunable, both must be a power of 2. * * zcr_blksz_min: the smallest read we may consider to loan out an arcbuf * zcr_blksz_max: if set to less than the file block size, allow loaning out of * an arcbuf for a partial block read */ int zcr_blksz_min = (1 << 10); /* 1K */ int zcr_blksz_max = (1 << 17); /* 128K */ /*ARGSUSED*/ static int zfs_reqzcbuf(struct inode *ip, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ITOZSB(ip); int max_blksz = zsb->z_max_blksz; uio_t *uio = &xuio->xu_uio; ssize_t size = uio->uio_resid; offset_t offset = uio->uio_loffset; int blksz; int fullblk, i; arc_buf_t *abuf; ssize_t maxsize; int preamble, postamble; if (xuio->xu_type != UIOTYPE_ZEROCOPY) return (SET_ERROR(EINVAL)); ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); switch (ioflag) { case UIO_WRITE: /* * Loan out an arc_buf for write if write size is bigger than * max_blksz, and the file's block size is also max_blksz. */ blksz = max_blksz; if (size < blksz || zp->z_blksz != blksz) { ZFS_EXIT(zsb); return (SET_ERROR(EINVAL)); } /* * Caller requests buffers for write before knowing where the * write offset might be (e.g. NFS TCP write). */ if (offset == -1) { preamble = 0; } else { preamble = P2PHASE(offset, blksz); if (preamble) { preamble = blksz - preamble; size -= preamble; } } postamble = P2PHASE(size, blksz); size -= postamble; fullblk = size / blksz; (void) dmu_xuio_init(xuio, (preamble != 0) + fullblk + (postamble != 0)); /* * Have to fix iov base/len for partial buffers. They * currently represent full arc_buf's. */ if (preamble) { /* data begins in the middle of the arc_buf */ abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), blksz); ASSERT(abuf); (void) dmu_xuio_add(xuio, abuf, blksz - preamble, preamble); } for (i = 0; i < fullblk; i++) { abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), blksz); ASSERT(abuf); (void) dmu_xuio_add(xuio, abuf, 0, blksz); } if (postamble) { /* data ends in the middle of the arc_buf */ abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), blksz); ASSERT(abuf); (void) dmu_xuio_add(xuio, abuf, 0, postamble); } break; case UIO_READ: /* * Loan out an arc_buf for read if the read size is larger than * the current file block size. Block alignment is not * considered. Partial arc_buf will be loaned out for read. */ blksz = zp->z_blksz; if (blksz < zcr_blksz_min) blksz = zcr_blksz_min; if (blksz > zcr_blksz_max) blksz = zcr_blksz_max; /* avoid potential complexity of dealing with it */ if (blksz > max_blksz) { ZFS_EXIT(zsb); return (SET_ERROR(EINVAL)); } maxsize = zp->z_size - uio->uio_loffset; if (size > maxsize) size = maxsize; if (size < blksz) { ZFS_EXIT(zsb); return (SET_ERROR(EINVAL)); } break; default: ZFS_EXIT(zsb); return (SET_ERROR(EINVAL)); } uio->uio_extflg = UIO_XUIO; XUIO_XUZC_RW(xuio) = ioflag; ZFS_EXIT(zsb); return (0); } /*ARGSUSED*/ static int zfs_retzcbuf(struct inode *ip, xuio_t *xuio, cred_t *cr) { int i; arc_buf_t *abuf; int ioflag = XUIO_XUZC_RW(xuio); ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); i = dmu_xuio_cnt(xuio); while (i-- > 0) { abuf = dmu_xuio_arcbuf(xuio, i); /* * if abuf == NULL, it must be a write buffer * that has been returned in zfs_write(). */ if (abuf) dmu_return_arcbuf(abuf); ASSERT(abuf || ioflag == UIO_WRITE); } dmu_xuio_fini(xuio); return (0); } #endif /* HAVE_UIO_ZEROCOPY */ #if defined(_KERNEL) && defined(HAVE_SPL) module_param(zfs_delete_blocks, ulong, 0644); MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async"); module_param(zfs_read_chunk_size, long, 0644); MODULE_PARM_DESC(zfs_read_chunk_size, "Bytes to read per chunk"); #endif diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index ebf512a84f0a..a4d1520b160c 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -1,2147 +1,2147 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved. */ /* Portions Copyright 2007 Jeremy Teo */ #ifdef _KERNEL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fs/fs_subr.h" #include #include #include #include #include #include #include #include #include #include #include #endif /* _KERNEL */ #include #include #include #include #include #include #include #include #include #include #include "zfs_prop.h" #include "zfs_comutil.h" /* * Define ZNODE_STATS to turn on statistic gathering. By default, it is only * turned on when DEBUG is also defined. */ #ifdef DEBUG #define ZNODE_STATS #endif /* DEBUG */ #ifdef ZNODE_STATS #define ZNODE_STAT_ADD(stat) ((stat)++) #else #define ZNODE_STAT_ADD(stat) /* nothing */ #endif /* ZNODE_STATS */ /* * Functions needed for userland (ie: libzpool) are not put under * #ifdef_KERNEL; the rest of the functions have dependencies * (such as VFS logic) that will not compile easily in userland. */ #ifdef _KERNEL static kmem_cache_t *znode_cache = NULL; static kmem_cache_t *znode_hold_cache = NULL; unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ; /*ARGSUSED*/ static int zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) { znode_t *zp = buf; inode_init_once(ZTOI(zp)); list_link_init(&zp->z_link_node); mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); rw_init(&zp->z_name_lock, NULL, RW_NOLOCKDEP, NULL); mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL); zfs_rlock_init(&zp->z_range_lock); zp->z_dirlocks = NULL; zp->z_acl_cached = NULL; zp->z_xattr_cached = NULL; zp->z_xattr_parent = 0; zp->z_moved = 0; return (0); } /*ARGSUSED*/ static void zfs_znode_cache_destructor(void *buf, void *arg) { znode_t *zp = buf; ASSERT(!list_link_active(&zp->z_link_node)); mutex_destroy(&zp->z_lock); rw_destroy(&zp->z_parent_lock); rw_destroy(&zp->z_name_lock); mutex_destroy(&zp->z_acl_lock); rw_destroy(&zp->z_xattr_lock); zfs_rlock_destroy(&zp->z_range_lock); ASSERT(zp->z_dirlocks == NULL); ASSERT(zp->z_acl_cached == NULL); ASSERT(zp->z_xattr_cached == NULL); } static int zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags) { znode_hold_t *zh = buf; mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL); refcount_create(&zh->zh_refcount); zh->zh_obj = ZFS_NO_OBJECT; return (0); } static void zfs_znode_hold_cache_destructor(void *buf, void *arg) { znode_hold_t *zh = buf; mutex_destroy(&zh->zh_lock); refcount_destroy(&zh->zh_refcount); } void zfs_znode_init(void) { /* * Initialize zcache. The KMC_SLAB hint is used in order that it be * backed by kmalloc() when on the Linux slab in order that any * wait_on_bit() operations on the related inode operate properly. */ ASSERT(znode_cache == NULL); znode_cache = kmem_cache_create("zfs_znode_cache", sizeof (znode_t), 0, zfs_znode_cache_constructor, zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB); ASSERT(znode_hold_cache == NULL); znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache", sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor, zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0); } void zfs_znode_fini(void) { /* * Cleanup zcache */ if (znode_cache) kmem_cache_destroy(znode_cache); znode_cache = NULL; if (znode_hold_cache) kmem_cache_destroy(znode_hold_cache); znode_hold_cache = NULL; } /* * The zfs_znode_hold_enter() / zfs_znode_hold_exit() functions are used to * serialize access to a znode and its SA buffer while the object is being * created or destroyed. This kind of locking would normally reside in the * znode itself but in this case that's impossible because the znode and SA * buffer may not yet exist. Therefore the locking is handled externally * with an array of mutexs and AVLs trees which contain per-object locks. * * In zfs_znode_hold_enter() a per-object lock is created as needed, inserted * in to the correct AVL tree and finally the per-object lock is held. In * zfs_znode_hold_exit() the process is reversed. The per-object lock is * released, removed from the AVL tree and destroyed if there are no waiters. * * This scheme has two important properties: * * 1) No memory allocations are performed while holding one of the z_hold_locks. * This ensures evict(), which can be called from direct memory reclaim, will * never block waiting on a z_hold_locks which just happens to have hashed * to the same index. * * 2) All locks used to serialize access to an object are per-object and never * shared. This minimizes lock contention without creating a large number * of dedicated locks. * * On the downside it does require znode_lock_t structures to be frequently * allocated and freed. However, because these are backed by a kmem cache * and very short lived this cost is minimal. */ int zfs_znode_hold_compare(const void *a, const void *b) { const znode_hold_t *zh_a = (const znode_hold_t *)a; const znode_hold_t *zh_b = (const znode_hold_t *)b; return (AVL_CMP(zh_a->zh_obj, zh_b->zh_obj)); } boolean_t zfs_znode_held(zfs_sb_t *zsb, uint64_t obj) { znode_hold_t *zh, search; int i = ZFS_OBJ_HASH(zsb, obj); boolean_t held; search.zh_obj = obj; mutex_enter(&zsb->z_hold_locks[i]); zh = avl_find(&zsb->z_hold_trees[i], &search, NULL); held = (zh && MUTEX_HELD(&zh->zh_lock)) ? B_TRUE : B_FALSE; mutex_exit(&zsb->z_hold_locks[i]); return (held); } static znode_hold_t * zfs_znode_hold_enter(zfs_sb_t *zsb, uint64_t obj) { znode_hold_t *zh, *zh_new, search; int i = ZFS_OBJ_HASH(zsb, obj); boolean_t found = B_FALSE; zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP); zh_new->zh_obj = obj; search.zh_obj = obj; mutex_enter(&zsb->z_hold_locks[i]); zh = avl_find(&zsb->z_hold_trees[i], &search, NULL); if (likely(zh == NULL)) { zh = zh_new; avl_add(&zsb->z_hold_trees[i], zh); } else { ASSERT3U(zh->zh_obj, ==, obj); found = B_TRUE; } refcount_add(&zh->zh_refcount, NULL); mutex_exit(&zsb->z_hold_locks[i]); if (found == B_TRUE) kmem_cache_free(znode_hold_cache, zh_new); ASSERT(MUTEX_NOT_HELD(&zh->zh_lock)); ASSERT3S(refcount_count(&zh->zh_refcount), >, 0); mutex_enter(&zh->zh_lock); return (zh); } static void zfs_znode_hold_exit(zfs_sb_t *zsb, znode_hold_t *zh) { int i = ZFS_OBJ_HASH(zsb, zh->zh_obj); boolean_t remove = B_FALSE; ASSERT(zfs_znode_held(zsb, zh->zh_obj)); ASSERT3S(refcount_count(&zh->zh_refcount), >, 0); mutex_exit(&zh->zh_lock); mutex_enter(&zsb->z_hold_locks[i]); if (refcount_remove(&zh->zh_refcount, NULL) == 0) { avl_remove(&zsb->z_hold_trees[i], zh); remove = B_TRUE; } mutex_exit(&zsb->z_hold_locks[i]); if (remove == B_TRUE) kmem_cache_free(znode_hold_cache, zh); } int zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx) { #ifdef HAVE_SMB_SHARE zfs_acl_ids_t acl_ids; vattr_t vattr; znode_t *sharezp; vnode_t *vp; znode_t *zp; int error; vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; vattr.va_mode = S_IFDIR | 0555; vattr.va_uid = crgetuid(kcred); vattr.va_gid = crgetgid(kcred); sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP); sharezp->z_moved = 0; sharezp->z_unlinked = 0; sharezp->z_atime_dirty = 0; sharezp->z_zfsvfs = zfsvfs; sharezp->z_is_sa = zfsvfs->z_use_sa; vp = ZTOV(sharezp); vn_reinit(vp); vp->v_type = VDIR; VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr, kcred, NULL, &acl_ids)); zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids); ASSERT3P(zp, ==, sharezp); ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */ POINTER_INVALIDATE(&sharezp->z_zfsvfs); error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx); zfsvfs->z_shares_dir = sharezp->z_id; zfs_acl_ids_free(&acl_ids); // ZTOV(sharezp)->v_count = 0; sa_handle_destroy(sharezp->z_sa_hdl); kmem_cache_free(znode_cache, sharezp); return (error); #else return (0); #endif /* HAVE_SMB_SHARE */ } static void zfs_znode_sa_init(zfs_sb_t *zsb, znode_t *zp, dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl) { ASSERT(zfs_znode_held(zsb, zp->z_id)); mutex_enter(&zp->z_lock); ASSERT(zp->z_sa_hdl == NULL); ASSERT(zp->z_acl_cached == NULL); if (sa_hdl == NULL) { VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, zp, SA_HDL_SHARED, &zp->z_sa_hdl)); } else { zp->z_sa_hdl = sa_hdl; sa_set_userp(sa_hdl, zp); } zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE; mutex_exit(&zp->z_lock); } void zfs_znode_dmu_fini(znode_t *zp) { ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || zp->z_unlinked || RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock)); sa_handle_destroy(zp->z_sa_hdl); zp->z_sa_hdl = NULL; } /* * Called by new_inode() to allocate a new inode. */ int zfs_inode_alloc(struct super_block *sb, struct inode **ip) { znode_t *zp; zp = kmem_cache_alloc(znode_cache, KM_SLEEP); *ip = ZTOI(zp); return (0); } /* * Called in multiple places when an inode should be destroyed. */ void zfs_inode_destroy(struct inode *ip) { znode_t *zp = ITOZ(ip); zfs_sb_t *zsb = ZTOZSB(zp); mutex_enter(&zsb->z_znodes_lock); if (list_link_active(&zp->z_link_node)) { list_remove(&zsb->z_all_znodes, zp); zsb->z_nr_znodes--; } mutex_exit(&zsb->z_znodes_lock); if (zp->z_acl_cached) { zfs_acl_free(zp->z_acl_cached); zp->z_acl_cached = NULL; } if (zp->z_xattr_cached) { nvlist_free(zp->z_xattr_cached); zp->z_xattr_cached = NULL; } kmem_cache_free(znode_cache, zp); } static void zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip) { uint64_t rdev = 0; switch (ip->i_mode & S_IFMT) { case S_IFREG: ip->i_op = &zpl_inode_operations; ip->i_fop = &zpl_file_operations; ip->i_mapping->a_ops = &zpl_address_space_operations; break; case S_IFDIR: ip->i_op = &zpl_dir_inode_operations; ip->i_fop = &zpl_dir_file_operations; ITOZ(ip)->z_zn_prefetch = B_TRUE; break; case S_IFLNK: ip->i_op = &zpl_symlink_inode_operations; break; /* * rdev is only stored in a SA only for device files. */ case S_IFCHR: case S_IFBLK: (void) sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zsb), &rdev, sizeof (rdev)); /*FALLTHROUGH*/ case S_IFIFO: case S_IFSOCK: init_special_inode(ip, ip->i_mode, rdev); ip->i_op = &zpl_special_inode_operations; break; default: zfs_panic_recover("inode %llu has invalid mode: 0x%x\n", (u_longlong_t)ip->i_ino, ip->i_mode); /* Assume the inode is a file and attempt to continue */ ip->i_mode = S_IFREG | 0644; ip->i_op = &zpl_inode_operations; ip->i_fop = &zpl_file_operations; ip->i_mapping->a_ops = &zpl_address_space_operations; break; } } /* * Update the embedded inode given the znode. We should work toward * eliminating this function as soon as possible by removing values * which are duplicated between the znode and inode. If the generic * inode has the correct field it should be used, and the ZFS code * updated to access the inode. This can be done incrementally. */ void zfs_inode_update(znode_t *zp) { zfs_sb_t *zsb; struct inode *ip; uint32_t blksize; u_longlong_t i_blocks; ASSERT(zp != NULL); zsb = ZTOZSB(zp); ip = ZTOI(zp); /* Skip .zfs control nodes which do not exist on disk. */ if (zfsctl_is_node(ip)) return; dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks); spin_lock(&ip->i_lock); ip->i_blocks = i_blocks; i_size_write(ip, zp->z_size); spin_unlock(&ip->i_lock); } /* * Construct a znode+inode and initialize. * * This does not do a call to dmu_set_user() that is * up to the caller to do, in case you don't want to * return the znode */ static znode_t * zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz, dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl) { znode_t *zp; struct inode *ip; uint64_t mode; uint64_t parent; uint64_t tmp_gen; uint64_t links; uint64_t z_uid, z_gid; uint64_t atime[2], mtime[2], ctime[2]; sa_bulk_attr_t bulk[11]; int count = 0; ASSERT(zsb != NULL); ip = new_inode(zsb->z_sb); if (ip == NULL) return (NULL); zp = ITOZ(ip); ASSERT(zp->z_dirlocks == NULL); ASSERT3P(zp->z_acl_cached, ==, NULL); ASSERT3P(zp->z_xattr_cached, ==, NULL); zp->z_moved = 0; zp->z_sa_hdl = NULL; zp->z_unlinked = 0; zp->z_atime_dirty = 0; zp->z_mapcnt = 0; zp->z_id = db->db_object; zp->z_blksz = blksz; zp->z_seq = 0x7A4653; zp->z_sync_cnt = 0; zp->z_is_mapped = B_FALSE; zp->z_is_ctldir = B_FALSE; zp->z_is_stale = B_FALSE; zp->z_range_lock.zr_size = &zp->z_size; zp->z_range_lock.zr_blksz = &zp->z_blksz; zp->z_range_lock.zr_max_blksz = &ZTOZSB(zp)->z_max_blksz; zfs_znode_sa_init(zsb, zp, db, obj_type, hdl); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL, &tmp_gen, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &links, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL, &parent, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &z_uid, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &z_gid, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL, &atime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || tmp_gen == 0) { if (hdl == NULL) sa_handle_destroy(zp->z_sa_hdl); zp->z_sa_hdl = NULL; goto error; } zp->z_mode = ip->i_mode = mode; ip->i_generation = (uint32_t)tmp_gen; ip->i_blkbits = SPA_MINBLOCKSHIFT; set_nlink(ip, (uint32_t)links); zfs_uid_write(ip, z_uid); zfs_gid_write(ip, z_gid); /* Cache the xattr parent id */ if (zp->z_pflags & ZFS_XATTR) zp->z_xattr_parent = parent; ZFS_TIME_DECODE(&ip->i_atime, atime); ZFS_TIME_DECODE(&ip->i_mtime, mtime); ZFS_TIME_DECODE(&ip->i_ctime, ctime); ip->i_ino = obj; zfs_inode_update(zp); zfs_inode_set_ops(zsb, ip); /* * The only way insert_inode_locked() can fail is if the ip->i_ino * number is already hashed for this super block. This can never * happen because the inode numbers map 1:1 with the object numbers. * * The one exception is rolling back a mounted file system, but in * this case all the active inode are unhashed during the rollback. */ VERIFY3S(insert_inode_locked(ip), ==, 0); mutex_enter(&zsb->z_znodes_lock); list_insert_tail(&zsb->z_all_znodes, zp); zsb->z_nr_znodes++; membar_producer(); mutex_exit(&zsb->z_znodes_lock); unlock_new_inode(ip); return (zp); error: iput(ip); return (NULL); } /* * Safely mark an inode dirty. Inodes which are part of a read-only * file system or snapshot may not be dirtied. */ void zfs_mark_inode_dirty(struct inode *ip) { zfs_sb_t *zsb = ITOZSB(ip); if (zfs_is_readonly(zsb) || dmu_objset_is_snapshot(zsb->z_os)) return; mark_inode_dirty(ip); } static uint64_t empty_xattr; static uint64_t pad[4]; static zfs_acl_phys_t acl_phys; /* * Create a new DMU object to hold a zfs znode. * * IN: dzp - parent directory for new znode * vap - file attributes for new znode * tx - dmu transaction id for zap operations * cr - credentials of caller * flag - flags: * IS_ROOT_NODE - new object will be root * IS_XATTR - new object is an attribute * bonuslen - length of bonus buffer * setaclp - File/Dir initial ACL * fuidp - Tracks fuid allocation. * * OUT: zpp - allocated znode * */ void zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids) { uint64_t crtime[2], atime[2], mtime[2], ctime[2]; uint64_t mode, size, links, parent, pflags; uint64_t dzp_pflags = 0; uint64_t rdev = 0; zfs_sb_t *zsb = ZTOZSB(dzp); dmu_buf_t *db; timestruc_t now; uint64_t gen, obj; int bonuslen; int dnodesize; sa_handle_t *sa_hdl; dmu_object_type_t obj_type; sa_bulk_attr_t *sa_attrs; int cnt = 0; zfs_acl_locator_cb_t locate = { 0 }; znode_hold_t *zh; if (zsb->z_replay) { obj = vap->va_nodeid; now = vap->va_ctime; /* see zfs_replay_create() */ gen = vap->va_nblocks; /* ditto */ dnodesize = vap->va_fsid; /* ditto */ } else { obj = 0; gethrestime(&now); gen = dmu_tx_get_txg(tx); dnodesize = dmu_objset_dnodesize(zsb->z_os); } if (dnodesize == 0) dnodesize = DNODE_MIN_SIZE; obj_type = zsb->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; bonuslen = (obj_type == DMU_OT_SA) ? DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE; /* * Create a new DMU object. */ /* * There's currently no mechanism for pre-reading the blocks that will * be needed to allocate a new object, so we accept the small chance * that there will be an i/o error and we will fail one of the * assertions below. */ if (S_ISDIR(vap->va_mode)) { if (zsb->z_replay) { VERIFY0(zap_create_claim_norm_dnsize(zsb->z_os, obj, zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS, obj_type, bonuslen, dnodesize, tx)); } else { obj = zap_create_norm_dnsize(zsb->z_os, zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS, obj_type, bonuslen, dnodesize, tx); } } else { if (zsb->z_replay) { VERIFY0(dmu_object_claim_dnsize(zsb->z_os, obj, DMU_OT_PLAIN_FILE_CONTENTS, 0, obj_type, bonuslen, dnodesize, tx)); } else { obj = dmu_object_alloc_dnsize(zsb->z_os, DMU_OT_PLAIN_FILE_CONTENTS, 0, obj_type, bonuslen, dnodesize, tx); } } zh = zfs_znode_hold_enter(zsb, obj); VERIFY(0 == sa_buf_hold(zsb->z_os, obj, NULL, &db)); /* * If this is the root, fix up the half-initialized parent pointer * to reference the just-allocated physical data area. */ if (flag & IS_ROOT_NODE) { dzp->z_id = obj; } else { dzp_pflags = dzp->z_pflags; } /* * If parent is an xattr, so am I. */ if (dzp_pflags & ZFS_XATTR) { flag |= IS_XATTR; } if (zsb->z_use_fuids) pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; else pflags = 0; if (S_ISDIR(vap->va_mode)) { size = 2; /* contents ("." and "..") */ links = 2; } else { size = 0; - links = 1; + links = (flag & IS_TMPFILE) ? 0 : 1; } if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode)) rdev = vap->va_rdev; parent = dzp->z_id; mode = acl_ids->z_mode; if (flag & IS_XATTR) pflags |= ZFS_XATTR; /* * No execs denied will be deterimed when zfs_mode_compute() is called. */ pflags |= acl_ids->z_aclp->z_hints & (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED); ZFS_TIME_ENCODE(&now, crtime); ZFS_TIME_ENCODE(&now, ctime); if (vap->va_mask & ATTR_ATIME) { ZFS_TIME_ENCODE(&vap->va_atime, atime); } else { ZFS_TIME_ENCODE(&now, atime); } if (vap->va_mask & ATTR_MTIME) { ZFS_TIME_ENCODE(&vap->va_mtime, mtime); } else { ZFS_TIME_ENCODE(&now, mtime); } /* Now add in all of the "SA" attributes */ VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, NULL, SA_HDL_SHARED, &sa_hdl)); /* * Setup the array of attributes to be replaced/set on the new file * * order for DMU_OT_ZNODE is critical since it needs to be constructed * in the old znode_phys_t format. Don't change this ordering */ sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP); if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb), NULL, &atime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb), NULL, &gen, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb), NULL, &mode, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb), NULL, &size, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb), NULL, &parent, 8); } else { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb), NULL, &mode, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb), NULL, &size, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb), NULL, &gen, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL, &acl_ids->z_fuid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL, &acl_ids->z_fgid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb), NULL, &parent, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb), NULL, &pflags, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb), NULL, &atime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16); } SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zsb), NULL, &links, 8); if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zsb), NULL, &empty_xattr, 8); } if (obj_type == DMU_OT_ZNODE || (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb), NULL, &rdev, 8); } if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb), NULL, &pflags, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL, &acl_ids->z_fuid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL, &acl_ids->z_fgid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zsb), NULL, pad, sizeof (uint64_t) * 4); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zsb), NULL, &acl_phys, sizeof (zfs_acl_phys_t)); } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zsb), NULL, &acl_ids->z_aclp->z_acl_count, 8); locate.cb_aclp = acl_ids->z_aclp; SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zsb), zfs_acl_data_locator, &locate, acl_ids->z_aclp->z_acl_bytes); mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags, acl_ids->z_fuid, acl_ids->z_fgid); } VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); if (!(flag & IS_ROOT_NODE)) { *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl); VERIFY(*zpp != NULL); VERIFY(dzp != NULL); } else { /* * If we are creating the root node, the "parent" we * passed in is the znode for the root. */ *zpp = dzp; (*zpp)->z_sa_hdl = sa_hdl; } (*zpp)->z_pflags = pflags; (*zpp)->z_mode = ZTOI(*zpp)->i_mode = mode; (*zpp)->z_dnodesize = dnodesize; if (obj_type == DMU_OT_ZNODE || acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); } kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END); zfs_znode_hold_exit(zsb, zh); } /* * Update in-core attributes. It is assumed the caller will be doing an * sa_bulk_update to push the changes out. */ void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) { xoptattr_t *xoap; xoap = xva_getxoptattr(xvap); ASSERT(xoap); if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { uint64_t times[2]; ZFS_TIME_ENCODE(&xoap->xoa_createtime, times); (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)), ×, sizeof (times), tx); XVA_SET_RTN(xvap, XAT_CREATETIME); } if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_READONLY); } if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_HIDDEN); } if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_SYSTEM); } if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_ARCHIVE); } if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_IMMUTABLE); ZTOI(zp)->i_flags |= S_IMMUTABLE; } else { ZTOI(zp)->i_flags &= ~S_IMMUTABLE; } if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_NOUNLINK); } if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_APPENDONLY); ZTOI(zp)->i_flags |= S_APPEND; } else { ZTOI(zp)->i_flags &= ~S_APPEND; } if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_NODUMP); } if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_OPAQUE); } if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, xoap->xoa_av_quarantined, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); } if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_AV_MODIFIED); } if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { zfs_sa_set_scanstamp(zp, xvap, tx); XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); } if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_REPARSE); } if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_OFFLINE); } if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_SPARSE); } } int zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp) { dmu_object_info_t doi; dmu_buf_t *db; znode_t *zp; znode_hold_t *zh; int err; sa_handle_t *hdl; *zpp = NULL; again: zh = zfs_znode_hold_enter(zsb, obj_num); err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db); if (err) { zfs_znode_hold_exit(zsb, zh); return (err); } dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_SA && (doi.doi_bonus_type != DMU_OT_ZNODE || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); zfs_znode_hold_exit(zsb, zh); return (SET_ERROR(EINVAL)); } hdl = dmu_buf_get_user(db); if (hdl != NULL) { zp = sa_get_userdata(hdl); /* * Since "SA" does immediate eviction we * should never find a sa handle that doesn't * know about the znode. */ ASSERT3P(zp, !=, NULL); mutex_enter(&zp->z_lock); ASSERT3U(zp->z_id, ==, obj_num); /* * If igrab() returns NULL the VFS has independently * determined the inode should be evicted and has * called iput_final() to start the eviction process. * The SA handle is still valid but because the VFS * requires that the eviction succeed we must drop * our locks and references to allow the eviction to * complete. The zfs_zget() may then be retried. * * This unlikely case could be optimized by registering * a sops->drop_inode() callback. The callback would * need to detect the active SA hold thereby informing * the VFS that this inode should not be evicted. */ if (igrab(ZTOI(zp)) == NULL) { mutex_exit(&zp->z_lock); sa_buf_rele(db, NULL); zfs_znode_hold_exit(zsb, zh); /* inode might need this to finish evict */ cond_resched(); goto again; } *zpp = zp; err = 0; mutex_exit(&zp->z_lock); sa_buf_rele(db, NULL); zfs_znode_hold_exit(zsb, zh); return (err); } /* * Not found create new znode/vnode but only if file exists. * * There is a small window where zfs_vget() could * find this object while a file create is still in * progress. This is checked for in zfs_znode_alloc() * * if zfs_znode_alloc() fails it will drop the hold on the * bonus buffer. */ zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size, doi.doi_bonus_type, obj_num, NULL); if (zp == NULL) { err = SET_ERROR(ENOENT); } else { *zpp = zp; } zfs_znode_hold_exit(zsb, zh); return (err); } int zfs_rezget(znode_t *zp) { zfs_sb_t *zsb = ZTOZSB(zp); dmu_object_info_t doi; dmu_buf_t *db; uint64_t obj_num = zp->z_id; uint64_t mode; uint64_t links; sa_bulk_attr_t bulk[10]; int err; int count = 0; uint64_t gen; uint64_t z_uid, z_gid; uint64_t atime[2], mtime[2], ctime[2]; znode_hold_t *zh; /* * skip ctldir, otherwise they will always get invalidated. This will * cause funny behaviour for the mounted snapdirs. Especially for * Linux >= 3.18, d_invalidate will detach the mountpoint and prevent * anyone automount it again as long as someone is still using the * detached mount. */ if (zp->z_is_ctldir) return (0); zh = zfs_znode_hold_enter(zsb, obj_num); mutex_enter(&zp->z_acl_lock); if (zp->z_acl_cached) { zfs_acl_free(zp->z_acl_cached); zp->z_acl_cached = NULL; } mutex_exit(&zp->z_acl_lock); rw_enter(&zp->z_xattr_lock, RW_WRITER); if (zp->z_xattr_cached) { nvlist_free(zp->z_xattr_cached); zp->z_xattr_cached = NULL; } rw_exit(&zp->z_xattr_lock); ASSERT(zp->z_sa_hdl == NULL); err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db); if (err) { zfs_znode_hold_exit(zsb, zh); return (err); } dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_SA && (doi.doi_bonus_type != DMU_OT_ZNODE || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); zfs_znode_hold_exit(zsb, zh); return (SET_ERROR(EINVAL)); } zfs_znode_sa_init(zsb, zp, db, doi.doi_bonus_type, NULL); /* reload cached values */ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL, &gen, sizeof (gen)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, sizeof (zp->z_size)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &links, sizeof (links)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &z_uid, sizeof (z_uid)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &z_gid, sizeof (z_gid)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, sizeof (mode)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL, &atime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { zfs_znode_dmu_fini(zp); zfs_znode_hold_exit(zsb, zh); return (SET_ERROR(EIO)); } zp->z_mode = ZTOI(zp)->i_mode = mode; zfs_uid_write(ZTOI(zp), z_uid); zfs_gid_write(ZTOI(zp), z_gid); ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime); ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime); ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime); if (gen != ZTOI(zp)->i_generation) { zfs_znode_dmu_fini(zp); zfs_znode_hold_exit(zsb, zh); return (SET_ERROR(EIO)); } zp->z_unlinked = (ZTOI(zp)->i_nlink == 0); set_nlink(ZTOI(zp), (uint32_t)links); zp->z_blksz = doi.doi_data_block_size; zp->z_atime_dirty = 0; zfs_inode_update(zp); zfs_znode_hold_exit(zsb, zh); return (0); } void zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) { zfs_sb_t *zsb = ZTOZSB(zp); objset_t *os = zsb->z_os; uint64_t obj = zp->z_id; uint64_t acl_obj = zfs_external_acl(zp); znode_hold_t *zh; zh = zfs_znode_hold_enter(zsb, obj); if (acl_obj) { VERIFY(!zp->z_is_sa); VERIFY(0 == dmu_object_free(os, acl_obj, tx)); } VERIFY(0 == dmu_object_free(os, obj, tx)); zfs_znode_dmu_fini(zp); zfs_znode_hold_exit(zsb, zh); } void zfs_zinactive(znode_t *zp) { zfs_sb_t *zsb = ZTOZSB(zp); uint64_t z_id = zp->z_id; znode_hold_t *zh; ASSERT(zp->z_sa_hdl); /* * Don't allow a zfs_zget() while were trying to release this znode. */ zh = zfs_znode_hold_enter(zsb, z_id); mutex_enter(&zp->z_lock); /* * If this was the last reference to a file with no links, * remove the file from the file system. */ if (zp->z_unlinked) { mutex_exit(&zp->z_lock); zfs_znode_hold_exit(zsb, zh); zfs_rmnode(zp); return; } mutex_exit(&zp->z_lock); zfs_znode_dmu_fini(zp); zfs_znode_hold_exit(zsb, zh); } static inline int zfs_compare_timespec(struct timespec *t1, struct timespec *t2) { if (t1->tv_sec < t2->tv_sec) return (-1); if (t1->tv_sec > t2->tv_sec) return (1); return (t1->tv_nsec - t2->tv_nsec); } /* * Prepare to update znode time stamps. * * IN: zp - znode requiring timestamp update * flag - ATTR_MTIME, ATTR_CTIME flags * * OUT: zp - z_seq * mtime - new mtime * ctime - new ctime * * Note: We don't update atime here, because we rely on Linux VFS to do * atime updating. */ void zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], uint64_t ctime[2]) { timestruc_t now; gethrestime(&now); zp->z_seq++; if (flag & ATTR_MTIME) { ZFS_TIME_ENCODE(&now, mtime); ZFS_TIME_DECODE(&(ZTOI(zp)->i_mtime), mtime); if (ZTOZSB(zp)->z_use_fuids) { zp->z_pflags |= (ZFS_ARCHIVE | ZFS_AV_MODIFIED); } } if (flag & ATTR_CTIME) { ZFS_TIME_ENCODE(&now, ctime); ZFS_TIME_DECODE(&(ZTOI(zp)->i_ctime), ctime); if (ZTOZSB(zp)->z_use_fuids) zp->z_pflags |= ZFS_ARCHIVE; } } /* * Grow the block size for a file. * * IN: zp - znode of file to free data in. * size - requested block size * tx - open transaction. * * NOTE: this function assumes that the znode is write locked. */ void zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) { int error; u_longlong_t dummy; if (size <= zp->z_blksz) return; /* * If the file size is already greater than the current blocksize, * we will not grow. If there is more than one block in a file, * the blocksize cannot change. */ if (zp->z_blksz && zp->z_size > zp->z_blksz) return; error = dmu_object_set_blocksize(ZTOZSB(zp)->z_os, zp->z_id, size, 0, tx); if (error == ENOTSUP) return; ASSERT0(error); /* What blocksize did we actually get? */ dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy); } /* * Increase the file length * * IN: zp - znode of file to free data in. * end - new end-of-file * * RETURN: 0 on success, error code on failure */ static int zfs_extend(znode_t *zp, uint64_t end) { zfs_sb_t *zsb = ZTOZSB(zp); dmu_tx_t *tx; rl_t *rl; uint64_t newblksz; int error; /* * We will change zp_size, lock the whole file. */ rl = zfs_range_lock(&zp->z_range_lock, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end <= zp->z_size) { zfs_range_unlock(rl); return (0); } tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); if (end > zp->z_blksz && (!ISP2(zp->z_blksz) || zp->z_blksz < zsb->z_max_blksz)) { /* * We are growing the file past the current block size. */ if (zp->z_blksz > ZTOZSB(zp)->z_max_blksz) { /* * File's blocksize is already larger than the * "recordsize" property. Only let it grow to * the next power of 2. */ ASSERT(!ISP2(zp->z_blksz)); newblksz = MIN(end, 1 << highbit64(zp->z_blksz)); } else { newblksz = MIN(end, ZTOZSB(zp)->z_max_blksz); } dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); } else { newblksz = 0; } error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); zfs_range_unlock(rl); return (error); } if (newblksz) zfs_grow_blocksize(zp, newblksz, tx); zp->z_size = end; VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)), &zp->z_size, sizeof (zp->z_size), tx)); zfs_range_unlock(rl); dmu_tx_commit(tx); return (0); } /* * zfs_zero_partial_page - Modeled after update_pages() but * with different arguments and semantics for use by zfs_freesp(). * * Zeroes a piece of a single page cache entry for zp at offset * start and length len. * * Caller must acquire a range lock on the file for the region * being zeroed in order that the ARC and page cache stay in sync. */ static void zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len) { struct address_space *mp = ZTOI(zp)->i_mapping; struct page *pp; int64_t off; void *pb; ASSERT((start & PAGE_MASK) == ((start + len - 1) & PAGE_MASK)); off = start & (PAGE_SIZE - 1); start &= PAGE_MASK; pp = find_lock_page(mp, start >> PAGE_SHIFT); if (pp) { if (mapping_writably_mapped(mp)) flush_dcache_page(pp); pb = kmap(pp); bzero(pb + off, len); kunmap(pp); if (mapping_writably_mapped(mp)) flush_dcache_page(pp); mark_page_accessed(pp); SetPageUptodate(pp); ClearPageError(pp); unlock_page(pp); put_page(pp); } } /* * Free space in a file. * * IN: zp - znode of file to free data in. * off - start of section to free. * len - length of section to free. * * RETURN: 0 on success, error code on failure */ static int zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) { zfs_sb_t *zsb = ZTOZSB(zp); rl_t *rl; int error; /* * Lock the range being freed. */ rl = zfs_range_lock(&zp->z_range_lock, off, len, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (off >= zp->z_size) { zfs_range_unlock(rl); return (0); } if (off + len > zp->z_size) len = zp->z_size - off; error = dmu_free_long_range(zsb->z_os, zp->z_id, off, len); /* * Zero partial page cache entries. This must be done under a * range lock in order to keep the ARC and page cache in sync. */ if (zp->z_is_mapped) { loff_t first_page, last_page, page_len; loff_t first_page_offset, last_page_offset; /* first possible full page in hole */ first_page = (off + PAGE_SIZE - 1) >> PAGE_SHIFT; /* last page of hole */ last_page = (off + len) >> PAGE_SHIFT; /* offset of first_page */ first_page_offset = first_page << PAGE_SHIFT; /* offset of last_page */ last_page_offset = last_page << PAGE_SHIFT; /* truncate whole pages */ if (last_page_offset > first_page_offset) { truncate_inode_pages_range(ZTOI(zp)->i_mapping, first_page_offset, last_page_offset - 1); } /* truncate sub-page ranges */ if (first_page > last_page) { /* entire punched area within a single page */ zfs_zero_partial_page(zp, off, len); } else { /* beginning of punched area at the end of a page */ page_len = first_page_offset - off; if (page_len > 0) zfs_zero_partial_page(zp, off, page_len); /* end of punched area at the beginning of a page */ page_len = off + len - last_page_offset; if (page_len > 0) zfs_zero_partial_page(zp, last_page_offset, page_len); } } zfs_range_unlock(rl); return (error); } /* * Truncate a file * * IN: zp - znode of file to free data in. * end - new end-of-file. * * RETURN: 0 on success, error code on failure */ static int zfs_trunc(znode_t *zp, uint64_t end) { zfs_sb_t *zsb = ZTOZSB(zp); dmu_tx_t *tx; rl_t *rl; int error; sa_bulk_attr_t bulk[2]; int count = 0; /* * We will change zp_size, lock the whole file. */ rl = zfs_range_lock(&zp->z_range_lock, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end >= zp->z_size) { zfs_range_unlock(rl); return (0); } error = dmu_free_long_range(zsb->z_os, zp->z_id, end, -1); if (error) { zfs_range_unlock(rl); return (error); } tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); zfs_range_unlock(rl); return (error); } zp->z_size = end; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, sizeof (zp->z_size)); if (end == 0) { zp->z_pflags &= ~ZFS_SPARSE; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, 8); } VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); dmu_tx_commit(tx); zfs_range_unlock(rl); return (0); } /* * Free space in a file * * IN: zp - znode of file to free data in. * off - start of range * len - end of range (0 => EOF) * flag - current file open mode flags. * log - TRUE if this action should be logged * * RETURN: 0 on success, error code on failure */ int zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) { dmu_tx_t *tx; zfs_sb_t *zsb = ZTOZSB(zp); zilog_t *zilog = zsb->z_log; uint64_t mode; uint64_t mtime[2], ctime[2]; sa_bulk_attr_t bulk[3]; int count = 0; int error; if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zsb), &mode, sizeof (mode))) != 0) return (error); if (off > zp->z_size) { error = zfs_extend(zp, off+len); if (error == 0 && log) goto log; goto out; } if (len == 0) { error = zfs_trunc(zp, off); } else { if ((error = zfs_free_range(zp, off, len)) == 0 && off + len > zp->z_size) error = zfs_extend(zp, off+len); } if (error || !log) goto out; log: tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); goto out; } SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, ctime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, 8); zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); ASSERT(error == 0); zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); dmu_tx_commit(tx); zfs_inode_update(zp); error = 0; out: /* * Truncate the page cache - for file truncate operations, use * the purpose-built API for truncations. For punching operations, * the truncation is handled under a range lock in zfs_free_range. */ if (len == 0) truncate_setsize(ZTOI(zp), off); return (error); } void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) { struct super_block *sb; zfs_sb_t *zsb; uint64_t moid, obj, sa_obj, version; uint64_t sense = ZFS_CASE_SENSITIVE; uint64_t norm = 0; nvpair_t *elem; int size; int error; int i; znode_t *rootzp = NULL; vattr_t vattr; znode_t *zp; zfs_acl_ids_t acl_ids; /* * First attempt to create master node. */ /* * In an empty objset, there are no blocks to read and thus * there can be no i/o errors (which we assert below). */ moid = MASTER_NODE_OBJ; error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, DMU_OT_NONE, 0, tx); ASSERT(error == 0); /* * Give dmu_object_alloc() a hint about where to start * allocating new objects. Otherwise, since the metadnode's * dnode_phys_t structure isn't initialized yet, dmu_object_next() * would fail and we'd have to skip to the next dnode block. */ os->os_obj_next = moid + 1; /* * Set starting attributes. */ version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os))); elem = NULL; while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) { /* For the moment we expect all zpl props to be uint64_ts */ uint64_t val; char *name; ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); VERIFY(nvpair_value_uint64(elem, &val) == 0); name = nvpair_name(elem); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { if (val < version) version = val; } else { error = zap_update(os, moid, name, 8, 1, &val, tx); } ASSERT(error == 0); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) norm = val; else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) sense = val; } ASSERT(version != 0); error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); /* * Create zap object used for SA attribute registration */ if (version >= ZPL_VERSION_SA) { sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, DMU_OT_NONE, 0, tx); error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); ASSERT(error == 0); } else { sa_obj = 0; } /* * Create a delete queue. */ obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); ASSERT(error == 0); /* * Create root znode. Create minimal znode/inode/zsb/sb * to allow zfs_mknode to work. */ vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID; vattr.va_mode = S_IFDIR|0755; vattr.va_uid = crgetuid(cr); vattr.va_gid = crgetgid(cr); rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); rootzp->z_moved = 0; rootzp->z_unlinked = 0; rootzp->z_atime_dirty = 0; rootzp->z_is_sa = USE_SA(version, os); zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP); zsb->z_os = os; zsb->z_parent = zsb; zsb->z_version = version; zsb->z_use_fuids = USE_FUIDS(version, os); zsb->z_use_sa = USE_SA(version, os); zsb->z_norm = norm; sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP); sb->s_fs_info = zsb; ZTOI(rootzp)->i_sb = sb; error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, &zsb->z_attr_table); ASSERT(error == 0); /* * Fold case on file systems that are always or sometimes case * insensitive. */ if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) zsb->z_norm |= U8_TEXTPREP_TOUPPER; mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zsb->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX); zsb->z_hold_size = size; zsb->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size, KM_SLEEP); zsb->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP); for (i = 0; i != size; i++) { avl_create(&zsb->z_hold_trees[i], zfs_znode_hold_compare, sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node)); mutex_init(&zsb->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL); } VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, cr, NULL, &acl_ids)); zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); ASSERT3P(zp, ==, rootzp); error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); ASSERT(error == 0); zfs_acl_ids_free(&acl_ids); atomic_set(&ZTOI(rootzp)->i_count, 0); sa_handle_destroy(rootzp->z_sa_hdl); kmem_cache_free(znode_cache, rootzp); /* * Create shares directory */ error = zfs_create_share_dir(zsb, tx); ASSERT(error == 0); for (i = 0; i != size; i++) { avl_destroy(&zsb->z_hold_trees[i]); mutex_destroy(&zsb->z_hold_locks[i]); } vmem_free(zsb->z_hold_trees, sizeof (avl_tree_t) * size); vmem_free(zsb->z_hold_locks, sizeof (kmutex_t) * size); kmem_free(sb, sizeof (struct super_block)); kmem_free(zsb, sizeof (zfs_sb_t)); } #endif /* _KERNEL */ static int zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) { uint64_t sa_obj = 0; int error; error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); if (error != 0 && error != ENOENT) return (error); error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); return (error); } static int zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, dmu_buf_t **db, void *tag) { dmu_object_info_t doi; int error; if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) return (error); dmu_object_info_from_db(*db, &doi); if ((doi.doi_bonus_type != DMU_OT_SA && doi.doi_bonus_type != DMU_OT_ZNODE) || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t))) { sa_buf_rele(*db, tag); return (SET_ERROR(ENOTSUP)); } error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); if (error != 0) { sa_buf_rele(*db, tag); return (error); } return (0); } void zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag) { sa_handle_destroy(hdl); sa_buf_rele(db, tag); } /* * Given an object number, return its parent object number and whether * or not the object is an extended attribute directory. */ static int zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table, uint64_t *pobjp, int *is_xattrdir) { uint64_t parent; uint64_t pflags; uint64_t mode; uint64_t parent_mode; sa_bulk_attr_t bulk[3]; sa_handle_t *sa_hdl; dmu_buf_t *sa_db; int count = 0; int error; SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, &parent, sizeof (parent)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, &pflags, sizeof (pflags)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, &mode, sizeof (mode)); if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) return (error); /* * When a link is removed its parent pointer is not changed and will * be invalid. There are two cases where a link is removed but the * file stays around, when it goes to the delete queue and when there * are additional links. */ error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG); if (error != 0) return (error); error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode)); zfs_release_sa_handle(sa_hdl, sa_db, FTAG); if (error != 0) return (error); *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); /* * Extended attributes can be applied to files, directories, etc. * Otherwise the parent must be a directory. */ if (!*is_xattrdir && !S_ISDIR(parent_mode)) return (EINVAL); *pobjp = parent; return (0); } /* * Given an object number, return some zpl level statistics */ static int zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, zfs_stat_t *sb) { sa_bulk_attr_t bulk[4]; int count = 0; SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, &sb->zs_mode, sizeof (sb->zs_mode)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, &sb->zs_gen, sizeof (sb->zs_gen)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, &sb->zs_links, sizeof (sb->zs_links)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, &sb->zs_ctime, sizeof (sb->zs_ctime)); return (sa_bulk_lookup(hdl, bulk, count)); } static int zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, sa_attr_type_t *sa_table, char *buf, int len) { sa_handle_t *sa_hdl; sa_handle_t *prevhdl = NULL; dmu_buf_t *prevdb = NULL; dmu_buf_t *sa_db = NULL; char *path = buf + len - 1; int error; *path = '\0'; sa_hdl = hdl; for (;;) { uint64_t pobj = 0; char component[MAXNAMELEN + 2]; size_t complen; int is_xattrdir = 0; if (prevdb) zfs_release_sa_handle(prevhdl, prevdb, FTAG); if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj, &is_xattrdir)) != 0) break; if (pobj == obj) { if (path[0] != '/') *--path = '/'; break; } component[0] = '/'; if (is_xattrdir) { (void) sprintf(component + 1, ""); } else { error = zap_value_search(osp, pobj, obj, ZFS_DIRENT_OBJ(-1ULL), component + 1); if (error != 0) break; } complen = strlen(component); path -= complen; ASSERT(path >= buf); bcopy(component, path, complen); obj = pobj; if (sa_hdl != hdl) { prevhdl = sa_hdl; prevdb = sa_db; } error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); if (error != 0) { sa_hdl = prevhdl; sa_db = prevdb; break; } } if (sa_hdl != NULL && sa_hdl != hdl) { ASSERT(sa_db != NULL); zfs_release_sa_handle(sa_hdl, sa_db, FTAG); } if (error == 0) (void) memmove(buf, path, buf + len - path); return (error); } int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) { sa_attr_type_t *sa_table; sa_handle_t *hdl; dmu_buf_t *db; int error; error = zfs_sa_setup(osp, &sa_table); if (error != 0) return (error); error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); if (error != 0) return (error); error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); zfs_release_sa_handle(hdl, db, FTAG); return (error); } int zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, char *buf, int len) { char *path = buf + len - 1; sa_attr_type_t *sa_table; sa_handle_t *hdl; dmu_buf_t *db; int error; *path = '\0'; error = zfs_sa_setup(osp, &sa_table); if (error != 0) return (error); error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); if (error != 0) return (error); error = zfs_obj_to_stats_impl(hdl, sa_table, sb); if (error != 0) { zfs_release_sa_handle(hdl, db, FTAG); return (error); } error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); zfs_release_sa_handle(hdl, db, FTAG); return (error); } #if defined(_KERNEL) && defined(HAVE_SPL) EXPORT_SYMBOL(zfs_create_fs); EXPORT_SYMBOL(zfs_obj_to_path); module_param(zfs_object_mutex_size, uint, 0644); MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array"); #endif diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c index b8adda7a1b1b..b1f9b1f4e6d8 100644 --- a/module/zfs/zpl_inode.c +++ b/module/zfs/zpl_inode.c @@ -1,765 +1,807 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2011, Lawrence Livermore National Security, LLC. * Copyright (c) 2015 by Chunwei Chen. All rights reserved. */ #include #include #include #include #include #include #include #include static struct dentry * #ifdef HAVE_LOOKUP_NAMEIDATA zpl_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) #else zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) #endif { cred_t *cr = CRED(); struct inode *ip; int error; fstrans_cookie_t cookie; pathname_t *ppn = NULL; pathname_t pn; int zfs_flags = 0; zfs_sb_t *zsb = dentry->d_sb->s_fs_info; if (dlen(dentry) > ZFS_MAX_DATASET_NAME_LEN) return (ERR_PTR(-ENAMETOOLONG)); crhold(cr); cookie = spl_fstrans_mark(); /* If we are a case insensitive fs, we need the real name */ if (zsb->z_case == ZFS_CASE_INSENSITIVE) { zfs_flags = FIGNORECASE; pn_alloc(&pn); ppn = &pn; } error = -zfs_lookup(dir, dname(dentry), &ip, zfs_flags, cr, NULL, ppn); spl_fstrans_unmark(cookie); ASSERT3S(error, <=, 0); crfree(cr); spin_lock(&dentry->d_lock); dentry->d_time = jiffies; #ifndef HAVE_S_D_OP d_set_d_op(dentry, &zpl_dentry_operations); #endif /* HAVE_S_D_OP */ spin_unlock(&dentry->d_lock); if (error) { /* * If we have a case sensitive fs, we do not want to * insert negative entries, so return NULL for ENOENT. * Fall through if the error is not ENOENT. Also free memory. */ if (ppn) { pn_free(ppn); if (error == -ENOENT) return (NULL); } if (error == -ENOENT) return (d_splice_alias(NULL, dentry)); else return (ERR_PTR(error)); } /* * If we are case insensitive, call the correct function * to install the name. */ if (ppn) { struct dentry *new_dentry; struct qstr ci_name; if (strcmp(dname(dentry), pn.pn_buf) == 0) { new_dentry = d_splice_alias(ip, dentry); } else { ci_name.name = pn.pn_buf; ci_name.len = strlen(pn.pn_buf); new_dentry = d_add_ci(dentry, ip, &ci_name); } pn_free(ppn); return (new_dentry); } else { return (d_splice_alias(ip, dentry)); } } void zpl_vap_init(vattr_t *vap, struct inode *dir, zpl_umode_t mode, cred_t *cr) { vap->va_mask = ATTR_MODE; vap->va_mode = mode; vap->va_uid = crgetfsuid(cr); if (dir && dir->i_mode & S_ISGID) { vap->va_gid = KGID_TO_SGID(dir->i_gid); if (S_ISDIR(mode)) vap->va_mode |= S_ISGID; } else { vap->va_gid = crgetfsgid(cr); } } static int #ifdef HAVE_CREATE_NAMEIDATA zpl_create(struct inode *dir, struct dentry *dentry, zpl_umode_t mode, struct nameidata *nd) #else zpl_create(struct inode *dir, struct dentry *dentry, zpl_umode_t mode, bool flag) #endif { cred_t *cr = CRED(); struct inode *ip; vattr_t *vap; int error; fstrans_cookie_t cookie; crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); zpl_vap_init(vap, dir, mode, cr); cookie = spl_fstrans_mark(); error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL); if (error == 0) { d_instantiate(dentry, ip); error = zpl_xattr_security_init(ip, dir, &dentry->d_name); if (error == 0) error = zpl_init_acl(ip, dir); if (error) (void) zfs_remove(dir, dname(dentry), cr, 0); } spl_fstrans_unmark(cookie); kmem_free(vap, sizeof (vattr_t)); crfree(cr); ASSERT3S(error, <=, 0); return (error); } static int zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode, dev_t rdev) { cred_t *cr = CRED(); struct inode *ip; vattr_t *vap; int error; fstrans_cookie_t cookie; /* * We currently expect Linux to supply rdev=0 for all sockets * and fifos, but we want to know if this behavior ever changes. */ if (S_ISSOCK(mode) || S_ISFIFO(mode)) ASSERT(rdev == 0); crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); zpl_vap_init(vap, dir, mode, cr); vap->va_rdev = rdev; cookie = spl_fstrans_mark(); error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL); if (error == 0) { d_instantiate(dentry, ip); error = zpl_xattr_security_init(ip, dir, &dentry->d_name); if (error == 0) error = zpl_init_acl(ip, dir); if (error) (void) zfs_remove(dir, dname(dentry), cr, 0); } spl_fstrans_unmark(cookie); kmem_free(vap, sizeof (vattr_t)); crfree(cr); ASSERT3S(error, <=, 0); return (error); } +#ifdef HAVE_TMPFILE +static int +zpl_tmpfile(struct inode *dir, struct dentry *dentry, zpl_umode_t mode) +{ + cred_t *cr = CRED(); + struct inode *ip; + vattr_t *vap; + int error; + fstrans_cookie_t cookie; + + crhold(cr); + vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); + zpl_vap_init(vap, dir, mode, cr); + + cookie = spl_fstrans_mark(); + error = -zfs_tmpfile(dir, vap, 0, mode, &ip, cr, 0, NULL); + if (error == 0) { + /* d_tmpfile will do drop_nlink, so we should set it first */ + set_nlink(ip, 1); + d_tmpfile(dentry, ip); + + error = zpl_xattr_security_init(ip, dir, &dentry->d_name); + if (error == 0) + error = zpl_init_acl(ip, dir); + /* + * don't need to handle error here, file is already in + * unlinked set. + */ + } + + spl_fstrans_unmark(cookie); + kmem_free(vap, sizeof (vattr_t)); + crfree(cr); + ASSERT3S(error, <=, 0); + + return (error); +} +#endif + static int zpl_unlink(struct inode *dir, struct dentry *dentry) { cred_t *cr = CRED(); int error; fstrans_cookie_t cookie; zfs_sb_t *zsb = dentry->d_sb->s_fs_info; crhold(cr); cookie = spl_fstrans_mark(); error = -zfs_remove(dir, dname(dentry), cr, 0); /* * For a CI FS we must invalidate the dentry to prevent the * creation of negative entries. */ if (error == 0 && zsb->z_case == ZFS_CASE_INSENSITIVE) d_invalidate(dentry); spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); return (error); } static int zpl_mkdir(struct inode *dir, struct dentry *dentry, zpl_umode_t mode) { cred_t *cr = CRED(); vattr_t *vap; struct inode *ip; int error; fstrans_cookie_t cookie; crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); zpl_vap_init(vap, dir, mode | S_IFDIR, cr); cookie = spl_fstrans_mark(); error = -zfs_mkdir(dir, dname(dentry), vap, &ip, cr, 0, NULL); if (error == 0) { d_instantiate(dentry, ip); error = zpl_xattr_security_init(ip, dir, &dentry->d_name); if (error == 0) error = zpl_init_acl(ip, dir); if (error) (void) zfs_rmdir(dir, dname(dentry), NULL, cr, 0); } spl_fstrans_unmark(cookie); kmem_free(vap, sizeof (vattr_t)); crfree(cr); ASSERT3S(error, <=, 0); return (error); } static int zpl_rmdir(struct inode * dir, struct dentry *dentry) { cred_t *cr = CRED(); int error; fstrans_cookie_t cookie; zfs_sb_t *zsb = dentry->d_sb->s_fs_info; crhold(cr); cookie = spl_fstrans_mark(); error = -zfs_rmdir(dir, dname(dentry), NULL, cr, 0); /* * For a CI FS we must invalidate the dentry to prevent the * creation of negative entries. */ if (error == 0 && zsb->z_case == ZFS_CASE_INSENSITIVE) d_invalidate(dentry); spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); return (error); } static int zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { int error; fstrans_cookie_t cookie; cookie = spl_fstrans_mark(); error = -zfs_getattr_fast(dentry->d_inode, stat); spl_fstrans_unmark(cookie); ASSERT3S(error, <=, 0); return (error); } static int zpl_setattr(struct dentry *dentry, struct iattr *ia) { struct inode *ip = dentry->d_inode; cred_t *cr = CRED(); vattr_t *vap; int error; fstrans_cookie_t cookie; error = setattr_prepare(dentry, ia); if (error) return (error); crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); vap->va_mask = ia->ia_valid & ATTR_IATTR_MASK; vap->va_mode = ia->ia_mode; vap->va_uid = KUID_TO_SUID(ia->ia_uid); vap->va_gid = KGID_TO_SGID(ia->ia_gid); vap->va_size = ia->ia_size; vap->va_atime = ia->ia_atime; vap->va_mtime = ia->ia_mtime; vap->va_ctime = ia->ia_ctime; if (vap->va_mask & ATTR_ATIME) ip->i_atime = timespec_trunc(ia->ia_atime, ip->i_sb->s_time_gran); cookie = spl_fstrans_mark(); error = -zfs_setattr(ip, vap, 0, cr); if (!error && (ia->ia_valid & ATTR_MODE)) error = zpl_chmod_acl(ip); spl_fstrans_unmark(cookie); kmem_free(vap, sizeof (vattr_t)); crfree(cr); ASSERT3S(error, <=, 0); return (error); } static int zpl_rename2(struct inode *sdip, struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry, unsigned int flags) { cred_t *cr = CRED(); int error; fstrans_cookie_t cookie; /* We don't have renameat2(2) support */ if (flags) return (-EINVAL); crhold(cr); cookie = spl_fstrans_mark(); error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr, 0); spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); return (error); } #ifndef HAVE_RENAME_WANTS_FLAGS static int zpl_rename(struct inode *sdip, struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry) { return (zpl_rename2(sdip, sdentry, tdip, tdentry, 0)); } #endif static int zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name) { cred_t *cr = CRED(); vattr_t *vap; struct inode *ip; int error; fstrans_cookie_t cookie; crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr); cookie = spl_fstrans_mark(); error = -zfs_symlink(dir, dname(dentry), vap, (char *)name, &ip, cr, 0); if (error == 0) { d_instantiate(dentry, ip); error = zpl_xattr_security_init(ip, dir, &dentry->d_name); if (error) (void) zfs_remove(dir, dname(dentry), cr, 0); } spl_fstrans_unmark(cookie); kmem_free(vap, sizeof (vattr_t)); crfree(cr); ASSERT3S(error, <=, 0); return (error); } #if defined(HAVE_PUT_LINK_COOKIE) static void zpl_put_link(struct inode *unused, void *cookie) { kmem_free(cookie, MAXPATHLEN); } #elif defined(HAVE_PUT_LINK_NAMEIDATA) static void zpl_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) { const char *link = nd_get_link(nd); if (!IS_ERR(link)) kmem_free(link, MAXPATHLEN); } #elif defined(HAVE_PUT_LINK_DELAYED) static void zpl_put_link(void *ptr) { kmem_free(ptr, MAXPATHLEN); } #endif static int zpl_get_link_common(struct dentry *dentry, struct inode *ip, char **link) { fstrans_cookie_t cookie; cred_t *cr = CRED(); struct iovec iov; uio_t uio; int error; crhold(cr); *link = NULL; iov.iov_len = MAXPATHLEN; iov.iov_base = kmem_zalloc(MAXPATHLEN, KM_SLEEP); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_skip = 0; uio.uio_resid = (MAXPATHLEN - 1); uio.uio_segflg = UIO_SYSSPACE; cookie = spl_fstrans_mark(); error = -zfs_readlink(ip, &uio, cr); spl_fstrans_unmark(cookie); crfree(cr); if (error) kmem_free(iov.iov_base, MAXPATHLEN); else *link = iov.iov_base; return (error); } #if defined(HAVE_GET_LINK_DELAYED) const char * zpl_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { char *link = NULL; int error; if (!dentry) return (ERR_PTR(-ECHILD)); error = zpl_get_link_common(dentry, inode, &link); if (error) return (ERR_PTR(error)); set_delayed_call(done, zpl_put_link, link); return (link); } #elif defined(HAVE_GET_LINK_COOKIE) const char * zpl_get_link(struct dentry *dentry, struct inode *inode, void **cookie) { char *link = NULL; int error; if (!dentry) return (ERR_PTR(-ECHILD)); error = zpl_get_link_common(dentry, inode, &link); if (error) return (ERR_PTR(error)); return (*cookie = link); } #elif defined(HAVE_FOLLOW_LINK_COOKIE) const char * zpl_follow_link(struct dentry *dentry, void **cookie) { char *link = NULL; int error; error = zpl_get_link_common(dentry, dentry->d_inode, &link); if (error) return (ERR_PTR(error)); return (*cookie = link); } #elif defined(HAVE_FOLLOW_LINK_NAMEIDATA) static void * zpl_follow_link(struct dentry *dentry, struct nameidata *nd) { char *link = NULL; int error; error = zpl_get_link_common(dentry, dentry->d_inode, &link); if (error) nd_set_link(nd, ERR_PTR(error)); else nd_set_link(nd, link); return (NULL); } #endif static int zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { cred_t *cr = CRED(); struct inode *ip = old_dentry->d_inode; int error; fstrans_cookie_t cookie; if (ip->i_nlink >= ZFS_LINK_MAX) return (-EMLINK); crhold(cr); ip->i_ctime = CURRENT_TIME_SEC; igrab(ip); /* Use ihold() if available */ cookie = spl_fstrans_mark(); error = -zfs_link(dir, ip, dname(dentry), cr, 0); if (error) { iput(ip); goto out; } d_instantiate(dentry, ip); out: spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); return (error); } #ifdef HAVE_INODE_TRUNCATE_RANGE static void zpl_truncate_range(struct inode *ip, loff_t start, loff_t end) { cred_t *cr = CRED(); flock64_t bf; fstrans_cookie_t cookie; ASSERT3S(start, <=, end); /* * zfs_freesp() will interpret (len == 0) as meaning "truncate until * the end of the file". We don't want that. */ if (start == end) return; crhold(cr); bf.l_type = F_WRLCK; bf.l_whence = 0; bf.l_start = start; bf.l_len = end - start; bf.l_pid = 0; cookie = spl_fstrans_mark(); zfs_space(ip, F_FREESP, &bf, FWRITE, start, cr); spl_fstrans_unmark(cookie); crfree(cr); } #endif /* HAVE_INODE_TRUNCATE_RANGE */ #ifdef HAVE_INODE_FALLOCATE static long zpl_fallocate(struct inode *ip, int mode, loff_t offset, loff_t len) { return (zpl_fallocate_common(ip, mode, offset, len)); } #endif /* HAVE_INODE_FALLOCATE */ static int #ifdef HAVE_D_REVALIDATE_NAMEIDATA zpl_revalidate(struct dentry *dentry, struct nameidata *nd) { unsigned int flags = (nd ? nd->flags : 0); #else zpl_revalidate(struct dentry *dentry, unsigned int flags) { #endif /* HAVE_D_REVALIDATE_NAMEIDATA */ zfs_sb_t *zsb = dentry->d_sb->s_fs_info; int error; if (flags & LOOKUP_RCU) return (-ECHILD); /* * Automounted snapshots rely on periodic dentry revalidation * to defer snapshots from being automatically unmounted. */ if (zsb->z_issnap) { if (time_after(jiffies, zsb->z_snap_defer_time + MAX(zfs_expire_snapshot * HZ / 2, HZ))) { zsb->z_snap_defer_time = jiffies; zfsctl_snapshot_unmount_delay(zsb->z_os->os_spa, dmu_objset_id(zsb->z_os), zfs_expire_snapshot); } } /* * After a rollback negative dentries created before the rollback * time must be invalidated. Otherwise they can obscure files which * are only present in the rolled back dataset. */ if (dentry->d_inode == NULL) { spin_lock(&dentry->d_lock); error = time_before(dentry->d_time, zsb->z_rollback_time); spin_unlock(&dentry->d_lock); if (error) return (0); } /* * The dentry may reference a stale inode if a mounted file system * was rolled back to a point in time where the object didn't exist. */ if (dentry->d_inode && ITOZ(dentry->d_inode)->z_is_stale) return (0); return (1); } const struct inode_operations zpl_inode_operations = { .setattr = zpl_setattr, .getattr = zpl_getattr, #ifdef HAVE_GENERIC_SETXATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .removexattr = generic_removexattr, #endif .listxattr = zpl_xattr_list, #ifdef HAVE_INODE_TRUNCATE_RANGE .truncate_range = zpl_truncate_range, #endif /* HAVE_INODE_TRUNCATE_RANGE */ #ifdef HAVE_INODE_FALLOCATE .fallocate = zpl_fallocate, #endif /* HAVE_INODE_FALLOCATE */ #if defined(CONFIG_FS_POSIX_ACL) #if defined(HAVE_GET_ACL) .get_acl = zpl_get_acl, #elif defined(HAVE_CHECK_ACL) .check_acl = zpl_check_acl, #elif defined(HAVE_PERMISSION) .permission = zpl_permission, #endif /* HAVE_GET_ACL | HAVE_CHECK_ACL | HAVE_PERMISSION */ #endif /* CONFIG_FS_POSIX_ACL */ }; const struct inode_operations zpl_dir_inode_operations = { .create = zpl_create, .lookup = zpl_lookup, .link = zpl_link, .unlink = zpl_unlink, .symlink = zpl_symlink, .mkdir = zpl_mkdir, .rmdir = zpl_rmdir, .mknod = zpl_mknod, #ifdef HAVE_RENAME_WANTS_FLAGS .rename = zpl_rename2, #else .rename = zpl_rename, +#endif +#ifdef HAVE_TMPFILE + .tmpfile = zpl_tmpfile, #endif .setattr = zpl_setattr, .getattr = zpl_getattr, #ifdef HAVE_GENERIC_SETXATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .removexattr = generic_removexattr, #endif .listxattr = zpl_xattr_list, #if defined(CONFIG_FS_POSIX_ACL) #if defined(HAVE_GET_ACL) .get_acl = zpl_get_acl, #elif defined(HAVE_CHECK_ACL) .check_acl = zpl_check_acl, #elif defined(HAVE_PERMISSION) .permission = zpl_permission, #endif /* HAVE_GET_ACL | HAVE_CHECK_ACL | HAVE_PERMISSION */ #endif /* CONFIG_FS_POSIX_ACL */ }; const struct inode_operations zpl_symlink_inode_operations = { .readlink = generic_readlink, #if defined(HAVE_GET_LINK_DELAYED) || defined(HAVE_GET_LINK_COOKIE) .get_link = zpl_get_link, #elif defined(HAVE_FOLLOW_LINK_COOKIE) || defined(HAVE_FOLLOW_LINK_NAMEIDATA) .follow_link = zpl_follow_link, #endif #if defined(HAVE_PUT_LINK_COOKIE) || defined(HAVE_PUT_LINK_NAMEIDATA) .put_link = zpl_put_link, #endif .setattr = zpl_setattr, .getattr = zpl_getattr, #ifdef HAVE_GENERIC_SETXATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .removexattr = generic_removexattr, #endif .listxattr = zpl_xattr_list, }; const struct inode_operations zpl_special_inode_operations = { .setattr = zpl_setattr, .getattr = zpl_getattr, #ifdef HAVE_GENERIC_SETXATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .removexattr = generic_removexattr, #endif .listxattr = zpl_xattr_list, #if defined(CONFIG_FS_POSIX_ACL) #if defined(HAVE_GET_ACL) .get_acl = zpl_get_acl, #elif defined(HAVE_CHECK_ACL) .check_acl = zpl_check_acl, #elif defined(HAVE_PERMISSION) .permission = zpl_permission, #endif /* HAVE_GET_ACL | HAVE_CHECK_ACL | HAVE_PERMISSION */ #endif /* CONFIG_FS_POSIX_ACL */ }; dentry_operations_t zpl_dentry_operations = { .d_revalidate = zpl_revalidate, }; diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 9d63d68535cb..4290984357e1 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -1,638 +1,641 @@ # # This file and its contents are supplied under the terms of the # Common Development and Distribution License ("CDDL"), version 1.0. # You may only use this file in accordance with the terms of version # 1.0 of the CDDL. # # A full copy of the text of the CDDL should have accompanied this # source. A copy of the CDDL is also available via the Internet at # http://www.illumos.org/license/CDDL. # [DEFAULT] pre = setup quiet = False pre_user = root user = root timeout = 600 post_user = root post = cleanup outputdir = /var/tmp/test_results # DISABLED: update to use ZFS_ACL_* variables and user_run helper. # posix_001_pos # posix_002_pos [tests/functional/acl/posix] tests = ['posix_003_pos'] [tests/functional/atime] tests = ['atime_001_pos', 'atime_002_neg', 'atime_003_pos'] # DISABLED: # bootfs_006_pos - needs investigation # bootfs_008_neg - needs investigation [tests/functional/bootfs] tests = ['bootfs_001_pos', 'bootfs_002_neg', 'bootfs_003_pos', 'bootfs_004_neg', 'bootfs_005_neg', 'bootfs_007_neg'] # DISABLED: # cache_001_pos - needs investigation # cache_010_neg - needs investigation [tests/functional/cache] tests = ['cache_002_pos', 'cache_003_pos', 'cache_004_neg', 'cache_005_neg', 'cache_006_pos', 'cache_007_neg', 'cache_008_neg', 'cache_009_pos', 'cache_011_pos'] # DISABLED: needs investigation #[tests/functional/cachefile] #tests = ['cachefile_001_pos', 'cachefile_002_pos', 'cachefile_003_pos', # 'cachefile_004_pos'] #pre = #post = # DISABLED: needs investigation # 'sensitive_none_lookup', 'sensitive_none_delete', # 'sensitive_formd_lookup', 'sensitive_formd_delete', # 'insensitive_none_lookup', 'insensitive_none_delete', # 'insensitive_formd_lookup', 'insensitive_formd_delete', # 'mixed_none_lookup', 'mixed_none_lookup_ci', 'mixed_none_delete', # 'mixed_formd_lookup', 'mixed_formd_lookup_ci', 'mixed_formd_delete'] [tests/functional/casenorm] tests = ['case_all_values', 'norm_all_values'] [tests/functional/checksum] tests = ['run_edonr_test', 'run_sha2_test', 'run_skein_test', 'filetest_001_pos'] [tests/functional/clean_mirror] tests = [ 'clean_mirror_001_pos', 'clean_mirror_002_pos', 'clean_mirror_003_pos', 'clean_mirror_004_pos'] [tests/functional/cli_root/zdb] tests = ['zdb_001_neg'] pre = post = [tests/functional/cli_root/zfs] tests = ['zfs_001_neg', 'zfs_002_pos', 'zfs_003_neg'] # DISABLED: # zfs_clone_005_pos - busy unmount [tests/functional/cli_root/zfs_clone] tests = ['zfs_clone_001_neg', 'zfs_clone_002_pos', 'zfs_clone_003_pos', 'zfs_clone_004_pos', 'zfs_clone_006_pos', 'zfs_clone_007_pos', 'zfs_clone_008_neg', 'zfs_clone_009_neg', 'zfs_clone_010_pos'] # DISABLED: # zfs_copies_003_pos - zpool on zvol # zfs_copies_005_neg - nested pools [tests/functional/cli_root/zfs_copies] tests = ['zfs_copies_001_pos', 'zfs_copies_002_pos', 'zfs_copies_004_neg', 'zfs_copies_006_pos'] [tests/functional/cli_root/zfs_create] tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos', 'zfs_create_004_pos', 'zfs_create_005_pos', 'zfs_create_006_pos', 'zfs_create_007_pos', 'zfs_create_008_neg', 'zfs_create_009_neg', 'zfs_create_010_neg', 'zfs_create_011_pos', 'zfs_create_012_pos', 'zfs_create_013_pos'] # DISABLED: # zfs_destroy_005_neg - busy mountpoint behavior [tests/functional/cli_root/zfs_destroy] tests = ['zfs_destroy_001_pos', 'zfs_destroy_002_pos', 'zfs_destroy_003_pos', 'zfs_destroy_004_pos','zfs_destroy_006_neg', 'zfs_destroy_007_neg', 'zfs_destroy_008_pos','zfs_destroy_009_pos', 'zfs_destroy_010_pos', 'zfs_destroy_011_pos','zfs_destroy_012_pos', 'zfs_destroy_013_neg', 'zfs_destroy_014_pos','zfs_destroy_015_pos', 'zfs_destroy_016_pos'] # DISABLED: # zfs_get_004_pos - nested pools # zfs_get_006_neg - needs investigation [tests/functional/cli_root/zfs_get] tests = ['zfs_get_001_pos', 'zfs_get_002_pos', 'zfs_get_003_pos', 'zfs_get_005_neg', 'zfs_get_007_neg', 'zfs_get_008_pos', 'zfs_get_009_pos', 'zfs_get_010_neg'] [tests/functional/cli_root/zfs_inherit] tests = ['zfs_inherit_001_neg', 'zfs_inherit_002_neg', 'zfs_inherit_003_pos'] # DISABLED: # zfs_mount_006_pos - needs investigation # zfs_mount_007_pos - needs investigation # zfs_mount_009_neg - needs investigation # zfs_mount_all_001_pos - needs investigation [tests/functional/cli_root/zfs_mount] tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos', 'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_008_pos', 'zfs_mount_010_neg', 'zfs_mount_011_neg'] [tests/functional/cli_root/zfs_promote] tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos', 'zfs_promote_004_pos', 'zfs_promote_005_pos', 'zfs_promote_006_neg', 'zfs_promote_007_neg', 'zfs_promote_008_pos'] # DISABLED: # zfs_written_property_001_pos - sync(1) does not force txg under Linux [tests/functional/cli_root/zfs_property] tests = [] # DISABLED: # zfs_receive_004_neg - Fails for OpenZFS on illumos [tests/functional/cli_root/zfs_receive] tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos', 'zfs_receive_005_neg', 'zfs_receive_006_pos', 'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg', 'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos', 'zfs_receive_013_pos'] [tests/functional/cli_root/zfs_rename] tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', 'zfs_rename_004_neg', 'zfs_rename_005_neg', 'zfs_rename_006_pos', 'zfs_rename_007_pos', 'zfs_rename_008_pos', 'zfs_rename_009_neg', 'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg', 'zfs_rename_013_pos'] [tests/functional/cli_root/zfs_reservation] tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] # DISABLED: # zfs_rollback_001_pos - busy mountpoint behavior # zfs_rollback_002_pos - busy mountpoint behavior [tests/functional/cli_root/zfs_rollback] tests = ['zfs_rollback_003_neg', 'zfs_rollback_004_neg'] [tests/functional/cli_root/zfs_send] tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos', 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos', 'zfs_send_007_pos'] # DISABLED: # mountpoint_003_pos - needs investigation # ro_props_001_pos - https://github.com/zfsonlinux/zfs/issues/5201 # user_property_002_pos - needs investigation [tests/functional/cli_root/zfs_set] tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', 'canmount_002_pos', 'canmount_003_pos', 'canmount_004_pos', 'checksum_001_pos', 'compression_001_pos', 'mountpoint_001_pos', 'mountpoint_002_pos', 'reservation_001_neg', 'share_mount_001_neg', 'snapdir_001_pos', 'onoffs_001_pos', 'user_property_001_pos', 'user_property_003_neg', 'readonly_001_pos', 'user_property_004_pos', 'version_001_neg', 'zfs_set_001_neg', 'zfs_set_002_neg', 'zfs_set_003_neg', 'property_alias_001_pos'] # DISABLED: Tests need to be updated for Linux share behavior #[tests/functional/cli_root/zfs_share] #tests = ['zfs_share_001_pos', 'zfs_share_002_pos', 'zfs_share_003_pos', # 'zfs_share_004_pos', 'zfs_share_005_pos', 'zfs_share_006_pos', # 'zfs_share_007_neg', 'zfs_share_008_neg', 'zfs_share_009_neg', # 'zfs_share_010_neg', 'zfs_share_011_pos'] [tests/functional/cli_root/zfs_snapshot] tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg', 'zfs_snapshot_003_neg', 'zfs_snapshot_004_neg', 'zfs_snapshot_005_neg', 'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg', 'zfs_snapshot_008_neg', 'zfs_snapshot_009_pos'] # DISABLED: # zfs_unmount_005_pos - needs investigation # zfs_unmount_009_pos - needs investigation # zfs_unmount_all_001_pos - needs investigation [tests/functional/cli_root/zfs_unmount] tests = ['zfs_unmount_001_pos', 'zfs_unmount_002_pos', 'zfs_unmount_003_pos', 'zfs_unmount_004_pos', 'zfs_unmount_006_pos', 'zfs_unmount_007_neg', 'zfs_unmount_008_neg'] # DISABLED: Tests need to be updated for Linux unshare behavior #[tests/functional/cli_root/zfs_unshare] #tests = ['zfs_unshare_001_pos', 'zfs_unshare_002_pos', 'zfs_unshare_003_pos', # 'zfs_unshare_004_neg', 'zfs_unshare_005_neg'] [tests/functional/cli_root/zfs_upgrade] tests = ['zfs_upgrade_001_pos', 'zfs_upgrade_002_pos', 'zfs_upgrade_003_pos', 'zfs_upgrade_004_pos', 'zfs_upgrade_005_pos', 'zfs_upgrade_006_neg', 'zfs_upgrade_007_neg'] [tests/functional/cli_root/zpool] tests = ['zpool_001_neg', 'zpool_002_pos', 'zpool_003_pos'] # DISABLED: # zpool_add_004_pos - nested pools # zpool_add_005_pos - no 'dumpadm' command. # zpool_add_006_pos - nested pools [tests/functional/cli_root/zpool_add] tests = ['zpool_add_001_pos', 'zpool_add_002_pos', 'zpool_add_003_pos', 'zpool_add_007_neg', 'zpool_add_008_neg', 'zpool_add_009_neg'] [tests/functional/cli_root/zpool_attach] tests = ['zpool_attach_001_neg'] [tests/functional/cli_root/zpool_clear] tests = ['zpool_clear_001_pos', 'zpool_clear_002_neg', 'zpool_clear_003_neg'] # DISABLED: # zpool_create_001_pos - needs investigation # zpool_create_002_pos - needs investigation # zpool_create_004_pos - needs investigation # zpool_create_006_pos - nested pools # zpool_create_008_pos - uses VTOC labels (?) and 'overlapping slices' # zpool_create_011_neg - tries to access /etc/vfstab etc # zpool_create_012_neg - swap devices # zpool_create_014_neg - swap devices # zpool_create_015_neg - swap devices # zpool_create_016_pos - no dumadm command. # zpool_create_020_pos - needs investigation [tests/functional/cli_root/zpool_create] tests = [ 'zpool_create_003_pos', 'zpool_create_005_pos', 'zpool_create_007_neg', 'zpool_create_009_neg', 'zpool_create_010_neg', 'zpool_create_017_neg', 'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_021_pos', 'zpool_create_022_pos', 'zpool_create_023_neg', 'zpool_create_024_pos', 'zpool_create_features_001_pos', 'zpool_create_features_002_pos', 'zpool_create_features_003_pos', 'zpool_create_features_004_neg', 'zpool_create_features_005_pos'] # DISABLED: # zpool_destroy_001_pos - failure should be investigated # zpool_destroy_002_pos - update for Linux fource unmount behavior [tests/functional/cli_root/zpool_destroy] tests = [ 'zpool_destroy_003_neg'] pre = post = [tests/functional/cli_root/zpool_detach] tests = ['zpool_detach_001_neg'] # DISABLED: Requires full FMA support in ZED #[tests/functional/cli_root/zpool_expand] #tests = ['zpool_expand_001_pos', 'zpool_expand_002_pos', # 'zpool_expand_003_neg'] # DISABLED: # zpool_export_004_pos - nested pools [tests/functional/cli_root/zpool_export] tests = ['zpool_export_001_pos', 'zpool_export_002_pos', 'zpool_export_003_neg'] [tests/functional/cli_root/zpool_get] tests = ['zpool_get_001_pos', 'zpool_get_002_pos', 'zpool_get_003_pos', 'zpool_get_004_neg'] [tests/functional/cli_root/zpool_history] tests = ['zpool_history_001_neg', 'zpool_history_002_pos'] # DISABLED: # zpool_import_002_pos - https://github.com/zfsonlinux/zfs/issues/5202 # zpool_import_012_pos - sharenfs issue # zpool_import_all_001_pos - partition issue [tests/functional/cli_root/zpool_import] tests = ['zpool_import_001_pos', 'zpool_import_003_pos', 'zpool_import_004_pos', 'zpool_import_005_pos', 'zpool_import_006_pos', 'zpool_import_007_pos', 'zpool_import_008_pos', 'zpool_import_009_neg', 'zpool_import_010_pos', 'zpool_import_011_neg', 'zpool_import_013_neg', 'zpool_import_features_001_pos', 'zpool_import_features_002_neg', 'zpool_import_features_003_pos','zpool_import_missing_001_pos', 'zpool_import_missing_002_pos', 'zpool_import_missing_003_pos', 'zpool_import_rename_001_pos'] [tests/functional/cli_root/zpool_offline] tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg'] [tests/functional/cli_root/zpool_online] tests = ['zpool_online_001_pos', 'zpool_online_002_neg'] # DISABLED: # zpool_remove_003_pos - needs investigation [tests/functional/cli_root/zpool_remove] tests = ['zpool_remove_001_neg', 'zpool_remove_002_pos'] [tests/functional/cli_root/zpool_replace] tests = ['zpool_replace_001_neg'] [tests/functional/cli_root/zpool_scrub] tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos', 'zpool_scrub_004_pos', 'zpool_scrub_005_pos'] [tests/functional/cli_root/zpool_set] tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg'] pre = post = [tests/functional/cli_root/zpool_status] tests = ['zpool_status_001_pos', 'zpool_status_002_pos'] # DISABLED: # zpool_upgrade_002_pos - Issue zfsonlinux/zfs#4034 # zpool_upgrade_004_pos - Issue zfsonlinux/zfs#4034 # zpool_upgrade_007_pos - needs investigation [tests/functional/cli_root/zpool_upgrade] tests = ['zpool_upgrade_001_pos', 'zpool_upgrade_003_pos', 'zpool_upgrade_005_neg', 'zpool_upgrade_006_neg', 'zpool_upgrade_008_pos', 'zpool_upgrade_009_neg'] # DISABLED: # zfs_share_001_neg - requires additional dependencies # zfs_unshare_001_neg - requires additional dependencies [tests/functional/cli_user/misc] tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg', 'zfs_clone_001_neg', 'zfs_create_001_neg', 'zfs_destroy_001_neg', 'zfs_get_001_neg', 'zfs_inherit_001_neg', 'zfs_mount_001_neg', 'zfs_promote_001_neg', 'zfs_receive_001_neg', 'zfs_rename_001_neg', 'zfs_rollback_001_neg', 'zfs_send_001_neg', 'zfs_set_001_neg', 'zfs_snapshot_001_neg', 'zfs_unallow_001_neg', 'zfs_unmount_001_neg', 'zfs_upgrade_001_neg', 'zpool_001_neg', 'zpool_add_001_neg', 'zpool_attach_001_neg', 'zpool_clear_001_neg', 'zpool_create_001_neg', 'zpool_destroy_001_neg', 'zpool_detach_001_neg', 'zpool_export_001_neg', 'zpool_get_001_neg', 'zpool_history_001_neg', 'zpool_import_001_neg', 'zpool_import_002_neg', 'zpool_offline_001_neg', 'zpool_online_001_neg', 'zpool_remove_001_neg', 'zpool_replace_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg', 'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos', 'arc_summary_001_pos', 'dbufstat_001_pos'] user = [tests/functional/cli_user/zfs_list] tests = ['zfs_list_001_pos', 'zfs_list_002_pos', 'zfs_list_003_pos', 'zfs_list_004_neg', 'zfs_list_007_pos', 'zfs_list_008_neg'] user = [tests/functional/cli_user/zpool_iostat] tests = ['zpool_iostat_001_neg', 'zpool_iostat_002_pos', 'zpool_iostat_003_neg', 'zpool_iostat_004_pos'] user = [tests/functional/cli_user/zpool_list] tests = ['zpool_list_001_pos', 'zpool_list_002_neg'] user = [tests/functional/compression] tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos', 'compress_004_pos'] [tests/functional/ctime] tests = ['ctime_001_pos' ] [tests/functional/delegate] tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos', 'zfs_allow_007_pos', 'zfs_allow_008_pos', 'zfs_allow_009_neg', 'zfs_allow_010_pos', 'zfs_allow_011_neg', 'zfs_allow_012_neg', 'zfs_unallow_001_pos', 'zfs_unallow_002_pos', 'zfs_unallow_003_pos', 'zfs_unallow_004_pos', 'zfs_unallow_005_pos', 'zfs_unallow_006_pos', 'zfs_unallow_007_neg', 'zfs_unallow_008_neg'] # DISABLED: # devices_001_pos - needs investigation # devices_002_neg - needs investigation [tests/functional/devices] tests = ['devices_003_pos'] # DISABLED: # exec_002_neg - needs investigation [tests/functional/exec] tests = ['exec_001_pos'] [tests/functional/features/async_destroy] tests = ['async_destroy_001_pos'] [tests/functional/features/large_dnode] tests = ['large_dnode_001_pos', 'large_dnode_002_pos', 'large_dnode_003_pos', 'large_dnode_004_neg', 'large_dnode_005_pos', 'large_dnode_006_pos', 'large_dnode_007_neg'] # DISABLED: needs investigation #[tests/functional/grow_pool] #tests = ['grow_pool_001_pos'] #pre = #post = # DISABLED: needs investigation #[tests/functional/grow_replicas] #tests = ['grow_replicas_001_pos'] #pre = #post = [tests/functional/history] tests = ['history_001_pos', 'history_002_pos', 'history_003_pos', 'history_004_pos', 'history_005_neg', 'history_006_neg', 'history_007_pos', 'history_008_pos', 'history_009_pos', 'history_010_pos'] [tests/functional/inheritance] tests = ['inherit_001_pos'] pre = # DISABLED: # inuse_001_pos, inuse_007_pos - no dumpadm command # inuse_005_pos - partition issue # inuse_006_pos - partition issue # inuse_008_pos - partition issue # inuse_009_pos - partition issue [tests/functional/inuse] tests = ['inuse_004_pos'] post = # DISABLED: needs investigation #[tests/functional/large_files] #tests = ['large_files_001_pos'] # DISABLED: needs investigation #[tests/functional/largest_pool] #tests = ['largest_pool_001_pos'] #pre = #post = # DISABLED: needs investigation #[tests/functional/link_count] #tests = ['link_count_001'] [tests/functional/migration] tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos', 'migration_004_pos', 'migration_005_pos', 'migration_006_pos', 'migration_007_pos', 'migration_008_pos', 'migration_009_pos', 'migration_010_pos', 'migration_011_pos', 'migration_012_pos'] # DISABLED: # mmap_write_001_pos: needs investigation [tests/functional/mmap] tests = ['mmap_read_001_pos'] # DISABLED: # umountall_001 - Doesn't make sence in Linux - no umountall command. [tests/functional/mount] tests = ['umount_001'] [tests/functional/mv_files] tests = ['mv_files_001_pos', 'mv_files_002_pos'] [tests/functional/nestedfs] tests = ['nestedfs_001_pos'] [tests/functional/no_space] tests = ['enospc_001_pos'] # DISABLED: # nopwrite_varying_compression - needs investigation [tests/functional/nopwrite] tests = ['nopwrite_copies', 'nopwrite_mtime', 'nopwrite_negative', 'nopwrite_promoted_clone', 'nopwrite_recsize', 'nopwrite_sync', 'nopwrite_volume'] # DISABLED: needs investigation #[tests/functional/online_offline] #tests = ['online_offline_001_pos', 'online_offline_002_neg', # 'online_offline_003_neg'] [tests/functional/pool_names] tests = ['pool_names_001_pos', 'pool_names_002_neg'] pre = post = [tests/functional/poolversion] tests = ['poolversion_001_pos', 'poolversion_002_pos'] # DISABLED: Doesn't make sense on Linux - no pfexec command or 'RBAC profile' #[tests/functional/privilege] #tests = ['privilege_001_pos', 'privilege_002_pos'] [tests/functional/quota] tests = ['quota_001_pos', 'quota_002_pos', 'quota_003_pos', 'quota_004_pos', 'quota_005_pos', 'quota_006_neg'] [tests/functional/raidz] tests = ['raidz_001_neg', 'raidz_002_pos'] [tests/functional/redundancy] tests = ['redundancy_001_pos', 'redundancy_002_pos', 'redundancy_003_pos'] [tests/functional/refquota] tests = ['refquota_001_pos', 'refquota_002_pos', 'refquota_003_pos', 'refquota_004_pos', 'refquota_005_pos', 'refquota_006_neg'] # DISABLED: # refreserv_004_pos - needs investigation [tests/functional/refreserv] tests = ['refreserv_001_pos', 'refreserv_002_pos', 'refreserv_003_pos', 'refreserv_005_pos'] # DISABLED: nested pool #[tests/functional/rename_dirs] #tests = ['rename_dirs_001_pos'] [tests/functional/replacement] tests = ['replacement_001_pos', 'replacement_002_pos', 'replacement_003_pos'] [tests/functional/reservation] tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos', 'reservation_004_pos', 'reservation_005_pos', 'reservation_006_pos', 'reservation_007_pos', 'reservation_008_pos', 'reservation_009_pos', 'reservation_010_pos', 'reservation_011_pos', 'reservation_012_pos', 'reservation_013_pos', 'reservation_014_pos', 'reservation_015_pos', 'reservation_016_pos', 'reservation_017_pos', 'reservation_018_pos'] # DISABLED: Root pools must be handled differently under Linux #[tests/functional/rootpool] #tests = ['rootpool_002_neg', 'rootpool_003_neg', 'rootpool_007_neg'] # DISABLED: # rsend_008_pos - Fails for OpenZFS on illumos # rsend_009_pos - Fails for OpenZFS on illumos # rsend_020_pos - ASSERTs in dump_record() [tests/functional/rsend] tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', 'rsend_005_pos', 'rsend_006_pos', 'rsend_007_pos', 'rsend_010_pos', 'rsend_011_pos', 'rsend_012_pos', 'rsend_013_pos', 'rsend_014_pos', 'rsend_019_pos', 'rsend_021_pos', 'rsend_022_pos', 'rsend_024_pos'] [tests/functional/scrub_mirror] tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos', 'scrub_mirror_003_pos', 'scrub_mirror_004_pos'] # DISABLED: Scripts need to be updated. # slog_012_neg - needs investigation # slog_013_pos - Linux doesn't have a 'lofiadm' command. # slog_014_pos - needs investigation [tests/functional/slog] tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos', 'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg', 'slog_009_neg', 'slog_010_neg', 'slog_011_neg'] # DISABLED: # clone_001_pos - nested pools # rollback_003_pos - Hangs in unmount and spins. # snapshot_016_pos - Problem with automount [tests/functional/snapshot] tests = ['rollback_001_pos', 'rollback_002_pos', 'snapshot_001_pos', 'snapshot_002_pos', 'snapshot_003_pos', 'snapshot_004_pos', 'snapshot_005_pos', 'snapshot_006_pos', 'snapshot_007_pos', 'snapshot_008_pos', 'snapshot_009_pos', 'snapshot_010_pos', 'snapshot_011_pos', 'snapshot_012_pos', 'snapshot_013_pos', 'snapshot_014_pos', 'snapshot_015_pos', 'snapshot_017_pos'] [tests/functional/snapused] tests = ['snapused_001_pos', 'snapused_002_pos', 'snapused_003_pos', 'snapused_004_pos', 'snapused_005_pos'] [tests/functional/sparse] tests = ['sparse_001_pos'] # DISABLED: needs investigation #[tests/functional/threadsappend] #tests = ['threadsappend_001_pos'] +[tests/functional/tmpfile] +tests = ['tmpfile_001_pos', 'tmpfile_002_pos', 'tmpfile_003_pos'] + [tests/functional/truncate] tests = ['truncate_001_pos', 'truncate_002_pos'] [tests/functional/upgrade] tests = [ 'upgrade_userobj_001_pos' ] [tests/functional/userquota] tests = [ 'userquota_001_pos', 'userquota_002_pos', 'userquota_003_pos', 'userquota_004_pos', 'userquota_005_neg', 'userquota_006_pos', 'userquota_007_pos', 'userquota_008_pos', 'userquota_009_pos', 'userquota_010_pos', 'userquota_011_pos', 'userquota_012_neg', 'userquota_013_pos', 'userspace_001_pos', 'userspace_002_pos', 'userspace_003_pos', 'groupspace_001_pos', 'groupspace_002_pos', 'groupspace_003_pos' ] # DISABLED: # vdev_zaps_007_pos -- fails due to a pre-existing issue with zpool split # [tests/functional/vdev_zaps] tests = ['vdev_zaps_001_pos', 'vdev_zaps_002_pos', 'vdev_zaps_003_pos', 'vdev_zaps_004_pos', 'vdev_zaps_005_pos', 'vdev_zaps_006_pos'] # DISABLED: # write_dirs_002_pos - needs investigation [tests/functional/write_dirs] tests = ['write_dirs_001_pos'] # DISABLED: No 'runat' command, replace the Linux equivilant and add xattrtest #[tests/functional/xattr] #tests = ['xattr_001_pos', 'xattr_002_neg', 'xattr_003_neg', 'xattr_004_pos', # 'xattr_005_pos', 'xattr_006_pos', 'xattr_007_neg', 'xattr_008_pos', # 'xattr_009_neg', 'xattr_010_neg', 'xattr_011_pos', 'xattr_012_pos', # 'xattr_013_pos'] [tests/functional/zvol/zvol_ENOSPC] tests = ['zvol_ENOSPC_001_pos'] [tests/functional/zvol/zvol_cli] tests = ['zvol_cli_001_pos', 'zvol_cli_002_pos', 'zvol_cli_003_neg'] # DISABLED: requires dumpadm #[tests/functional/zvol/zvol_misc] #tests = ['zvol_misc_001_neg', 'zvol_misc_002_pos', 'zvol_misc_003_neg', # 'zvol_misc_004_pos', 'zvol_misc_005_neg', 'zvol_misc_006_pos'] # DISABLED: requires updated for Linux #[tests/functional/zvol/zvol_swap] #tests = ['zvol_swap_001_pos', 'zvol_swap_002_pos', 'zvol_swap_003_pos', # 'zvol_swap_004_pos', 'zvol_swap_005_pos', 'zvol_swap_006_pos'] diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am index ffba71b51990..670f994fe823 100644 --- a/tests/zfs-tests/tests/functional/Makefile.am +++ b/tests/zfs-tests/tests/functional/Makefile.am @@ -1,59 +1,60 @@ SUBDIRS = \ acl \ atime \ bootfs \ cache \ cachefile \ casenorm \ checksum \ clean_mirror \ cli_root \ cli_user \ compression \ ctime \ delegate \ devices \ exec \ features \ grow_pool \ grow_replicas \ history \ inheritance \ inuse \ large_files \ largest_pool \ link_count \ migration \ mmap \ mount \ mv_files \ nestedfs \ no_space \ nopwrite \ online_offline \ pool_names \ poolversion \ privilege \ quota \ raidz \ redundancy \ refquota \ refreserv \ rename_dirs \ replacement \ reservation \ rootpool \ rsend \ scrub_mirror \ slog \ snapshot \ snapused \ sparse \ threadsappend \ + tmpfile \ truncate \ upgrade \ userquota \ vdev_zaps \ write_dirs \ xattr \ zvol diff --git a/tests/zfs-tests/tests/functional/tmpfile/.gitignore b/tests/zfs-tests/tests/functional/tmpfile/.gitignore new file mode 100644 index 000000000000..59115815499a --- /dev/null +++ b/tests/zfs-tests/tests/functional/tmpfile/.gitignore @@ -0,0 +1,3 @@ +/tmpfile_test +/tmpfile_001_pos +/tmpfile_002_pos diff --git a/tests/zfs-tests/tests/functional/tmpfile/Makefile.am b/tests/zfs-tests/tests/functional/tmpfile/Makefile.am new file mode 100644 index 000000000000..411445217a6d --- /dev/null +++ b/tests/zfs-tests/tests/functional/tmpfile/Makefile.am @@ -0,0 +1,15 @@ +include $(top_srcdir)/config/Rules.am + +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/tmpfile + +dist_pkgdata_SCRIPTS = \ + cleanup.ksh \ + setup.ksh + +pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/tmpfile + +pkgexec_PROGRAMS = tmpfile_test tmpfile_001_pos tmpfile_002_pos tmpfile_003_pos +tmpfile_test_SOURCES= tmpfile_test.c +tmpfile_001_pos_SOURCES = tmpfile_001_pos.c +tmpfile_002_pos_SOURCES = tmpfile_002_pos.c +tmpfile_003_pos_SOURCES = tmpfile_003_pos.c diff --git a/tests/zfs-tests/tests/functional/tmpfile/cleanup.ksh b/tests/zfs-tests/tests/functional/tmpfile/cleanup.ksh new file mode 100755 index 000000000000..3166bd6ec16e --- /dev/null +++ b/tests/zfs-tests/tests/functional/tmpfile/cleanup.ksh @@ -0,0 +1,34 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/tmpfile/setup.ksh b/tests/zfs-tests/tests/functional/tmpfile/setup.ksh new file mode 100755 index 000000000000..243a5b7792f3 --- /dev/null +++ b/tests/zfs-tests/tests/functional/tmpfile/setup.ksh @@ -0,0 +1,39 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +if ! $STF_SUITE/tests/functional/tmpfile/tmpfile_test /tmp; then + log_unsupported "The kernel doesn't support O_TMPFILE." +fi + +DISK=${DISKS%% *} +default_setup $DISK diff --git a/tests/zfs-tests/tests/functional/tmpfile/tmpfile_001_pos.c b/tests/zfs-tests/tests/functional/tmpfile/tmpfile_001_pos.c new file mode 100644 index 000000000000..c2c02c5d4fd0 --- /dev/null +++ b/tests/zfs-tests/tests/functional/tmpfile/tmpfile_001_pos.c @@ -0,0 +1,109 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* backward compat in case it's not defined */ +#ifndef O_TMPFILE +#define O_TMPFILE (020000000|O_DIRECTORY) +#endif + +/* + * DESCRIPTION: + * Verify we can create tmpfile. + * + * STRATEGY: + * 1. open(2) with O_TMPFILE. + * 2. write(2) random data to it, then read(2) and compare. + * 3. fsetxattr(2) random data, then fgetxattr(2) and compare. + * 4. Verify the above operations run successfully. + * + */ + +#define BSZ 64 + +void +fill_random(char *buf, int len) +{ + int i; + srand(time(NULL)); + for (i = 0; i < len; i++) { + buf[i] = (char)rand(); + } +} + +int +main(int argc, char *argv[]) +{ + int i, fd; + char buf1[BSZ], buf2[BSZ] = {}; + char *penv[] = {"TESTDIR"}; + + (void) fprintf(stdout, "Verify O_TMPFILE is working properly.\n"); + + /* + * Get the environment variable values. + */ + for (i = 0; i < sizeof (penv) / sizeof (char *); i++) { + if ((penv[i] = getenv(penv[i])) == NULL) { + (void) fprintf(stderr, "getenv(penv[%d])\n", i); + exit(1); + } + } + + fill_random(buf1, BSZ); + + fd = open(penv[0], O_RDWR|O_TMPFILE, 0666); + if (fd < 0) { + perror("open"); + exit(2); + } + + if (write(fd, buf1, BSZ) < 0) { + perror("write"); + close(fd); + exit(3); + } + + if (pread(fd, buf2, BSZ, 0) < 0) { + perror("pread"); + close(fd); + exit(4); + } + + if (memcmp(buf1, buf2, BSZ) != 0) { + fprintf(stderr, "data corrupted\n"); + close(fd); + exit(5); + } + + memset(buf2, 0, BSZ); + + if (fsetxattr(fd, "user.test", buf1, BSZ, 0) < 0) { + perror("fsetxattr"); + close(fd); + exit(6); + } + + if (fgetxattr(fd, "user.test", buf2, BSZ) < 0) { + perror("fgetxattr"); + close(fd); + exit(7); + } + + if (memcmp(buf1, buf2, BSZ) != 0) { + fprintf(stderr, "xattr corrupted\n"); + close(fd); + exit(8); + } + + close(fd); + + return (0); +} diff --git a/tests/zfs-tests/tests/functional/tmpfile/tmpfile_002_pos.c b/tests/zfs-tests/tests/functional/tmpfile/tmpfile_002_pos.c new file mode 100644 index 000000000000..c92e6127d6a3 --- /dev/null +++ b/tests/zfs-tests/tests/functional/tmpfile/tmpfile_002_pos.c @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +/* backward compat in case it's not defined */ +#ifndef O_TMPFILE +#define O_TMPFILE (020000000|O_DIRECTORY) +#endif + +/* + * DESCRIPTION: + * Verify we can link tmpfile. + * + * STRATEGY: + * 1. open(2) with O_TMPFILE. + * 2. linkat(2). + * 3. freeze the pool, export and re-import the pool. + * 3. stat(2) the path to verify it has been created. + * + */ + +int +main(int argc, char *argv[]) +{ + int i, fd, ret; + char spath[1024], dpath[1024]; + char *penv[] = {"TESTDIR", "TESTFILE0"}; + struct stat sbuf; + + (void) fprintf(stdout, "Verify O_TMPFILE file can be linked.\n"); + + /* + * Get the environment variable values. + */ + for (i = 0; i < sizeof (penv) / sizeof (char *); i++) { + if ((penv[i] = getenv(penv[i])) == NULL) { + (void) fprintf(stderr, "getenv(penv[%d])\n", i); + exit(1); + } + } + + fd = open(penv[0], O_RDWR|O_TMPFILE, 0666); + if (fd < 0) { + perror("open"); + exit(2); + } + + snprintf(spath, 1024, "/proc/self/fd/%d", fd); + snprintf(dpath, 1024, "%s/%s", penv[0], penv[1]); + if (linkat(AT_FDCWD, spath, AT_FDCWD, dpath, AT_SYMLINK_FOLLOW) < 0) { + perror("linkat"); + close(fd); + exit(3); + } + + if ((ret = system("sudo zpool freeze $TESTPOOL"))) { + if (ret == -1) + perror("system \"zpool freeze\""); + else + fprintf(stderr, "zpool freeze exits with %d\n", + WEXITSTATUS(ret)); + exit(4); + } + + close(fd); + + if ((ret = system("sudo zpool export $TESTPOOL"))) { + if (ret == -1) + perror("system \"zpool export\""); + else + fprintf(stderr, "zpool export exits with %d\n", + WEXITSTATUS(ret)); + exit(4); + } + + if ((ret = system("sudo zpool import $TESTPOOL"))) { + if (ret == -1) + perror("system \"zpool import\""); + else + fprintf(stderr, "zpool import exits with %d\n", + WEXITSTATUS(ret)); + exit(4); + } + + if (stat(dpath, &sbuf) < 0) { + perror("stat"); + unlink(dpath); + exit(5); + } + unlink(dpath); + + return (0); +} diff --git a/tests/zfs-tests/tests/functional/tmpfile/tmpfile_003_pos.c b/tests/zfs-tests/tests/functional/tmpfile/tmpfile_003_pos.c new file mode 100644 index 000000000000..477ef3f81948 --- /dev/null +++ b/tests/zfs-tests/tests/functional/tmpfile/tmpfile_003_pos.c @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +/* backward compat in case it's not defined */ +#ifndef O_TMPFILE +#define O_TMPFILE (020000000|O_DIRECTORY) +#endif + +/* + * DESCRIPTION: + * Verify O_EXCL tmpfile cannot be linked. + * + * STRATEGY: + * 1. open(2) with O_TMPFILE|O_EXCL. + * 2. linkat(2). + * 3. stat(2) the path to verify it wasn't created. + * + */ + +int +main(int argc, char *argv[]) +{ + int i, fd; + char spath[1024], dpath[1024]; + char *penv[] = {"TESTDIR", "TESTFILE0"}; + struct stat sbuf; + + (void) fprintf(stdout, "Verify O_EXCL tmpfile cannot be linked.\n"); + + /* + * Get the environment variable values. + */ + for (i = 0; i < sizeof (penv) / sizeof (char *); i++) { + if ((penv[i] = getenv(penv[i])) == NULL) { + (void) fprintf(stderr, "getenv(penv[%d])\n", i); + exit(1); + } + } + + fd = open(penv[0], O_RDWR|O_TMPFILE|O_EXCL, 0666); + if (fd < 0) { + perror("open"); + exit(2); + } + + snprintf(spath, 1024, "/proc/self/fd/%d", fd); + snprintf(dpath, 1024, "%s/%s", penv[0], penv[1]); + if (linkat(AT_FDCWD, spath, AT_FDCWD, dpath, AT_SYMLINK_FOLLOW) == 0) { + fprintf(stderr, "linkat returns successfully\n"); + close(fd); + exit(3); + } + + if (stat(dpath, &sbuf) == 0) { + fprintf(stderr, "stat returns successfully\n"); + close(fd); + exit(4); + } + close(fd); + + return (0); +} diff --git a/tests/zfs-tests/tests/functional/tmpfile/tmpfile_test.c b/tests/zfs-tests/tests/functional/tmpfile/tmpfile_test.c new file mode 100644 index 000000000000..5fb67b47f7b2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/tmpfile/tmpfile_test.c @@ -0,0 +1,52 @@ +#include +#include +#include +#include +#include +#include + +/* backward compat in case it's not defined */ +#ifndef O_TMPFILE +#define O_TMPFILE (020000000|O_DIRECTORY) +#endif + +/* + * DESCRIPTION: + * Check if the kernel support O_TMPFILE. + */ + +int +main(int argc, char *argv[]) +{ + int fd; + struct stat buf; + + if (argc < 2) { + fprintf(stderr, "Usage: %s dir\n", argv[0]); + return (2); + } + if (stat(argv[1], &buf) < 0) { + perror("stat"); + return (2); + } + if (!S_ISDIR(buf.st_mode)) { + fprintf(stderr, "\"%s\" is not a directory\n", argv[1]); + return (2); + } + + fd = open(argv[1], O_TMPFILE | O_WRONLY, 0666); + if (fd < 0) { + /* + * Only fail on EISDIR. If we get EOPNOTSUPP, that means + * kernel support O_TMPFILE, but the path at argv[1] doesn't. + */ + if (errno == EISDIR) { + fprintf(stderr, "kernel doesn't support O_TMPFILE\n"); + return (1); + } + perror("open"); + } else { + close(fd); + } + return (0); +}