diff --git a/usr.sbin/makefs/tests/makefs_zfs_tests.sh b/usr.sbin/makefs/tests/makefs_zfs_tests.sh index 9196232ed3b3..aeda889d9a5c 100644 --- a/usr.sbin/makefs/tests/makefs_zfs_tests.sh +++ b/usr.sbin/makefs/tests/makefs_zfs_tests.sh @@ -1,868 +1,868 @@ #- # SPDX-License-Identifier: BSD-2-Clause # # Copyright (c) 2022-2023 The FreeBSD Foundation # # This software was developed by Mark Johnston under sponsorship from # the FreeBSD Foundation. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # -MAKEFS="makefs -t zfs -o verify-txgs=true -o nowarn=true" +MAKEFS="makefs -t zfs -o verify-txgs=true" ZFS_POOL_NAME="makefstest$$" TEST_ZFS_POOL_NAME="$TMPDIR/poolname" . "$(dirname "$0")/makefs_tests_common.sh" common_cleanup() { local pool md # Try to force a TXG, this can help catch bugs by triggering a panic. sync pool=$(cat $TEST_ZFS_POOL_NAME) if zpool list "$pool" >/dev/null; then zpool destroy "$pool" fi md=$(cat $TEST_MD_DEVICE_FILE) if [ -c /dev/"$md" ]; then mdconfig -d -u "$md" fi } import_image() { atf_check -e empty -o save:$TEST_MD_DEVICE_FILE -s exit:0 \ mdconfig -a -f $TEST_IMAGE atf_check -o ignore -e empty -s exit:0 \ zdb -e -p /dev/$(cat $TEST_MD_DEVICE_FILE) -mmm -ddddd $ZFS_POOL_NAME atf_check zpool import -R $TEST_MOUNT_DIR $ZFS_POOL_NAME echo "$ZFS_POOL_NAME" > $TEST_ZFS_POOL_NAME } # # Test autoexpansion of the vdev. # # The pool is initially 10GB, so we get 10GB minus one metaslab's worth of # usable space for data. Then the pool is expanded to 50GB, and the amount of # usable space is 50GB minus one metaslab. # atf_test_case autoexpand cleanup autoexpand_body() { local mssize poolsize poolsize1 newpoolsize create_test_inputs mssize=$((128 * 1024 * 1024)) poolsize=$((10 * 1024 * 1024 * 1024)) atf_check $MAKEFS -s $poolsize -o mssize=$mssize -o rootpath=/ \ -o poolname=$ZFS_POOL_NAME \ $TEST_IMAGE $TEST_INPUTS_DIR newpoolsize=$((50 * 1024 * 1024 * 1024)) truncate -s $newpoolsize $TEST_IMAGE import_image check_image_contents poolsize1=$(zpool list -Hp -o size $ZFS_POOL_NAME) atf_check [ $((poolsize1 + $mssize)) -eq $poolsize ] atf_check zpool online -e $ZFS_POOL_NAME /dev/$(cat $TEST_MD_DEVICE_FILE) check_image_contents poolsize1=$(zpool list -Hp -o size $ZFS_POOL_NAME) atf_check [ $((poolsize1 + $mssize)) -eq $newpoolsize ] } autoexpand_cleanup() { common_cleanup } # # Test with some default layout defined by the common code. # atf_test_case basic cleanup basic_body() { create_test_inputs atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ $TEST_IMAGE $TEST_INPUTS_DIR import_image check_image_contents } basic_cleanup() { common_cleanup } atf_test_case dataset_removal cleanup dataset_removal_body() { create_test_dirs cd $TEST_INPUTS_DIR mkdir dir cd - atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ -o fs=${ZFS_POOL_NAME}/dir \ $TEST_IMAGE $TEST_INPUTS_DIR import_image check_image_contents atf_check zfs destroy ${ZFS_POOL_NAME}/dir } dataset_removal_cleanup() { common_cleanup } # # Make sure that we can create and remove an empty directory. # atf_test_case empty_dir cleanup empty_dir_body() { create_test_dirs cd $TEST_INPUTS_DIR mkdir dir cd - atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ $TEST_IMAGE $TEST_INPUTS_DIR import_image check_image_contents atf_check rmdir ${TEST_MOUNT_DIR}/dir } empty_dir_cleanup() { common_cleanup } atf_test_case empty_fs cleanup empty_fs_body() { create_test_dirs atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ $TEST_IMAGE $TEST_INPUTS_DIR import_image check_image_contents } empty_fs_cleanup() { common_cleanup } atf_test_case file_extend cleanup file_extend_body() { local i start create_test_dirs # Create a file slightly longer than the maximum block size. start=132 dd if=/dev/random of=${TEST_INPUTS_DIR}/foo bs=1k count=$start md5 -q ${TEST_INPUTS_DIR}/foo > foo.md5 atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ $TEST_IMAGE $TEST_INPUTS_DIR import_image check_image_contents i=0 while [ $i -lt 1000 ]; do dd if=/dev/random of=${TEST_MOUNT_DIR}/foo bs=1k count=1 \ seek=$(($i + $start)) conv=notrunc # Make sure that the first $start blocks are unmodified. dd if=${TEST_MOUNT_DIR}/foo bs=1k count=$start of=foo.copy atf_check -o file:foo.md5 md5 -q foo.copy i=$(($i + 1)) done } file_extend_cleanup() { common_cleanup } atf_test_case file_sizes cleanup file_sizes_body() { local i create_test_dirs cd $TEST_INPUTS_DIR i=1 while [ $i -lt $((1 << 20)) ]; do truncate -s $i ${i}.1 truncate -s $(($i - 1)) ${i}.2 truncate -s $(($i + 1)) ${i}.3 i=$(($i << 1)) done cd - # XXXMJ this creates sparse files, make sure makefs doesn't # preserve the sparseness. # XXXMJ need to test with larger files (at least 128MB for L2 indirs) atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ $TEST_IMAGE $TEST_INPUTS_DIR import_image check_image_contents } file_sizes_cleanup() { common_cleanup } atf_test_case hard_links cleanup hard_links_body() { local f create_test_dirs cd $TEST_INPUTS_DIR mkdir dir echo "hello" > 1 ln 1 2 ln 1 dir/1 echo "goodbye" > dir/a ln dir/a dir/b ln dir/a a cd - atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ $TEST_IMAGE $TEST_INPUTS_DIR import_image check_image_contents stat -f '%i' ${TEST_MOUNT_DIR}/1 > ./ino stat -f '%l' ${TEST_MOUNT_DIR}/1 > ./nlink for f in 1 2 dir/1; do atf_check -o file:./nlink -e empty -s exit:0 \ stat -f '%l' ${TEST_MOUNT_DIR}/${f} atf_check -o file:./ino -e empty -s exit:0 \ stat -f '%i' ${TEST_MOUNT_DIR}/${f} atf_check cmp -s ${TEST_INPUTS_DIR}/1 ${TEST_MOUNT_DIR}/${f} done stat -f '%i' ${TEST_MOUNT_DIR}/dir/a > ./ino stat -f '%l' ${TEST_MOUNT_DIR}/dir/a > ./nlink for f in dir/a dir/b a; do atf_check -o file:./nlink -e empty -s exit:0 \ stat -f '%l' ${TEST_MOUNT_DIR}/${f} atf_check -o file:./ino -e empty -s exit:0 \ stat -f '%i' ${TEST_MOUNT_DIR}/${f} atf_check cmp -s ${TEST_INPUTS_DIR}/dir/a ${TEST_MOUNT_DIR}/${f} done } hard_links_cleanup() { common_cleanup } # Allocate enough dnodes from an object set that the meta dnode needs to use # indirect blocks. atf_test_case indirect_dnode_array cleanup indirect_dnode_array_body() { local count i # How many dnodes do we need to allocate? Well, the data block size # for meta dnodes is always 16KB, so with a dnode size of 512B we get # 32 dnodes per direct block. The maximum indirect block size is 128KB # and that can fit 1024 block pointers, so we need at least 32 * 1024 # files to force the use of two levels of indirection. # # Unfortunately that number of files makes the test run quite slowly, # so we settle for a single indirect block for now... count=$(jot -r 1 32 1024) create_test_dirs cd $TEST_INPUTS_DIR for i in $(seq 1 $count); do touch $i done cd - atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ $TEST_IMAGE $TEST_INPUTS_DIR import_image check_image_contents } indirect_dnode_array_cleanup() { common_cleanup } # # Create some files with long names, so as to test fat ZAP handling. # atf_test_case long_file_name cleanup long_file_name_body() { local dir i create_test_dirs cd $TEST_INPUTS_DIR # micro ZAP keys can be at most 50 bytes. for i in $(seq 1 60); do touch $(jot -s '' $i 1 1) done dir=$(jot -s '' 61 1 1) mkdir $dir for i in $(seq 1 60); do touch ${dir}/$(jot -s '' $i 1 1) done cd - atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ $TEST_IMAGE $TEST_INPUTS_DIR import_image check_image_contents # Add a directory entry in the hope that OpenZFS might catch a bug # in makefs' fat ZAP encoding. touch ${TEST_MOUNT_DIR}/foo } long_file_name_cleanup() { common_cleanup } # # Exercise handling of multiple datasets. # atf_test_case multi_dataset_1 cleanup multi_dataset_1_body() { create_test_dirs cd $TEST_INPUTS_DIR mkdir dir1 echo a > dir1/a mkdir dir2 echo b > dir2/b cd - atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ -o fs=${ZFS_POOL_NAME}/dir1 -o fs=${ZFS_POOL_NAME}/dir2 \ $TEST_IMAGE $TEST_INPUTS_DIR import_image check_image_contents # Make sure that we have three datasets with the expected mount points. atf_check -o inline:${ZFS_POOL_NAME}\\n -e empty -s exit:0 \ zfs list -H -o name ${ZFS_POOL_NAME} atf_check -o inline:${TEST_MOUNT_DIR}\\n -e empty -s exit:0 \ zfs list -H -o mountpoint ${ZFS_POOL_NAME} atf_check -o inline:${ZFS_POOL_NAME}/dir1\\n -e empty -s exit:0 \ zfs list -H -o name ${ZFS_POOL_NAME}/dir1 atf_check -o inline:${TEST_MOUNT_DIR}/dir1\\n -e empty -s exit:0 \ zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir1 atf_check -o inline:${ZFS_POOL_NAME}/dir2\\n -e empty -s exit:0 \ zfs list -H -o name ${ZFS_POOL_NAME}/dir2 atf_check -o inline:${TEST_MOUNT_DIR}/dir2\\n -e empty -s exit:0 \ zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir2 } multi_dataset_1_cleanup() { common_cleanup } # # Create a pool with two datasets, where the root dataset is mounted below # the child dataset. # atf_test_case multi_dataset_2 cleanup multi_dataset_2_body() { create_test_dirs cd $TEST_INPUTS_DIR mkdir dir1 echo a > dir1/a mkdir dir2 echo b > dir2/b cd - atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ -o fs=${ZFS_POOL_NAME}/dir1\;mountpoint=/ \ -o fs=${ZFS_POOL_NAME}\;mountpoint=/dir1 \ $TEST_IMAGE $TEST_INPUTS_DIR import_image check_image_contents } multi_dataset_2_cleanup() { common_cleanup } # # Create a dataset with a non-existent mount point. # atf_test_case multi_dataset_3 cleanup multi_dataset_3_body() { create_test_dirs cd $TEST_INPUTS_DIR mkdir dir1 echo a > dir1/a cd - atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ -o fs=${ZFS_POOL_NAME}/dir1 \ -o fs=${ZFS_POOL_NAME}/dir2 \ $TEST_IMAGE $TEST_INPUTS_DIR import_image atf_check -o inline:${TEST_MOUNT_DIR}/dir2\\n -e empty -s exit:0 \ zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir2 # Mounting dir2 should have created a directory called dir2. Go # back and create it in the staging tree before comparing. atf_check mkdir ${TEST_INPUTS_DIR}/dir2 check_image_contents } multi_dataset_3_cleanup() { common_cleanup } # # Create an unmounted dataset. # atf_test_case multi_dataset_4 cleanup multi_dataset_4_body() { create_test_dirs cd $TEST_INPUTS_DIR mkdir dir1 echo a > dir1/a cd - atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ -o fs=${ZFS_POOL_NAME}/dir1\;canmount=noauto\;mountpoint=none \ $TEST_IMAGE $TEST_INPUTS_DIR import_image atf_check -o inline:none\\n -e empty -s exit:0 \ zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir1 check_image_contents atf_check zfs set mountpoint=/dir1 ${ZFS_POOL_NAME}/dir1 atf_check zfs mount ${ZFS_POOL_NAME}/dir1 atf_check -o inline:${TEST_MOUNT_DIR}/dir1\\n -e empty -s exit:0 \ zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir1 # dir1/a should be part of the root dataset, not dir1. atf_check -s not-exit:0 -e not-empty stat ${TEST_MOUNT_DIR}dir1/a } multi_dataset_4_cleanup() { common_cleanup } # # Validate handling of multiple staging directories. # atf_test_case multi_staging_1 cleanup multi_staging_1_body() { local tmpdir create_test_dirs cd $TEST_INPUTS_DIR mkdir dir1 echo a > a echo a > dir1/a echo z > z cd - tmpdir=$(mktemp -d) cd $tmpdir mkdir dir2 dir2/dir3 echo b > dir2/b echo c > dir2/dir3/c ln -s dir2/dir3c s cd - atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ $TEST_IMAGE ${TEST_INPUTS_DIR} $tmpdir import_image check_image_contents -d $tmpdir } multi_staging_1_cleanup() { common_cleanup } atf_test_case multi_staging_2 cleanup multi_staging_2_body() { local tmpdir create_test_dirs cd $TEST_INPUTS_DIR mkdir dir echo a > dir/foo echo b > dir/bar cd - tmpdir=$(mktemp -d) cd $tmpdir mkdir dir echo c > dir/baz cd - atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ $TEST_IMAGE ${TEST_INPUTS_DIR} $tmpdir import_image # check_image_contents can't easily handle merged directories, so # just check that the merged directory contains the files we expect. atf_check -o not-empty stat ${TEST_MOUNT_DIR}/dir/foo atf_check -o not-empty stat ${TEST_MOUNT_DIR}/dir/bar atf_check -o not-empty stat ${TEST_MOUNT_DIR}/dir/baz if [ "$(ls ${TEST_MOUNT_DIR}/dir | wc -l)" -ne 3 ]; then atf_fail "Expected 3 files in ${TEST_MOUNT_DIR}/dir" fi } multi_staging_2_cleanup() { common_cleanup } # # Rudimentary test to verify that two ZFS images created using the same # parameters and input hierarchy are byte-identical. In particular, makefs(1) # does not preserve file access times. # atf_test_case reproducible cleanup reproducible_body() { create_test_inputs atf_check $MAKEFS -s 512m -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ ${TEST_IMAGE}.1 $TEST_INPUTS_DIR atf_check $MAKEFS -s 512m -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ ${TEST_IMAGE}.2 $TEST_INPUTS_DIR # XXX-MJ cmp(1) is really slow atf_check cmp ${TEST_IMAGE}.1 ${TEST_IMAGE}.2 } reproducible_cleanup() { } # # Verify that we can take a snapshot of a generated dataset. # atf_test_case snapshot cleanup snapshot_body() { create_test_dirs cd $TEST_INPUTS_DIR mkdir dir echo "hello" > dir/hello echo "goodbye" > goodbye cd - atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ $TEST_IMAGE $TEST_INPUTS_DIR import_image atf_check zfs snapshot ${ZFS_POOL_NAME}@1 } snapshot_cleanup() { common_cleanup } # # Check handling of symbolic links. # atf_test_case soft_links cleanup soft_links_body() { create_test_dirs cd $TEST_INPUTS_DIR mkdir dir ln -s a a ln -s dir/../a a ln -s dir/b b echo 'c' > dir ln -s dir/c c # XXX-MJ overflows bonus buffer ln -s $(jot -s '' 320 1 1) 1 cd - atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ $TEST_IMAGE $TEST_INPUTS_DIR import_image check_image_contents } soft_links_cleanup() { common_cleanup } # # Verify that we can set properties on the root dataset. # atf_test_case root_props cleanup root_props_body() { create_test_inputs atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ -o fs=${ZFS_POOL_NAME}\;atime=off\;setuid=off \ $TEST_IMAGE $TEST_INPUTS_DIR import_image check_image_contents atf_check -o inline:off\\n -e empty -s exit:0 \ zfs get -H -o value atime $ZFS_POOL_NAME atf_check -o inline:local\\n -e empty -s exit:0 \ zfs get -H -o source atime $ZFS_POOL_NAME atf_check -o inline:off\\n -e empty -s exit:0 \ zfs get -H -o value setuid $ZFS_POOL_NAME atf_check -o inline:local\\n -e empty -s exit:0 \ zfs get -H -o source setuid $ZFS_POOL_NAME } root_props_cleanup() { common_cleanup } # # Verify that usedds and usedchild props are set properly. # atf_test_case used_space_props cleanup used_space_props_body() { local used usedds usedchild local rootmb childmb totalmb fudge local status create_test_dirs cd $TEST_INPUTS_DIR mkdir dir rootmb=17 childmb=39 totalmb=$(($rootmb + $childmb)) fudge=$((2 * 1024 * 1024)) atf_check -e ignore dd if=/dev/random of=foo bs=1M count=$rootmb atf_check -e ignore dd if=/dev/random of=dir/bar bs=1M count=$childmb cd - atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ -o fs=${ZFS_POOL_NAME}/dir \ $TEST_IMAGE $TEST_INPUTS_DIR import_image # Make sure that each dataset's space usage is no more than 2MB larger # than their files. This number is magic and might need to change # someday. usedds=$(zfs list -o usedds -Hp ${ZFS_POOL_NAME}) atf_check test $usedds -gt $(($rootmb * 1024 * 1024)) -a \ $usedds -le $(($rootmb * 1024 * 1024 + $fudge)) usedds=$(zfs list -o usedds -Hp ${ZFS_POOL_NAME}/dir) atf_check test $usedds -gt $(($childmb * 1024 * 1024)) -a \ $usedds -le $(($childmb * 1024 * 1024 + $fudge)) # Make sure that the usedchild property value makes sense: the parent's # value corresponds to the size of the child, and the child has no # children. usedchild=$(zfs list -o usedchild -Hp ${ZFS_POOL_NAME}) atf_check test $usedchild -gt $(($childmb * 1024 * 1024)) -a \ $usedchild -le $(($childmb * 1024 * 1024 + $fudge)) atf_check -o inline:'0\n' \ zfs list -Hp -o usedchild ${ZFS_POOL_NAME}/dir # Make sure that the used property value makes sense: the parent's # value is the sum of the two sizes, and the child's value is the # same as its usedds value, which has already been checked. used=$(zfs list -o used -Hp ${ZFS_POOL_NAME}) atf_check test $used -gt $(($totalmb * 1024 * 1024)) -a \ $used -le $(($totalmb * 1024 * 1024 + 2 * $fudge)) used=$(zfs list -o used -Hp ${ZFS_POOL_NAME}/dir) atf_check -o inline:$used'\n' \ zfs list -Hp -o usedds ${ZFS_POOL_NAME}/dir # Both datasets do not have snapshots. atf_check -o inline:'0\n' zfs list -Hp -o usedsnap ${ZFS_POOL_NAME} atf_check -o inline:'0\n' zfs list -Hp -o usedsnap ${ZFS_POOL_NAME}/dir } used_space_props_cleanup() { common_cleanup } # Verify that file permissions are set properly. Make sure that non-executable # files can't be executed. atf_test_case perms cleanup perms_body() { local mode create_test_dirs cd $TEST_INPUTS_DIR for mode in $(seq 0 511); do mode=$(printf "%04o\n" $mode) echo 'echo a' > $mode atf_check chmod $mode $mode done cd - atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ $TEST_IMAGE $TEST_INPUTS_DIR import_image check_image_contents for mode in $(seq 0 511); do mode=$(printf "%04o\n" $mode) if [ $(($mode & 0111)) -eq 0 ]; then atf_check -s not-exit:0 -e match:"Permission denied" \ ${TEST_INPUTS_DIR}/$mode fi if [ $(($mode & 0001)) -eq 0 ]; then atf_check -s not-exit:0 -e match:"Permission denied" \ su -m tests -c ${TEST_INPUTS_DIR}/$mode fi done } perms_cleanup() { common_cleanup } atf_init_test_cases() { atf_add_test_case autoexpand atf_add_test_case basic atf_add_test_case dataset_removal atf_add_test_case empty_dir atf_add_test_case empty_fs atf_add_test_case file_extend atf_add_test_case file_sizes atf_add_test_case hard_links atf_add_test_case indirect_dnode_array atf_add_test_case long_file_name atf_add_test_case multi_dataset_1 atf_add_test_case multi_dataset_2 atf_add_test_case multi_dataset_3 atf_add_test_case multi_dataset_4 atf_add_test_case multi_staging_1 atf_add_test_case multi_staging_2 atf_add_test_case reproducible atf_add_test_case snapshot atf_add_test_case soft_links atf_add_test_case root_props atf_add_test_case used_space_props atf_add_test_case perms # XXXMJ tests: # - test with different ashifts (at least, 9 and 12), different image sizes # - create datasets in imported pool } diff --git a/usr.sbin/makefs/zfs.c b/usr.sbin/makefs/zfs.c index 43469c2358be..66e7f8dafc9c 100644 --- a/usr.sbin/makefs/zfs.c +++ b/usr.sbin/makefs/zfs.c @@ -1,810 +1,804 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 The FreeBSD Foundation * * This software was developed by Mark Johnston under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "makefs.h" #include "zfs.h" #define VDEV_LABEL_SPACE \ ((off_t)(VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) _Static_assert(VDEV_LABEL_SPACE <= MINDEVSIZE, ""); #define MINMSSIZE ((off_t)1 << 24) /* 16MB */ #define DFLTMSSIZE ((off_t)1 << 29) /* 512MB */ #define MAXMSSIZE ((off_t)1 << 34) /* 16GB */ #define INDIR_LEVELS 6 /* Indirect blocks are always 128KB. */ #define BLKPTR_PER_INDIR (MAXBLOCKSIZE / sizeof(blkptr_t)) struct dnode_cursor { char inddir[INDIR_LEVELS][MAXBLOCKSIZE]; off_t indloc; off_t indspace; dnode_phys_t *dnode; off_t dataoff; off_t datablksz; }; void zfs_prep_opts(fsinfo_t *fsopts) { zfs_opt_t *zfs; size_t align; align = alignof(uint64_t); zfs = aligned_alloc(align, roundup2(sizeof(*zfs), align)); if (zfs == NULL) err(1, "aligned_alloc"); memset(zfs, 0, sizeof(*zfs)); const option_t zfs_options[] = { { '\0', "bootfs", &zfs->bootfs, OPT_STRPTR, 0, 0, "Bootable dataset" }, { '\0', "mssize", &zfs->mssize, OPT_INT64, MINMSSIZE, MAXMSSIZE, "Metaslab size" }, { '\0', "poolname", &zfs->poolname, OPT_STRPTR, 0, 0, "ZFS pool name" }, { '\0', "rootpath", &zfs->rootpath, OPT_STRPTR, 0, 0, "Prefix for all dataset mount points" }, { '\0', "ashift", &zfs->ashift, OPT_INT32, MINBLOCKSHIFT, MAXBLOCKSHIFT, "ZFS pool ashift" }, { '\0', "verify-txgs", &zfs->verify_txgs, OPT_BOOL, 0, 0, "Make OpenZFS verify data upon import" }, { '\0', "nowarn", &zfs->nowarn, OPT_BOOL, - 0, 0, "Suppress warning about experimental ZFS support" }, + 0, 0, "Provided for backwards compatibility, ignored" }, { .name = NULL } }; STAILQ_INIT(&zfs->datasetdescs); fsopts->fs_specific = zfs; fsopts->fs_options = copy_opts(zfs_options); } int zfs_parse_opts(const char *option, fsinfo_t *fsopts) { zfs_opt_t *zfs; struct dataset_desc *dsdesc; char buf[BUFSIZ], *opt, *val; int rv; zfs = fsopts->fs_specific; opt = val = estrdup(option); opt = strsep(&val, "="); if (strcmp(opt, "fs") == 0) { if (val == NULL) errx(1, "invalid filesystem parameters `%s'", option); /* * Dataset descriptions will be parsed later, in dsl_init(). * Just stash them away for now. */ dsdesc = ecalloc(1, sizeof(*dsdesc)); dsdesc->params = estrdup(val); free(opt); STAILQ_INSERT_TAIL(&zfs->datasetdescs, dsdesc, next); return (1); } free(opt); rv = set_option(fsopts->fs_options, option, buf, sizeof(buf)); return (rv == -1 ? 0 : 1); } static void zfs_size_vdev(fsinfo_t *fsopts) { zfs_opt_t *zfs; off_t asize, mssize, vdevsize, vdevsize1; zfs = fsopts->fs_specific; assert(fsopts->maxsize != 0); assert(zfs->ashift != 0); /* * Figure out how big the vdev should be. */ vdevsize = rounddown2(fsopts->maxsize, 1 << zfs->ashift); if (vdevsize < MINDEVSIZE) errx(1, "maximum image size is too small"); if (vdevsize < fsopts->minsize || vdevsize > fsopts->maxsize) { errx(1, "image size bounds must be multiples of %d", 1 << zfs->ashift); } asize = vdevsize - VDEV_LABEL_SPACE; /* * Size metaslabs according to the following heuristic: * - provide at least 8 metaslabs, * - without using a metaslab size larger than 512MB. * This approximates what OpenZFS does without being complicated. In * practice we expect pools to be expanded upon first use, and OpenZFS * does not resize metaslabs in that case, so there is no right answer * here. In general we want to provide large metaslabs even if the * image size is small, and 512MB is a reasonable size for pools up to * several hundred gigabytes. * * The user may override this heuristic using the "-o mssize" option. */ mssize = zfs->mssize; if (mssize == 0) { mssize = MAX(MIN(asize / 8, DFLTMSSIZE), MINMSSIZE); if (!powerof2(mssize)) mssize = 1l << (flsll(mssize) - 1); } if (!powerof2(mssize)) errx(1, "metaslab size must be a power of 2"); /* * If we have some slop left over, try to cover it by resizing the vdev, * subject to the maxsize and minsize parameters. */ if (asize % mssize != 0) { vdevsize1 = rounddown2(asize, mssize) + VDEV_LABEL_SPACE; if (vdevsize1 < fsopts->minsize) vdevsize1 = roundup2(asize, mssize) + VDEV_LABEL_SPACE; if (vdevsize1 <= fsopts->maxsize) vdevsize = vdevsize1; } asize = vdevsize - VDEV_LABEL_SPACE; zfs->asize = asize; zfs->vdevsize = vdevsize; zfs->mssize = mssize; zfs->msshift = flsll(mssize) - 1; zfs->mscount = asize / mssize; } /* * Validate options and set some default values. */ static void zfs_check_opts(fsinfo_t *fsopts) { zfs_opt_t *zfs; zfs = fsopts->fs_specific; if (fsopts->offset != 0) errx(1, "unhandled offset option"); if (fsopts->maxsize == 0) errx(1, "an image size must be specified"); if (zfs->poolname == NULL) errx(1, "a pool name must be specified"); if (!isalpha(zfs->poolname[0])) errx(1, "the pool name must begin with a letter"); for (size_t i = 0, len = strlen(zfs->poolname); i < len; i++) { if (!isalnum(zfs->poolname[i]) && zfs->poolname[i] != '_') errx(1, "invalid character '%c' in pool name", zfs->poolname[i]); } if (strcmp(zfs->poolname, "mirror") == 0 || strcmp(zfs->poolname, "raidz") == 0 || strcmp(zfs->poolname, "draid") == 0) { errx(1, "pool name '%s' is reserved and cannot be used", zfs->poolname); } if (zfs->rootpath == NULL) easprintf(&zfs->rootpath, "/%s", zfs->poolname); if (zfs->rootpath[0] != '/') errx(1, "mountpoint `%s' must be absolute", zfs->rootpath); if (zfs->ashift == 0) zfs->ashift = 12; zfs_size_vdev(fsopts); } void zfs_cleanup_opts(fsinfo_t *fsopts) { struct dataset_desc *d, *tmp; zfs_opt_t *zfs; zfs = fsopts->fs_specific; free(zfs->rootpath); free(zfs->bootfs); free(__DECONST(void *, zfs->poolname)); STAILQ_FOREACH_SAFE(d, &zfs->datasetdescs, next, tmp) { free(d->params); free(d); } free(zfs); free(fsopts->fs_options); } static size_t nvlist_size(const nvlist_t *nvl) { return (sizeof(nvl->nv_header) + nvl->nv_size); } static void nvlist_copy(const nvlist_t *nvl, char *buf, size_t sz) { assert(sz >= nvlist_size(nvl)); memcpy(buf, &nvl->nv_header, sizeof(nvl->nv_header)); memcpy(buf + sizeof(nvl->nv_header), nvl->nv_data, nvl->nv_size); } /* * Avoid returning a GUID of 0, just to avoid the possibility that something * will interpret that as meaning that the GUID is uninitialized. */ uint64_t randomguid(void) { uint64_t ret; do { ret = ((uint64_t)random() << 32) | random(); } while (ret == 0); return (ret); } static nvlist_t * pool_config_nvcreate(zfs_opt_t *zfs) { nvlist_t *featuresnv, *poolnv; poolnv = nvlist_create(NV_UNIQUE_NAME); nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_TXG, TXG); nvlist_add_uint64(poolnv, ZPOOL_CONFIG_VERSION, SPA_VERSION); nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_STATE, POOL_STATE_EXPORTED); nvlist_add_string(poolnv, ZPOOL_CONFIG_POOL_NAME, zfs->poolname); nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_GUID, zfs->poolguid); nvlist_add_uint64(poolnv, ZPOOL_CONFIG_TOP_GUID, zfs->vdevguid); nvlist_add_uint64(poolnv, ZPOOL_CONFIG_GUID, zfs->vdevguid); nvlist_add_uint64(poolnv, ZPOOL_CONFIG_VDEV_CHILDREN, 1); featuresnv = nvlist_create(NV_UNIQUE_NAME); nvlist_add_nvlist(poolnv, ZPOOL_CONFIG_FEATURES_FOR_READ, featuresnv); nvlist_destroy(featuresnv); return (poolnv); } static nvlist_t * pool_disk_vdev_config_nvcreate(zfs_opt_t *zfs) { nvlist_t *diskvdevnv; assert(zfs->objarrid != 0); diskvdevnv = nvlist_create(NV_UNIQUE_NAME); nvlist_add_string(diskvdevnv, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK); nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ASHIFT, zfs->ashift); nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ASIZE, zfs->asize); nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_GUID, zfs->vdevguid); nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ID, 0); nvlist_add_string(diskvdevnv, ZPOOL_CONFIG_PATH, "/dev/null"); nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_WHOLE_DISK, 1); nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_CREATE_TXG, TXG); nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_METASLAB_ARRAY, zfs->objarrid); nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_METASLAB_SHIFT, zfs->msshift); return (diskvdevnv); } static nvlist_t * pool_root_vdev_config_nvcreate(zfs_opt_t *zfs) { nvlist_t *diskvdevnv, *rootvdevnv; diskvdevnv = pool_disk_vdev_config_nvcreate(zfs); rootvdevnv = nvlist_create(NV_UNIQUE_NAME); nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_ID, 0); nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_GUID, zfs->poolguid); nvlist_add_string(rootvdevnv, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT); nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_CREATE_TXG, TXG); nvlist_add_nvlist_array(rootvdevnv, ZPOOL_CONFIG_CHILDREN, &diskvdevnv, 1); nvlist_destroy(diskvdevnv); return (rootvdevnv); } /* * Create the pool's "config" object, which contains an nvlist describing pool * parameters and the vdev topology. It is similar but not identical to the * nvlist stored in vdev labels. The main difference is that vdev labels do not * describe the full vdev tree and in particular do not contain the "root" * meta-vdev. */ static void pool_init_objdir_config(zfs_opt_t *zfs, zfs_zap_t *objdir) { dnode_phys_t *dnode; nvlist_t *poolconfig, *vdevconfig; void *configbuf; uint64_t dnid; off_t configloc, configblksz; int error; dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_PACKED_NVLIST, DMU_OT_PACKED_NVLIST_SIZE, sizeof(uint64_t), &dnid); poolconfig = pool_config_nvcreate(zfs); vdevconfig = pool_root_vdev_config_nvcreate(zfs); nvlist_add_nvlist(poolconfig, ZPOOL_CONFIG_VDEV_TREE, vdevconfig); nvlist_destroy(vdevconfig); error = nvlist_export(poolconfig); if (error != 0) errc(1, error, "nvlist_export"); configblksz = nvlist_size(poolconfig); configloc = objset_space_alloc(zfs, zfs->mos, &configblksz); configbuf = ecalloc(1, configblksz); nvlist_copy(poolconfig, configbuf, configblksz); vdev_pwrite_dnode_data(zfs, dnode, configbuf, configblksz, configloc); dnode->dn_datablkszsec = configblksz >> MINBLOCKSHIFT; dnode->dn_flags = DNODE_FLAG_USED_BYTES; *(uint64_t *)DN_BONUS(dnode) = nvlist_size(poolconfig); zap_add_uint64(objdir, DMU_POOL_CONFIG, dnid); nvlist_destroy(poolconfig); free(configbuf); } /* * Add objects block pointer list objects, used for deferred frees. We don't do * anything with them, but they need to be present or OpenZFS will refuse to * import the pool. */ static void pool_init_objdir_bplists(zfs_opt_t *zfs __unused, zfs_zap_t *objdir) { uint64_t dnid; (void)objset_dnode_bonus_alloc(zfs->mos, DMU_OT_BPOBJ, DMU_OT_BPOBJ_HDR, BPOBJ_SIZE_V2, &dnid); zap_add_uint64(objdir, DMU_POOL_FREE_BPOBJ, dnid); (void)objset_dnode_bonus_alloc(zfs->mos, DMU_OT_BPOBJ, DMU_OT_BPOBJ_HDR, BPOBJ_SIZE_V2, &dnid); zap_add_uint64(objdir, DMU_POOL_SYNC_BPLIST, dnid); } /* * Add required feature metadata objects. We don't know anything about ZFS * features, so the objects are just empty ZAPs. */ static void pool_init_objdir_feature_maps(zfs_opt_t *zfs, zfs_zap_t *objdir) { dnode_phys_t *dnode; uint64_t dnid; dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid); zap_add_uint64(objdir, DMU_POOL_FEATURES_FOR_READ, dnid); zap_write(zfs, zap_alloc(zfs->mos, dnode)); dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid); zap_add_uint64(objdir, DMU_POOL_FEATURES_FOR_WRITE, dnid); zap_write(zfs, zap_alloc(zfs->mos, dnode)); dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid); zap_add_uint64(objdir, DMU_POOL_FEATURE_DESCRIPTIONS, dnid); zap_write(zfs, zap_alloc(zfs->mos, dnode)); } static void pool_init_objdir_dsl(zfs_opt_t *zfs, zfs_zap_t *objdir) { zap_add_uint64(objdir, DMU_POOL_ROOT_DATASET, dsl_dir_id(zfs->rootdsldir)); } static void pool_init_objdir_poolprops(zfs_opt_t *zfs, zfs_zap_t *objdir) { dnode_phys_t *dnode; uint64_t id; dnode = objset_dnode_alloc(zfs->mos, DMU_OT_POOL_PROPS, &id); zap_add_uint64(objdir, DMU_POOL_PROPS, id); zfs->poolprops = zap_alloc(zfs->mos, dnode); } /* * Initialize the MOS object directory, the root of virtually all of the pool's * data and metadata. */ static void pool_init_objdir(zfs_opt_t *zfs) { zfs_zap_t *zap; dnode_phys_t *objdir; objdir = objset_dnode_lookup(zfs->mos, DMU_POOL_DIRECTORY_OBJECT); zap = zap_alloc(zfs->mos, objdir); pool_init_objdir_config(zfs, zap); pool_init_objdir_bplists(zfs, zap); pool_init_objdir_feature_maps(zfs, zap); pool_init_objdir_dsl(zfs, zap); pool_init_objdir_poolprops(zfs, zap); zap_write(zfs, zap); } /* * Initialize the meta-object set (MOS) and immediately write out several * special objects whose contents are already finalized, including the object * directory. * * Once the MOS is finalized, it'll look roughly like this: * * object directory (ZAP) * |-> vdev config object (nvlist) * |-> features for read * |-> features for write * |-> feature descriptions * |-> sync bplist * |-> free bplist * |-> pool properties * L-> root DSL directory * |-> DSL child directory (ZAP) * | |-> $MOS (DSL dir) * | | |-> child map * | | L-> props (ZAP) * | |-> $FREE (DSL dir) * | | |-> child map * | | L-> props (ZAP) * | |-> $ORIGIN (DSL dir) * | | |-> child map * | | |-> dataset * | | | L-> deadlist * | | |-> snapshot * | | | |-> deadlist * | | | L-> snapshot names * | | |-> props (ZAP) * | | L-> clones (ZAP) * | |-> dataset 1 (DSL dir) * | | |-> DSL dataset * | | | |-> snapshot names * | | | L-> deadlist * | | |-> child map * | | | L-> ... * | | L-> props * | |-> dataset 2 * | | L-> ... * | |-> ... * | L-> dataset n * |-> DSL root dataset * | |-> snapshot names * | L-> deadlist * L-> props (ZAP) * space map object array * |-> space map 1 * |-> space map 2 * |-> ... * L-> space map n (zfs->mscount) * * The space map object array is pointed to by the "msarray" property in the * pool configuration. */ static void pool_init(zfs_opt_t *zfs) { uint64_t dnid; zfs->poolguid = randomguid(); zfs->vdevguid = randomguid(); zfs->mos = objset_alloc(zfs, DMU_OST_META); (void)objset_dnode_alloc(zfs->mos, DMU_OT_OBJECT_DIRECTORY, &dnid); assert(dnid == DMU_POOL_DIRECTORY_OBJECT); (void)objset_dnode_alloc(zfs->mos, DMU_OT_OBJECT_ARRAY, &zfs->objarrid); dsl_init(zfs); pool_init_objdir(zfs); } static void pool_labels_write(zfs_opt_t *zfs) { uberblock_t *ub; vdev_label_t *label; nvlist_t *poolconfig, *vdevconfig; int error; label = ecalloc(1, sizeof(*label)); /* * Assemble the vdev configuration and store it in the label. */ poolconfig = pool_config_nvcreate(zfs); vdevconfig = pool_disk_vdev_config_nvcreate(zfs); nvlist_add_nvlist(poolconfig, ZPOOL_CONFIG_VDEV_TREE, vdevconfig); nvlist_destroy(vdevconfig); error = nvlist_export(poolconfig); if (error != 0) errc(1, error, "nvlist_export"); nvlist_copy(poolconfig, label->vl_vdev_phys.vp_nvlist, sizeof(label->vl_vdev_phys.vp_nvlist)); nvlist_destroy(poolconfig); /* * Fill out the uberblock. Just make each one the same. The embedded * checksum is calculated in vdev_label_write(). */ for (size_t uoff = 0; uoff < sizeof(label->vl_uberblock); uoff += (1 << zfs->ashift)) { ub = (uberblock_t *)(&label->vl_uberblock[0] + uoff); ub->ub_magic = UBERBLOCK_MAGIC; ub->ub_version = SPA_VERSION; /* * Upon import, OpenZFS will perform metadata verification of * the last TXG by default. If all data is written in the same * TXG, it'll all get verified, which can be painfully slow in * some cases, e.g., initial boot in a cloud environment with * slow storage. So, fabricate additional TXGs to avoid this * overhead, unless the user requests otherwise. */ ub->ub_txg = TXG; if (!zfs->verify_txgs) ub->ub_txg += TXG_SIZE; ub->ub_guid_sum = zfs->poolguid + zfs->vdevguid; ub->ub_timestamp = 0; ub->ub_software_version = SPA_VERSION; ub->ub_mmp_magic = MMP_MAGIC; ub->ub_mmp_delay = 0; ub->ub_mmp_config = 0; ub->ub_checkpoint_txg = 0; objset_root_blkptr_copy(zfs->mos, &ub->ub_rootbp); } /* * Write out four copies of the label: two at the beginning of the vdev * and two at the end. */ for (int i = 0; i < VDEV_LABELS; i++) vdev_label_write(zfs, i, label); free(label); } static void pool_fini(zfs_opt_t *zfs) { zap_write(zfs, zfs->poolprops); dsl_write(zfs); objset_write(zfs, zfs->mos); pool_labels_write(zfs); } struct dnode_cursor * dnode_cursor_init(zfs_opt_t *zfs, zfs_objset_t *os, dnode_phys_t *dnode, off_t size, off_t blksz) { struct dnode_cursor *c; uint64_t nbppindir, indlevel, ndatablks, nindblks; assert(dnode->dn_nblkptr == 1); assert(blksz <= MAXBLOCKSIZE); if (blksz == 0) { /* Must be between 1<ashift, powerof2(size) ? size : (1l << flsll(size)))); } assert(powerof2(blksz)); /* * Do we need indirect blocks? Figure out how many levels are needed * (indlevel == 1 means no indirect blocks) and how much space is needed * (it has to be allocated up-front to break the dependency cycle * described in objset_write()). */ ndatablks = size == 0 ? 0 : howmany(size, blksz); nindblks = 0; for (indlevel = 1, nbppindir = 1; ndatablks > nbppindir; indlevel++) { nbppindir *= BLKPTR_PER_INDIR; nindblks += howmany(ndatablks, indlevel * nbppindir); } assert(indlevel < INDIR_LEVELS); dnode->dn_nlevels = (uint8_t)indlevel; dnode->dn_maxblkid = ndatablks > 0 ? ndatablks - 1 : 0; dnode->dn_datablkszsec = blksz >> MINBLOCKSHIFT; c = ecalloc(1, sizeof(*c)); if (nindblks > 0) { c->indspace = nindblks * MAXBLOCKSIZE; c->indloc = objset_space_alloc(zfs, os, &c->indspace); } c->dnode = dnode; c->dataoff = 0; c->datablksz = blksz; return (c); } static void _dnode_cursor_flush(zfs_opt_t *zfs, struct dnode_cursor *c, unsigned int levels) { blkptr_t *bp, *pbp; void *buf; uint64_t fill; off_t blkid, blksz, loc; assert(levels > 0); assert(levels <= c->dnode->dn_nlevels - 1U); blksz = MAXBLOCKSIZE; blkid = (c->dataoff / c->datablksz) / BLKPTR_PER_INDIR; for (unsigned int level = 1; level <= levels; level++) { buf = c->inddir[level - 1]; if (level == c->dnode->dn_nlevels - 1U) { pbp = &c->dnode->dn_blkptr[0]; } else { uint64_t iblkid; iblkid = blkid & (BLKPTR_PER_INDIR - 1); pbp = (blkptr_t *) &c->inddir[level][iblkid * sizeof(blkptr_t)]; } /* * Space for indirect blocks is allocated up-front; see the * comment in objset_write(). */ loc = c->indloc; c->indloc += blksz; assert(c->indspace >= blksz); c->indspace -= blksz; bp = buf; fill = 0; for (size_t i = 0; i < BLKPTR_PER_INDIR; i++) fill += BP_GET_FILL(&bp[i]); vdev_pwrite_dnode_indir(zfs, c->dnode, level, fill, buf, blksz, loc, pbp); memset(buf, 0, MAXBLOCKSIZE); blkid /= BLKPTR_PER_INDIR; } } blkptr_t * dnode_cursor_next(zfs_opt_t *zfs, struct dnode_cursor *c, off_t off) { off_t blkid, l1id; unsigned int levels; if (c->dnode->dn_nlevels == 1) { assert(off < MAXBLOCKSIZE); return (&c->dnode->dn_blkptr[0]); } assert(off % c->datablksz == 0); /* Do we need to flush any full indirect blocks? */ if (off > 0) { blkid = off / c->datablksz; for (levels = 0; levels < c->dnode->dn_nlevels - 1U; levels++) { if (blkid % BLKPTR_PER_INDIR != 0) break; blkid /= BLKPTR_PER_INDIR; } if (levels > 0) _dnode_cursor_flush(zfs, c, levels); } c->dataoff = off; l1id = (off / c->datablksz) & (BLKPTR_PER_INDIR - 1); return ((blkptr_t *)&c->inddir[0][l1id * sizeof(blkptr_t)]); } void dnode_cursor_finish(zfs_opt_t *zfs, struct dnode_cursor *c) { unsigned int levels; assert(c->dnode->dn_nlevels > 0); levels = c->dnode->dn_nlevels - 1; if (levels > 0) _dnode_cursor_flush(zfs, c, levels); assert(c->indspace == 0); free(c); } void zfs_makefs(const char *image, const char *dir, fsnode *root, fsinfo_t *fsopts) { zfs_opt_t *zfs; int dirfd; zfs = fsopts->fs_specific; /* * Use a fixed seed to provide reproducible pseudo-random numbers for * on-disk structures when needed (e.g., GUIDs, ZAP hash salts). */ srandom(1729); zfs_check_opts(fsopts); - if (!zfs->nowarn) { - fprintf(stderr, - "ZFS support is currently considered experimental. " - "Do not use it for anything critical.\n"); - } - dirfd = open(dir, O_DIRECTORY | O_RDONLY); if (dirfd < 0) err(1, "open(%s)", dir); vdev_init(zfs, image); pool_init(zfs); fs_build(zfs, dirfd, root); pool_fini(zfs); vdev_fini(zfs); } diff --git a/usr.sbin/makefs/zfs/zfs.h b/usr.sbin/makefs/zfs/zfs.h index 924fad83f63e..33694e2bdbee 100644 --- a/usr.sbin/makefs/zfs/zfs.h +++ b/usr.sbin/makefs/zfs/zfs.h @@ -1,176 +1,176 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 The FreeBSD Foundation * * This software was developed by Mark Johnston under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _MAKEFS_ZFS_H_ #define _MAKEFS_ZFS_H_ #include #include #include #include #include #include #include "makefs.h" #include "zfs/nvlist.h" #define ASSERT assert #include "zfs/zfsimpl.h" #define MAXBLOCKSHIFT 17 /* 128KB */ #define MAXBLOCKSIZE ((off_t)(1 << MAXBLOCKSHIFT)) _Static_assert(MAXBLOCKSIZE == SPA_OLDMAXBLOCKSIZE, ""); #define MINBLOCKSHIFT 9 /* 512B */ #define MINBLOCKSIZE ((off_t)(1 << MINBLOCKSHIFT)) _Static_assert(MINBLOCKSIZE == SPA_MINBLOCKSIZE, ""); #define MINDEVSIZE ((off_t)SPA_MINDEVSIZE) /* All data was written in this transaction group. */ #define TXG 4 #define TXG_SIZE 4 typedef struct zfs_dsl_dataset zfs_dsl_dataset_t; typedef struct zfs_dsl_dir zfs_dsl_dir_t; typedef struct zfs_objset zfs_objset_t; typedef struct zfs_zap zfs_zap_t; struct dataset_desc { char *params; STAILQ_ENTRY(dataset_desc) next; }; typedef struct { /* * Block buffer, needs to be aligned for various on-disk structures, * ZAPs, etc.. */ char filebuf[MAXBLOCKSIZE] __aligned(alignof(uint64_t)); - bool nowarn; + bool nowarn; /* ignored */ /* Pool parameters. */ const char *poolname; char *rootpath; /* implicit mount point prefix */ char *bootfs; /* bootable dataset, pool property */ int ashift; /* vdev block size */ uint64_t mssize; /* metaslab size */ STAILQ_HEAD(, dataset_desc) datasetdescs; /* non-root dataset descrs */ bool verify_txgs; /* verify data upon import */ /* Pool state. */ uint64_t poolguid; /* pool and root vdev GUID */ zfs_zap_t *poolprops; /* MOS state. */ zfs_objset_t *mos; /* meta object set */ uint64_t objarrid; /* space map object array */ /* DSL state. */ zfs_dsl_dir_t *rootdsldir; /* root DSL directory */ zfs_dsl_dataset_t *rootds; zfs_dsl_dir_t *origindsldir; /* $ORIGIN */ zfs_dsl_dataset_t *originds; zfs_dsl_dataset_t *snapds; zfs_zap_t *cloneszap; zfs_dsl_dir_t *freedsldir; /* $FREE */ zfs_dsl_dir_t *mosdsldir; /* $MOS */ /* vdev state. */ int fd; /* vdev disk fd */ uint64_t vdevguid; /* disk vdev GUID */ off_t vdevsize; /* vdev size, including labels */ off_t asize; /* vdev size, excluding labels */ bitstr_t *spacemap; /* space allocation tracking */ int spacemapbits; /* one bit per ashift-sized block */ uint64_t msshift; /* log2(metaslab size) */ uint64_t mscount; /* number of metaslabs for this vdev */ } zfs_opt_t; /* dsl.c */ void dsl_init(zfs_opt_t *); const char *dsl_dir_fullname(const zfs_dsl_dir_t *); uint64_t dsl_dir_id(zfs_dsl_dir_t *); uint64_t dsl_dir_dataset_id(zfs_dsl_dir_t *); void dsl_dir_foreach(zfs_opt_t *, zfs_dsl_dir_t *, void (*)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *); int dsl_dir_get_canmount(zfs_dsl_dir_t *, uint64_t *); char *dsl_dir_get_mountpoint(zfs_opt_t *, zfs_dsl_dir_t *); bool dsl_dir_has_dataset(zfs_dsl_dir_t *); bool dsl_dir_dataset_has_objset(zfs_dsl_dir_t *); void dsl_dir_dataset_write(zfs_opt_t *, zfs_objset_t *, zfs_dsl_dir_t *); void dsl_dir_root_finalize(zfs_opt_t *, uint64_t); void dsl_write(zfs_opt_t *); /* fs.c */ void fs_build(zfs_opt_t *, int, fsnode *); /* objset.c */ zfs_objset_t *objset_alloc(zfs_opt_t *zfs, uint64_t type); off_t objset_space_alloc(zfs_opt_t *, zfs_objset_t *, off_t *); dnode_phys_t *objset_dnode_alloc(zfs_objset_t *, uint8_t, uint64_t *); dnode_phys_t *objset_dnode_bonus_alloc(zfs_objset_t *, uint8_t, uint8_t, uint16_t, uint64_t *); dnode_phys_t *objset_dnode_lookup(zfs_objset_t *, uint64_t); void objset_root_blkptr_copy(const zfs_objset_t *, blkptr_t *); uint64_t objset_space(const zfs_objset_t *); void objset_write(zfs_opt_t *zfs, zfs_objset_t *os); /* vdev.c */ void vdev_init(zfs_opt_t *, const char *); off_t vdev_space_alloc(zfs_opt_t *zfs, off_t *lenp); void vdev_pwrite_data(zfs_opt_t *zfs, uint8_t datatype, uint8_t cksumtype, uint8_t level, uint64_t fill, const void *data, off_t sz, off_t loc, blkptr_t *bp); void vdev_pwrite_dnode_indir(zfs_opt_t *zfs, dnode_phys_t *dnode, uint8_t level, uint64_t fill, const void *data, off_t sz, off_t loc, blkptr_t *bp); void vdev_pwrite_dnode_data(zfs_opt_t *zfs, dnode_phys_t *dnode, const void *data, off_t sz, off_t loc); void vdev_label_write(zfs_opt_t *zfs, int ind, const vdev_label_t *labelp); void vdev_spacemap_write(zfs_opt_t *); void vdev_fini(zfs_opt_t *zfs); /* zap.c */ zfs_zap_t *zap_alloc(zfs_objset_t *, dnode_phys_t *); void zap_add(zfs_zap_t *, const char *, size_t, size_t, const uint8_t *); void zap_add_uint64(zfs_zap_t *, const char *, uint64_t); void zap_add_uint64_self(zfs_zap_t *, uint64_t); void zap_add_string(zfs_zap_t *, const char *, const char *); bool zap_entry_exists(zfs_zap_t *, const char *); void zap_write(zfs_opt_t *, zfs_zap_t *); /* zfs.c */ struct dnode_cursor *dnode_cursor_init(zfs_opt_t *, zfs_objset_t *, dnode_phys_t *, off_t, off_t); blkptr_t *dnode_cursor_next(zfs_opt_t *, struct dnode_cursor *, off_t); void dnode_cursor_finish(zfs_opt_t *, struct dnode_cursor *); uint64_t randomguid(void); #endif /* !_MAKEFS_ZFS_H_ */