diff --git a/cddl/usr.sbin/zfsd/case_file.cc b/cddl/usr.sbin/zfsd/case_file.cc --- a/cddl/usr.sbin/zfsd/case_file.cc +++ b/cddl/usr.sbin/zfsd/case_file.cc @@ -360,7 +360,7 @@ { bool consumed(false); - if (event.Value("type") == "misc.fs.zfs.vdev_remove") { + if (event.Value("type") == "sysevent.fs.zfs.vdev_remove") { /* * The Vdev we represent has been removed from the * configuration. This case is no longer of value. @@ -368,12 +368,12 @@ Close(); return (/*consumed*/true); - } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { + } else if (event.Value("type") == "sysevent.fs.zfs.pool_destroy") { /* This Pool has been destroyed. Discard the case */ Close(); return (/*consumed*/true); - } else if (event.Value("type") == "misc.fs.zfs.config_sync") { + } else if (event.Value("type") == "sysevent.fs.zfs.config_sync") { RefreshVdevState(); if (VdevState() < VDEV_STATE_HEALTHY) consumed = ActivateSpare(); diff --git a/cddl/usr.sbin/zfsd/zfsd.cc b/cddl/usr.sbin/zfsd/zfsd.cc --- a/cddl/usr.sbin/zfsd/zfsd.cc +++ b/cddl/usr.sbin/zfsd/zfsd.cc @@ -248,7 +248,7 @@ snprintf(evString, 160, "!system=ZFS subsystem=ZFS " - "type=misc.fs.zfs.config_sync sub_type=synthesized " + "type=sysevent.fs.zfs.config_sync sub_type=synthesized " "pool_name=%s pool_guid=%" PRIu64 "\n", poolname, poolGUID); event = Event::CreateEvent(GetFactory(), string(evString)); if (event != NULL) { diff --git a/cddl/usr.sbin/zfsd/zfsd_event.cc b/cddl/usr.sbin/zfsd/zfsd_event.cc --- a/cddl/usr.sbin/zfsd/zfsd_event.cc +++ b/cddl/usr.sbin/zfsd/zfsd_event.cc @@ -281,7 +281,7 @@ } /* On config syncs, replay any queued events first. */ - if (Value("type").find("misc.fs.zfs.config_sync") == 0) { + if (Value("type").find("sysevent.fs.zfs.config_sync") == 0) { /* * Even if saved events are unconsumed the second time * around, drop them. Any events that still can't be @@ -292,7 +292,7 @@ CaseFile::ReEvaluateByGuid(PoolGUID(), *this); } - if (Value("type").find("misc.fs.zfs.") == 0) { + if (Value("type").find("sysevent.fs.zfs.") == 0) { /* Configuration changes, resilver events, etc. */ ProcessPoolEvent(); return (false); @@ -405,7 +405,7 @@ bool degradedDevice(false); /* The pool is destroyed. Discard any open cases */ - if (Value("type") == "misc.fs.zfs.pool_destroy") { + if (Value("type") == "sysevent.fs.zfs.pool_destroy") { Log(LOG_INFO); CaseFile::ReEvaluateByGuid(PoolGUID(), *this); return; @@ -420,7 +420,7 @@ Log(LOG_INFO); caseFile->ReEvaluate(*this); } - else if (Value("type") == "misc.fs.zfs.resilver_finish") + else if (Value("type") == "sysevent.fs.zfs.resilver_finish") { /* * It's possible to get a resilver_finish event with no @@ -431,7 +431,7 @@ CleanupSpares(); } - if (Value("type") == "misc.fs.zfs.vdev_remove" + if (Value("type") == "sysevent.fs.zfs.vdev_remove" && degradedDevice == false) { /* See if any other cases can make use of this device. */ diff --git a/tests/sys/cddl/zfs/include/libgnop.kshlib b/tests/sys/cddl/zfs/include/libgnop.kshlib --- a/tests/sys/cddl/zfs/include/libgnop.kshlib +++ b/tests/sys/cddl/zfs/include/libgnop.kshlib @@ -85,6 +85,12 @@ # Use "-f" so we can destroy a gnop with a consumer (like ZFS) gnop destroy -f ${disk}.nop + + # Wait for it to disappear + for i in `seq 5`; do + gnop status ${disk}.nop >/dev/null 2>/dev/null || break + sleep $i + done } # Destroy multiple gnop devices. Attempt to destroy them all, ignoring errors diff --git a/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib b/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib --- a/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib +++ b/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib @@ -67,19 +67,32 @@ typeset pool=$1 typeset vdev=$2 typeset file=$3 + typeset -li start=0 + typeset -li now=0 + typeset -li timeout=60 # do some IO on the pool log_must $DD if=/dev/zero of=$file bs=1024k count=64 $FSYNC $file - # scribble on the underlying file to corrupt the vdev - log_must $DD if=/dev/urandom of=$vdev bs=1024k count=64 conv=notrunc + # ZFS rate limits checksum errors to about 20 per second. So in order + # to ensure that we reach zfsd's threshold, we must alternately + # scribble and scrub. + while (( "$now" - "$start" < "$timeout" )); do + # scribble on the underlying file to corrupt the vdev + log_must $DD if=/dev/urandom of=$vdev bs=1024k count=64 conv=notrunc - # Scrub the pool to detect the corruption - log_must $ZPOOL scrub $pool - wait_until_scrubbed $pool + # Scrub the pool to detect and repair the corruption + log_must $ZPOOL scrub $pool + wait_until_scrubbed $pool + now=`date +%s` + if [ "$start" -eq 0 ]; then + start=`date +%s` + fi + check_state "$pool" "$vdev" DEGRADED && return + $SLEEP 1 + done - # ZFSD can take up to 60 seconds to degrade an array in response to - # errors (though it's usually faster). - wait_for_pool_dev_state_change 60 $vdev DEGRADED + log_must $ZPOOL status "$pool" + log_fail "ERROR: Disk $vdev not marked as DEGRADED in $pool" } diff --git a/tests/sys/cddl/zfs/tests/zfsd/zfsd_fault_001_pos.ksh b/tests/sys/cddl/zfs/tests/zfsd/zfsd_fault_001_pos.ksh --- a/tests/sys/cddl/zfs/tests/zfsd/zfsd_fault_001_pos.ksh +++ b/tests/sys/cddl/zfs/tests/zfsd/zfsd_fault_001_pos.ksh @@ -27,6 +27,7 @@ # $FreeBSD$ . $STF_SUITE/include/libtest.kshlib +. $STF_SUITE/include/libgnop.kshlib ################################################################################ # @@ -39,8 +40,7 @@ # # # STRATEGY: -# 1. Create a storage pool. Only use the da driver (FreeBSD's SCSI disk -# driver) because it has a special interface for simulating IO errors. +# 1. Create a storage pool. Use gnop vdevs so we can inject I/O errors. # 2. Inject IO errors while doing IO to the pool. # 3. Verify that the vdev becomes FAULTED. # 4. ONLINE it and verify that it resilvers and joins the pool. @@ -57,65 +57,28 @@ verify_runnable "global" -function cleanup -{ - # Disable error injection, if still active - sysctl kern.cam.da.$TMPDISKNUM.error_inject=0 > /dev/null - - if poolexists $TESTPOOL; then - # We should not get here if the test passed. Print the output - # of zpool status to assist in debugging. - $ZPOOL status - # Clear out artificially generated errors and destroy the pool - $ZPOOL clear $TESTPOOL - destroy_pool $TESTPOOL - fi -} - log_assert "ZFS will fault a vdev that produces IO errors" -log_onexit cleanup ensure_zfsd_running -# Make sure that at least one of the disks is using the da driver, and use -# that disk for inject errors -typeset TMPDISK="" -for d in $DISKS -do - b=`basename $d` - if test ${b%%[0-9]*} == da - then - TMPDISK=$b - TMPDISKNUM=${b##da} - break - fi -done -if test -z $TMPDISK -then - log_unsupported "This test requires at least one disk to use the da driver" -fi +DISK0_NOP=${DISK0}.nop +DISK1_NOP=${DISK1}.nop +log_must create_gnops $DISK0 $DISK1 for type in "raidz" "mirror"; do log_note "Testing raid type $type" # Create a pool on the supplied disks - create_pool $TESTPOOL $type $DISKS + create_pool $TESTPOOL $type "$DISK0_NOP" "$DISK1_NOP" log_must $ZFS create $TESTPOOL/$TESTFS # Cause some IO errors writing to the pool while true; do - # Running zpool status after every dd operation is too slow. - # So we will run several dd's in a row before checking zpool - # status. sync between dd operations to ensure that the disk - # gets IO - for ((i=0; $i<64; i=$i+1)); do - sysctl kern.cam.da.$TMPDISKNUM.error_inject=1 > \ - /dev/null - $DD if=/dev/zero bs=128k count=1 >> \ - /$TESTPOOL/$TESTFS/$TESTFILE 2> /dev/null - $FSYNC /$TESTPOOL/$TESTFS/$TESTFILE - done + log_must gnop configure -e 5 -w 100 "$DISK1_NOP" + $DD if=/dev/zero bs=128k count=1 >> \ + /$TESTPOOL/$TESTFS/$TESTFILE 2> /dev/null + $FSYNC /$TESTPOOL/$TESTFS/$TESTFILE # Check to see if the pool is faulted yet $ZPOOL status $TESTPOOL | grep -q 'state: DEGRADED' if [ $? == 0 ] @@ -127,15 +90,9 @@ log_must check_state $TESTPOOL $TMPDISK "FAULTED" - #find the failed disk guid - typeset FAILED_VDEV=`$ZPOOL status $TESTPOOL | - awk "/^[[:space:]]*$TMPDISK[[:space:]]*FAULTED/ {print \\$1}"` - - # Reattach the failed disk - $ZPOOL online $TESTPOOL $FAILED_VDEV > /dev/null - if [ $? != 0 ]; then - log_fail "Could not reattach $FAILED_VDEV" - fi + # Heal and reattach the failed disk + log_must gnop configure -w 0 "$DISK1_NOP" + log_must $ZPOOL online $TESTPOOL "$DISK1_NOP" # Verify that the pool resilvers and goes to the ONLINE state for (( retries=60; $retries>0; retries=$retries+1 )) diff --git a/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh b/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh --- a/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh +++ b/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh @@ -30,12 +30,14 @@ zfsd_fault_001_pos_head() { atf_set "descr" "ZFS will fault a vdev that produces IO errors" - atf_set "require.progs" "ksh93 zfs zpool zfsd" + atf_set "require.progs" "ksh93 gnop zfs zpool zfsd" atf_set "timeout" 300 } zfsd_fault_001_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg + . $(atf_get_srcdir)/../hotspare/hotspare.kshlib + . $(atf_get_srcdir)/../hotspare/hotspare.cfg . $(atf_get_srcdir)/zfsd.cfg verify_disk_count "$DISKS" 2 @@ -212,7 +214,7 @@ zfsd_hotspare_004_pos_head() { atf_set "descr" "Removing a disk from a pool results in the spare activating" - atf_set "require.progs" "ksh93 gnop zpool camcontrol zfsd" + atf_set "require.progs" "ksh93 gnop zpool" atf_set "timeout" 3600 } zfsd_hotspare_004_pos_body() @@ -303,7 +305,7 @@ zfsd_hotspare_007_pos_head() { atf_set "descr" "zfsd will swap failed drives at startup" - atf_set "require.progs" "ksh93 gnop zpool camcontrol zfsd" + atf_set "require.progs" "ksh93 gnop zpool" atf_set "timeout" 3600 } zfsd_hotspare_007_pos_body() @@ -364,7 +366,7 @@ zfsd_autoreplace_001_neg_head() { atf_set "descr" "A pool without autoreplace set will not replace by physical path" - atf_set "require.progs" "ksh93 zpool camcontrol zfsd gnop" + atf_set "require.progs" "ksh93 zpool gnop" atf_set "timeout" 3600 } zfsd_autoreplace_001_neg_body() @@ -425,7 +427,7 @@ zfsd_autoreplace_003_pos_head() { atf_set "descr" "A pool with autoreplace set will replace by physical path even if a spare is active" - atf_set "require.progs" "ksh93 zpool camcontrol zfsd gnop" + atf_set "require.progs" "ksh93 zpool gnop" atf_set "timeout" 3600 } zfsd_autoreplace_003_pos_body() @@ -456,7 +458,7 @@ zfsd_replace_001_pos_head() { atf_set "descr" "ZFSD will automatically replace a SAS disk that disappears and reappears in the same location, with the same devname" - atf_set "require.progs" "ksh93 zpool camcontrol zfsd zfs gnop" + atf_set "require.progs" "ksh93 zpool zfs gnop" } zfsd_replace_001_pos_body() { @@ -485,7 +487,7 @@ zfsd_replace_002_pos_head() { atf_set "descr" "zfsd will reactivate a pool after all disks are failed and reappeared" - atf_set "require.progs" "ksh93 zpool camcontrol zfsd zfs" + atf_set "require.progs" "ksh93 zpool zfs" } zfsd_replace_002_pos_body() { @@ -514,7 +516,7 @@ zfsd_replace_003_pos_head() { atf_set "descr" "ZFSD will correctly replace disks that dissapear and reappear with different devnames" - atf_set "require.progs" "ksh93 zpool camcontrol zfsd zfs gnop" + atf_set "require.progs" "ksh93 zpool zfs gnop" } zfsd_replace_003_pos_body() {