Index: head/tests/sys/geom/class/mirror/10_test.sh =================================================================== --- head/tests/sys/geom/class/mirror/10_test.sh (revision 363462) +++ head/tests/sys/geom/class/mirror/10_test.sh (revision 363463) @@ -1,69 +1,69 @@ #!/bin/sh # $FreeBSD$ # Test handling of read errors. . $(dirname $0)/conf.sh echo 1..3 set -e ddbs=2048 regreadfp="debug.fail_point.g_mirror_regular_request_read" m1=$(mktemp $base.XXXXXX) m2=$(mktemp $base.XXXXXX) dd if=/dev/random of=$m1 bs=$ddbs count=1024 >/dev/null 2>&1 dd if=/dev/zero of=$m2 bs=$ddbs count=1024 >/dev/null 2>&1 us0=$(attach_md -t vnode -f $m1) us1=$(attach_md -t vnode -f $m2) gmirror label $name /dev/$us0 gmirror insert $name /dev/$us1 devwait syncwait tmp1=$(mktemp $base.XXXXXX) tmp2=$(mktemp $base.XXXXXX) EIO=5 # gmirror should retry a failed read from the other mirror. -sysctl ${regreadfp}="1*return(${EIO})" +sysctl ${regreadfp}="1*return(${EIO})[pid $(gmirror_worker_pid)]" dd if=/dev/mirror/$name of=$tmp1 iseek=256 bs=$ddbs count=1 >/dev/null 2>&1 dd if=/dev/$us1 of=$tmp2 iseek=256 bs=$ddbs count=1 >/dev/null 2>&1 sysctl ${regreadfp}='off' if cmp -s $tmp1 $tmp2; then echo "ok 1" else echo "not ok 1" fi # Make sure that one of the mirrors was marked broken. genid1=$(gmirror dump /dev/$us0 | awk '/^[[:space:]]*genid: /{print $2}') genid2=$(gmirror dump /dev/$us1 | awk '/^[[:space:]]*genid: /{print $2}') if [ $genid1 -eq $(($genid2 + 1)) -o $genid2 -eq $(($genid1 + 1)) ]; then echo "ok 2" else echo "not ok 2" fi # Force a retaste of the disconnected component. if [ $(gmirror status -s $name | awk '{print $3}') = $us0 ]; then detach_md $us1 us1=$(attach_md -t vnode -f $m2) else detach_md $us0 us0=$(attach_md -t vnode -f $m1) fi # Make sure that the component wasn't re-added to the gmirror. if [ $(gmirror status -s $name | wc -l) -eq 1 ]; then echo "ok 3" else echo "not ok 3" fi rm -f $m1 $m2 $tmp1 $tmp2 Index: head/tests/sys/geom/class/mirror/11_test.sh =================================================================== --- head/tests/sys/geom/class/mirror/11_test.sh (revision 363462) +++ head/tests/sys/geom/class/mirror/11_test.sh (revision 363463) @@ -1,84 +1,84 @@ #!/bin/sh # $FreeBSD$ # Test handling of read errors. . $(dirname $0)/conf.sh echo 1..4 set -e ddbs=2048 regreadfp="debug.fail_point.g_mirror_regular_request_read" m1=$(mktemp $base.XXXXXX) m2=$(mktemp $base.XXXXXX) dd if=/dev/random of=$m1 bs=$ddbs count=1024 >/dev/null 2>&1 dd if=/dev/zero of=$m2 bs=$ddbs count=1024 >/dev/null 2>&1 us0=$(attach_md -t vnode -f $m1) us1=$(attach_md -t vnode -f $m2) gmirror label $name /dev/$us0 gmirror insert $name /dev/$us1 devwait syncwait tmp1=$(mktemp $base.XXXXXX) tmp2=$(mktemp $base.XXXXXX) ENXIO=6 # gmirror has special handling for ENXIO. It does not mark the failed component # as broken, allowing it to rejoin the mirror automatically when it appears. -sysctl ${regreadfp}="1*return(${ENXIO})" +sysctl ${regreadfp}="1*return(${ENXIO})[pid $(gmirror_worker_pid)]" dd if=/dev/mirror/$name of=$tmp1 iseek=512 bs=$ddbs count=1 >/dev/null 2>&1 dd if=/dev/$us1 of=$tmp2 iseek=512 bs=$ddbs count=1 >/dev/null 2>&1 sysctl ${regreadfp}='off' if cmp -s $tmp1 $tmp2; then echo "ok 1" else echo "not ok 1" fi # Verify that the genids still match after ENXIO. genid1=$(gmirror dump /dev/$us0 | awk '/^[[:space:]]*genid: /{print $2}') genid2=$(gmirror dump /dev/$us1 | awk '/^[[:space:]]*genid: /{print $2}') if [ $genid1 -eq $genid2 ]; then echo "ok 2" else echo "not ok 2" fi # Trigger a syncid bump. dd if=/dev/zero of=/dev/mirror/$name bs=$ddbs count=1 >/dev/null 2>&1 # The ENXIO+write should have caused a syncid bump. syncid1=$(gmirror dump /dev/$us0 | awk '/^[[:space:]]*syncid: /{print $2}') syncid2=$(gmirror dump /dev/$us1 | awk '/^[[:space:]]*syncid: /{print $2}') if [ $syncid1 -eq $(($syncid2 + 1)) -o $syncid2 -eq $(($syncid1 + 1)) ]; then echo "ok 3" else echo "not ok 3" fi # Force a retaste of the disconnected component. if [ $(gmirror status -s $name | awk '{print $3}') = $us0 ]; then detach_md $us1 us1=$(attach_md -t vnode -f $m2) else detach_md $us0 us0=$(attach_md -t vnode -f $m1) fi # Make sure that the retaste caused the mirror to automatically be re-added. if [ $(gmirror status -s $name | wc -l) -eq 2 ]; then echo "ok 4" else echo "not ok 4" fi syncwait rm -f $m1 $m2 $tmp1 $tmp2 Index: head/tests/sys/geom/class/mirror/12_test.sh =================================================================== --- head/tests/sys/geom/class/mirror/12_test.sh (revision 363462) +++ head/tests/sys/geom/class/mirror/12_test.sh (revision 363463) @@ -1,68 +1,68 @@ #!/bin/sh # $FreeBSD$ # Test handling of write errors. . $(dirname $0)/conf.sh echo 1..3 set -e ddbs=2048 regwritefp="debug.fail_point.g_mirror_regular_request_write" m1=$(mktemp $base.XXXXXX) m2=$(mktemp $base.XXXXXX) dd if=/dev/zero of=$m1 bs=$ddbs count=1024 >/dev/null 2>&1 dd if=/dev/zero of=$m2 bs=$ddbs count=1024 >/dev/null 2>&1 us0=$(attach_md -t vnode -f $m1) us1=$(attach_md -t vnode -f $m2) gmirror label $name /dev/$us0 /dev/$us1 devwait tmp1=$(mktemp $base.XXXXXX) tmp2=$(mktemp $base.XXXXXX) dd if=/dev/random of=$tmp1 bs=$ddbs count=1 >/dev/null 2>&1 EIO=5 # gmirror should kick one of the mirrors out after hitting EIO. -sysctl ${regwritefp}="1*return(${EIO})" +sysctl ${regwritefp}="1*return(${EIO})[pid $(gmirror_worker_pid)]" dd if=$tmp1 of=/dev/mirror/$name bs=$ddbs count=1 >/dev/null 2>&1 dd if=/dev/mirror/$name of=$tmp2 bs=$ddbs count=1 >/dev/null 2>&1 sysctl ${regwritefp}='off' if cmp -s $tmp1 $tmp2; then echo "ok 1" else echo "not ok 1" fi # Make sure that one of the mirrors was marked broken. genid1=$(gmirror dump /dev/$us0 | awk '/^[[:space:]]*genid: /{print $2}') genid2=$(gmirror dump /dev/$us1 | awk '/^[[:space:]]*genid: /{print $2}') if [ $genid1 -eq $(($genid2 + 1)) -o $genid2 -eq $(($genid1 + 1)) ]; then echo "ok 2" else echo "not ok 2" fi # Force a retaste of the disconnected component. if [ $(gmirror status -s $name | awk '{print $3}') = $us0 ]; then detach_md $us1 us1=$(attach_md -t vnode -f $m2) else detach_md $us0 us0=$(attach_md -t vnode -f $m1) fi # Make sure that the component wasn't re-added to the gmirror. if [ $(gmirror status -s $name | wc -l) -eq 1 ]; then echo "ok 3" else echo "not ok 3" fi rm -f $m1 $m2 $tmp1 $tmp2 Index: head/tests/sys/geom/class/mirror/13_test.sh =================================================================== --- head/tests/sys/geom/class/mirror/13_test.sh (revision 363462) +++ head/tests/sys/geom/class/mirror/13_test.sh (revision 363463) @@ -1,81 +1,81 @@ #!/bin/sh # $FreeBSD$ # Test handling of write errors. . $(dirname $0)/conf.sh echo 1..4 set -e ddbs=2048 regwritefp="debug.fail_point.g_mirror_regular_request_write" m1=$(mktemp $base.XXXXXX) m2=$(mktemp $base.XXXXXX) dd if=/dev/random of=$m1 bs=$ddbs count=1024 >/dev/null 2>&1 dd if=/dev/zero of=$m2 bs=$ddbs count=1024 >/dev/null 2>&1 us0=$(attach_md -t vnode -f $m1) us1=$(attach_md -t vnode -f $m2) gmirror label $name /dev/$us0 /dev/$us1 devwait tmp1=$(mktemp $base.XXXXXX) tmp2=$(mktemp $base.XXXXXX) dd if=/dev/random of=$tmp1 bs=$ddbs count=1 >/dev/null 2>&1 ENXIO=6 # gmirror has special handling for ENXIO. It does not mark the failed component # as broken, allowing it to rejoin the mirror automatically when it appears. -sysctl ${regwritefp}="1*return(${ENXIO})" +sysctl ${regwritefp}="1*return(${ENXIO})[pid $(gmirror_worker_pid)]" dd if=$tmp1 of=/dev/mirror/$name bs=$ddbs count=1 >/dev/null 2>&1 dd if=/dev/mirror/$name of=$tmp2 bs=$ddbs count=1 >/dev/null 2>&1 sysctl ${regwritefp}='off' if cmp -s $tmp1 $tmp2; then echo "ok 1" else echo "not ok 1" fi # Verify that the genids still match after ENXIO. genid1=$(gmirror dump /dev/$us0 | awk '/^[[:space:]]*genid: /{print $2}') genid2=$(gmirror dump /dev/$us1 | awk '/^[[:space:]]*genid: /{print $2}') if [ $genid1 -eq $genid2 ]; then echo "ok 2" else echo "not ok 2" fi # The ENXIO should have caused a syncid bump. syncid1=$(gmirror dump /dev/$us0 | awk '/^[[:space:]]*syncid: /{print $2}') syncid2=$(gmirror dump /dev/$us1 | awk '/^[[:space:]]*syncid: /{print $2}') if [ $syncid1 -eq $(($syncid2 + 1)) -o $syncid2 -eq $(($syncid1 + 1)) ]; then echo "ok 3" else echo "not ok 3" fi # Force a retaste of the disconnected component. if [ $(gmirror status -s $name | awk '{print $3}') = $us0 ]; then detach_md $us1 us1=$(attach_md -t vnode -f $m2) else detach_md $us0 us0=$(attach_md -t vnode -f $m1) fi # Make sure that the retaste caused the mirror to automatically be re-added. if [ $(gmirror status -s $name | wc -l) -eq 2 ]; then echo "ok 4" else echo "not ok 4" fi syncwait rm -f $m1 $m2 $tmp1 $tmp2 Index: head/tests/sys/geom/class/mirror/9_test.sh =================================================================== --- head/tests/sys/geom/class/mirror/9_test.sh (revision 363462) +++ head/tests/sys/geom/class/mirror/9_test.sh (revision 363463) @@ -1,58 +1,58 @@ #!/bin/sh # $FreeBSD$ # Regression test for r306743. . `dirname $0`/conf.sh echo 1..1 ddbs=2048 m1=`mktemp $base.XXXXXX` || exit 1 m2=`mktemp $base.XXXXXX` || exit 1 m3=`mktemp $base.XXXXXX` || exit 1 dd if=/dev/zero of=$m1 bs=$ddbs count=1024 >/dev/null 2>&1 dd if=/dev/zero of=$m2 bs=$ddbs count=1024 >/dev/null 2>&1 dd if=/dev/zero of=$m3 bs=$ddbs count=1024 >/dev/null 2>&1 us0=$(attach_md -t vnode -f $m1) || exit 1 us1=$(attach_md -t vnode -f $m2) || exit 1 us2=$(attach_md -t vnode -f $m3) || exit 1 gmirror label $name /dev/$us0 /dev/$us1 || exit 1 devwait # Break one of the mirrors by forcing a single metadata write error. # When dd closes the mirror provider, gmirror will attempt to mark the mirrors # clean, and will kick one of the mirrors out upon hitting the error. -sysctl debug.fail_point.g_mirror_metadata_write='1*return(5)' || exit 1 +sysctl debug.fail_point.g_mirror_metadata_write="1*return(5)[pid $(gmirror_worker_pid)]" || exit 1 dd if=/dev/random of=/dev/mirror/$name bs=$ddbs count=1 >/dev/null 2>&1 sysctl debug.fail_point.g_mirror_metadata_write='off' || exit 1 # Replace the broken mirror, and then stop the gmirror. gmirror forget $name || exit 1 gmirror insert $name /dev/$us2 || exit 1 syncwait gmirror stop $name || exit 1 # Restart the gmirror on the original two mirrors. One of them is broken, # so we should end up with a degraded gmirror. gmirror activate $name /dev/$us0 /dev/$us1 || exit 1 devwait dd if=/dev/random of=/dev/mirror/$name bs=$ddbs count=1 >/dev/null 2>&1 # Re-add the replacement mirror and verify the two mirrors are synchronized. # Destroy the gmirror first so that the mirror metadata is wiped; otherwise # the metadata blocks will fail the comparison. It would be nice to do this # with a "gmirror verify" command instead. gmirror activate $name /dev/$us2 || exit 1 syncwait gmirror destroy $name || exit 1 if cmp -s $m1 $m3; then echo "ok 1" else echo "not ok 1" fi rm -f $m1 $m2 $m3 Index: head/tests/sys/geom/class/mirror/conf.sh =================================================================== --- head/tests/sys/geom/class/mirror/conf.sh (revision 363462) +++ head/tests/sys/geom/class/mirror/conf.sh (revision 363463) @@ -1,53 +1,58 @@ #!/bin/sh # $FreeBSD$ name="$(mktemp -u mirror.XXXXXX)" class="mirror" base=`basename $0` gmirror_test_cleanup() { [ -c /dev/$class/$name ] && gmirror destroy $name geom_test_cleanup } trap gmirror_test_cleanup ABRT EXIT INT TERM +gmirror_worker_pid() +{ + pgrep -S -n "g_mirror mirror\..*" +} + syncwait() { while $(gmirror status -s $name | grep -q SYNCHRONIZING); do sleep 0.1; done } consumerrefs() { gclass=$1 geom=$2 if [ $# -ne 2 ]; then echo "Bad usage consumerrefs" >&2 exit 1 fi geom "${gclass}" list "${geom}" | \ grep -A5 ^Consumers | \ grep Mode | \ cut -d: -f2 } disconnectwait() { gclass=$1 geom=$2 if [ $# -ne 2 ]; then echo "Bad usage disconnectwait" >&2 exit 1 fi while [ $(consumerrefs "$gclass" "$geom") != r0w0e0 ]; do sleep 0.05 done } . `dirname $0`/../geom_subr.sh Index: head/tests/sys/geom/class/mirror/sync_error.sh =================================================================== --- head/tests/sys/geom/class/mirror/sync_error.sh (revision 363462) +++ head/tests/sys/geom/class/mirror/sync_error.sh (revision 363463) @@ -1,109 +1,109 @@ # $FreeBSD$ ATF_TEST=true . $(atf_get_srcdir)/conf.sh REG_READ_FP=debug.fail_point.g_mirror_regular_request_read atf_test_case sync_read_error_2_disks cleanup sync_read_error_2_disks_head() { atf_set "descr" \ "Ensure that we properly handle read errors during synchronization." atf_set "require.user" "root" } sync_read_error_2_disks_body() { geom_atf_test_setup f1=$(mktemp ${base}.XXXXXX) f2=$(mktemp ${base}.XXXXXX) atf_check dd if=/dev/zero bs=1M count=32 of=$f1 status=none atf_check truncate -s 32M $f2 md1=$(attach_md -t vnode -f ${f1}) md2=$(attach_md -t vnode -f ${f2}) atf_check gmirror label $name $md1 devwait - atf_check -s ignore -e empty -o not-empty sysctl ${REG_READ_FP}='1*return(5)' + atf_check -s ignore -e empty -o not-empty sysctl ${REG_READ_FP}="1*return(5)[pid $(gmirror_worker_pid)]" # If a read error occurs while synchronizing and the mirror contains # a single active disk, gmirror has no choice but to fail the # synchronization and kick the new disk out of the mirror. atf_check gmirror insert $name $md2 sleep 0.1 syncwait atf_check [ $(gmirror status -s $name | wc -l) -eq 1 ] atf_check -s exit:0 -o match:"DEGRADED $md1 \(ACTIVE\)" \ gmirror status -s $name } sync_read_error_2_disks_cleanup() { atf_check -s ignore -e ignore -o ignore sysctl ${REG_READ_FP}='off' gmirror_test_cleanup } atf_test_case sync_read_error_3_disks cleanup sync_read_error_3_disks_head() { atf_set "descr" \ "Ensure that we properly handle read errors during synchronization." atf_set "require.user" "root" } sync_read_error_3_disks_body() { geom_atf_test_setup f1=$(mktemp ${base}.XXXXXX) f2=$(mktemp ${base}.XXXXXX) f3=$(mktemp ${base}.XXXXXX) atf_check dd if=/dev/random bs=1M count=32 of=$f1 status=none atf_check truncate -s 32M $f2 atf_check truncate -s 32M $f3 md1=$(attach_md -t vnode -f ${f1}) md2=$(attach_md -t vnode -f ${f2}) md3=$(attach_md -t vnode -f ${f3}) atf_check gmirror label $name $md1 devwait atf_check gmirror insert $name $md2 syncwait - atf_check -s exit:0 -e empty -o not-empty sysctl ${REG_READ_FP}='1*return(5)' + atf_check -s exit:0 -e empty -o not-empty sysctl ${REG_READ_FP}="1*return(5)[pid $(gmirror_worker_pid)]" # If a read error occurs while synchronizing a new disk, and we have # multiple active disks, we retry the read after an error. The disk # which returned the read error is kicked out of the mirror. atf_check gmirror insert $name $md3 syncwait atf_check [ $(gmirror status -s $name | wc -l) -eq 2 ] atf_check -s exit:0 -o match:"DEGRADED $md3 \(ACTIVE\)" \ gmirror status -s $name # Make sure that the two active disks are identical. Destroy the # mirror first so that the metadata sectors are wiped. if $(gmirror status -s $name | grep -q $md1); then active=$md1 else active=$md2 fi atf_check gmirror destroy $name atf_check cmp /dev/$active /dev/$md3 } sync_read_error_3_disks_cleanup() { atf_check -s ignore -e ignore -o ignore sysctl ${REG_READ_FP}='off' gmirror_test_cleanup } atf_init_test_cases() { atf_add_test_case sync_read_error_2_disks atf_add_test_case sync_read_error_3_disks }