Page MenuHomeFreeBSD

D44043.id135419.diff
No OneTemporary

D44043.id135419.diff

diff --git a/cddl/usr.sbin/zfsd/case_file.h b/cddl/usr.sbin/zfsd/case_file.h
--- a/cddl/usr.sbin/zfsd/case_file.h
+++ b/cddl/usr.sbin/zfsd/case_file.h
@@ -235,18 +235,27 @@
*/
int IsSpare();
+ /**
+ * \brief Get case vdev's specified property
+ */
+ int GetVdevProp(vdev_prop_t) const;
+
protected:
enum {
+ /*
+ * Use these defaults if we can't get the corresponding vdev
+ * prop or if the prop is not set
+ */
/**
* The number of soft errors on a vdev required
* to transition a vdev from healthy to degraded
- * status.
+ * status
*/
- ZFS_DEGRADE_IO_COUNT = 50,
+ DEFAULT_ZFS_DEGRADE_IO_COUNT = 50,
/**
* The number of delay errors on a vdev required to fault it
*/
- ZFS_FAULT_DELAY_COUNT = 8,
+ DEFAULT_ZFS_FAULT_SLOW_IO_COUNT = 8,
};
static CalloutFunc_t OnGracePeriodEnded;
@@ -379,12 +388,6 @@
*/
static const string s_caseFilePath;
- /**
- * \brief The time ZFSD waits before promoting a tentative event
- * into a permanent event.
- */
- static const timeval s_removeGracePeriod;
-
/**
* \brief A list of soft error events counted against the health of
* a vdev.
@@ -404,6 +407,7 @@
string m_poolGUIDString;
string m_vdevGUIDString;
string m_vdevPhysPath;
+ string m_vdevName;
int m_is_spare;
/**
diff --git a/cddl/usr.sbin/zfsd/case_file.cc b/cddl/usr.sbin/zfsd/case_file.cc
--- a/cddl/usr.sbin/zfsd/case_file.cc
+++ b/cddl/usr.sbin/zfsd/case_file.cc
@@ -53,6 +53,7 @@
#include <syslog.h>
#include <unistd.h>
+#include <libzutil.h>
#include <libzfs.h>
#include <list>
@@ -91,7 +92,6 @@
CaseFileList CaseFile::s_activeCases;
const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
-const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
//- CaseFile Static Public Methods ---------------------------------------------
CaseFile *
@@ -255,6 +255,7 @@
m_vdevState = vd.State();
m_vdevPhysPath = vd.PhysicalPath();
+ m_vdevName = vd.Name(casePool, false);
return (true);
}
@@ -610,15 +611,55 @@
return (Replace(vdev_type, devPath, /*isspare*/true));
}
+/* Does the argument event refer to a checksum error? */
+static bool
+IsChecksumEvent(const Event* const event)
+{
+ return ("ereport.fs.zfs.checksum" == event->Value("type"));
+}
+
+/* Does the argument event refer to an IO error? */
+static bool
+IsIOEvent(const Event* const event)
+{
+ return ("ereport.fs.zfs.io" == event->Value("type"));
+}
+
+/* Does the argument event refer to an IO delay? */
+static bool
+IsDelayEvent(const Event* const event)
+{
+ return ("ereport.fs.zfs.delay" == event->Value("type"));
+}
+
void
CaseFile::RegisterCallout(const Event &event)
{
timeval now, countdown, elapsed, timestamp, zero, remaining;
+ /**
+ * The time ZFSD waits before promoting a tentative event
+ * into a permanent event.
+ */
+ int sec = -1;
+ if (IsChecksumEvent(&event))
+ sec = CaseFile::GetVdevProp(VDEV_PROP_CHECKSUM_T);
+ else if (IsIOEvent(&event))
+ sec = CaseFile::GetVdevProp(VDEV_PROP_IO_T);
+ else if (IsDelayEvent(&event))
+ sec = CaseFile::GetVdevProp(VDEV_PROP_SLOW_IO_T);
+
+ if (sec == -1)
+ sec = 60; /* default */
+
+ timeval removeGracePeriod = {
+ sec, /*sec*/
+ 0 /*usec*/
+ };
gettimeofday(&now, 0);
timestamp = event.GetTimestamp();
timersub(&now, &timestamp, &elapsed);
- timersub(&s_removeGracePeriod, &elapsed, &countdown);
+ timersub(&removeGracePeriod, &elapsed, &countdown);
/*
* If countdown is <= zero, Reset the timer to the
* smallest positive time value instead
@@ -827,6 +868,10 @@
guidString << m_poolGUID;
m_poolGUIDString = guidString.str();
+ ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
+ zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
+ m_vdevName = vdev.Name(zhp, false);
+
s_activeCases.push_back(this);
syslog(LOG_INFO, "Creating new CaseFile:\n");
@@ -1158,43 +1203,54 @@
return (retval);
}
-/* Does the argument event refer to a checksum error? */
-static bool
-IsChecksumEvent(const Event* const event)
+/* Lookup the vdev prop. Used for checksum, IO, or slow IO props */
+int
+CaseFile::GetVdevProp(vdev_prop_t vdev_prop) const
{
- return ("ereport.fs.zfs.checksum" == event->Value("type"));
-}
+ char val[ZFS_MAXPROPLEN];
+ zprop_source_t srctype;
+ DevdCtl::Guid poolGUID = PoolGUID();
+ ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
+ zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
-/* Does the argument event refer to an IO error? */
-static bool
-IsIOEvent(const Event* const event)
-{
- return ("ereport.fs.zfs.io" == event->Value("type"));
-}
+ char *prop_str = (char *) vdev_prop_to_name(vdev_prop);
+ if (zhp == NULL || zpool_get_vdev_prop(zhp, m_vdevName.c_str(),
+ vdev_prop, prop_str, val, sizeof (val), &srctype, B_FALSE) != 0)
+ return (-1);
-/* Does the argument event refer to an IO delay? */
-static bool
-IsDelayEvent(const Event* const event)
-{
- return ("ereport.fs.zfs.delay" == event->Value("type"));
+ /* we'll get "-" from libzfs for a prop that is not set */
+ if (zfs_isnumber(val) == B_FALSE)
+ return (-1);
+
+ return (atoi(val));
}
bool
CaseFile::ShouldDegrade() const
{
+ int checksum_n = GetVdevProp(VDEV_PROP_CHECKSUM_N);
+ if (checksum_n == -1)
+ checksum_n = DEFAULT_ZFS_DEGRADE_IO_COUNT;
return (std::count_if(m_events.begin(), m_events.end(),
- IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
+ IsChecksumEvent) > checksum_n);
}
bool
CaseFile::ShouldFault() const
{
bool should_fault_for_io, should_fault_for_delay;
+ int io_n = GetVdevProp(VDEV_PROP_IO_N);
+ int slow_io_n = GetVdevProp(VDEV_PROP_SLOW_IO_N);
+
+ if (io_n == -1)
+ io_n = DEFAULT_ZFS_DEGRADE_IO_COUNT;
+ if (slow_io_n == -1)
+ slow_io_n = DEFAULT_ZFS_FAULT_SLOW_IO_COUNT;
should_fault_for_io = std::count_if(m_events.begin(), m_events.end(),
- IsIOEvent) > ZFS_DEGRADE_IO_COUNT;
+ IsIOEvent) > io_n;
should_fault_for_delay = std::count_if(m_events.begin(), m_events.end(),
- IsDelayEvent) > ZFS_FAULT_DELAY_COUNT;
+ IsDelayEvent) > slow_io_n;
return (should_fault_for_io || should_fault_for_delay);
}
diff --git a/cddl/usr.sbin/zfsd/zfsd.8 b/cddl/usr.sbin/zfsd/zfsd.8
--- a/cddl/usr.sbin/zfsd/zfsd.8
+++ b/cddl/usr.sbin/zfsd/zfsd.8
@@ -23,7 +23,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd April 18, 2020
+.Dd February 20, 2024
.Dt ZFSD 8
.Os
.Sh NAME
@@ -55,7 +55,7 @@
Instead, they control its behavior indirectly through zpool configuration.
There are two ways to influence
.Nm :
-assigning hotspares and setting pool properties.
+assigning hot spares and setting pool properties.
Currently, only the
.Em autoreplace
property has any effect.
@@ -69,7 +69,7 @@
.It device removal
When a leaf vdev disappears,
.Nm
-will activate any available hotspare.
+will activate any available hot spare.
.It device arrival
When a new GEOM device appears,
.Nm
@@ -77,40 +77,71 @@
If it matches a previously removed vdev on an active pool,
.Nm
will online it.
-Once resilvering completes, any active hotspare will detach automatically.
+Once resilvering completes, any active hot spare will detach automatically.
.Pp
If the new device has no ZFS label but its physical path matches the
physical path of a previously removed vdev on an active pool, and that
pool has the autoreplace property set, then
.Nm
will replace the missing vdev with the newly arrived device.
-Once resilvering completes, any active hotspare will detach automatically.
+Once resilvering completes, any active hot spare will detach automatically.
.It vdev degrade or fault events
If a vdev becomes degraded or faulted,
.Nm
-will activate any available hotspare.
+will activate any available hot spare.
.It I/O errors
-If a leaf vdev generates more than 50 I/O errors in a 60 second period, then
+By default, if a leaf vdev generates more than 50 I/O errors in a 60 second
+period, then
+.Nm
+will mark that vdev as
+.Em FAULTED .
+ZFS will no longer issue any I/Os to it.
+.Nm
+will activate a hot spare if one is available. The defaults can be changed by
+setting the
+.Em io_n
+and/or
+.Em io_t
+vdev properties. See
+.Xr vdevprops 7
+for details.
+.It I/O delays
+By default, if a leaf vdev generates more than delayed 8 I/O events in a 60
+second period, then
.Nm
will mark that vdev as
.Em FAULTED .
ZFS will no longer issue any I/Os to it.
.Nm
-will activate a hotspare if one is available.
+will activate a hot spare if one is available. The defaults can be changed by
+setting the
+.Em slow_io_n
+and/or
+.Em slow_io_t
+vdev properties. See
+.Xr vdevprops 7
+for details.
.It Checksum errors
-If a leaf vdev generates more than 50 checksum errors in a 60 second
-period, then
+By default, if a leaf vdev generates more than 50 checksum errors in a 60
+second period, then
.Nm
will mark that vdev as
.Em DEGRADED .
-ZFS will still use it, but zfsd will activate a spare anyway.
+ZFS will still use it, but zfsd will also activate a hot spare if one is
+available. The defaults can be changed by setting the
+.Em checksum_n
+and/or
+.Em checksum_t
+vdev properties. See
+.Xr vdevprops 7
+for details.
.It Spare addition
-If the system administrator adds a hotspare to a pool that is already degraded,
+If the system administrator adds a hot spare to a pool that is already degraded,
.Nm
will activate the spare.
.It Resilver complete
.Nm
-will detach any hotspare once a permanent replacement finishes resilvering.
+will detach any hot spare once a permanent replacement finishes resilvering.
.It Physical path change
If the physical path of an existing disk changes,
.Nm
@@ -134,6 +165,7 @@
.El
.Sh SEE ALSO
.Xr devctl 4 ,
+.Xr vdevprops 7 ,
.Xr zpool 8
.Sh HISTORY
.Nm

File Metadata

Mime Type
text/plain
Expires
Mon, Jan 27, 12:29 PM (4 h, 47 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16168077
Default Alt Text
D44043.id135419.diff (9 KB)

Event Timeline