Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F108573258
D44043.id135419.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
9 KB
Referenced Files
None
Subscribers
None
D44043.id135419.diff
View Options
diff --git a/cddl/usr.sbin/zfsd/case_file.h b/cddl/usr.sbin/zfsd/case_file.h
--- a/cddl/usr.sbin/zfsd/case_file.h
+++ b/cddl/usr.sbin/zfsd/case_file.h
@@ -235,18 +235,27 @@
*/
int IsSpare();
+ /**
+ * \brief Get case vdev's specified property
+ */
+ int GetVdevProp(vdev_prop_t) const;
+
protected:
enum {
+ /*
+ * Use these defaults if we can't get the corresponding vdev
+ * prop or if the prop is not set
+ */
/**
* The number of soft errors on a vdev required
* to transition a vdev from healthy to degraded
- * status.
+ * status
*/
- ZFS_DEGRADE_IO_COUNT = 50,
+ DEFAULT_ZFS_DEGRADE_IO_COUNT = 50,
/**
* The number of delay errors on a vdev required to fault it
*/
- ZFS_FAULT_DELAY_COUNT = 8,
+ DEFAULT_ZFS_FAULT_SLOW_IO_COUNT = 8,
};
static CalloutFunc_t OnGracePeriodEnded;
@@ -379,12 +388,6 @@
*/
static const string s_caseFilePath;
- /**
- * \brief The time ZFSD waits before promoting a tentative event
- * into a permanent event.
- */
- static const timeval s_removeGracePeriod;
-
/**
* \brief A list of soft error events counted against the health of
* a vdev.
@@ -404,6 +407,7 @@
string m_poolGUIDString;
string m_vdevGUIDString;
string m_vdevPhysPath;
+ string m_vdevName;
int m_is_spare;
/**
diff --git a/cddl/usr.sbin/zfsd/case_file.cc b/cddl/usr.sbin/zfsd/case_file.cc
--- a/cddl/usr.sbin/zfsd/case_file.cc
+++ b/cddl/usr.sbin/zfsd/case_file.cc
@@ -53,6 +53,7 @@
#include <syslog.h>
#include <unistd.h>
+#include <libzutil.h>
#include <libzfs.h>
#include <list>
@@ -91,7 +92,6 @@
CaseFileList CaseFile::s_activeCases;
const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
-const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
//- CaseFile Static Public Methods ---------------------------------------------
CaseFile *
@@ -255,6 +255,7 @@
m_vdevState = vd.State();
m_vdevPhysPath = vd.PhysicalPath();
+ m_vdevName = vd.Name(casePool, false);
return (true);
}
@@ -610,15 +611,55 @@
return (Replace(vdev_type, devPath, /*isspare*/true));
}
+/* Does the argument event refer to a checksum error? */
+static bool
+IsChecksumEvent(const Event* const event)
+{
+ return ("ereport.fs.zfs.checksum" == event->Value("type"));
+}
+
+/* Does the argument event refer to an IO error? */
+static bool
+IsIOEvent(const Event* const event)
+{
+ return ("ereport.fs.zfs.io" == event->Value("type"));
+}
+
+/* Does the argument event refer to an IO delay? */
+static bool
+IsDelayEvent(const Event* const event)
+{
+ return ("ereport.fs.zfs.delay" == event->Value("type"));
+}
+
void
CaseFile::RegisterCallout(const Event &event)
{
timeval now, countdown, elapsed, timestamp, zero, remaining;
+ /**
+ * The time ZFSD waits before promoting a tentative event
+ * into a permanent event.
+ */
+ int sec = -1;
+ if (IsChecksumEvent(&event))
+ sec = CaseFile::GetVdevProp(VDEV_PROP_CHECKSUM_T);
+ else if (IsIOEvent(&event))
+ sec = CaseFile::GetVdevProp(VDEV_PROP_IO_T);
+ else if (IsDelayEvent(&event))
+ sec = CaseFile::GetVdevProp(VDEV_PROP_SLOW_IO_T);
+
+ if (sec == -1)
+ sec = 60; /* default */
+
+ timeval removeGracePeriod = {
+ sec, /*sec*/
+ 0 /*usec*/
+ };
gettimeofday(&now, 0);
timestamp = event.GetTimestamp();
timersub(&now, ×tamp, &elapsed);
- timersub(&s_removeGracePeriod, &elapsed, &countdown);
+ timersub(&removeGracePeriod, &elapsed, &countdown);
/*
* If countdown is <= zero, Reset the timer to the
* smallest positive time value instead
@@ -827,6 +868,10 @@
guidString << m_poolGUID;
m_poolGUIDString = guidString.str();
+ ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
+ zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
+ m_vdevName = vdev.Name(zhp, false);
+
s_activeCases.push_back(this);
syslog(LOG_INFO, "Creating new CaseFile:\n");
@@ -1158,43 +1203,54 @@
return (retval);
}
-/* Does the argument event refer to a checksum error? */
-static bool
-IsChecksumEvent(const Event* const event)
+/* Lookup the vdev prop. Used for checksum, IO, or slow IO props */
+int
+CaseFile::GetVdevProp(vdev_prop_t vdev_prop) const
{
- return ("ereport.fs.zfs.checksum" == event->Value("type"));
-}
+ char val[ZFS_MAXPROPLEN];
+ zprop_source_t srctype;
+ DevdCtl::Guid poolGUID = PoolGUID();
+ ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
+ zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
-/* Does the argument event refer to an IO error? */
-static bool
-IsIOEvent(const Event* const event)
-{
- return ("ereport.fs.zfs.io" == event->Value("type"));
-}
+ char *prop_str = (char *) vdev_prop_to_name(vdev_prop);
+ if (zhp == NULL || zpool_get_vdev_prop(zhp, m_vdevName.c_str(),
+ vdev_prop, prop_str, val, sizeof (val), &srctype, B_FALSE) != 0)
+ return (-1);
-/* Does the argument event refer to an IO delay? */
-static bool
-IsDelayEvent(const Event* const event)
-{
- return ("ereport.fs.zfs.delay" == event->Value("type"));
+ /* we'll get "-" from libzfs for a prop that is not set */
+ if (zfs_isnumber(val) == B_FALSE)
+ return (-1);
+
+ return (atoi(val));
}
bool
CaseFile::ShouldDegrade() const
{
+ int checksum_n = GetVdevProp(VDEV_PROP_CHECKSUM_N);
+ if (checksum_n == -1)
+ checksum_n = DEFAULT_ZFS_DEGRADE_IO_COUNT;
return (std::count_if(m_events.begin(), m_events.end(),
- IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
+ IsChecksumEvent) > checksum_n);
}
bool
CaseFile::ShouldFault() const
{
bool should_fault_for_io, should_fault_for_delay;
+ int io_n = GetVdevProp(VDEV_PROP_IO_N);
+ int slow_io_n = GetVdevProp(VDEV_PROP_SLOW_IO_N);
+
+ if (io_n == -1)
+ io_n = DEFAULT_ZFS_DEGRADE_IO_COUNT;
+ if (slow_io_n == -1)
+ slow_io_n = DEFAULT_ZFS_FAULT_SLOW_IO_COUNT;
should_fault_for_io = std::count_if(m_events.begin(), m_events.end(),
- IsIOEvent) > ZFS_DEGRADE_IO_COUNT;
+ IsIOEvent) > io_n;
should_fault_for_delay = std::count_if(m_events.begin(), m_events.end(),
- IsDelayEvent) > ZFS_FAULT_DELAY_COUNT;
+ IsDelayEvent) > slow_io_n;
return (should_fault_for_io || should_fault_for_delay);
}
diff --git a/cddl/usr.sbin/zfsd/zfsd.8 b/cddl/usr.sbin/zfsd/zfsd.8
--- a/cddl/usr.sbin/zfsd/zfsd.8
+++ b/cddl/usr.sbin/zfsd/zfsd.8
@@ -23,7 +23,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd April 18, 2020
+.Dd February 20, 2024
.Dt ZFSD 8
.Os
.Sh NAME
@@ -55,7 +55,7 @@
Instead, they control its behavior indirectly through zpool configuration.
There are two ways to influence
.Nm :
-assigning hotspares and setting pool properties.
+assigning hot spares and setting pool properties.
Currently, only the
.Em autoreplace
property has any effect.
@@ -69,7 +69,7 @@
.It device removal
When a leaf vdev disappears,
.Nm
-will activate any available hotspare.
+will activate any available hot spare.
.It device arrival
When a new GEOM device appears,
.Nm
@@ -77,40 +77,71 @@
If it matches a previously removed vdev on an active pool,
.Nm
will online it.
-Once resilvering completes, any active hotspare will detach automatically.
+Once resilvering completes, any active hot spare will detach automatically.
.Pp
If the new device has no ZFS label but its physical path matches the
physical path of a previously removed vdev on an active pool, and that
pool has the autoreplace property set, then
.Nm
will replace the missing vdev with the newly arrived device.
-Once resilvering completes, any active hotspare will detach automatically.
+Once resilvering completes, any active hot spare will detach automatically.
.It vdev degrade or fault events
If a vdev becomes degraded or faulted,
.Nm
-will activate any available hotspare.
+will activate any available hot spare.
.It I/O errors
-If a leaf vdev generates more than 50 I/O errors in a 60 second period, then
+By default, if a leaf vdev generates more than 50 I/O errors in a 60 second
+period, then
+.Nm
+will mark that vdev as
+.Em FAULTED .
+ZFS will no longer issue any I/Os to it.
+.Nm
+will activate a hot spare if one is available. The defaults can be changed by
+setting the
+.Em io_n
+and/or
+.Em io_t
+vdev properties. See
+.Xr vdevprops 7
+for details.
+.It I/O delays
+By default, if a leaf vdev generates more than delayed 8 I/O events in a 60
+second period, then
.Nm
will mark that vdev as
.Em FAULTED .
ZFS will no longer issue any I/Os to it.
.Nm
-will activate a hotspare if one is available.
+will activate a hot spare if one is available. The defaults can be changed by
+setting the
+.Em slow_io_n
+and/or
+.Em slow_io_t
+vdev properties. See
+.Xr vdevprops 7
+for details.
.It Checksum errors
-If a leaf vdev generates more than 50 checksum errors in a 60 second
-period, then
+By default, if a leaf vdev generates more than 50 checksum errors in a 60
+second period, then
.Nm
will mark that vdev as
.Em DEGRADED .
-ZFS will still use it, but zfsd will activate a spare anyway.
+ZFS will still use it, but zfsd will also activate a hot spare if one is
+available. The defaults can be changed by setting the
+.Em checksum_n
+and/or
+.Em checksum_t
+vdev properties. See
+.Xr vdevprops 7
+for details.
.It Spare addition
-If the system administrator adds a hotspare to a pool that is already degraded,
+If the system administrator adds a hot spare to a pool that is already degraded,
.Nm
will activate the spare.
.It Resilver complete
.Nm
-will detach any hotspare once a permanent replacement finishes resilvering.
+will detach any hot spare once a permanent replacement finishes resilvering.
.It Physical path change
If the physical path of an existing disk changes,
.Nm
@@ -134,6 +165,7 @@
.El
.Sh SEE ALSO
.Xr devctl 4 ,
+.Xr vdevprops 7 ,
.Xr zpool 8
.Sh HISTORY
.Nm
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Jan 27, 12:29 PM (4 h, 47 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16168077
Default Alt Text
D44043.id135419.diff (9 KB)
Attached To
Mode
D44043: zfsd: Use vdev prop values for fault/degrade thresholds
Attached
Detach File
Event Timeline
Log In to Comment