diff --git a/cddl/usr.sbin/zfsd/case_file.cc b/cddl/usr.sbin/zfsd/case_file.cc index ad3bb0ffc538..f6b6a0228a50 100644 --- a/cddl/usr.sbin/zfsd/case_file.cc +++ b/cddl/usr.sbin/zfsd/case_file.cc @@ -1,1164 +1,1178 @@ /*- * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) */ /** * \file case_file.cc * * We keep case files for any leaf vdev that is not in the optimal state. * However, we only serialize to disk those events that need to be preserved * across reboots. For now, this is just a log of soft errors which we * accumulate in order to mark a device as degraded. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "callout.h" #include "vdev_iterator.h" #include "zfsd_event.h" #include "case_file.h" #include "vdev.h" #include "zfsd.h" #include "zfsd_exception.h" #include "zpool_list.h" /*============================ Namespace Control =============================*/ using std::hex; using std::ifstream; using std::stringstream; using std::setfill; using std::setw; using DevdCtl::Event; using DevdCtl::EventFactory; using DevdCtl::EventList; using DevdCtl::Guid; using DevdCtl::ParseException; /*--------------------------------- CaseFile ---------------------------------*/ //- CaseFile Static Data ------------------------------------------------------- CaseFileList CaseFile::s_activeCases; const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; //- CaseFile Static Public Methods --------------------------------------------- CaseFile * CaseFile::Find(Guid poolGUID, Guid vdevGUID) { for (CaseFileList::iterator curCase = s_activeCases.begin(); curCase != s_activeCases.end(); curCase++) { if (((*curCase)->PoolGUID() != poolGUID && Guid::InvalidGuid() != poolGUID) || (*curCase)->VdevGUID() != vdevGUID) continue; /* * We only carry one active case per-vdev. */ return (*curCase); } return (NULL); } CaseFile * CaseFile::Find(const string &physPath) { CaseFile *result = NULL; for (CaseFileList::iterator curCase = s_activeCases.begin(); curCase != s_activeCases.end(); curCase++) { if ((*curCase)->PhysicalPath() != physPath) continue; if (result != NULL) { syslog(LOG_WARNING, "Multiple casefiles found for " "physical path %s. " "This is most likely a bug in zfsd", physPath.c_str()); } result = *curCase; } return (result); } void CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) { CaseFileList::iterator casefile; for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ CaseFileList::iterator next = casefile; next++; if (poolGUID == (*casefile)->PoolGUID()) (*casefile)->ReEvaluate(event); casefile = next; } } CaseFile & CaseFile::Create(Vdev &vdev) { CaseFile *activeCase; activeCase = Find(vdev.PoolGUID(), vdev.GUID()); if (activeCase == NULL) activeCase = new CaseFile(vdev); return (*activeCase); } void CaseFile::DeSerialize() { struct dirent **caseFiles; int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, DeSerializeSelector, /*compar*/NULL)); if (numCaseFiles == -1) return; if (numCaseFiles == 0) { free(caseFiles); return; } for (int i = 0; i < numCaseFiles; i++) { DeSerializeFile(caseFiles[i]->d_name); free(caseFiles[i]); } free(caseFiles); } bool CaseFile::Empty() { return (s_activeCases.empty()); } void CaseFile::LogAll() { for (CaseFileList::iterator curCase = s_activeCases.begin(); curCase != s_activeCases.end(); curCase++) (*curCase)->Log(); } void CaseFile::PurgeAll() { /* * Serialize casefiles before deleting them so that they can be reread * and revalidated during BuildCaseFiles. * CaseFiles remove themselves from this list on destruction. */ while (s_activeCases.size() != 0) { CaseFile *casefile = s_activeCases.front(); casefile->Serialize(); delete casefile; } } //- CaseFile Public Methods ---------------------------------------------------- bool CaseFile::RefreshVdevState() { ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); if (casePool == NULL) return (false); Vdev vd(casePool, CaseVdev(casePool)); if (vd.DoesNotExist()) return (false); m_vdevState = vd.State(); m_vdevPhysPath = vd.PhysicalPath(); return (true); } bool CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) { ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); if (pool == NULL || !RefreshVdevState()) { /* * The pool or vdev for this case file is no longer * part of the configuration. This can happen * if we process a device arrival notification * before seeing the ZFS configuration change * event. */ syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " "Closing\n", PoolGUIDString().c_str(), VdevGUIDString().c_str()); Close(); /* * Since this event was not used to close this * case, do not report it as consumed. */ return (/*consumed*/false); } if (VdevState() > VDEV_STATE_CANT_OPEN) { /* * For now, newly discovered devices only help for * devices that are missing. In the future, we might * use a newly inserted spare to replace a degraded * or faulted device. */ syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", PoolGUIDString().c_str(), VdevGUIDString().c_str()); return (/*consumed*/false); } if (vdev != NULL && ( vdev->PoolGUID() == m_poolGUID || vdev->PoolGUID() == Guid::InvalidGuid()) && vdev->GUID() == m_vdevGUID) { if (zpool_vdev_online(pool, vdev->GUIDString().c_str(), ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &m_vdevState) != 0) { syslog(LOG_ERR, "Failed to online vdev(%s/%s:%s): %s: %s\n", zpool_get_name(pool), vdev->GUIDString().c_str(), devPath.c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); return (/*consumed*/false); } syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", zpool_get_name(pool), vdev->GUIDString().c_str(), devPath.c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); /* * Check the vdev state post the online action to see * if we can retire this case. */ CloseIfSolved(); return (/*consumed*/true); } /* * If the auto-replace policy is enabled, and we have physical * path information, try a physical path replacement. */ if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { syslog(LOG_INFO, "CaseFile(%s:%s:%s): AutoReplace not set. " "Ignoring device insertion.\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); return (/*consumed*/false); } if (PhysicalPath().empty()) { syslog(LOG_INFO, "CaseFile(%s:%s:%s): No physical path information. " "Ignoring device insertion.\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); return (/*consumed*/false); } if (physPath != PhysicalPath()) { syslog(LOG_INFO, "CaseFile(%s:%s:%s): Physical path mismatch. " "Ignoring device insertion.\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); return (/*consumed*/false); } /* Write a label on the newly inserted disk. */ if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { syslog(LOG_ERR, "Replace vdev(%s/%s) by physical path (label): %s: %s\n", zpool_get_name(pool), VdevGUIDString().c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); return (/*consumed*/false); } syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", PoolGUIDString().c_str(), VdevGUIDString().c_str(), devPath.c_str()); return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); } bool CaseFile::ReEvaluate(const ZfsEvent &event) { bool consumed(false); if (event.Value("type") == "sysevent.fs.zfs.vdev_remove") { /* * The Vdev we represent has been removed from the * configuration. This case is no longer of value. */ Close(); return (/*consumed*/true); } else if (event.Value("type") == "sysevent.fs.zfs.pool_destroy") { /* This Pool has been destroyed. Discard the case */ Close(); return (/*consumed*/true); } else if (event.Value("type") == "sysevent.fs.zfs.config_sync") { RefreshVdevState(); if (VdevState() < VDEV_STATE_HEALTHY) consumed = ActivateSpare(); } if (event.Value("class") == "resource.fs.zfs.removed") { bool spare_activated; if (!RefreshVdevState()) { /* * The pool or vdev for this case file is no longer * part of the configuration. This can happen * if we process a device arrival notification * before seeing the ZFS configuration change * event. */ syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " "unconfigured. Closing\n", PoolGUIDString().c_str(), VdevGUIDString().c_str()); /* * Close the case now so we won't waste cycles in the * system rescan */ Close(); /* * Since this event was not used to close this * case, do not report it as consumed. */ return (/*consumed*/false); } /* * Discard any tentative I/O error events for * this case. They were most likely caused by the * hot-unplug of this device. */ PurgeTentativeEvents(); /* Try to activate spares if they are available */ spare_activated = ActivateSpare(); /* * Rescan the drives in the system to see if a recent * drive arrival can be used to solve this case. */ ZfsDaemon::RequestSystemRescan(); /* * Consume the event if we successfully activated a spare. * Otherwise, leave it in the unconsumed events list so that the * future addition of a spare to this pool might be able to * close the case */ consumed = spare_activated; } else if (event.Value("class") == "resource.fs.zfs.statechange") { RefreshVdevState(); /* * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to * activate a hotspare. Otherwise, ignore the event */ if (VdevState() == VDEV_STATE_FAULTED || VdevState() == VDEV_STATE_DEGRADED || VdevState() == VDEV_STATE_CANT_OPEN) (void) ActivateSpare(); consumed = true; } else if (event.Value("class") == "ereport.fs.zfs.io" || - event.Value("class") == "ereport.fs.zfs.checksum") { + event.Value("class") == "ereport.fs.zfs.checksum" || + event.Value("class") == "ereport.fs.zfs.delay") { m_tentativeEvents.push_front(event.DeepCopy()); RegisterCallout(event); consumed = true; } bool closed(CloseIfSolved()); return (consumed || closed); } /* Find a Vdev containing the vdev with the given GUID */ static nvlist_t* find_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid) { nvlist_t **vdevChildren; int error; unsigned ch, numChildren; error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, &vdevChildren, &numChildren); if (error != 0 || numChildren == 0) return (NULL); for (ch = 0; ch < numChildren; ch++) { nvlist *result; Vdev vdev(pool_config, vdevChildren[ch]); if (vdev.GUID() == child_guid) return (config); result = find_parent(pool_config, vdevChildren[ch], child_guid); if (result != NULL) return (result); } return (NULL); } bool CaseFile::ActivateSpare() { nvlist_t *config, *nvroot, *parent_config; nvlist_t **spares; char *devPath, *vdev_type; const char *poolname; u_int nspares, i; int error; ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); if (zhp == NULL) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID); return (false); } poolname = zpool_get_name(zhp); config = zpool_get_config(zhp, NULL); if (config == NULL) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " "config for pool %s", poolname); return (false); } error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); if (error != 0){ syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " "tree for pool %s", poolname); return (false); } parent_config = find_parent(config, nvroot, m_vdevGUID); if (parent_config != NULL) { char *parent_type; /* * Don't activate spares for members of a "replacing" vdev. * They're already dealt with. Sparing them will just drag out * the resilver process. */ error = nvlist_lookup_string(parent_config, ZPOOL_CONFIG_TYPE, &parent_type); if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0) return (false); } nspares = 0; nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares); if (nspares == 0) { /* The pool has no spares configured */ syslog(LOG_INFO, "CaseFile::ActivateSpare: " "No spares available for pool %s", poolname); return (false); } for (i = 0; i < nspares; i++) { uint64_t *nvlist_array; vdev_stat_t *vs; uint_t nstats; if (nvlist_lookup_uint64_array(spares[i], ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " "find vdev stats for pool %s, spare %d", poolname, i); return (false); } vs = reinterpret_cast(nvlist_array); if ((vs->vs_aux != VDEV_AUX_SPARED) && (vs->vs_state == VDEV_STATE_HEALTHY)) { /* We found a usable spare */ break; } } if (i == nspares) { /* No available spares were found */ return (false); } error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath); if (error != 0) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " "the path of pool %s, spare %d. Error %d", poolname, i, error); return (false); } error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type); if (error != 0) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " "the vdev type of pool %s, spare %d. Error %d", poolname, i, error); return (false); } return (Replace(vdev_type, devPath, /*isspare*/true)); } void CaseFile::RegisterCallout(const Event &event) { timeval now, countdown, elapsed, timestamp, zero, remaining; gettimeofday(&now, 0); timestamp = event.GetTimestamp(); timersub(&now, ×tamp, &elapsed); timersub(&s_removeGracePeriod, &elapsed, &countdown); /* * If countdown is <= zero, Reset the timer to the * smallest positive time value instead */ timerclear(&zero); if (timercmp(&countdown, &zero, <=)) { timerclear(&countdown); countdown.tv_usec = 1; } remaining = m_tentativeTimer.TimeRemaining(); if (!m_tentativeTimer.IsPending() || timercmp(&countdown, &remaining, <)) m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this); } bool CaseFile::CloseIfSolved() { if (m_events.empty() && m_tentativeEvents.empty()) { /* * We currently do not track or take actions on * devices in the degraded or faulted state. * Once we have support for spare pools, we'll * retain these cases so that any spares added in * the future can be applied to them. */ switch (VdevState()) { case VDEV_STATE_HEALTHY: /* No need to keep cases for healthy vdevs */ Close(); return (true); case VDEV_STATE_REMOVED: case VDEV_STATE_CANT_OPEN: /* * Keep open. We may solve it with a newly inserted * device. */ case VDEV_STATE_FAULTED: case VDEV_STATE_DEGRADED: /* * Keep open. We may solve it with the future * addition of a spare to the pool */ case VDEV_STATE_UNKNOWN: case VDEV_STATE_CLOSED: case VDEV_STATE_OFFLINE: /* * Keep open? This may not be the correct behavior, * but it's what we've always done */ ; } /* * Re-serialize the case in order to remove any * previous event data. */ Serialize(); } return (false); } void CaseFile::Log() { syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), PhysicalPath().c_str()); syslog(LOG_INFO, "\tVdev State = %s\n", zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); if (m_tentativeEvents.size() != 0) { syslog(LOG_INFO, "\t=== Tentative Events ===\n"); for (EventList::iterator event(m_tentativeEvents.begin()); event != m_tentativeEvents.end(); event++) (*event)->Log(LOG_INFO); } if (m_events.size() != 0) { syslog(LOG_INFO, "\t=== Events ===\n"); for (EventList::iterator event(m_events.begin()); event != m_events.end(); event++) (*event)->Log(LOG_INFO); } } //- CaseFile Static Protected Methods ------------------------------------------ void CaseFile::OnGracePeriodEnded(void *arg) { CaseFile &casefile(*static_cast(arg)); casefile.OnGracePeriodEnded(); } int CaseFile::DeSerializeSelector(const struct dirent *dirEntry) { uint64_t poolGUID; uint64_t vdevGUID; if (dirEntry->d_type == DT_REG && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", &poolGUID, &vdevGUID) == 2) return (1); return (0); } void CaseFile::DeSerializeFile(const char *fileName) { string fullName(s_caseFilePath + '/' + fileName); CaseFile *existingCaseFile(NULL); CaseFile *caseFile(NULL); try { uint64_t poolGUID; uint64_t vdevGUID; nvlist_t *vdevConf; if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", &poolGUID, &vdevGUID) != 2) { throw ZfsdException("CaseFile::DeSerialize: " "Unintelligible CaseFile filename %s.\n", fileName); } existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID)); if (existingCaseFile != NULL) { /* * If the vdev is already degraded or faulted, * there's no point in keeping the state around * that we use to put a drive into the degraded * state. However, if the vdev is simply missing, * preserve the case data in the hopes that it will * return. */ caseFile = existingCaseFile; vdev_state curState(caseFile->VdevState()); if (curState > VDEV_STATE_CANT_OPEN && curState < VDEV_STATE_HEALTHY) { unlink(fileName); return; } } else { ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); if (zpl.empty() || (vdevConf = VdevIterator(zpl.front()) .Find(vdevGUID)) == NULL) { /* * Either the pool no longer exists * or this vdev is no longer a member of * the pool. */ unlink(fullName.c_str()); return; } /* * Any vdev we find that does not have a case file * must be in the healthy state and thus worthy of * continued SERD data tracking. */ caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); } ifstream caseStream(fullName.c_str()); if (!caseStream) throw ZfsdException("CaseFile::DeSerialize: Unable to " "read %s.\n", fileName); caseFile->DeSerialize(caseStream); } catch (const ParseException &exp) { exp.Log(); if (caseFile != existingCaseFile) delete caseFile; /* * Since we can't parse the file, unlink it so we don't * trip over it again. */ unlink(fileName); } catch (const ZfsdException &zfsException) { zfsException.Log(); if (caseFile != existingCaseFile) delete caseFile; } } //- CaseFile Protected Methods ------------------------------------------------- CaseFile::CaseFile(const Vdev &vdev) : m_poolGUID(vdev.PoolGUID()), m_vdevGUID(vdev.GUID()), m_vdevState(vdev.State()), m_vdevPhysPath(vdev.PhysicalPath()) { stringstream guidString; guidString << m_vdevGUID; m_vdevGUIDString = guidString.str(); guidString.str(""); guidString << m_poolGUID; m_poolGUIDString = guidString.str(); s_activeCases.push_back(this); syslog(LOG_INFO, "Creating new CaseFile:\n"); Log(); } CaseFile::~CaseFile() { PurgeEvents(); PurgeTentativeEvents(); m_tentativeTimer.Stop(); s_activeCases.remove(this); } void CaseFile::PurgeEvents() { for (EventList::iterator event(m_events.begin()); event != m_events.end(); event++) delete *event; m_events.clear(); } void CaseFile::PurgeTentativeEvents() { for (EventList::iterator event(m_tentativeEvents.begin()); event != m_tentativeEvents.end(); event++) delete *event; m_tentativeEvents.clear(); } void CaseFile::SerializeEvList(const EventList events, int fd, const char* prefix) const { if (events.empty()) return; for (EventList::const_iterator curEvent = events.begin(); curEvent != events.end(); curEvent++) { const string &eventString((*curEvent)->GetEventString()); // TODO: replace many write(2) calls with a single writev(2) if (prefix) write(fd, prefix, strlen(prefix)); write(fd, eventString.c_str(), eventString.length()); } } void CaseFile::Serialize() { stringstream saveFile; saveFile << setfill('0') << s_caseFilePath << "/" << "pool_" << PoolGUIDString() << "_vdev_" << VdevGUIDString() << ".case"; if (m_events.empty() && m_tentativeEvents.empty()) { unlink(saveFile.str().c_str()); return; } int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); if (fd == -1) { syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", saveFile.str().c_str()); return; } SerializeEvList(m_events, fd); SerializeEvList(m_tentativeEvents, fd, "tentative "); close(fd); } /* * XXX: This method assumes that events may not contain embedded newlines. If * ever events can contain embedded newlines, then CaseFile must switch * serialization formats */ void CaseFile::DeSerialize(ifstream &caseStream) { string evString; const EventFactory &factory(ZfsDaemon::Get().GetFactory()); caseStream >> std::noskipws >> std::ws; while (caseStream.good()) { /* * Outline: * read the beginning of a line and check it for * "tentative". If found, discard "tentative". * Create a new event * continue */ EventList* destEvents; const string tentFlag("tentative "); string line; std::stringbuf lineBuf; caseStream.get(lineBuf); caseStream.ignore(); /*discard the newline character*/ line = lineBuf.str(); if (line.compare(0, tentFlag.size(), tentFlag) == 0) { /* Discard "tentative" */ line.erase(0, tentFlag.size()); destEvents = &m_tentativeEvents; } else { destEvents = &m_events; } Event *event(Event::CreateEvent(factory, line)); if (event != NULL) { destEvents->push_back(event); RegisterCallout(*event); } } } void CaseFile::Close() { /* * This case is no longer relevant. Clean up our * serialization file, and delete the case. */ syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); /* * Serialization of a Case with no event data, clears the * Serialization data for that event. */ PurgeEvents(); Serialize(); delete this; } void CaseFile::OnGracePeriodEnded() { bool should_fault, should_degrade; ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); m_events.splice(m_events.begin(), m_tentativeEvents); should_fault = ShouldFault(); should_degrade = ShouldDegrade(); if (should_fault || should_degrade) { if (zhp == NULL || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) { /* * Either the pool no longer exists * or this vdev is no longer a member of * the pool. */ Close(); return; } } /* A fault condition has priority over a degrade condition */ if (ShouldFault()) { /* Fault the vdev and close the case. */ if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID, VDEV_AUX_ERR_EXCEEDED) == 0) { syslog(LOG_INFO, "Faulting vdev(%s/%s)", PoolGUIDString().c_str(), VdevGUIDString().c_str()); Close(); return; } else { syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); } } else if (ShouldDegrade()) { /* Degrade the vdev and close the case. */ if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID, VDEV_AUX_ERR_EXCEEDED) == 0) { syslog(LOG_INFO, "Degrading vdev(%s/%s)", PoolGUIDString().c_str(), VdevGUIDString().c_str()); Close(); return; } else { syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); } } Serialize(); } Vdev CaseFile::BeingReplacedBy(zpool_handle_t *zhp) { Vdev vd(zhp, CaseVdev(zhp)); std::list children; std::list::iterator children_it; Vdev parent(vd.Parent()); Vdev replacing(NonexistentVdev); /* * To determine whether we are being replaced by another spare that * is still working, then make sure that it is currently spared and * that the spare is either resilvering or healthy. If any of these * conditions fail, then we are not being replaced by a spare. * * If the spare is healthy, then the case file should be closed very * soon after this check. */ if (parent.DoesNotExist() || parent.Name(zhp, /*verbose*/false) != "spare") return (NonexistentVdev); children = parent.Children(); children_it = children.begin(); for (;children_it != children.end(); children_it++) { Vdev child = *children_it; /* Skip our vdev. */ if (child.GUID() == VdevGUID()) continue; /* * Accept the first child that doesn't match our GUID, or * any resilvering/healthy device if one exists. */ if (replacing.DoesNotExist() || child.IsResilvering() || child.State() == VDEV_STATE_HEALTHY) replacing = child; } return (replacing); } bool CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) { nvlist_t *nvroot, *newvd; const char *poolname; string oldstr(VdevGUIDString()); bool retval = true; /* Figure out what pool we're working on */ ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); if (zhp == NULL) { syslog(LOG_ERR, "CaseFile::Replace: could not find pool for " "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID); return (false); } poolname = zpool_get_name(zhp); Vdev vd(zhp, CaseVdev(zhp)); Vdev replaced(BeingReplacedBy(zhp)); if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) { /* If we are already being replaced by a working spare, pass. */ if (replaced.IsResilvering() || replaced.State() == VDEV_STATE_HEALTHY) { syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already " "replaced", VdevGUIDString().c_str(), path); return (/*consumed*/false); } /* * If we have already been replaced by a spare, but that spare * is broken, we must spare the spare, not the original device. */ oldstr = replaced.GUIDString(); syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing " "broken spare %s instead", VdevGUIDString().c_str(), path, oldstr.c_str()); } /* * Build a root vdev/leaf vdev configuration suitable for * zpool_vdev_attach. Only enough data for the kernel to find * the device (i.e. type and disk device node path) are needed. */ nvroot = NULL; newvd = NULL; if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate " "configuration data.", poolname, oldstr.c_str()); if (nvroot != NULL) nvlist_free(nvroot); return (false); } if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd, 1) != 0) { syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize " "configuration data.", poolname, oldstr.c_str()); nvlist_free(newvd); nvlist_free(nvroot); return (true); } /* Data was copied when added to the root vdev. */ nvlist_free(newvd); retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot, /*replace*/B_TRUE, /*rebuild*/ B_FALSE) == 0); if (retval) syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", poolname, oldstr.c_str(), path); else syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n", poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); nvlist_free(nvroot); return (retval); } /* Does the argument event refer to a checksum error? */ static bool IsChecksumEvent(const Event* const event) { return ("ereport.fs.zfs.checksum" == event->Value("type")); } /* Does the argument event refer to an IO error? */ static bool IsIOEvent(const Event* const event) { return ("ereport.fs.zfs.io" == event->Value("type")); } +/* Does the argument event refer to an IO delay? */ +static bool +IsDelayEvent(const Event* const event) +{ + return ("ereport.fs.zfs.delay" == event->Value("type")); +} + bool CaseFile::ShouldDegrade() const { return (std::count_if(m_events.begin(), m_events.end(), IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT); } bool CaseFile::ShouldFault() const { - return (std::count_if(m_events.begin(), m_events.end(), - IsIOEvent) > ZFS_DEGRADE_IO_COUNT); + bool should_fault_for_io, should_fault_for_delay; + + should_fault_for_io = std::count_if(m_events.begin(), m_events.end(), + IsIOEvent) > ZFS_DEGRADE_IO_COUNT; + should_fault_for_delay = std::count_if(m_events.begin(), m_events.end(), + IsDelayEvent) > ZFS_FAULT_DELAY_COUNT; + + return (should_fault_for_io || should_fault_for_delay); } nvlist_t * CaseFile::CaseVdev(zpool_handle_t *zhp) const { return (VdevIterator(zhp).Find(VdevGUID())); } diff --git a/cddl/usr.sbin/zfsd/case_file.h b/cddl/usr.sbin/zfsd/case_file.h index 48eb33350a71..00d66ed76a67 100644 --- a/cddl/usr.sbin/zfsd/case_file.h +++ b/cddl/usr.sbin/zfsd/case_file.h @@ -1,431 +1,435 @@ /*- * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) */ /** * \file case_file.h * * CaseFile objects aggregate vdev faults that may require ZFSD action * in order to maintain the health of a ZFS pool. * * Header requirements: * * #include * * #include "callout.h" * #include "zfsd_event.h" */ #ifndef _CASE_FILE_H_ #define _CASE_FILE_H_ /*=========================== Forward Declarations ===========================*/ class CaseFile; class Vdev; /*============================= Class Definitions ============================*/ /*------------------------------- CaseFileList -------------------------------*/ /** * CaseFileList is a specialization of the standard list STL container. */ typedef std::list< CaseFile *> CaseFileList; /*--------------------------------- CaseFile ---------------------------------*/ /** * A CaseFile object is instantiated anytime a vdev for an active pool * experiences an I/O error, is faulted by ZFS, or is determined to be * missing/removed. * * A vdev may have at most one CaseFile. * * CaseFiles are retired when a vdev leaves an active pool configuration * or an action is taken to resolve the issues recorded in the CaseFile. * * Logging a case against a vdev does not imply that an immediate action * to resolve a fault is required or even desired. For example, a CaseFile * must accumulate a number of I/O errors in order to flag a device as * degraded. * * Vdev I/O errors are not recorded in ZFS label inforamation. For this * reasons, CaseFile%%s with accumulated I/O error events are serialized * to the file system so that they survive across boots. Currently all * other fault types can be reconstructed from ZFS label information, so * CaseFile%%s for missing, faulted, or degradded members are just recreated * at ZFSD startup instead of being deserialized from the file system. */ class CaseFile { public: /** * \brief Find a CaseFile object by a vdev's pool/vdev GUID tuple. * * \param poolGUID Pool GUID for the vdev of the CaseFile to find. * If InvalidGuid, then only match the vdev GUID * instead of both pool and vdev GUIDs. * \param vdevGUID Vdev GUID for the vdev of the CaseFile to find. * * \return If found, a pointer to a valid CaseFile object. * Otherwise NULL. */ static CaseFile *Find(DevdCtl::Guid poolGUID, DevdCtl::Guid vdevGUID); /** * \brief Find a CaseFile object by a vdev's current/last known * physical path. * * \param physPath Physical path of the vdev of the CaseFile to find. * * \return If found, a pointer to a valid CaseFile object. * Otherwise NULL. */ static CaseFile *Find(const string &physPath); /** * \brief ReEvaluate all open cases whose pool guid matches the argument * * \param poolGUID Only reevaluate cases for this pool * \param event Try to consume this event with the casefile */ static void ReEvaluateByGuid(DevdCtl::Guid poolGUID, const ZfsEvent &event); /** * \brief Create or return an existing active CaseFile for the * specified vdev. * * \param vdev The vdev object for which to find/create a CaseFile. * * \return A reference to a valid CaseFile object. */ static CaseFile &Create(Vdev &vdev); /** * \brief Deserialize all serialized CaseFile objects found in * the file system. */ static void DeSerialize(); /** * \brief returns true if there are no CaseFiles */ static bool Empty(); /** * \brief Emit syslog data on all active CaseFile%%s in the system. */ static void LogAll(); /** * \brief Destroy the in-core cache of CaseFile data. * * This routine does not disturb the on disk, serialized, CaseFile * data. */ static void PurgeAll(); DevdCtl::Guid PoolGUID() const; DevdCtl::Guid VdevGUID() const; vdev_state VdevState() const; const string &PoolGUIDString() const; const string &VdevGUIDString() const; const string &PhysicalPath() const; /** * \brief Attempt to resolve this CaseFile using the disk * resource at the given device/physical path/vdev object * tuple. * * \param devPath The devfs path for the disk resource. * \param physPath The physical path information reported by * the disk resource. * \param vdev If the disk contains ZFS label information, * a pointer to the disk label's vdev object * data. Otherwise NULL. * * \return True if this event was consumed by this CaseFile. */ bool ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev); /** * \brief Update this CaseFile in light of the provided ZfsEvent. * * Must be virtual so it can be overridden in the unit tests * * \param event The ZfsEvent to evaluate. * * \return True if this event was consumed by this CaseFile. */ virtual bool ReEvaluate(const ZfsEvent &event); /** * \brief Register an itimer callout for the given event, if necessary */ virtual void RegisterCallout(const DevdCtl::Event &event); /** * \brief Close a case if it is no longer relevant. * * This method deals with cases tracking soft errors. Soft errors * will be discarded should a remove event occur within a short period * of the soft errors being reported. We also discard the events * if the vdev is marked degraded or failed. * * \return True if the case is closed. False otherwise. */ bool CloseIfSolved(); /** * \brief Emit data about this CaseFile via syslog(3). */ void Log(); /** * \brief Whether we should degrade this vdev */ bool ShouldDegrade() const; /** * \brief Whether we should fault this vdev */ bool ShouldFault() const; protected: enum { /** * The number of soft errors on a vdev required * to transition a vdev from healthy to degraded * status. */ - ZFS_DEGRADE_IO_COUNT = 50 + ZFS_DEGRADE_IO_COUNT = 50, + /** + * The number of delay errors on a vdev required to fault it + */ + ZFS_FAULT_DELAY_COUNT = 8, }; static CalloutFunc_t OnGracePeriodEnded; /** * \brief scandir(3) filter function used to find files containing * serialized CaseFile data. * * \param dirEntry Directory entry for the file to filter. * * \return Non-zero for a file to include in the selection, * otherwise 0. */ static int DeSerializeSelector(const struct dirent *dirEntry); /** * \brief Given the name of a file containing serialized events from a * CaseFile object, create/update an in-core CaseFile object * representing the serialized data. * * \param fileName The name of a file containing serialized events * from a CaseFile object. */ static void DeSerializeFile(const char *fileName); /** Constructor. */ CaseFile(const Vdev &vdev); /** * Destructor. * Must be virtual so it can be subclassed in the unit tests */ virtual ~CaseFile(); /** * \brief Reload state for the vdev associated with this CaseFile. * * \return True if the refresh was successful. False if the system * has no record of the pool or vdev for this CaseFile. */ virtual bool RefreshVdevState(); /** * \brief Free all events in the m_events list. */ void PurgeEvents(); /** * \brief Free all events in the m_tentativeEvents list. */ void PurgeTentativeEvents(); /** * \brief Commit to file system storage. */ void Serialize(); /** * \brief Retrieve event data from a serialization stream. * * \param caseStream The serializtion stream to parse. */ void DeSerialize(std::ifstream &caseStream); /** * \brief Serializes the supplied event list and writes it to fd * * \param prefix If not NULL, this prefix will be prepended to * every event in the file. */ void SerializeEvList(const DevdCtl::EventList events, int fd, const char* prefix=NULL) const; /** * \brief Unconditionally close a CaseFile. */ virtual void Close(); /** * \brief Callout callback invoked when the remove timer grace * period expires. * * If no remove events are received prior to the grace period * firing, then any tentative events are promoted and counted * against the health of the vdev. */ void OnGracePeriodEnded(); /** * \brief Attempt to activate a spare on this case's pool. * * Call this whenever a pool becomes degraded. It will look for any * spare devices and activate one to replace the casefile's vdev. It * will _not_ close the casefile; that should only happen when the * missing drive is replaced or the user promotes the spare. * * \return True if a spare was activated */ bool ActivateSpare(); /** * \brief replace a pool's vdev with another * * \param vdev_type The type of the new vdev. Usually either * VDEV_TYPE_DISK or VDEV_TYPE_FILE * \param path The file system path to the new vdev * \param isspare Whether the new vdev is a spare * * \return true iff the replacement was successful */ bool Replace(const char* vdev_type, const char* path, bool isspare); /** * \brief Which vdev, if any, is replacing ours. * * \param zhp Pool handle state from the caller context * * \return the vdev that is currently replacing ours, * or NonexistentVdev if there isn't one. */ Vdev BeingReplacedBy(zpool_handle_t *zhp); /** * \brief All CaseFiles being tracked by ZFSD. */ static CaseFileList s_activeCases; /** * \brief The file system path to serialized CaseFile data. */ static const string s_caseFilePath; /** * \brief The time ZFSD waits before promoting a tentative event * into a permanent event. */ static const timeval s_removeGracePeriod; /** * \brief A list of soft error events counted against the health of * a vdev. */ DevdCtl::EventList m_events; /** * \brief A list of soft error events waiting for a grace period * expiration before being counted against the health of * a vdev. */ DevdCtl::EventList m_tentativeEvents; DevdCtl::Guid m_poolGUID; DevdCtl::Guid m_vdevGUID; vdev_state m_vdevState; string m_poolGUIDString; string m_vdevGUIDString; string m_vdevPhysPath; /** * \brief Callout activated when a grace period */ Callout m_tentativeTimer; private: nvlist_t *CaseVdev(zpool_handle_t *zhp) const; }; inline DevdCtl::Guid CaseFile::PoolGUID() const { return (m_poolGUID); } inline DevdCtl::Guid CaseFile::VdevGUID() const { return (m_vdevGUID); } inline vdev_state CaseFile::VdevState() const { return (m_vdevState); } inline const string & CaseFile::PoolGUIDString() const { return (m_poolGUIDString); } inline const string & CaseFile::VdevGUIDString() const { return (m_vdevGUIDString); } inline const string & CaseFile::PhysicalPath() const { return (m_vdevPhysPath); } #endif /* _CASE_FILE_H_ */ diff --git a/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc b/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc index caeb077a3de8..d76abb54c9ed 100644 --- a/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc +++ b/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc @@ -1,805 +1,871 @@ /*- * Copyright (c) 2012, 2013, 2014 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Alan Somers (Spectra Logic Corporation) */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libmocks.h" /*================================== Macros ==================================*/ #define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x)) /*============================ Namespace Control =============================*/ using std::string; using std::stringstream; using DevdCtl::Event; using DevdCtl::EventFactory; using DevdCtl::EventList; using DevdCtl::Guid; using DevdCtl::NVPairMap; /* redefine zpool_handle here because libzfs_impl.h is not includable */ struct zpool_handle { libzfs_handle_t *zpool_hdl; zpool_handle_t *zpool_next; char zpool_name[ZFS_MAX_DATASET_NAME_LEN]; int zpool_state; size_t zpool_config_size; nvlist_t *zpool_config; nvlist_t *zpool_old_config; nvlist_t *zpool_props; diskaddr_t zpool_start_block; }; class MockZfsEvent : public ZfsEvent { public: MockZfsEvent(Event::Type, NVPairMap&, const string&); virtual ~MockZfsEvent() {} static BuildMethod MockZfsEventBuilder; MOCK_CONST_METHOD0(ProcessPoolEvent, void()); static EventFactory::Record s_buildRecords[]; }; EventFactory::Record MockZfsEvent::s_buildRecords[] = { { Event::NOTIFY, "ZFS", &MockZfsEvent::MockZfsEventBuilder } }; MockZfsEvent::MockZfsEvent(Event::Type type, NVPairMap& map, const string& str) : ZfsEvent(type, map, str) { } Event * MockZfsEvent::MockZfsEventBuilder(Event::Type type, NVPairMap &nvpairs, const string &eventString) { return (new MockZfsEvent(type, nvpairs, eventString)); } /* * A dummy Vdev class used for testing other classes */ class MockVdev : public Vdev { public: MockVdev(nvlist_t *vdevConfig); virtual ~MockVdev() {} MOCK_CONST_METHOD0(GUID, Guid()); MOCK_CONST_METHOD0(PoolGUID, Guid()); MOCK_CONST_METHOD0(State, vdev_state()); MOCK_CONST_METHOD0(PhysicalPath, string()); }; MockVdev::MockVdev(nvlist_t *vdevConfig) : Vdev(vdevConfig) { } /* * A CaseFile class with side effects removed, for testing */ class TestableCaseFile : public CaseFile { public: static TestableCaseFile &Create(Vdev &vdev); TestableCaseFile(Vdev &vdev); virtual ~TestableCaseFile() {} MOCK_METHOD0(Close, void()); MOCK_METHOD1(RegisterCallout, void(const Event &event)); MOCK_METHOD0(RefreshVdevState, bool()); MOCK_METHOD1(ReEvaluate, bool(const ZfsEvent &event)); bool RealReEvaluate(const ZfsEvent &event) { return (CaseFile::ReEvaluate(event)); } /* * This splices the event lists, a procedure that would normally be done * by OnGracePeriodEnded, but we don't necessarily call that in the * unit tests */ void SpliceEvents(); /* * Used by some of our expectations. CaseFile does not publicize this */ static int getActiveCases() { return (s_activeCases.size()); } }; TestableCaseFile::TestableCaseFile(Vdev &vdev) : CaseFile(vdev) { } TestableCaseFile & TestableCaseFile::Create(Vdev &vdev) { TestableCaseFile *newCase; newCase = new TestableCaseFile(vdev); return (*newCase); } void TestableCaseFile::SpliceEvents() { m_events.splice(m_events.begin(), m_tentativeEvents); } /* * Test class ZfsdException */ class ZfsdExceptionTest : public ::testing::Test { protected: virtual void SetUp() { ASSERT_EQ(0, nvlist_alloc(&poolConfig, NV_UNIQUE_NAME, 0)); ASSERT_EQ(0, nvlist_add_string(poolConfig, ZPOOL_CONFIG_POOL_NAME, "unit_test_pool")); ASSERT_EQ(0, nvlist_add_uint64(poolConfig, ZPOOL_CONFIG_POOL_GUID, 0x1234)); ASSERT_EQ(0, nvlist_alloc(&vdevConfig, NV_UNIQUE_NAME, 0)); ASSERT_EQ(0, nvlist_add_uint64(vdevConfig, ZPOOL_CONFIG_GUID, 0x5678)); bzero(&poolHandle, sizeof(poolHandle)); poolHandle.zpool_config = poolConfig; } virtual void TearDown() { nvlist_free(poolConfig); nvlist_free(vdevConfig); } nvlist_t *poolConfig; nvlist_t *vdevConfig; zpool_handle_t poolHandle; }; TEST_F(ZfsdExceptionTest, StringConstructorNull) { ZfsdException ze(""); EXPECT_STREQ("", ze.GetString().c_str()); } TEST_F(ZfsdExceptionTest, StringConstructorFormatted) { ZfsdException ze(" %d %s", 55, "hello world"); EXPECT_STREQ(" 55 hello world", ze.GetString().c_str()); } TEST_F(ZfsdExceptionTest, LogSimple) { ZfsdException ze("unit test w/o vdev or pool"); ze.Log(); EXPECT_EQ(LOG_ERR, syslog_last_priority); EXPECT_STREQ("unit test w/o vdev or pool\n", syslog_last_message); } TEST_F(ZfsdExceptionTest, Pool) { const char msg[] = "Exception with pool name"; char expected[4096]; sprintf(expected, "Pool unit_test_pool: %s\n", msg); ZfsdException ze(poolConfig, msg); ze.Log(); EXPECT_STREQ(expected, syslog_last_message); } TEST_F(ZfsdExceptionTest, PoolHandle) { const char msg[] = "Exception with pool handle"; char expected[4096]; sprintf(expected, "Pool unit_test_pool: %s\n", msg); ZfsdException ze(&poolHandle, msg); ze.Log(); EXPECT_STREQ(expected, syslog_last_message); } /* * Test class Vdev */ class VdevTest : public ::testing::Test { protected: virtual void SetUp() { ASSERT_EQ(0, nvlist_alloc(&m_poolConfig, NV_UNIQUE_NAME, 0)); ASSERT_EQ(0, nvlist_add_uint64(m_poolConfig, ZPOOL_CONFIG_POOL_GUID, 0x1234)); ASSERT_EQ(0, nvlist_alloc(&m_vdevConfig, NV_UNIQUE_NAME, 0)); ASSERT_EQ(0, nvlist_add_uint64(m_vdevConfig, ZPOOL_CONFIG_GUID, 0x5678)); } virtual void TearDown() { nvlist_free(m_poolConfig); nvlist_free(m_vdevConfig); } nvlist_t *m_poolConfig; nvlist_t *m_vdevConfig; }; TEST_F(VdevTest, StateFromConfig) { vdev_stat_t vs; vs.vs_state = VDEV_STATE_OFFLINE; ASSERT_EQ(0, nvlist_add_uint64_array(m_vdevConfig, ZPOOL_CONFIG_VDEV_STATS, (uint64_t*)&vs, sizeof(vs) / sizeof(uint64_t))); Vdev vdev(m_poolConfig, m_vdevConfig); EXPECT_EQ(VDEV_STATE_OFFLINE, vdev.State()); } TEST_F(VdevTest, StateFaulted) { ASSERT_EQ(0, nvlist_add_uint64(m_vdevConfig, ZPOOL_CONFIG_FAULTED, 1)); Vdev vdev(m_poolConfig, m_vdevConfig); EXPECT_EQ(VDEV_STATE_FAULTED, vdev.State()); } /* * Test that we can construct a Vdev from the label information that is stored * on an available spare drive */ TEST_F(VdevTest, ConstructAvailSpare) { nvlist_t *labelConfig; ASSERT_EQ(0, nvlist_alloc(&labelConfig, NV_UNIQUE_NAME, 0)); ASSERT_EQ(0, nvlist_add_uint64(labelConfig, ZPOOL_CONFIG_GUID, 1948339428197961030)); ASSERT_EQ(0, nvlist_add_uint64(labelConfig, ZPOOL_CONFIG_POOL_STATE, POOL_STATE_SPARE)); EXPECT_NO_THROW(Vdev vdev(labelConfig)); nvlist_free(labelConfig); } /* Available spares will always show the HEALTHY state */ TEST_F(VdevTest, AvailSpareState) { nvlist_t *labelConfig; ASSERT_EQ(0, nvlist_alloc(&labelConfig, NV_UNIQUE_NAME, 0)); ASSERT_EQ(0, nvlist_add_uint64(labelConfig, ZPOOL_CONFIG_GUID, 1948339428197961030)); ASSERT_EQ(0, nvlist_add_uint64(labelConfig, ZPOOL_CONFIG_POOL_STATE, POOL_STATE_SPARE)); Vdev vdev(labelConfig); EXPECT_EQ(VDEV_STATE_HEALTHY, vdev.State()); nvlist_free(labelConfig); } /* Test the Vdev::IsSpare method */ TEST_F(VdevTest, IsSpare) { Vdev notSpare(m_poolConfig, m_vdevConfig); EXPECT_EQ(false, notSpare.IsSpare()); ASSERT_EQ(0, nvlist_add_uint64(m_vdevConfig, ZPOOL_CONFIG_IS_SPARE, 1)); Vdev isSpare(m_poolConfig, m_vdevConfig); EXPECT_EQ(true, isSpare.IsSpare()); } /* * Test class ZFSEvent */ class ZfsEventTest : public ::testing::Test { protected: virtual void SetUp() { m_eventFactory = new EventFactory(); m_eventFactory->UpdateRegistry(MockZfsEvent::s_buildRecords, NUM_ELEMENTS(MockZfsEvent::s_buildRecords)); m_event = NULL; } virtual void TearDown() { delete m_eventFactory; delete m_event; } EventFactory *m_eventFactory; Event *m_event; }; TEST_F(ZfsEventTest, ProcessPoolEventGetsCalled) { string evString("!system=ZFS " "subsystem=ZFS " "type=sysevent.fs.zfs.vdev_remove " "pool_name=foo " "pool_guid=9756779504028057996 " "vdev_guid=1631193447431603339 " "vdev_path=/dev/da1 " "timestamp=1348871594"); m_event = Event::CreateEvent(*m_eventFactory, evString); MockZfsEvent *mock_event = static_cast(m_event); EXPECT_CALL(*mock_event, ProcessPoolEvent()).Times(1); mock_event->Process(); } /* * Test class CaseFile */ class CaseFileTest : public ::testing::Test { protected: virtual void SetUp() { m_eventFactory = new EventFactory(); m_eventFactory->UpdateRegistry(MockZfsEvent::s_buildRecords, NUM_ELEMENTS(MockZfsEvent::s_buildRecords)); m_event = NULL; nvlist_alloc(&m_vdevConfig, NV_UNIQUE_NAME, 0); ASSERT_EQ(0, nvlist_add_uint64(m_vdevConfig, ZPOOL_CONFIG_GUID, 0xbeef)); m_vdev = new MockVdev(m_vdevConfig); ON_CALL(*m_vdev, GUID()) .WillByDefault(::testing::Return(Guid(123))); ON_CALL(*m_vdev, PoolGUID()) .WillByDefault(::testing::Return(Guid(456))); ON_CALL(*m_vdev, State()) .WillByDefault(::testing::Return(VDEV_STATE_HEALTHY)); m_caseFile = &TestableCaseFile::Create(*m_vdev); ON_CALL(*m_caseFile, ReEvaluate(::testing::_)) .WillByDefault(::testing::Invoke(m_caseFile, &TestableCaseFile::RealReEvaluate)); return; } virtual void TearDown() { delete m_caseFile; nvlist_free(m_vdevConfig); delete m_vdev; delete m_event; delete m_eventFactory; } nvlist_t *m_vdevConfig; MockVdev *m_vdev; TestableCaseFile *m_caseFile; Event *m_event; EventFactory *m_eventFactory; }; /* * A Vdev with no events should not be degraded or faulted */ TEST_F(CaseFileTest, HealthyVdev) { EXPECT_FALSE(m_caseFile->ShouldDegrade()); EXPECT_FALSE(m_caseFile->ShouldFault()); } /* * A Vdev with only one event should not be degraded or faulted * For performance reasons, RefreshVdevState should not be called. */ TEST_F(CaseFileTest, HealthyishVdev) { string evString("!system=ZFS " "class=ereport.fs.zfs.io " "ena=12091638756982918145 " "parent_guid=13237004955564865395 " "parent_type=raidz " "pool=testpool.4415 " "pool_context=0 " "pool_failmode=wait " "pool_guid=456 " "subsystem=ZFS " "timestamp=1348867914 " "type=ereport.fs.zfs.io " "vdev_guid=123 " "vdev_path=/dev/da400 " "vdev_type=disk " "zio_blkid=622 " "zio_err=1 " "zio_level=-2 " "zio_object=0 " "zio_objset=37 " "zio_offset=25598976 " "zio_size=1024"); m_event = Event::CreateEvent(*m_eventFactory, evString); ZfsEvent *zfs_event = static_cast(m_event); EXPECT_CALL(*m_caseFile, RefreshVdevState()) .Times(::testing::Exactly(0)); EXPECT_TRUE(m_caseFile->ReEvaluate(*zfs_event)); EXPECT_FALSE(m_caseFile->ShouldDegrade()); EXPECT_FALSE(m_caseFile->ShouldFault()); } /* The case file should be closed when its pool is destroyed */ TEST_F(CaseFileTest, PoolDestroy) { string evString("!system=ZFS " "pool_name=testpool.4415 " "pool_guid=456 " "subsystem=ZFS " "timestamp=1348867914 " "type=sysevent.fs.zfs.pool_destroy "); m_event = Event::CreateEvent(*m_eventFactory, evString); ZfsEvent *zfs_event = static_cast(m_event); EXPECT_CALL(*m_caseFile, Close()); EXPECT_TRUE(m_caseFile->ReEvaluate(*zfs_event)); } +/* + * A Vdev with a very large number of Delay errors should fault + * For performance reasons, RefreshVdevState should be called at most once + */ +TEST_F(CaseFileTest, VeryManyDelayErrors) +{ + EXPECT_CALL(*m_caseFile, RefreshVdevState()) + .Times(::testing::AtMost(1)) + .WillRepeatedly(::testing::Return(true)); + + for(int i=0; i<100; i++) { + stringstream evStringStream; + evStringStream << + "!system=ZFS " + "class=ereport.fs.zfs.delay " + "ena=12091638756982918145 " + "parent_guid=13237004955564865395 " + "parent_type=raidz " + "pool=testpool.4415 " + "pool_context=0 " + "pool_failmode=wait " + "pool_guid=456 " + "pool_state= 0" + "subsystem=ZFS " + "time="; + evStringStream << i << "0000000000000000 "; + evStringStream << "timestamp=" << i << " "; + evStringStream << + "type=ereport.fs.zfs.delay " + "vdev_ashift=12 " + "vdev_cksum_errors=0 " + "vdev_complete_ts=948336226469 " + "vdev_delays=77 " + "vdev_delta_ts=123998485899 " + "vdev_guid=123 " + "vdev_path=/dev/da400 " + "vdev_read_errors=0 " + "vdev_spare_guids= " + "vdev_type=disk " + "vdev_write_errors=0 " + "zio_blkid=622 " + "zio_delay=31000041101 " + "zio_delta=123998485899 " + "zio_err=0 " + "zio_flags=1572992 " + "zio_level=-2 " + "zio_object=0 " + "zio_objset=37 " + "zio_offset=25598976 " + "zio_pipeline=48234496 " + "zio_priority=3 " + "zio_size=1024" + "zio_stage=33554432 " + "zio_timestamp=824337740570 "; + Event *event(Event::CreateEvent(*m_eventFactory, + evStringStream.str())); + ZfsEvent *zfs_event = static_cast(event); + EXPECT_TRUE(m_caseFile->ReEvaluate(*zfs_event)); + delete event; + } + + m_caseFile->SpliceEvents(); + EXPECT_FALSE(m_caseFile->ShouldDegrade()); + EXPECT_TRUE(m_caseFile->ShouldFault()); +} + /* * A Vdev with a very large number of IO errors should fault * For performance reasons, RefreshVdevState should be called at most once */ TEST_F(CaseFileTest, VeryManyIOErrors) { EXPECT_CALL(*m_caseFile, RefreshVdevState()) .Times(::testing::AtMost(1)) .WillRepeatedly(::testing::Return(true)); for(int i=0; i<100; i++) { stringstream evStringStream; evStringStream << "!system=ZFS " "class=ereport.fs.zfs.io " "ena=12091638756982918145 " "parent_guid=13237004955564865395 " "parent_type=raidz " "pool=testpool.4415 " "pool_context=0 " "pool_failmode=wait " "pool_guid=456 " "subsystem=ZFS " "timestamp="; evStringStream << i << " "; evStringStream << "type=ereport.fs.zfs.io " "vdev_guid=123 " "vdev_path=/dev/da400 " "vdev_type=disk " "zio_blkid=622 " "zio_err=1 " "zio_level=-2 " "zio_object=0 " "zio_objset=37 " "zio_offset=25598976 " "zio_size=1024"; Event *event(Event::CreateEvent(*m_eventFactory, evStringStream.str())); ZfsEvent *zfs_event = static_cast(event); EXPECT_TRUE(m_caseFile->ReEvaluate(*zfs_event)); delete event; } m_caseFile->SpliceEvents(); EXPECT_FALSE(m_caseFile->ShouldDegrade()); EXPECT_TRUE(m_caseFile->ShouldFault()); } /* * A Vdev with a very large number of checksum errors should degrade * For performance reasons, RefreshVdevState should be called at most once */ TEST_F(CaseFileTest, VeryManyChecksumErrors) { EXPECT_CALL(*m_caseFile, RefreshVdevState()) .Times(::testing::AtMost(1)) .WillRepeatedly(::testing::Return(true)); for(int i=0; i<100; i++) { stringstream evStringStream; evStringStream << "!system=ZFS " "bad_cleared_bits=03000000000000803f50b00000000000 " "bad_range_clears=0000000e " "bad_range_sets=00000000 " "bad_ranges=0000000000000010 " "bad_ranges_min_gap=8 " "bad_set_bits=00000000000000000000000000000000 " "class=ereport.fs.zfs.checksum " "ena=12272856582652437505 " "parent_guid=5838204195352909894 " "parent_type=raidz pool=testpool.7640 " "pool_context=0 " "pool_failmode=wait " "pool_guid=456 " "subsystem=ZFS timestamp="; evStringStream << i << " "; evStringStream << "type=ereport.fs.zfs.checksum " "vdev_guid=123 " "vdev_path=/mnt/tmp/file1.7702 " "vdev_type=file " "zio_blkid=0 " "zio_err=0 " "zio_level=0 " "zio_object=3 " "zio_objset=0 " "zio_offset=16896 " "zio_size=512"; Event *event(Event::CreateEvent(*m_eventFactory, evStringStream.str())); ZfsEvent *zfs_event = static_cast(event); EXPECT_TRUE(m_caseFile->ReEvaluate(*zfs_event)); delete event; } m_caseFile->SpliceEvents(); EXPECT_TRUE(m_caseFile->ShouldDegrade()); EXPECT_FALSE(m_caseFile->ShouldFault()); } /* * Test CaseFile::ReEvaluateByGuid */ class ReEvaluateByGuidTest : public ::testing::Test { protected: virtual void SetUp() { m_eventFactory = new EventFactory(); m_eventFactory->UpdateRegistry(MockZfsEvent::s_buildRecords, NUM_ELEMENTS(MockZfsEvent::s_buildRecords)); m_event = Event::CreateEvent(*m_eventFactory, s_evString); nvlist_alloc(&m_vdevConfig, NV_UNIQUE_NAME, 0); ASSERT_EQ(0, nvlist_add_uint64(m_vdevConfig, ZPOOL_CONFIG_GUID, 0xbeef)); m_vdev456 = new ::testing::NiceMock(m_vdevConfig); m_vdev789 = new ::testing::NiceMock(m_vdevConfig); ON_CALL(*m_vdev456, GUID()) .WillByDefault(::testing::Return(Guid(123))); ON_CALL(*m_vdev456, PoolGUID()) .WillByDefault(::testing::Return(Guid(456))); ON_CALL(*m_vdev456, State()) .WillByDefault(::testing::Return(VDEV_STATE_HEALTHY)); ON_CALL(*m_vdev789, GUID()) .WillByDefault(::testing::Return(Guid(123))); ON_CALL(*m_vdev789, PoolGUID()) .WillByDefault(::testing::Return(Guid(789))); ON_CALL(*m_vdev789, State()) .WillByDefault(::testing::Return(VDEV_STATE_HEALTHY)); m_caseFile456 = NULL; m_caseFile789 = NULL; return; } virtual void TearDown() { delete m_caseFile456; delete m_caseFile789; nvlist_free(m_vdevConfig); delete m_vdev456; delete m_vdev789; delete m_event; delete m_eventFactory; } static string s_evString; nvlist_t *m_vdevConfig; ::testing::NiceMock *m_vdev456; ::testing::NiceMock *m_vdev789; TestableCaseFile *m_caseFile456; TestableCaseFile *m_caseFile789; Event *m_event; EventFactory *m_eventFactory; }; string ReEvaluateByGuidTest::s_evString( "!system=ZFS " "pool_guid=16271873792808333580 " "pool_name=foo " "subsystem=ZFS " "timestamp=1360620391 " "type=sysevent.fs.zfs.config_sync"); /* * Test the ReEvaluateByGuid method on an empty list of casefiles. * We must create one event, even though it never gets used, because it will * be passed by reference to ReEvaluateByGuid */ TEST_F(ReEvaluateByGuidTest, ReEvaluateByGuid_empty) { ZfsEvent *zfs_event = static_cast(m_event); EXPECT_EQ(0, TestableCaseFile::getActiveCases()); CaseFile::ReEvaluateByGuid(Guid(456), *zfs_event); EXPECT_EQ(0, TestableCaseFile::getActiveCases()); } /* * Test the ReEvaluateByGuid method on a list of CaseFiles that contains only * one CaseFile, which doesn't match the criteria */ TEST_F(ReEvaluateByGuidTest, ReEvaluateByGuid_oneFalse) { m_caseFile456 = &TestableCaseFile::Create(*m_vdev456); ZfsEvent *zfs_event = static_cast(m_event); EXPECT_EQ(1, TestableCaseFile::getActiveCases()); EXPECT_CALL(*m_caseFile456, ReEvaluate(::testing::_)) .Times(::testing::Exactly(0)); CaseFile::ReEvaluateByGuid(Guid(789), *zfs_event); EXPECT_EQ(1, TestableCaseFile::getActiveCases()); } /* * Test the ReEvaluateByGuid method on a list of CaseFiles that contains only * one CaseFile, which does match the criteria */ TEST_F(ReEvaluateByGuidTest, ReEvaluateByGuid_oneTrue) { m_caseFile456 = &TestableCaseFile::Create(*m_vdev456); ZfsEvent *zfs_event = static_cast(m_event); EXPECT_EQ(1, TestableCaseFile::getActiveCases()); EXPECT_CALL(*m_caseFile456, ReEvaluate(::testing::_)) .Times(::testing::Exactly(1)) .WillRepeatedly(::testing::Return(false)); CaseFile::ReEvaluateByGuid(Guid(456), *zfs_event); EXPECT_EQ(1, TestableCaseFile::getActiveCases()); } /* * Test the ReEvaluateByGuid method on a long list of CaseFiles that contains a * few cases which meet the criteria */ TEST_F(ReEvaluateByGuidTest, ReEvaluateByGuid_five) { TestableCaseFile *CaseFile1 = &TestableCaseFile::Create(*m_vdev456); TestableCaseFile *CaseFile2 = &TestableCaseFile::Create(*m_vdev789); TestableCaseFile *CaseFile3 = &TestableCaseFile::Create(*m_vdev456); TestableCaseFile *CaseFile4 = &TestableCaseFile::Create(*m_vdev789); TestableCaseFile *CaseFile5 = &TestableCaseFile::Create(*m_vdev789); ZfsEvent *zfs_event = static_cast(m_event); EXPECT_EQ(5, TestableCaseFile::getActiveCases()); EXPECT_CALL(*CaseFile1, ReEvaluate(::testing::_)) .Times(::testing::Exactly(1)) .WillRepeatedly(::testing::Return(false)); EXPECT_CALL(*CaseFile3, ReEvaluate(::testing::_)) .Times(::testing::Exactly(1)) .WillRepeatedly(::testing::Return(false)); EXPECT_CALL(*CaseFile2, ReEvaluate(::testing::_)) .Times(::testing::Exactly(0)); EXPECT_CALL(*CaseFile4, ReEvaluate(::testing::_)) .Times(::testing::Exactly(0)); EXPECT_CALL(*CaseFile5, ReEvaluate(::testing::_)) .Times(::testing::Exactly(0)); CaseFile::ReEvaluateByGuid(Guid(456), *zfs_event); EXPECT_EQ(5, TestableCaseFile::getActiveCases()); delete CaseFile1; delete CaseFile2; delete CaseFile3; delete CaseFile4; delete CaseFile5; } /* * Test VdevIterator */ class VdevIteratorTest : public ::testing::Test { }; bool VdevIteratorTestCB(Vdev &vdev, void *cbArg) { return (false); } /* * VdevIterator::Next should not crash when run on a pool that has a previously * removed vdev. Regression for * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=273663 */ TEST_F(VdevIteratorTest, VdevRemoval) { nvlist_t* poolConfig, *rootVdev; ASSERT_EQ(0, nvlist_alloc(&rootVdev, NV_UNIQUE_NAME, 0)); ASSERT_EQ(0, nvlist_add_uint64(rootVdev, ZPOOL_CONFIG_GUID, 0x5678)); /* * Note: pools with previously-removed top-level VDEVs will contain a * TLV in their labels that has 0 children. */ ASSERT_EQ(0, nvlist_add_nvlist_array(rootVdev, ZPOOL_CONFIG_CHILDREN, NULL, 0)); ASSERT_EQ(0, nvlist_alloc(&poolConfig, NV_UNIQUE_NAME, 0)); ASSERT_EQ(0, nvlist_add_uint64(poolConfig, ZPOOL_CONFIG_POOL_GUID, 0x1234)); ASSERT_EQ(0, nvlist_add_nvlist(poolConfig, ZPOOL_CONFIG_VDEV_TREE, rootVdev)); VdevIterator(poolConfig).Each(VdevIteratorTestCB, NULL); } diff --git a/tests/sys/cddl/zfs/tests/zfsd/Makefile b/tests/sys/cddl/zfs/tests/zfsd/Makefile index c328cafecb6c..e2b709dc25c0 100644 --- a/tests/sys/cddl/zfs/tests/zfsd/Makefile +++ b/tests/sys/cddl/zfs/tests/zfsd/Makefile @@ -1,37 +1,38 @@ .include PACKAGE=tests TESTSDIR=${TESTSBASE}/sys/cddl/zfs/tests/zfsd FILESDIR=${TESTSDIR} ATF_TESTS_KSH93+= zfsd_test TEST_METADATA+= required_user="root" TEST_METADATA+= is_exclusive=true ${PACKAGE}FILES+= cleanup.ksh ${PACKAGE}FILES+= hotspare_cleanup.ksh ${PACKAGE}FILES+= hotspare_setup.ksh ${PACKAGE}FILES+= setup.ksh ${PACKAGE}FILES+= zfsd.cfg ${PACKAGE}FILES+= zfsd.kshlib ${PACKAGE}FILES+= zfsd_autoreplace_001_neg.ksh ${PACKAGE}FILES+= zfsd_autoreplace_002_pos.ksh ${PACKAGE}FILES+= zfsd_autoreplace_003_pos.ksh ${PACKAGE}FILES+= zfsd_degrade_001_pos.ksh ${PACKAGE}FILES+= zfsd_degrade_002_pos.ksh ${PACKAGE}FILES+= zfsd_fault_001_pos.ksh +${PACKAGE}FILES+= zfsd_fault_002_pos.ksh ${PACKAGE}FILES+= zfsd_hotspare_001_pos.ksh ${PACKAGE}FILES+= zfsd_hotspare_002_pos.ksh ${PACKAGE}FILES+= zfsd_hotspare_003_pos.ksh ${PACKAGE}FILES+= zfsd_hotspare_004_pos.ksh ${PACKAGE}FILES+= zfsd_hotspare_005_pos.ksh ${PACKAGE}FILES+= zfsd_hotspare_006_pos.ksh ${PACKAGE}FILES+= zfsd_hotspare_007_pos.ksh ${PACKAGE}FILES+= zfsd_hotspare_008_neg.ksh ${PACKAGE}FILES+= zfsd_import_001_pos.ksh ${PACKAGE}FILES+= zfsd_replace_001_pos.ksh ${PACKAGE}FILES+= zfsd_replace_002_pos.ksh ${PACKAGE}FILES+= zfsd_replace_003_pos.ksh .include diff --git a/tests/sys/cddl/zfs/tests/zfsd/zfsd_fault_002_pos.ksh b/tests/sys/cddl/zfs/tests/zfsd/zfsd_fault_002_pos.ksh new file mode 100644 index 000000000000..d8de1ceaff4b --- /dev/null +++ b/tests/sys/cddl/zfs/tests/zfsd/zfsd_fault_002_pos.ksh @@ -0,0 +1,99 @@ +#!/usr/local/bin/ksh93 -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 Axcient. All rights reserved. +# Use is subject to license terms. +# +# $FreeBSD$ + +. $STF_SUITE/include/libtest.kshlib +. $STF_SUITE/include/libgnop.kshlib + +################################################################################ +# +# __stc_assertion_start +# +# ID: zfsd_fault_002_pos +# +# DESCRIPTION: +# If a vdev experiences delayed I/O, it will become faulted. +# +# +# STRATEGY: +# 1. Create a storage pool. Use gnop vdevs so we can inject I/O delays. +# 2. Inject IO delays while doing IO to the pool. +# 3. Verify that the vdev becomes FAULTED. +# 4. ONLINE it and verify that it resilvers and joins the pool. +# +# TESTABILITY: explicit +# +# TEST_AUTOMATION_LEVEL: automated +# +# __stc_assertion_end +# +############################################################################### + +verify_runnable "global" + +log_assert "ZFS will fault a vdev that experiences delayed I/O" + +ensure_zfsd_running + +DISK0_NOP=${DISK0}.nop +DISK1_NOP=${DISK1}.nop + +log_must create_gnops $DISK0 $DISK1 + +for type in "raidz" "mirror"; do + log_note "Testing raid type $type" + + # Create a pool on the supplied disks + create_pool $TESTPOOL $type "$DISK0_NOP" "$DISK1_NOP" + log_must $ZFS create $TESTPOOL/$TESTFS + + # Cause some IO delays writing to the pool + while true; do + # ZFS currently considers an I/O to be "slow" if it's delayed + # for 30 seconds (zio_slow_io_ms). + log_must gnop configure -d 31000 -x 100 "$DISK1_NOP" + $DD if=/dev/zero bs=128k count=1 >> \ + /$TESTPOOL/$TESTFS/$TESTFILE 2> /dev/null + $FSYNC /$TESTPOOL/$TESTFS/$TESTFILE + # Check to see if the pool is faulted yet + $ZPOOL status $TESTPOOL | grep -q 'state: DEGRADED' + if [ $? == 0 ] + then + log_note "$TESTPOOL got degraded" + $ZPOOL status -s $TESTPOOL + break + fi + done + + log_must check_state $TESTPOOL $TMPDISK "FAULTED" + + log_must gnop configure -x 0 "$DISK1_NOP" + destroy_pool $TESTPOOL + log_must $RM -rf /$TESTPOOL +done + +log_pass diff --git a/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh b/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh index b6dcfe97dd7b..7aa808c9eb82 100755 --- a/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh +++ b/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh @@ -1,613 +1,645 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License (the "License"). # You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. # See the License for the specific language governing permissions # and limitations under the License. # # When distributing Covered Code, include this CDDL HEADER in each # file and include the License file at usr/src/OPENSOLARIS.LICENSE. # If applicable, add the following below this CDDL HEADER, with the # fields enclosed by brackets "[]" replaced with your own identifying # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END # # # Copyright 2012,2013 Spectra Logic. All rights reserved. # Use is subject to license terms. # atf_test_case zfsd_fault_001_pos cleanup zfsd_fault_001_pos_head() { atf_set "descr" "ZFS will fault a vdev that produces IO errors" atf_set "require.progs" "ksh93 gnop zfs zpool zfsd" atf_set "timeout" 300 } zfsd_fault_001_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg . $(atf_get_srcdir)/zfsd.cfg verify_disk_count "$DISKS" 2 verify_zfsd_running ksh93 $(atf_get_srcdir)/setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_fault_001_pos.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_fault_001_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/zfsd.cfg ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" } +atf_test_case zfsd_fault_002_pos cleanup +zfsd_fault_002_pos_head() +{ + atf_set "descr" "ZFS will fault a vdev that experiences delayed I/O" + atf_set "require.progs" "ksh93 gnop zfs zpool zfsd" + atf_set "timeout" 300 +} +zfsd_fault_002_pos_body() +{ + . $(atf_get_srcdir)/../../include/default.cfg + . $(atf_get_srcdir)/../hotspare/hotspare.kshlib + . $(atf_get_srcdir)/../hotspare/hotspare.cfg + . $(atf_get_srcdir)/zfsd.cfg + + verify_disk_count "$DISKS" 2 + verify_zfsd_running + ksh93 $(atf_get_srcdir)/setup.ksh || atf_fail "Setup failed" + ksh93 $(atf_get_srcdir)/zfsd_fault_002_pos.ksh + if [[ $? != 0 ]]; then + save_artifacts + atf_fail "Testcase failed" + fi +} +zfsd_fault_002_pos_cleanup() +{ + . $(atf_get_srcdir)/../../include/default.cfg + . $(atf_get_srcdir)/zfsd.cfg + + ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" +} + atf_test_case zfsd_degrade_001_pos cleanup zfsd_degrade_001_pos_head() { atf_set "descr" "ZFS will degrade a vdev that produces checksum errors" atf_set "require.progs" "ksh93 zpool zfsd" atf_set "timeout" 600 } zfsd_degrade_001_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/zfsd.cfg verify_disk_count "$DISKS" 2 verify_zfsd_running ksh93 $(atf_get_srcdir)/setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_degrade_001_pos.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_degrade_001_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/zfsd.cfg ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_degrade_002_pos cleanup zfsd_degrade_002_pos_head() { atf_set "descr" "ZFS will degrade a spare that produces checksum errors" atf_set "require.progs" "ksh93 zpool zfsd" atf_set "timeout" 600 } zfsd_degrade_002_pos_body() { atf_expect_fail "https://www.illumos.org/issues/8614 Checksum errors on a mirrored child of a raidz are incorrectly accounted" . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/zfsd.cfg verify_disk_count "$DISKS" 5 verify_zfsd_running ksh93 $(atf_get_srcdir)/setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_degrade_002_pos.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_degrade_002_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/zfsd.cfg ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_hotspare_001_pos cleanup zfsd_hotspare_001_pos_head() { atf_set "descr" "An active, damaged spare will be replaced by an available spare" atf_set "require.progs" "ksh93 zpool zfsd" atf_set "timeout" 3600 } zfsd_hotspare_001_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg verify_zfsd_running ksh93 $(atf_get_srcdir)/hotspare_setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_hotspare_001_pos.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_hotspare_001_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg ksh93 $(atf_get_srcdir)/hotspare_cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_hotspare_002_pos cleanup zfsd_hotspare_002_pos_head() { atf_set "descr" "If a vdev becomes degraded, the spare will be activated." atf_set "require.progs" "ksh93 zpool zfsd zinject" atf_set "timeout" 3600 } zfsd_hotspare_002_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg verify_zfsd_running ksh93 $(atf_get_srcdir)/hotspare_setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_hotspare_002_pos.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_hotspare_002_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg ksh93 $(atf_get_srcdir)/hotspare_cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_hotspare_003_pos cleanup zfsd_hotspare_003_pos_head() { atf_set "descr" "A faulted vdev will be replaced by an available spare" atf_set "require.progs" "ksh93 zpool zfsd zinject" atf_set "timeout" 3600 } zfsd_hotspare_003_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg verify_disk_count "$DISKS" 5 verify_zfsd_running ksh93 $(atf_get_srcdir)/hotspare_setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_hotspare_003_pos.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_hotspare_003_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg ksh93 $(atf_get_srcdir)/hotspare_cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_hotspare_004_pos cleanup zfsd_hotspare_004_pos_head() { atf_set "descr" "Removing a disk from a pool results in the spare activating" atf_set "require.progs" "ksh93 gnop zpool" atf_set "timeout" 3600 } zfsd_hotspare_004_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg verify_disk_count "$DISKS" 5 verify_zfsd_running ksh93 $(atf_get_srcdir)/hotspare_setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_hotspare_004_pos.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_hotspare_004_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_hotspare_005_pos cleanup zfsd_hotspare_005_pos_head() { atf_set "descr" "A spare that is added to a degraded pool will be activated" atf_set "require.progs" "ksh93 zpool zfsd zinject" atf_set "timeout" 3600 } zfsd_hotspare_005_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg verify_zfsd_running ksh93 $(atf_get_srcdir)/hotspare_setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_hotspare_005_pos.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_hotspare_005_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg ksh93 $(atf_get_srcdir)/hotspare_cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_hotspare_006_pos cleanup zfsd_hotspare_006_pos_head() { atf_set "descr" "zfsd will replace two vdevs that fail simultaneously" atf_set "require.progs" "ksh93 zpool zfsd zinject" atf_set "timeout" 3600 } zfsd_hotspare_006_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg verify_zfsd_running ksh93 $(atf_get_srcdir)/hotspare_setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_hotspare_006_pos.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_hotspare_006_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg ksh93 $(atf_get_srcdir)/hotspare_cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_hotspare_007_pos cleanup zfsd_hotspare_007_pos_head() { atf_set "descr" "zfsd will swap failed drives at startup" atf_set "require.progs" "ksh93 gnop zpool" atf_set "timeout" 3600 } zfsd_hotspare_007_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg verify_disk_count "$DISKS" 5 verify_zfsd_running ksh93 $(atf_get_srcdir)/hotspare_setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_hotspare_007_pos.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_hotspare_007_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_hotspare_008_neg cleanup zfsd_hotspare_008_neg_head() { atf_set "descr" "zfsd will not use newly added spares on replacing vdevs" atf_set "require.progs" "ksh93 zpool zfsd" atf_set "timeout" 3600 } zfsd_hotspare_008_neg_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg verify_disk_count "$DISKS" 4 ksh93 $(atf_get_srcdir)/hotspare_setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_hotspare_008_neg.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_hotspare_008_neg_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg ksh93 $(atf_get_srcdir)/hotspare_cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_autoreplace_001_neg cleanup zfsd_autoreplace_001_neg_head() { atf_set "descr" "A pool without autoreplace set will not replace by physical path" atf_set "require.progs" "ksh93 zpool gnop" atf_set "timeout" 3600 } zfsd_autoreplace_001_neg_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg verify_disk_count "$DISKS" 5 ksh93 $(atf_get_srcdir)/hotspare_setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_autoreplace_001_neg.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_autoreplace_001_neg_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_autoreplace_002_pos cleanup zfsd_autoreplace_002_pos_head() { atf_set "descr" "A pool with autoreplace set will replace by physical path" atf_set "require.progs" "ksh93 gnop zpool zfsd" atf_set "timeout" 3600 } zfsd_autoreplace_002_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg verify_disk_count "$DISKS" 5 verify_zfsd_running ksh93 $(atf_get_srcdir)/hotspare_setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_autoreplace_002_pos.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_autoreplace_002_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_autoreplace_003_pos cleanup zfsd_autoreplace_003_pos_head() { atf_set "descr" "A pool with autoreplace set will replace by physical path even if a spare is active" atf_set "require.progs" "ksh93 zpool gnop" atf_set "timeout" 3600 } zfsd_autoreplace_003_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg verify_disk_count "$DISKS" 5 verify_zfsd_running ksh93 $(atf_get_srcdir)/hotspare_setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_autoreplace_003_pos.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_autoreplace_003_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_replace_001_pos cleanup zfsd_replace_001_pos_head() { atf_set "descr" "ZFSD will automatically replace a SAS disk that disappears and reappears in the same location, with the same devname" atf_set "require.progs" "ksh93 zpool zfs gnop" } zfsd_replace_001_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/zfsd.cfg verify_disk_count "$DISKS" 3 verify_zfsd_running ksh93 $(atf_get_srcdir)/setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_replace_001_pos.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_replace_001_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/zfsd.cfg ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_replace_002_pos cleanup zfsd_replace_002_pos_head() { atf_set "descr" "zfsd will reactivate a pool after all disks are failed and reappeared" atf_set "require.progs" "ksh93 zpool zfs" } zfsd_replace_002_pos_body() { atf_expect_fail "Not yet implemented in zfsd" . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/zfsd.cfg verify_disk_count "$DISKS" 3 verify_zfsd_running ksh93 $(atf_get_srcdir)/setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_replace_002_pos.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_replace_002_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/zfsd.cfg ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_replace_003_pos cleanup zfsd_replace_003_pos_head() { atf_set "descr" "ZFSD will correctly replace disks that dissapear and reappear with different devnames" atf_set "require.progs" "ksh93 zpool zfs gnop" } zfsd_replace_003_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg verify_zfsd_running ksh93 $(atf_get_srcdir)/setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_replace_003_pos.ksh if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_replace_003_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/zfsd.cfg ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" } atf_test_case zfsd_import_001_pos cleanup zfsd_import_001_pos_head() { atf_set "descr" "If a removed drive gets reinserted while the pool is exported, it will detach its spare when imported." atf_set "require.progs" "ksh93 gnop zfsd zpool" atf_set "timeout" 3600 } zfsd_import_001_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg verify_disk_count "$DISKS" 5 verify_zfsd_running ksh93 $(atf_get_srcdir)/setup.ksh || atf_fail "Setup failed" ksh93 $(atf_get_srcdir)/zfsd_import_001_pos.ksh || atf_fail "Testcase failed" if [[ $? != 0 ]]; then save_artifacts atf_fail "Testcase failed" fi } zfsd_import_001_pos_cleanup() { . $(atf_get_srcdir)/../../include/default.cfg . $(atf_get_srcdir)/../hotspare/hotspare.kshlib . $(atf_get_srcdir)/../hotspare/hotspare.cfg ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed" } atf_init_test_cases() { atf_add_test_case zfsd_fault_001_pos + atf_add_test_case zfsd_fault_002_pos atf_add_test_case zfsd_degrade_001_pos atf_add_test_case zfsd_degrade_002_pos atf_add_test_case zfsd_hotspare_001_pos atf_add_test_case zfsd_hotspare_002_pos atf_add_test_case zfsd_hotspare_003_pos atf_add_test_case zfsd_hotspare_004_pos atf_add_test_case zfsd_hotspare_005_pos atf_add_test_case zfsd_hotspare_006_pos atf_add_test_case zfsd_hotspare_007_pos atf_add_test_case zfsd_hotspare_008_neg atf_add_test_case zfsd_autoreplace_001_neg atf_add_test_case zfsd_autoreplace_002_pos atf_add_test_case zfsd_autoreplace_003_pos atf_add_test_case zfsd_replace_001_pos atf_add_test_case zfsd_replace_002_pos atf_add_test_case zfsd_replace_003_pos atf_add_test_case zfsd_import_001_pos } save_artifacts() { # If ARTIFACTS_DIR is defined, save test artifacts for # post-mortem analysis if [[ -n $ARTIFACTS_DIR ]]; then TC_ARTIFACTS_DIR=${ARTIFACTS_DIR}/sys/cddl/zfs/tests/zfsd/$(atf_get ident) mkdir -p $TC_ARTIFACTS_DIR cp -a /var/log/zfsd.log* $TC_ARTIFACTS_DIR bzip2 $TC_ARTIFACTS_DIR/zfsd.log fi } verify_zfsd_running() { service zfsd onestatus || \ atf_skip "zfsd(8) must be enabled and running for this test" }