Index: head/cddl/usr.sbin/zfsd/case_file.cc =================================================================== --- head/cddl/usr.sbin/zfsd/case_file.cc (revision 329343) +++ head/cddl/usr.sbin/zfsd/case_file.cc (revision 329344) @@ -1,1152 +1,1158 @@ /*- * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) */ /** * \file case_file.cc * * We keep case files for any leaf vdev that is not in the optimal state. * However, we only serialize to disk those events that need to be preserved * across reboots. For now, this is just a log of soft errors which we * accumulate in order to mark a device as degraded. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "callout.h" #include "vdev_iterator.h" #include "zfsd_event.h" #include "case_file.h" #include "vdev.h" #include "zfsd.h" #include "zfsd_exception.h" #include "zpool_list.h" __FBSDID("$FreeBSD$"); /*============================ Namespace Control =============================*/ using std::auto_ptr; using std::hex; using std::ifstream; using std::stringstream; using std::setfill; using std::setw; using DevdCtl::Event; using DevdCtl::EventFactory; using DevdCtl::EventList; using DevdCtl::Guid; using DevdCtl::ParseException; /*--------------------------------- CaseFile ---------------------------------*/ //- CaseFile Static Data ------------------------------------------------------- CaseFileList CaseFile::s_activeCases; const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; //- CaseFile Static Public Methods --------------------------------------------- CaseFile * CaseFile::Find(Guid poolGUID, Guid vdevGUID) { for (CaseFileList::iterator curCase = s_activeCases.begin(); curCase != s_activeCases.end(); curCase++) { if (((*curCase)->PoolGUID() != poolGUID && Guid::InvalidGuid() != poolGUID) || (*curCase)->VdevGUID() != vdevGUID) continue; /* * We only carry one active case per-vdev. */ return (*curCase); } return (NULL); } CaseFile * CaseFile::Find(const string &physPath) { CaseFile *result = NULL; for (CaseFileList::iterator curCase = s_activeCases.begin(); curCase != s_activeCases.end(); curCase++) { if ((*curCase)->PhysicalPath() != physPath) continue; if (result != NULL) { syslog(LOG_WARNING, "Multiple casefiles found for " "physical path %s. " "This is most likely a bug in zfsd", physPath.c_str()); } result = *curCase; } return (result); } void CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) { CaseFileList::iterator casefile; for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ CaseFileList::iterator next = casefile; next++; if (poolGUID == (*casefile)->PoolGUID()) (*casefile)->ReEvaluate(event); casefile = next; } } CaseFile & CaseFile::Create(Vdev &vdev) { CaseFile *activeCase; activeCase = Find(vdev.PoolGUID(), vdev.GUID()); if (activeCase == NULL) activeCase = new CaseFile(vdev); return (*activeCase); } void CaseFile::DeSerialize() { struct dirent **caseFiles; int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, DeSerializeSelector, /*compar*/NULL)); if (numCaseFiles == -1) return; if (numCaseFiles == 0) { free(caseFiles); return; } for (int i = 0; i < numCaseFiles; i++) { DeSerializeFile(caseFiles[i]->d_name); free(caseFiles[i]); } free(caseFiles); } +bool +CaseFile::Empty() +{ + return (s_activeCases.empty()); +} + void CaseFile::LogAll() { for (CaseFileList::iterator curCase = s_activeCases.begin(); curCase != s_activeCases.end(); curCase++) (*curCase)->Log(); } void CaseFile::PurgeAll() { /* * Serialize casefiles before deleting them so that they can be reread * and revalidated during BuildCaseFiles. * CaseFiles remove themselves from this list on destruction. */ while (s_activeCases.size() != 0) { CaseFile *casefile = s_activeCases.front(); casefile->Serialize(); delete casefile; } } //- CaseFile Public Methods ---------------------------------------------------- bool CaseFile::RefreshVdevState() { ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); if (casePool == NULL) return (false); Vdev vd(casePool, CaseVdev(casePool)); if (vd.DoesNotExist()) return (false); m_vdevState = vd.State(); m_vdevPhysPath = vd.PhysicalPath(); return (true); } bool CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) { ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); if (pool == NULL || !RefreshVdevState()) { /* * The pool or vdev for this case file is no longer * part of the configuration. This can happen * if we process a device arrival notification * before seeing the ZFS configuration change * event. */ syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " "Closing\n", PoolGUIDString().c_str(), VdevGUIDString().c_str()); Close(); /* * Since this event was not used to close this * case, do not report it as consumed. */ return (/*consumed*/false); } if (VdevState() > VDEV_STATE_CANT_OPEN) { /* * For now, newly discovered devices only help for * devices that are missing. In the future, we might * use a newly inserted spare to replace a degraded * or faulted device. */ syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", PoolGUIDString().c_str(), VdevGUIDString().c_str()); return (/*consumed*/false); } if (vdev != NULL && ( vdev->PoolGUID() == m_poolGUID || vdev->PoolGUID() == Guid::InvalidGuid()) && vdev->GUID() == m_vdevGUID) { zpool_vdev_online(pool, vdev->GUIDString().c_str(), ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &m_vdevState); syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", zpool_get_name(pool), vdev->GUIDString().c_str(), devPath.c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); /* * Check the vdev state post the online action to see * if we can retire this case. */ CloseIfSolved(); return (/*consumed*/true); } /* * If the auto-replace policy is enabled, and we have physical * path information, try a physical path replacement. */ if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { syslog(LOG_INFO, "CaseFile(%s:%s:%s): AutoReplace not set. " "Ignoring device insertion.\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); return (/*consumed*/false); } if (PhysicalPath().empty()) { syslog(LOG_INFO, "CaseFile(%s:%s:%s): No physical path information. " "Ignoring device insertion.\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); return (/*consumed*/false); } if (physPath != PhysicalPath()) { syslog(LOG_INFO, "CaseFile(%s:%s:%s): Physical path mismatch. " "Ignoring device insertion.\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); return (/*consumed*/false); } /* Write a label on the newly inserted disk. */ if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { syslog(LOG_ERR, "Replace vdev(%s/%s) by physical path (label): %s: %s\n", zpool_get_name(pool), VdevGUIDString().c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); return (/*consumed*/false); } syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", PoolGUIDString().c_str(), VdevGUIDString().c_str(), devPath.c_str()); return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); } bool CaseFile::ReEvaluate(const ZfsEvent &event) { bool consumed(false); if (event.Value("type") == "misc.fs.zfs.vdev_remove") { /* * The Vdev we represent has been removed from the * configuration. This case is no longer of value. */ Close(); return (/*consumed*/true); } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { /* This Pool has been destroyed. Discard the case */ Close(); return (/*consumed*/true); } else if (event.Value("type") == "misc.fs.zfs.config_sync") { RefreshVdevState(); if (VdevState() < VDEV_STATE_HEALTHY) consumed = ActivateSpare(); } if (event.Value("class") == "resource.fs.zfs.removed") { bool spare_activated; if (!RefreshVdevState()) { /* * The pool or vdev for this case file is no longer * part of the configuration. This can happen * if we process a device arrival notification * before seeing the ZFS configuration change * event. */ syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " "unconfigured. Closing\n", PoolGUIDString().c_str(), VdevGUIDString().c_str()); /* * Close the case now so we won't waste cycles in the * system rescan */ Close(); /* * Since this event was not used to close this * case, do not report it as consumed. */ return (/*consumed*/false); } /* * Discard any tentative I/O error events for * this case. They were most likely caused by the * hot-unplug of this device. */ PurgeTentativeEvents(); /* Try to activate spares if they are available */ spare_activated = ActivateSpare(); /* * Rescan the drives in the system to see if a recent * drive arrival can be used to solve this case. */ ZfsDaemon::RequestSystemRescan(); /* * Consume the event if we successfully activated a spare. * Otherwise, leave it in the unconsumed events list so that the * future addition of a spare to this pool might be able to * close the case */ consumed = spare_activated; } else if (event.Value("class") == "resource.fs.zfs.statechange") { RefreshVdevState(); /* * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to * activate a hotspare. Otherwise, ignore the event */ if (VdevState() == VDEV_STATE_FAULTED || VdevState() == VDEV_STATE_DEGRADED || VdevState() == VDEV_STATE_CANT_OPEN) (void) ActivateSpare(); consumed = true; } else if (event.Value("class") == "ereport.fs.zfs.io" || event.Value("class") == "ereport.fs.zfs.checksum") { m_tentativeEvents.push_front(event.DeepCopy()); RegisterCallout(event); consumed = true; } bool closed(CloseIfSolved()); return (consumed || closed); } /* Find a Vdev containing the vdev with the given GUID */ static nvlist_t* find_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid) { nvlist_t **vdevChildren; int error; unsigned ch, numChildren; error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, &vdevChildren, &numChildren); if (error != 0 || numChildren == 0) return (NULL); for (ch = 0; ch < numChildren; ch++) { nvlist *result; Vdev vdev(pool_config, vdevChildren[ch]); if (vdev.GUID() == child_guid) return (config); result = find_parent(pool_config, vdevChildren[ch], child_guid); if (result != NULL) return (result); } return (NULL); } bool CaseFile::ActivateSpare() { nvlist_t *config, *nvroot, *parent_config; nvlist_t **spares; char *devPath, *vdev_type; const char *poolname; u_int nspares, i; int error; ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); if (zhp == NULL) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID); return (false); } poolname = zpool_get_name(zhp); config = zpool_get_config(zhp, NULL); if (config == NULL) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " "config for pool %s", poolname); return (false); } error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); if (error != 0){ syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " "tree for pool %s", poolname); return (false); } parent_config = find_parent(config, nvroot, m_vdevGUID); if (parent_config != NULL) { char *parent_type; /* * Don't activate spares for members of a "replacing" vdev. * They're already dealt with. Sparing them will just drag out * the resilver process. */ error = nvlist_lookup_string(parent_config, ZPOOL_CONFIG_TYPE, &parent_type); if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0) return (false); } nspares = 0; nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares); if (nspares == 0) { /* The pool has no spares configured */ syslog(LOG_INFO, "CaseFile::ActivateSpare: " "No spares available for pool %s", poolname); return (false); } for (i = 0; i < nspares; i++) { uint64_t *nvlist_array; vdev_stat_t *vs; uint_t nstats; if (nvlist_lookup_uint64_array(spares[i], ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " "find vdev stats for pool %s, spare %d", poolname, i); return (false); } vs = reinterpret_cast(nvlist_array); if ((vs->vs_aux != VDEV_AUX_SPARED) && (vs->vs_state == VDEV_STATE_HEALTHY)) { /* We found a usable spare */ break; } } if (i == nspares) { /* No available spares were found */ return (false); } error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath); if (error != 0) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " "the path of pool %s, spare %d. Error %d", poolname, i, error); return (false); } error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type); if (error != 0) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " "the vdev type of pool %s, spare %d. Error %d", poolname, i, error); return (false); } return (Replace(vdev_type, devPath, /*isspare*/true)); } void CaseFile::RegisterCallout(const Event &event) { timeval now, countdown, elapsed, timestamp, zero, remaining; gettimeofday(&now, 0); timestamp = event.GetTimestamp(); timersub(&now, ×tamp, &elapsed); timersub(&s_removeGracePeriod, &elapsed, &countdown); /* * If countdown is <= zero, Reset the timer to the * smallest positive time value instead */ timerclear(&zero); if (timercmp(&countdown, &zero, <=)) { timerclear(&countdown); countdown.tv_usec = 1; } remaining = m_tentativeTimer.TimeRemaining(); if (!m_tentativeTimer.IsPending() || timercmp(&countdown, &remaining, <)) m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this); } bool CaseFile::CloseIfSolved() { if (m_events.empty() && m_tentativeEvents.empty()) { /* * We currently do not track or take actions on * devices in the degraded or faulted state. * Once we have support for spare pools, we'll * retain these cases so that any spares added in * the future can be applied to them. */ switch (VdevState()) { case VDEV_STATE_HEALTHY: /* No need to keep cases for healthy vdevs */ Close(); return (true); case VDEV_STATE_REMOVED: case VDEV_STATE_CANT_OPEN: /* * Keep open. We may solve it with a newly inserted * device. */ case VDEV_STATE_FAULTED: case VDEV_STATE_DEGRADED: /* * Keep open. We may solve it with the future * addition of a spare to the pool */ case VDEV_STATE_UNKNOWN: case VDEV_STATE_CLOSED: case VDEV_STATE_OFFLINE: /* * Keep open? This may not be the correct behavior, * but it's what we've always done */ ; } /* * Re-serialize the case in order to remove any * previous event data. */ Serialize(); } return (false); } void CaseFile::Log() { syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), PhysicalPath().c_str()); syslog(LOG_INFO, "\tVdev State = %s\n", zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); if (m_tentativeEvents.size() != 0) { syslog(LOG_INFO, "\t=== Tentative Events ===\n"); for (EventList::iterator event(m_tentativeEvents.begin()); event != m_tentativeEvents.end(); event++) (*event)->Log(LOG_INFO); } if (m_events.size() != 0) { syslog(LOG_INFO, "\t=== Events ===\n"); for (EventList::iterator event(m_events.begin()); event != m_events.end(); event++) (*event)->Log(LOG_INFO); } } //- CaseFile Static Protected Methods ------------------------------------------ void CaseFile::OnGracePeriodEnded(void *arg) { CaseFile &casefile(*static_cast(arg)); casefile.OnGracePeriodEnded(); } int CaseFile::DeSerializeSelector(const struct dirent *dirEntry) { uint64_t poolGUID; uint64_t vdevGUID; if (dirEntry->d_type == DT_REG && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", &poolGUID, &vdevGUID) == 2) return (1); return (0); } void CaseFile::DeSerializeFile(const char *fileName) { string fullName(s_caseFilePath + '/' + fileName); CaseFile *existingCaseFile(NULL); CaseFile *caseFile(NULL); try { uint64_t poolGUID; uint64_t vdevGUID; nvlist_t *vdevConf; if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", &poolGUID, &vdevGUID) != 2) { throw ZfsdException("CaseFile::DeSerialize: " "Unintelligible CaseFile filename %s.\n", fileName); } existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID)); if (existingCaseFile != NULL) { /* * If the vdev is already degraded or faulted, * there's no point in keeping the state around * that we use to put a drive into the degraded * state. However, if the vdev is simply missing, * preserve the case data in the hopes that it will * return. */ caseFile = existingCaseFile; vdev_state curState(caseFile->VdevState()); if (curState > VDEV_STATE_CANT_OPEN && curState < VDEV_STATE_HEALTHY) { unlink(fileName); return; } } else { ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); if (zpl.empty() || (vdevConf = VdevIterator(zpl.front()) .Find(vdevGUID)) == NULL) { /* * Either the pool no longer exists * or this vdev is no longer a member of * the pool. */ unlink(fullName.c_str()); return; } /* * Any vdev we find that does not have a case file * must be in the healthy state and thus worthy of * continued SERD data tracking. */ caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); } ifstream caseStream(fullName.c_str()); if (!caseStream) throw ZfsdException("CaseFile::DeSerialize: Unable to " "read %s.\n", fileName); caseFile->DeSerialize(caseStream); } catch (const ParseException &exp) { exp.Log(); if (caseFile != existingCaseFile) delete caseFile; /* * Since we can't parse the file, unlink it so we don't * trip over it again. */ unlink(fileName); } catch (const ZfsdException &zfsException) { zfsException.Log(); if (caseFile != existingCaseFile) delete caseFile; } } //- CaseFile Protected Methods ------------------------------------------------- CaseFile::CaseFile(const Vdev &vdev) : m_poolGUID(vdev.PoolGUID()), m_vdevGUID(vdev.GUID()), m_vdevState(vdev.State()), m_vdevPhysPath(vdev.PhysicalPath()) { stringstream guidString; guidString << m_vdevGUID; m_vdevGUIDString = guidString.str(); guidString.str(""); guidString << m_poolGUID; m_poolGUIDString = guidString.str(); s_activeCases.push_back(this); syslog(LOG_INFO, "Creating new CaseFile:\n"); Log(); } CaseFile::~CaseFile() { PurgeEvents(); PurgeTentativeEvents(); m_tentativeTimer.Stop(); s_activeCases.remove(this); } void CaseFile::PurgeEvents() { for (EventList::iterator event(m_events.begin()); event != m_events.end(); event++) delete *event; m_events.clear(); } void CaseFile::PurgeTentativeEvents() { for (EventList::iterator event(m_tentativeEvents.begin()); event != m_tentativeEvents.end(); event++) delete *event; m_tentativeEvents.clear(); } void CaseFile::SerializeEvList(const EventList events, int fd, const char* prefix) const { if (events.empty()) return; for (EventList::const_iterator curEvent = events.begin(); curEvent != events.end(); curEvent++) { const string &eventString((*curEvent)->GetEventString()); // TODO: replace many write(2) calls with a single writev(2) if (prefix) write(fd, prefix, strlen(prefix)); write(fd, eventString.c_str(), eventString.length()); } } void CaseFile::Serialize() { stringstream saveFile; saveFile << setfill('0') << s_caseFilePath << "/" << "pool_" << PoolGUIDString() << "_vdev_" << VdevGUIDString() << ".case"; if (m_events.empty() && m_tentativeEvents.empty()) { unlink(saveFile.str().c_str()); return; } int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); if (fd == -1) { syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", saveFile.str().c_str()); return; } SerializeEvList(m_events, fd); SerializeEvList(m_tentativeEvents, fd, "tentative "); close(fd); } /* * XXX: This method assumes that events may not contain embedded newlines. If * ever events can contain embedded newlines, then CaseFile must switch * serialization formats */ void CaseFile::DeSerialize(ifstream &caseStream) { string evString; const EventFactory &factory(ZfsDaemon::Get().GetFactory()); caseStream >> std::noskipws >> std::ws; while (caseStream.good()) { /* * Outline: * read the beginning of a line and check it for * "tentative". If found, discard "tentative". * Create a new event * continue */ EventList* destEvents; const string tentFlag("tentative "); string line; std::stringbuf lineBuf; caseStream.get(lineBuf); caseStream.ignore(); /*discard the newline character*/ line = lineBuf.str(); if (line.compare(0, tentFlag.size(), tentFlag) == 0) { /* Discard "tentative" */ line.erase(0, tentFlag.size()); destEvents = &m_tentativeEvents; } else { destEvents = &m_events; } Event *event(Event::CreateEvent(factory, line)); if (event != NULL) { destEvents->push_back(event); RegisterCallout(*event); } } } void CaseFile::Close() { /* * This case is no longer relevant. Clean up our * serialization file, and delete the case. */ syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); /* * Serialization of a Case with no event data, clears the * Serialization data for that event. */ PurgeEvents(); Serialize(); delete this; } void CaseFile::OnGracePeriodEnded() { bool should_fault, should_degrade; ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); m_events.splice(m_events.begin(), m_tentativeEvents); should_fault = ShouldFault(); should_degrade = ShouldDegrade(); if (should_fault || should_degrade) { if (zhp == NULL || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) { /* * Either the pool no longer exists * or this vdev is no longer a member of * the pool. */ Close(); return; } } /* A fault condition has priority over a degrade condition */ if (ShouldFault()) { /* Fault the vdev and close the case. */ if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID, VDEV_AUX_ERR_EXCEEDED) == 0) { syslog(LOG_INFO, "Faulting vdev(%s/%s)", PoolGUIDString().c_str(), VdevGUIDString().c_str()); Close(); return; } else { syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); } } else if (ShouldDegrade()) { /* Degrade the vdev and close the case. */ if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID, VDEV_AUX_ERR_EXCEEDED) == 0) { syslog(LOG_INFO, "Degrading vdev(%s/%s)", PoolGUIDString().c_str(), VdevGUIDString().c_str()); Close(); return; } else { syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); } } Serialize(); } Vdev CaseFile::BeingReplacedBy(zpool_handle_t *zhp) { Vdev vd(zhp, CaseVdev(zhp)); std::list children; std::list::iterator children_it; Vdev parent(vd.Parent()); Vdev replacing(NonexistentVdev); /* * To determine whether we are being replaced by another spare that * is still working, then make sure that it is currently spared and * that the spare is either resilvering or healthy. If any of these * conditions fail, then we are not being replaced by a spare. * * If the spare is healthy, then the case file should be closed very * soon after this check. */ if (parent.DoesNotExist() || parent.Name(zhp, /*verbose*/false) != "spare") return (NonexistentVdev); children = parent.Children(); children_it = children.begin(); for (;children_it != children.end(); children_it++) { Vdev child = *children_it; /* Skip our vdev. */ if (child.GUID() == VdevGUID()) continue; /* * Accept the first child that doesn't match our GUID, or * any resilvering/healthy device if one exists. */ if (replacing.DoesNotExist() || child.IsResilvering() || child.State() == VDEV_STATE_HEALTHY) replacing = child; } return (replacing); } bool CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) { nvlist_t *nvroot, *newvd; const char *poolname; string oldstr(VdevGUIDString()); bool retval = true; /* Figure out what pool we're working on */ ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); if (zhp == NULL) { syslog(LOG_ERR, "CaseFile::Replace: could not find pool for " "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID); return (false); } poolname = zpool_get_name(zhp); Vdev vd(zhp, CaseVdev(zhp)); Vdev replaced(BeingReplacedBy(zhp)); if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) { /* If we are already being replaced by a working spare, pass. */ if (replaced.IsResilvering() || replaced.State() == VDEV_STATE_HEALTHY) { syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already " "replaced", VdevGUIDString().c_str(), path); return (/*consumed*/false); } /* * If we have already been replaced by a spare, but that spare * is broken, we must spare the spare, not the original device. */ oldstr = replaced.GUIDString(); syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing " "broken spare %s instead", VdevGUIDString().c_str(), path, oldstr.c_str()); } /* * Build a root vdev/leaf vdev configuration suitable for * zpool_vdev_attach. Only enough data for the kernel to find * the device (i.e. type and disk device node path) are needed. */ nvroot = NULL; newvd = NULL; if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate " "configuration data.", poolname, oldstr.c_str()); if (nvroot != NULL) nvlist_free(nvroot); return (false); } if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd, 1) != 0) { syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize " "configuration data.", poolname, oldstr.c_str()); nvlist_free(newvd); nvlist_free(nvroot); return (true); } /* Data was copied when added to the root vdev. */ nvlist_free(newvd); retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot, /*replace*/B_TRUE) == 0); if (retval) syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", poolname, oldstr.c_str(), path); else syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n", poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); nvlist_free(nvroot); return (retval); } /* Does the argument event refer to a checksum error? */ static bool IsChecksumEvent(const Event* const event) { return ("ereport.fs.zfs.checksum" == event->Value("type")); } /* Does the argument event refer to an IO error? */ static bool IsIOEvent(const Event* const event) { return ("ereport.fs.zfs.io" == event->Value("type")); } bool CaseFile::ShouldDegrade() const { return (std::count_if(m_events.begin(), m_events.end(), IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT); } bool CaseFile::ShouldFault() const { return (std::count_if(m_events.begin(), m_events.end(), IsIOEvent) > ZFS_DEGRADE_IO_COUNT); } nvlist_t * CaseFile::CaseVdev(zpool_handle_t *zhp) const { return (VdevIterator(zhp).Find(VdevGUID())); } Index: head/cddl/usr.sbin/zfsd/case_file.h =================================================================== --- head/cddl/usr.sbin/zfsd/case_file.h (revision 329343) +++ head/cddl/usr.sbin/zfsd/case_file.h (revision 329344) @@ -1,428 +1,433 @@ /*- * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) * * $FreeBSD$ */ /** * \file case_file.h * * CaseFile objects aggregate vdev faults that may require ZFSD action * in order to maintain the health of a ZFS pool. * * Header requirements: * * #include * * #include "callout.h" * #include "zfsd_event.h" */ #ifndef _CASE_FILE_H_ #define _CASE_FILE_H_ /*=========================== Forward Declarations ===========================*/ class CaseFile; class Vdev; /*============================= Class Definitions ============================*/ /*------------------------------- CaseFileList -------------------------------*/ /** * CaseFileList is a specialization of the standard list STL container. */ typedef std::list< CaseFile *> CaseFileList; /*--------------------------------- CaseFile ---------------------------------*/ /** * A CaseFile object is instantiated anytime a vdev for an active pool * experiences an I/O error, is faulted by ZFS, or is determined to be * missing/removed. * * A vdev may have at most one CaseFile. * * CaseFiles are retired when a vdev leaves an active pool configuration * or an action is taken to resolve the issues recorded in the CaseFile. * * Logging a case against a vdev does not imply that an immediate action * to resolve a fault is required or even desired. For example, a CaseFile * must accumulate a number of I/O errors in order to flag a device as * degraded. * * Vdev I/O errors are not recorded in ZFS label inforamation. For this * reasons, CaseFile%%s with accumulated I/O error events are serialized * to the file system so that they survive across boots. Currently all * other fault types can be reconstructed from ZFS label information, so * CaseFile%%s for missing, faulted, or degradded members are just recreated * at ZFSD startup instead of being deserialized from the file system. */ class CaseFile { public: /** * \brief Find a CaseFile object by a vdev's pool/vdev GUID tuple. * * \param poolGUID Pool GUID for the vdev of the CaseFile to find. * If InvalidGuid, then only match the vdev GUID * instead of both pool and vdev GUIDs. * \param vdevGUID Vdev GUID for the vdev of the CaseFile to find. * * \return If found, a pointer to a valid CaseFile object. * Otherwise NULL. */ static CaseFile *Find(DevdCtl::Guid poolGUID, DevdCtl::Guid vdevGUID); /** * \brief Find a CaseFile object by a vdev's current/last known * physical path. * * \param physPath Physical path of the vdev of the CaseFile to find. * * \return If found, a pointer to a valid CaseFile object. * Otherwise NULL. */ static CaseFile *Find(const string &physPath); /** * \brief ReEvaluate all open cases whose pool guid matches the argument * * \param poolGUID Only reevaluate cases for this pool * \param event Try to consume this event with the casefile */ static void ReEvaluateByGuid(DevdCtl::Guid poolGUID, const ZfsEvent &event); /** * \brief Create or return an existing active CaseFile for the * specified vdev. * * \param vdev The vdev object for which to find/create a CaseFile. * * \return A reference to a valid CaseFile object. */ static CaseFile &Create(Vdev &vdev); /** * \brief Deserialize all serialized CaseFile objects found in * the file system. */ static void DeSerialize(); /** + * \brief returns true if there are no CaseFiles + */ + static bool Empty(); + + /** * \brief Emit syslog data on all active CaseFile%%s in the system. */ static void LogAll(); /** * \brief Destroy the in-core cache of CaseFile data. * * This routine does not disturb the on disk, serialized, CaseFile * data. */ static void PurgeAll(); DevdCtl::Guid PoolGUID() const; DevdCtl::Guid VdevGUID() const; vdev_state VdevState() const; const string &PoolGUIDString() const; const string &VdevGUIDString() const; const string &PhysicalPath() const; /** * \brief Attempt to resolve this CaseFile using the disk * resource at the given device/physical path/vdev object * tuple. * * \param devPath The devfs path for the disk resource. * \param physPath The physical path information reported by * the disk resource. * \param vdev If the disk contains ZFS label information, * a pointer to the disk label's vdev object * data. Otherwise NULL. * * \return True if this event was consumed by this CaseFile. */ bool ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev); /** * \brief Update this CaseFile in light of the provided ZfsEvent. * * Must be virtual so it can be overridden in the unit tests * * \param event The ZfsEvent to evaluate. * * \return True if this event was consumed by this CaseFile. */ virtual bool ReEvaluate(const ZfsEvent &event); /** * \brief Register an itimer callout for the given event, if necessary */ virtual void RegisterCallout(const DevdCtl::Event &event); /** * \brief Close a case if it is no longer relevant. * * This method deals with cases tracking soft errors. Soft errors * will be discarded should a remove event occur within a short period * of the soft errors being reported. We also discard the events * if the vdev is marked degraded or failed. * * \return True if the case is closed. False otherwise. */ bool CloseIfSolved(); /** * \brief Emit data about this CaseFile via syslog(3). */ void Log(); /** * \brief Whether we should degrade this vdev */ bool ShouldDegrade() const; /** * \brief Whether we should fault this vdev */ bool ShouldFault() const; protected: enum { /** * The number of soft errors on a vdev required * to transition a vdev from healthy to degraded * status. */ ZFS_DEGRADE_IO_COUNT = 50 }; static CalloutFunc_t OnGracePeriodEnded; /** * \brief scandir(3) filter function used to find files containing * serialized CaseFile data. * * \param dirEntry Directory entry for the file to filter. * * \return Non-zero for a file to include in the selection, * otherwise 0. */ static int DeSerializeSelector(const struct dirent *dirEntry); /** * \brief Given the name of a file containing serialized events from a * CaseFile object, create/update an in-core CaseFile object * representing the serialized data. * * \param fileName The name of a file containing serialized events * from a CaseFile object. */ static void DeSerializeFile(const char *fileName); /** Constructor. */ CaseFile(const Vdev &vdev); /** * Destructor. * Must be virtual so it can be subclassed in the unit tests */ virtual ~CaseFile(); /** * \brief Reload state for the vdev associated with this CaseFile. * * \return True if the refresh was successful. False if the system * has no record of the pool or vdev for this CaseFile. */ virtual bool RefreshVdevState(); /** * \brief Free all events in the m_events list. */ void PurgeEvents(); /** * \brief Free all events in the m_tentativeEvents list. */ void PurgeTentativeEvents(); /** * \brief Commit to file system storage. */ void Serialize(); /** * \brief Retrieve event data from a serialization stream. * * \param caseStream The serializtion stream to parse. */ void DeSerialize(std::ifstream &caseStream); /** * \brief Serializes the supplied event list and writes it to fd * * \param prefix If not NULL, this prefix will be prepended to * every event in the file. */ void SerializeEvList(const DevdCtl::EventList events, int fd, const char* prefix=NULL) const; /** * \brief Unconditionally close a CaseFile. */ virtual void Close(); /** * \brief Callout callback invoked when the remove timer grace * period expires. * * If no remove events are received prior to the grace period * firing, then any tentative events are promoted and counted * against the health of the vdev. */ void OnGracePeriodEnded(); /** * \brief Attempt to activate a spare on this case's pool. * * Call this whenever a pool becomes degraded. It will look for any * spare devices and activate one to replace the casefile's vdev. It * will _not_ close the casefile; that should only happen when the * missing drive is replaced or the user promotes the spare. * * \return True if a spare was activated */ bool ActivateSpare(); /** * \brief replace a pool's vdev with another * * \param vdev_type The type of the new vdev. Usually either * VDEV_TYPE_DISK or VDEV_TYPE_FILE * \param path The file system path to the new vdev * \param isspare Whether the new vdev is a spare * * \return true iff the replacement was successful */ bool Replace(const char* vdev_type, const char* path, bool isspare); /** * \brief Which vdev, if any, is replacing ours. * * \param zhp Pool handle state from the caller context * * \return the vdev that is currently replacing ours, * or NonexistentVdev if there isn't one. */ Vdev BeingReplacedBy(zpool_handle_t *zhp); /** * \brief All CaseFiles being tracked by ZFSD. */ static CaseFileList s_activeCases; /** * \brief The file system path to serialized CaseFile data. */ static const string s_caseFilePath; /** * \brief The time ZFSD waits before promoting a tentative event * into a permanent event. */ static const timeval s_removeGracePeriod; /** * \brief A list of soft error events counted against the health of * a vdev. */ DevdCtl::EventList m_events; /** * \brief A list of soft error events waiting for a grace period * expiration before being counted against the health of * a vdev. */ DevdCtl::EventList m_tentativeEvents; DevdCtl::Guid m_poolGUID; DevdCtl::Guid m_vdevGUID; vdev_state m_vdevState; string m_poolGUIDString; string m_vdevGUIDString; string m_vdevPhysPath; /** * \brief Callout activated when a grace period */ Callout m_tentativeTimer; private: nvlist_t *CaseVdev(zpool_handle_t *zhp) const; }; inline DevdCtl::Guid CaseFile::PoolGUID() const { return (m_poolGUID); } inline DevdCtl::Guid CaseFile::VdevGUID() const { return (m_vdevGUID); } inline vdev_state CaseFile::VdevState() const { return (m_vdevState); } inline const string & CaseFile::PoolGUIDString() const { return (m_poolGUIDString); } inline const string & CaseFile::VdevGUIDString() const { return (m_vdevGUIDString); } inline const string & CaseFile::PhysicalPath() const { return (m_vdevPhysPath); } #endif /* _CASE_FILE_H_ */ Index: head/cddl/usr.sbin/zfsd/zfsd_event.cc =================================================================== --- head/cddl/usr.sbin/zfsd/zfsd_event.cc (revision 329343) +++ head/cddl/usr.sbin/zfsd/zfsd_event.cc (revision 329344) @@ -1,473 +1,481 @@ /*- * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) */ /** * \file zfsd_event.cc */ #include #include #include #include #include #include /* * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with * C++ flush methods */ #undef flush #include #include #include #include #include #include #include #include #include #include "callout.h" #include "vdev_iterator.h" #include "zfsd_event.h" #include "case_file.h" #include "vdev.h" #include "zfsd.h" #include "zfsd_exception.h" #include "zpool_list.h" __FBSDID("$FreeBSD$"); /*============================ Namespace Control =============================*/ using DevdCtl::Event; using DevdCtl::Guid; using DevdCtl::NVPairMap; using std::stringstream; /*=========================== Class Implementations ==========================*/ /*-------------------------------- GeomEvent --------------------------------*/ //- GeomEvent Static Public Methods ------------------------------------------- Event * GeomEvent::Builder(Event::Type type, NVPairMap &nvPairs, const string &eventString) { return (new GeomEvent(type, nvPairs, eventString)); } //- GeomEvent Virtual Public Methods ------------------------------------------ Event * GeomEvent::DeepCopy() const { return (new GeomEvent(*this)); } bool GeomEvent::Process() const { /* - * We are only concerned with create arrivals and physical path changes, - * because those can be used to satisfy online and autoreplace operations + * We only use GEOM events to repair damaged pools. So return early if + * there are no damaged pools + */ + if (CaseFile::Empty()) + return (false); + + /* + * We are only concerned with arrivals and physical path changes, + * because those can be used to satisfy online and autoreplace + * operations */ if (Value("type") != "GEOM::physpath" && Value("type") != "CREATE") return (false); /* Log the event since it is of interest. */ Log(LOG_INFO); string devPath; if (!DevPath(devPath)) return (false); int devFd(open(devPath.c_str(), O_RDONLY)); if (devFd == -1) return (false); bool inUse; bool degraded; nvlist_t *devLabel(ReadLabel(devFd, inUse, degraded)); string physPath; bool havePhysPath(PhysicalPath(physPath)); string devName; DevName(devName); close(devFd); if (inUse && devLabel != NULL) { OnlineByLabel(devPath, physPath, devLabel); } else if (degraded) { syslog(LOG_INFO, "%s is marked degraded. Ignoring " "as a replace by physical path candidate.\n", devName.c_str()); } else if (havePhysPath) { /* * TODO: attempt to resolve events using every casefile * that matches this physpath */ CaseFile *caseFile(CaseFile::Find(physPath)); if (caseFile != NULL) { syslog(LOG_INFO, "Found CaseFile(%s:%s:%s) - ReEvaluating\n", caseFile->PoolGUIDString().c_str(), caseFile->VdevGUIDString().c_str(), zpool_state_to_name(caseFile->VdevState(), VDEV_AUX_NONE)); caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL); } } return (false); } //- GeomEvent Protected Methods ----------------------------------------------- GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs, const string &eventString) : DevdCtl::GeomEvent(type, nvpairs, eventString) { } GeomEvent::GeomEvent(const GeomEvent &src) : DevdCtl::GeomEvent::GeomEvent(src) { } nvlist_t * GeomEvent::ReadLabel(int devFd, bool &inUse, bool °raded) { pool_state_t poolState; char *poolName; boolean_t b_inuse; int nlabels; inUse = false; degraded = false; poolName = NULL; if (zpool_in_use(g_zfsHandle, devFd, &poolState, &poolName, &b_inuse) == 0) { nvlist_t *devLabel = NULL; inUse = b_inuse == B_TRUE; if (poolName != NULL) free(poolName); nlabels = zpool_read_all_labels(devFd, &devLabel); /* * If we find a disk with fewer than the maximum number of * labels, it might be the whole disk of a partitioned disk * where ZFS resides on a partition. In that case, we should do * nothing and wait for the partition to appear. Or, the disk * might be damaged. In that case, zfsd should do nothing and * wait for the sysadmin to decide. */ if (nlabels != VDEV_LABELS || devLabel == NULL) { nvlist_free(devLabel); return (NULL); } try { Vdev vdev(devLabel); degraded = vdev.State() != VDEV_STATE_HEALTHY; return (devLabel); } catch (ZfsdException &exp) { string devName = fdevname(devFd); string devPath = _PATH_DEV + devName; string context("GeomEvent::ReadLabel: " + devPath + ": "); exp.GetString().insert(0, context); exp.Log(); nvlist_free(devLabel); } } return (NULL); } bool GeomEvent::OnlineByLabel(const string &devPath, const string& physPath, nvlist_t *devConfig) { try { /* * A device with ZFS label information has been * inserted. If it matches a device for which we * have a case, see if we can solve that case. */ syslog(LOG_INFO, "Interrogating VDEV label for %s\n", devPath.c_str()); Vdev vdev(devConfig); CaseFile *caseFile(CaseFile::Find(vdev.PoolGUID(), vdev.GUID())); if (caseFile != NULL) return (caseFile->ReEvaluate(devPath, physPath, &vdev)); } catch (ZfsdException &exp) { string context("GeomEvent::OnlineByLabel: " + devPath + ": "); exp.GetString().insert(0, context); exp.Log(); } return (false); } /*--------------------------------- ZfsEvent ---------------------------------*/ //- ZfsEvent Static Public Methods --------------------------------------------- DevdCtl::Event * ZfsEvent::Builder(Event::Type type, NVPairMap &nvpairs, const string &eventString) { return (new ZfsEvent(type, nvpairs, eventString)); } //- ZfsEvent Virtual Public Methods -------------------------------------------- Event * ZfsEvent::DeepCopy() const { return (new ZfsEvent(*this)); } bool ZfsEvent::Process() const { string logstr(""); if (!Contains("class") && !Contains("type")) { syslog(LOG_ERR, "ZfsEvent::Process: Missing class or type data."); return (false); } /* On config syncs, replay any queued events first. */ if (Value("type").find("misc.fs.zfs.config_sync") == 0) { /* * Even if saved events are unconsumed the second time * around, drop them. Any events that still can't be * consumed are probably referring to vdevs or pools that * no longer exist. */ ZfsDaemon::Get().ReplayUnconsumedEvents(/*discard*/true); CaseFile::ReEvaluateByGuid(PoolGUID(), *this); } if (Value("type").find("misc.fs.zfs.") == 0) { /* Configuration changes, resilver events, etc. */ ProcessPoolEvent(); return (false); } if (!Contains("pool_guid") || !Contains("vdev_guid")) { /* Only currently interested in Vdev related events. */ return (false); } CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); if (caseFile != NULL) { Log(LOG_INFO); syslog(LOG_INFO, "Evaluating existing case file\n"); caseFile->ReEvaluate(*this); return (false); } /* Skip events that can't be handled. */ Guid poolGUID(PoolGUID()); /* If there are no replicas for a pool, then it's not manageable. */ if (Value("class").find("fs.zfs.vdev.no_replicas") == 0) { stringstream msg; msg << "No replicas available for pool " << poolGUID; msg << ", ignoring"; Log(LOG_INFO); syslog(LOG_INFO, "%s", msg.str().c_str()); return (false); } /* * Create a case file for this vdev, and have it * evaluate the event. */ ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); if (zpl.empty()) { stringstream msg; int priority = LOG_INFO; msg << "ZfsEvent::Process: Event for unknown pool "; msg << poolGUID << " "; msg << "queued"; Log(LOG_INFO); syslog(priority, "%s", msg.str().c_str()); return (true); } nvlist_t *vdevConfig = VdevIterator(zpl.front()).Find(VdevGUID()); if (vdevConfig == NULL) { stringstream msg; int priority = LOG_INFO; msg << "ZfsEvent::Process: Event for unknown vdev "; msg << VdevGUID() << " "; msg << "queued"; Log(LOG_INFO); syslog(priority, "%s", msg.str().c_str()); return (true); } Vdev vdev(zpl.front(), vdevConfig); caseFile = &CaseFile::Create(vdev); if (caseFile->ReEvaluate(*this) == false) { stringstream msg; int priority = LOG_INFO; msg << "ZfsEvent::Process: Unconsumed event for vdev("; msg << zpool_get_name(zpl.front()) << ","; msg << vdev.GUID() << ") "; msg << "queued"; Log(LOG_INFO); syslog(priority, "%s", msg.str().c_str()); return (true); } return (false); } //- ZfsEvent Protected Methods ------------------------------------------------- ZfsEvent::ZfsEvent(Event::Type type, NVPairMap &nvpairs, const string &eventString) : DevdCtl::ZfsEvent(type, nvpairs, eventString) { } ZfsEvent::ZfsEvent(const ZfsEvent &src) : DevdCtl::ZfsEvent(src) { } /* * Sometimes the kernel won't detach a spare when it is no longer needed. This * can happen for example if a drive is removed, then either the pool is * exported or the machine is powered off, then the drive is reinserted, then * the machine is powered on or the pool is imported. ZFSD must detach these * spares itself. */ void ZfsEvent::CleanupSpares() const { Guid poolGUID(PoolGUID()); ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); if (!zpl.empty()) { zpool_handle_t* hdl; hdl = zpl.front(); VdevIterator(hdl).Each(TryDetach, (void*)hdl); } } void ZfsEvent::ProcessPoolEvent() const { bool degradedDevice(false); /* The pool is destroyed. Discard any open cases */ if (Value("type") == "misc.fs.zfs.pool_destroy") { Log(LOG_INFO); CaseFile::ReEvaluateByGuid(PoolGUID(), *this); return; } CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); if (caseFile != NULL) { if (caseFile->VdevState() != VDEV_STATE_UNKNOWN && caseFile->VdevState() < VDEV_STATE_HEALTHY) degradedDevice = true; Log(LOG_INFO); caseFile->ReEvaluate(*this); } else if (Value("type") == "misc.fs.zfs.resilver_finish") { /* * It's possible to get a resilver_finish event with no * corresponding casefile. For example, if a damaged pool were * exported, repaired, then reimported. */ Log(LOG_INFO); CleanupSpares(); } if (Value("type") == "misc.fs.zfs.vdev_remove" && degradedDevice == false) { /* See if any other cases can make use of this device. */ Log(LOG_INFO); ZfsDaemon::RequestSystemRescan(); } } bool ZfsEvent::TryDetach(Vdev &vdev, void *cbArg) { /* * Outline: * if this device is a spare, and its parent includes one healthy, * non-spare child, then detach this device. */ zpool_handle_t *hdl(static_cast(cbArg)); if (vdev.IsSpare()) { std::list siblings; std::list::iterator siblings_it; boolean_t cleanup = B_FALSE; Vdev parent = vdev.Parent(); siblings = parent.Children(); /* Determine whether the parent should be cleaned up */ for (siblings_it = siblings.begin(); siblings_it != siblings.end(); siblings_it++) { Vdev sibling = *siblings_it; if (!sibling.IsSpare() && sibling.State() == VDEV_STATE_HEALTHY) { cleanup = B_TRUE; break; } } if (cleanup) { syslog(LOG_INFO, "Detaching spare vdev %s from pool %s", vdev.Path().c_str(), zpool_get_name(hdl)); zpool_vdev_detach(hdl, vdev.Path().c_str()); } } /* Always return false, because there may be other spares to detach */ return (false); }