Index: projects/zfsd/head/cddl/usr.sbin/zfsd/case_file.cc =================================================================== --- projects/zfsd/head/cddl/usr.sbin/zfsd/case_file.cc (revision 300689) +++ projects/zfsd/head/cddl/usr.sbin/zfsd/case_file.cc (revision 300690) @@ -1,1103 +1,1104 @@ /*- * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) */ /** * \file case_file.cc * * We keep case files for any leaf vdev that is not in the optimal state. * However, we only serialize to disk those events that need to be preserved * across reboots. For now, this is just a log of soft errors which we * accumulate in order to mark a device as degraded. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "callout.h" #include "vdev_iterator.h" #include "zfsd_event.h" #include "case_file.h" #include "vdev.h" #include "zfsd.h" #include "zfsd_exception.h" #include "zpool_list.h" __FBSDID("$FreeBSD$"); /*============================ Namespace Control =============================*/ using std::auto_ptr; using std::hex; using std::ifstream; using std::stringstream; using std::setfill; using std::setw; using DevdCtl::Event; using DevdCtl::EventBuffer; using DevdCtl::EventFactory; using DevdCtl::EventList; using DevdCtl::Guid; using DevdCtl::ParseException; /*--------------------------------- CaseFile ---------------------------------*/ //- CaseFile Static Data ------------------------------------------------------- CaseFileList CaseFile::s_activeCases; const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; //- CaseFile Static Public Methods --------------------------------------------- CaseFile * CaseFile::Find(Guid poolGUID, Guid vdevGUID) { for (CaseFileList::iterator curCase = s_activeCases.begin(); curCase != s_activeCases.end(); curCase++) { if ((*curCase)->PoolGUID() != poolGUID || (*curCase)->VdevGUID() != vdevGUID) continue; /* * We only carry one active case per-vdev. */ return (*curCase); } return (NULL); } CaseFile * CaseFile::Find(const string &physPath) { CaseFile *result = NULL; for (CaseFileList::iterator curCase = s_activeCases.begin(); curCase != s_activeCases.end(); curCase++) { if ((*curCase)->PhysicalPath() != physPath) continue; if (result != NULL) { syslog(LOG_WARNING, "Multiple casefiles found for " "physical path %s. " "This is most likely a bug in zfsd", physPath.c_str()); } result = *curCase; } return (result); } void CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) { CaseFileList::iterator casefile; for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ - CaseFileList::iterator next = std::next(casefile); + CaseFileList::iterator next = casefile; + next++; if (poolGUID == (*casefile)->PoolGUID()) (*casefile)->ReEvaluate(event); casefile = next; } } CaseFile & CaseFile::Create(Vdev &vdev) { CaseFile *activeCase; activeCase = Find(vdev.PoolGUID(), vdev.GUID()); if (activeCase == NULL) activeCase = new CaseFile(vdev); return (*activeCase); } void CaseFile::DeSerialize() { struct dirent **caseFiles; int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, DeSerializeSelector, /*compar*/NULL)); if (numCaseFiles == -1) return; if (numCaseFiles == 0) { free(caseFiles); return; } for (int i = 0; i < numCaseFiles; i++) { DeSerializeFile(caseFiles[i]->d_name); free(caseFiles[i]); } free(caseFiles); } void CaseFile::LogAll() { for (CaseFileList::iterator curCase = s_activeCases.begin(); curCase != s_activeCases.end(); curCase++) (*curCase)->Log(); } void CaseFile::PurgeAll() { /* * Serialize casefiles before deleting them so that they can be reread * and revalidated during BuildCaseFiles. * CaseFiles remove themselves from this list on destruction. */ while (s_activeCases.size() != 0) { CaseFile *casefile = s_activeCases.front(); casefile->Serialize(); delete casefile; } } //- CaseFile Public Methods ---------------------------------------------------- bool CaseFile::RefreshVdevState() { ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); if (casePool == NULL) return (false); Vdev vd(casePool, CaseVdev(casePool)); if (vd.DoesNotExist()) return (false); m_vdevState = vd.State(); m_vdevPhysPath = vd.PhysicalPath(); return (true); } bool CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) { ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); if (pool == NULL || !RefreshVdevState()) { /* * The pool or vdev for this case file is no longer * part of the configuration. This can happen * if we process a device arrival notification * before seeing the ZFS configuration change * event. */ syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " "Closing\n", PoolGUIDString().c_str(), VdevGUIDString().c_str()); Close(); /* * Since this event was not used to close this * case, do not report it as consumed. */ return (/*consumed*/false); } if (VdevState() > VDEV_STATE_CANT_OPEN) { /* * For now, newly discovered devices only help for * devices that are missing. In the future, we might * use a newly inserted spare to replace a degraded * or faulted device. */ syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", PoolGUIDString().c_str(), VdevGUIDString().c_str()); return (/*consumed*/false); } if (vdev != NULL && vdev->PoolGUID() == m_poolGUID && vdev->GUID() == m_vdevGUID) { zpool_vdev_online(pool, vdev->GUIDString().c_str(), ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &m_vdevState); syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", zpool_get_name(pool), vdev->GUIDString().c_str(), devPath.c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); /* * Check the vdev state post the online action to see * if we can retire this case. */ CloseIfSolved(); return (/*consumed*/true); } /* * If the auto-replace policy is enabled, and we have physical * path information, try a physical path replacement. */ if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { syslog(LOG_INFO, "CaseFile(%s:%s:%s): AutoReplace not set. " "Ignoring device insertion.\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); return (/*consumed*/false); } if (PhysicalPath().empty()) { syslog(LOG_INFO, "CaseFile(%s:%s:%s): No physical path information. " "Ignoring device insertion.\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); return (/*consumed*/false); } if (physPath != PhysicalPath()) { syslog(LOG_INFO, "CaseFile(%s:%s:%s): Physical path mismatch. " "Ignoring device insertion.\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); return (/*consumed*/false); } /* Write a label on the newly inserted disk. */ if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { syslog(LOG_ERR, "Replace vdev(%s/%s) by physical path (label): %s: %s\n", zpool_get_name(pool), VdevGUIDString().c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); return (/*consumed*/false); } syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", PoolGUIDString().c_str(), VdevGUIDString().c_str(), devPath.c_str()); return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); } bool CaseFile::ReEvaluate(const ZfsEvent &event) { bool consumed(false); if (event.Value("type") == "misc.fs.zfs.vdev_remove") { /* * The Vdev we represent has been removed from the * configuration. This case is no longer of value. */ Close(); return (/*consumed*/true); } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { /* This Pool has been destroyed. Discard the case */ Close(); return (/*consumed*/true); } else if (event.Value("type") == "misc.fs.zfs.config_sync") { RefreshVdevState(); if (VdevState() < VDEV_STATE_HEALTHY) consumed = ActivateSpare(); } if (event.Value("class") == "resource.fs.zfs.removed") { bool spare_activated; if (!RefreshVdevState()) { /* * The pool or vdev for this case file is no longer * part of the configuration. This can happen * if we process a device arrival notification * before seeing the ZFS configuration change * event. */ syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " "unconfigured. Closing\n", PoolGUIDString().c_str(), VdevGUIDString().c_str()); /* * Close the case now so we won't waste cycles in the * system rescan */ Close(); /* * Since this event was not used to close this * case, do not report it as consumed. */ return (/*consumed*/false); } /* * Discard any tentative I/O error events for * this case. They were most likely caused by the * hot-unplug of this device. */ PurgeTentativeEvents(); /* Try to activate spares if they are available */ spare_activated = ActivateSpare(); /* * Rescan the drives in the system to see if a recent * drive arrival can be used to solve this case. */ ZfsDaemon::RequestSystemRescan(); /* * Consume the event if we successfully activated a spare. * Otherwise, leave it in the unconsumed events list so that the * future addition of a spare to this pool might be able to * close the case */ consumed = spare_activated; } else if (event.Value("class") == "resource.fs.zfs.statechange") { RefreshVdevState(); /* * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to * activate a hotspare. Otherwise, ignore the event */ if (VdevState() == VDEV_STATE_FAULTED || VdevState() == VDEV_STATE_DEGRADED || VdevState() == VDEV_STATE_CANT_OPEN) (void) ActivateSpare(); consumed = true; } else if (event.Value("class") == "ereport.fs.zfs.io" || event.Value("class") == "ereport.fs.zfs.checksum") { m_tentativeEvents.push_front(event.DeepCopy()); RegisterCallout(event); consumed = true; } bool closed(CloseIfSolved()); return (consumed || closed); } bool CaseFile::ActivateSpare() { nvlist_t *config, *nvroot; nvlist_t **spares; char *devPath, *vdev_type; const char *poolname; u_int nspares, i; int error; ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); if (zhp == NULL) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " "for pool_guid %"PRIu64".", (uint64_t)m_poolGUID); return (false); } poolname = zpool_get_name(zhp); config = zpool_get_config(zhp, NULL); if (config == NULL) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " "config for pool %s", poolname); return (false); } error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); if (error != 0){ syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " "tree for pool %s", poolname); return (false); } nspares = 0; nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares); if (nspares == 0) { /* The pool has no spares configured */ syslog(LOG_INFO, "CaseFile::ActivateSpare: " "No spares available for pool %s", poolname); return (false); } for (i = 0; i < nspares; i++) { uint64_t *nvlist_array; vdev_stat_t *vs; uint_t nstats; if (nvlist_lookup_uint64_array(spares[i], ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " "find vdev stats for pool %s, spare %d", poolname, i); return (false); } vs = reinterpret_cast(nvlist_array); if ((vs->vs_aux != VDEV_AUX_SPARED) && (vs->vs_state == VDEV_STATE_HEALTHY)) { /* We found a usable spare */ break; } } if (i == nspares) { /* No available spares were found */ return (false); } error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath); if (error != 0) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " "the path of pool %s, spare %d. Error %d", poolname, i, error); return (false); } error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type); if (error != 0) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " "the vdev type of pool %s, spare %d. Error %d", poolname, i, error); return (false); } return (Replace(vdev_type, devPath, /*isspare*/true)); } void CaseFile::RegisterCallout(const Event &event) { timeval now, countdown, elapsed, timestamp, zero, remaining; gettimeofday(&now, 0); timestamp = event.GetTimestamp(); timersub(&now, ×tamp, &elapsed); timersub(&s_removeGracePeriod, &elapsed, &countdown); /* * If countdown is <= zero, Reset the timer to the * smallest positive time value instead */ timerclear(&zero); if (timercmp(&countdown, &zero, <=)) { timerclear(&countdown); countdown.tv_usec = 1; } remaining = m_tentativeTimer.TimeRemaining(); if (!m_tentativeTimer.IsPending() || timercmp(&countdown, &remaining, <)) m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this); } bool CaseFile::CloseIfSolved() { if (m_events.empty() && m_tentativeEvents.empty()) { /* * We currently do not track or take actions on * devices in the degraded or faulted state. * Once we have support for spare pools, we'll * retain these cases so that any spares added in * the future can be applied to them. */ switch (VdevState()) { case VDEV_STATE_HEALTHY: /* No need to keep cases for healthy vdevs */ Close(); return (true); case VDEV_STATE_REMOVED: case VDEV_STATE_CANT_OPEN: /* * Keep open. We may solve it with a newly inserted * device. */ case VDEV_STATE_FAULTED: case VDEV_STATE_DEGRADED: /* * Keep open. We may solve it with the future * addition of a spare to the pool */ case VDEV_STATE_UNKNOWN: case VDEV_STATE_CLOSED: case VDEV_STATE_OFFLINE: /* * Keep open? This may not be the correct behavior, * but it's what we've always done */ ; } /* * Re-serialize the case in order to remove any * previous event data. */ Serialize(); } return (false); } void CaseFile::Log() { syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), PhysicalPath().c_str()); syslog(LOG_INFO, "\tVdev State = %s\n", zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); if (m_tentativeEvents.size() != 0) { syslog(LOG_INFO, "\t=== Tentative Events ===\n"); for (EventList::iterator event(m_tentativeEvents.begin()); event != m_tentativeEvents.end(); event++) (*event)->Log(LOG_INFO); } if (m_events.size() != 0) { syslog(LOG_INFO, "\t=== Events ===\n"); for (EventList::iterator event(m_events.begin()); event != m_events.end(); event++) (*event)->Log(LOG_INFO); } } //- CaseFile Static Protected Methods ------------------------------------------ void CaseFile::OnGracePeriodEnded(void *arg) { CaseFile &casefile(*static_cast(arg)); casefile.OnGracePeriodEnded(); } int CaseFile::DeSerializeSelector(const struct dirent *dirEntry) { uint64_t poolGUID; uint64_t vdevGUID; if (dirEntry->d_type == DT_REG && sscanf(dirEntry->d_name, "pool_%"PRIu64"_vdev_%"PRIu64".case", &poolGUID, &vdevGUID) == 2) return (1); return (0); } void CaseFile::DeSerializeFile(const char *fileName) { string fullName(s_caseFilePath + '/' + fileName); CaseFile *existingCaseFile(NULL); CaseFile *caseFile(NULL); try { uint64_t poolGUID; uint64_t vdevGUID; nvlist_t *vdevConf; sscanf(fileName, "pool_%"PRIu64"_vdev_%"PRIu64".case", &poolGUID, &vdevGUID); existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID)); if (existingCaseFile != NULL) { /* * If the vdev is already degraded or faulted, * there's no point in keeping the state around * that we use to put a drive into the degraded * state. However, if the vdev is simply missing, * preseve the case data in the hopes that it will * return. */ caseFile = existingCaseFile; vdev_state curState(caseFile->VdevState()); if (curState > VDEV_STATE_CANT_OPEN && curState < VDEV_STATE_HEALTHY) { unlink(fileName); return; } } else { ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); if (zpl.empty() || (vdevConf = VdevIterator(zpl.front()) .Find(vdevGUID)) == NULL) { /* * Either the pool no longer exists * or this vdev is no longer a member of * the pool. */ unlink(fullName.c_str()); return; } /* * Any vdev we find that does not have a case file * must be in the healthy state and thus worthy of * continued SERD data tracking. */ caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); } ifstream caseStream(fullName.c_str()); if (!caseStream) throw ZfsdException("CaseFile::DeSerialize: Unable to " "read %s.\n", fileName); caseFile->DeSerialize(caseStream); } catch (const ParseException &exp) { exp.Log(); if (caseFile != existingCaseFile) delete caseFile; /* * Since we can't parse the file, unlink it so we don't * trip over it again. */ unlink(fileName); } catch (const ZfsdException &zfsException) { zfsException.Log(); if (caseFile != existingCaseFile) delete caseFile; } } //- CaseFile Protected Methods ------------------------------------------------- CaseFile::CaseFile(const Vdev &vdev) : m_poolGUID(vdev.PoolGUID()), m_vdevGUID(vdev.GUID()), m_vdevState(vdev.State()), m_vdevPhysPath(vdev.PhysicalPath()) { stringstream guidString; guidString << m_vdevGUID; m_vdevGUIDString = guidString.str(); guidString.str(""); guidString << m_poolGUID; m_poolGUIDString = guidString.str(); s_activeCases.push_back(this); syslog(LOG_INFO, "Creating new CaseFile:\n"); Log(); } CaseFile::~CaseFile() { PurgeEvents(); PurgeTentativeEvents(); m_tentativeTimer.Stop(); s_activeCases.remove(this); } void CaseFile::PurgeEvents() { for (EventList::iterator event(m_events.begin()); event != m_events.end(); event++) delete *event; m_events.clear(); } void CaseFile::PurgeTentativeEvents() { for (EventList::iterator event(m_tentativeEvents.begin()); event != m_tentativeEvents.end(); event++) delete *event; m_tentativeEvents.clear(); } void CaseFile::SerializeEvList(const EventList events, int fd, const char* prefix) const { if (events.empty()) return; for (EventList::const_iterator curEvent = events.begin(); curEvent != events.end(); curEvent++) { const string &eventString((*curEvent)->GetEventString()); // TODO: replace many write(2) calls with a single writev(2) if (prefix) write(fd, prefix, strlen(prefix)); write(fd, eventString.c_str(), eventString.length()); } } void CaseFile::Serialize() { stringstream saveFile; saveFile << setfill('0') << s_caseFilePath << "/" << "pool_" << PoolGUIDString() << "_vdev_" << VdevGUIDString() << ".case"; if (m_events.empty() && m_tentativeEvents.empty()) { unlink(saveFile.str().c_str()); return; } int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); if (fd == -1) { syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", saveFile.str().c_str()); return; } SerializeEvList(m_events, fd); SerializeEvList(m_tentativeEvents, fd, "tentative "); close(fd); } /* * XXX: This method assumes that events may not contain embedded newlines. If * ever events can contain embedded newlines, then CaseFile must switch * serialization formats */ void CaseFile::DeSerialize(ifstream &caseStream) { string evString; const EventFactory &factory(ZfsDaemon::Get().GetFactory()); caseStream >> std::noskipws >> std::ws; while (caseStream.good()) { /* * Outline: * read the beginning of a line and check it for * "tentative". If found, discard "tentative". * Create a new event * continue */ EventList* destEvents; const string tentFlag("tentative "); string line; std::stringbuf lineBuf; caseStream.get(lineBuf); caseStream.ignore(); /*discard the newline character*/ line = lineBuf.str(); if (line.compare(0, tentFlag.size(), tentFlag) == 0) { /* Discard "tentative" */ line.erase(0, tentFlag.size()); destEvents = &m_tentativeEvents; } else { destEvents = &m_events; } Event *event(Event::CreateEvent(factory, line)); if (event != NULL) { destEvents->push_back(event); RegisterCallout(*event); } } } void CaseFile::Close() { /* * This case is no longer relevant. Clean up our * serialization file, and delete the case. */ syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); /* * Serialization of a Case with no event data, clears the * Serialization data for that event. */ PurgeEvents(); Serialize(); delete this; } void CaseFile::OnGracePeriodEnded() { bool should_fault, should_degrade; ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); m_events.splice(m_events.begin(), m_tentativeEvents); should_fault = ShouldFault(); should_degrade = ShouldDegrade(); if (should_fault || should_degrade) { if (zhp == NULL || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) { /* * Either the pool no longer exists * or this vdev is no longer a member of * the pool. */ Close(); return; } } /* A fault condition has priority over a degrade condition */ if (ShouldFault()) { /* Fault the vdev and close the case. */ if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID, VDEV_AUX_ERR_EXCEEDED) == 0) { syslog(LOG_INFO, "Faulting vdev(%s/%s)", PoolGUIDString().c_str(), VdevGUIDString().c_str()); Close(); return; } else { syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); } } else if (ShouldDegrade()) { /* Degrade the vdev and close the case. */ if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID, VDEV_AUX_ERR_EXCEEDED) == 0) { syslog(LOG_INFO, "Degrading vdev(%s/%s)", PoolGUIDString().c_str(), VdevGUIDString().c_str()); Close(); return; } else { syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); } } Serialize(); } Vdev CaseFile::BeingReplacedBy(zpool_handle_t *zhp) { Vdev vd(zhp, CaseVdev(zhp)); std::list children; std::list::iterator children_it; Vdev parent(vd.Parent()); Vdev replacing(NonexistentVdev); /* * To determine whether we are being replaced by another spare that * is still working, then make sure that it is currently spared and * that the spare is either resilvering or healthy. If any of these * conditions fail, then we are not being replaced by a spare. * * If the spare is healthy, then the case file should be closed very * soon after this check. */ if (parent.DoesNotExist() || parent.Name(zhp, /*verbose*/false) != "spare") return (NonexistentVdev); children = parent.Children(); children_it = children.begin(); for (;children_it != children.end(); children_it++) { Vdev child = *children_it; /* Skip our vdev. */ if (child.GUID() == VdevGUID()) continue; /* * Accept the first child that doesn't match our GUID, or * any resilvering/healthy device if one exists. */ if (replacing.DoesNotExist() || child.IsResilvering() || child.State() == VDEV_STATE_HEALTHY) replacing = child; } return (replacing); } bool CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) { nvlist_t *nvroot, *newvd; const char *poolname; string oldstr(VdevGUIDString()); bool retval = true; /* Figure out what pool we're working on */ ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); if (zhp == NULL) { syslog(LOG_ERR, "CaseFile::Replace: could not find pool for " "pool_guid %"PRIu64".", (uint64_t)m_poolGUID); return (false); } poolname = zpool_get_name(zhp); Vdev vd(zhp, CaseVdev(zhp)); Vdev replaced(BeingReplacedBy(zhp)); if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) { /* If we are already being replaced by a working spare, pass. */ if (replaced.IsResilvering() || replaced.State() == VDEV_STATE_HEALTHY) { syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already " "replaced", VdevGUIDString().c_str(), path); return (/*consumed*/false); } /* * If we have already been replaced by a spare, but that spare * is broken, we must spare the spare, not the original device. */ oldstr = replaced.GUIDString(); syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing " "broken spare %s instead", VdevGUIDString().c_str(), path, oldstr.c_str()); } /* * Build a root vdev/leaf vdev configuration suitable for * zpool_vdev_attach. Only enough data for the kernel to find * the device (i.e. type and disk device node path) are needed. */ nvroot = NULL; newvd = NULL; if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate " "configuration data.", poolname, oldstr.c_str()); if (nvroot != NULL) nvlist_free(nvroot); return (false); } if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd, 1) != 0) { syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize " "configuration data.", poolname, oldstr.c_str()); nvlist_free(newvd); nvlist_free(nvroot); return (true); } /* Data was copied when added to the root vdev. */ nvlist_free(newvd); retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot, /*replace*/B_TRUE) == 0); if (retval) syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", poolname, oldstr.c_str(), path); else syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n", poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); nvlist_free(nvroot); return (retval); } /* Does the argument event refer to a checksum error? */ static bool IsChecksumEvent(const Event* const event) { return ("ereport.fs.zfs.checksum" == event->Value("type")); } /* Does the argument event refer to an IO error? */ static bool IsIOEvent(const Event* const event) { return ("ereport.fs.zfs.io" == event->Value("type")); } bool CaseFile::ShouldDegrade() const { return (std::count_if(m_events.begin(), m_events.end(), IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT); } bool CaseFile::ShouldFault() const { return (std::count_if(m_events.begin(), m_events.end(), IsIOEvent) > ZFS_DEGRADE_IO_COUNT); } nvlist_t * CaseFile::CaseVdev(zpool_handle_t *zhp) const { return (VdevIterator(zhp).Find(VdevGUID())); } Index: projects/zfsd/head/cddl/usr.sbin/zfsd/vdev.cc =================================================================== --- projects/zfsd/head/cddl/usr.sbin/zfsd/vdev.cc (revision 300689) +++ projects/zfsd/head/cddl/usr.sbin/zfsd/vdev.cc (revision 300690) @@ -1,352 +1,357 @@ /*- * Copyright (c) 2011 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) * * $FreeBSD$ */ /** * \file vdev.cc * * Implementation of the Vdev class. */ #include #include #include #include +/* + * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with + * C++ flush methods + */ +#undef flush #include #include #include #include #include #include #include #include #include #include "vdev.h" #include "vdev_iterator.h" #include "zfsd.h" #include "zfsd_exception.h" #include "zpool_list.h" __FBSDID("$FreeBSD$"); /*============================ Namespace Control =============================*/ using std::string; using std::stringstream; //- Special objects ----------------------------------------------------------- Vdev NonexistentVdev; //- Vdev Inline Public Methods ------------------------------------------------ /*=========================== Class Implementations ==========================*/ /*----------------------------------- Vdev -----------------------------------*/ /* Special constructor for NonexistentVdev. */ Vdev::Vdev() : m_poolConfig(NULL), m_config(NULL) {} bool Vdev::VdevLookupPoolGuid() { uint64_t guid; if (nvlist_lookup_uint64(m_poolConfig, ZPOOL_CONFIG_POOL_GUID, &guid)) return (false); m_poolGUID = guid; return (true); } void Vdev::VdevLookupGuid() { uint64_t guid; if (nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_GUID, &guid) != 0) throw ZfsdException("Unable to extract vdev GUID " "from vdev config data."); m_vdevGUID = guid; } Vdev::Vdev(zpool_handle_t *pool, nvlist_t *config) : m_poolConfig(zpool_get_config(pool, NULL)), m_config(config) { if (!VdevLookupPoolGuid()) throw ZfsdException("Can't extract pool GUID from handle."); VdevLookupGuid(); } Vdev::Vdev(nvlist_t *poolConfig, nvlist_t *config) : m_poolConfig(poolConfig), m_config(config) { if (!VdevLookupPoolGuid()) throw ZfsdException("Can't extract pool GUID from config."); VdevLookupGuid(); } Vdev::Vdev(nvlist_t *labelConfig) : m_poolConfig(labelConfig), m_config(labelConfig) { /* * Spares do not have a Pool GUID. Tolerate its absence. * Code accessing this Vdev in a context where the Pool GUID is * required will find it invalid (as it is upon Vdev construction) * and act accordingly. */ (void) VdevLookupPoolGuid(); VdevLookupGuid(); try { m_config = VdevIterator(labelConfig).Find(m_vdevGUID); } catch (const ZfsdException &exp) { /* * When reading a spare's label, it is normal not to find * a list of vdevs */ m_config = NULL; } } bool Vdev::IsSpare() const { uint64_t is_spare(0); if (m_config == NULL) return (false); (void)nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_IS_SPARE, &is_spare); return (bool(is_spare)); } vdev_state Vdev::State() const { uint64_t *nvlist_array; vdev_stat_t *vs; uint_t vsc; if (m_config == NULL) { /* * If we couldn't find the list of vdevs, that normally means * that this is an available hotspare. In that case, we will * presume it to be healthy. Even if this spare had formerly * been in use, been degraded, and been replaced, the act of * replacement wipes the degraded bit from the label. So we * have no choice but to presume that it is healthy. */ return (VDEV_STATE_HEALTHY); } if (nvlist_lookup_uint64_array(m_config, ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &vsc) == 0) { vs = reinterpret_cast(nvlist_array); return (static_cast(vs->vs_state)); } /* * Stats are not available. This vdev was created from a label. * Synthesize a state based on available data. */ uint64_t faulted(0); uint64_t degraded(0); (void)nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_FAULTED, &faulted); (void)nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_DEGRADED, °raded); if (faulted) return (VDEV_STATE_FAULTED); if (degraded) return (VDEV_STATE_DEGRADED); return (VDEV_STATE_HEALTHY); } std::list Vdev::Children() { nvlist_t **vdevChildren; int result; u_int numChildren; std::list children; if (m_poolConfig == NULL || m_config == NULL) return (children); result = nvlist_lookup_nvlist_array(m_config, ZPOOL_CONFIG_CHILDREN, &vdevChildren, &numChildren); if (result != 0) return (children); for (u_int c = 0;c < numChildren; c++) children.push_back(Vdev(m_poolConfig, vdevChildren[c])); return (children); } Vdev Vdev::RootVdev() { nvlist_t *rootVdev; if (m_poolConfig == NULL) return (NonexistentVdev); if (nvlist_lookup_nvlist(m_poolConfig, ZPOOL_CONFIG_VDEV_TREE, &rootVdev) != 0) return (NonexistentVdev); return (Vdev(m_poolConfig, rootVdev)); } /* * Find our parent. This requires doing a traversal of the config; we can't * cache it as leaf vdevs may change their pool config location (spare, * replacing, mirror, etc). */ Vdev Vdev::Parent() { std::list to_examine; std::list children; std::list::iterator children_it; to_examine.push_back(RootVdev()); for (;;) { if (to_examine.empty()) return (NonexistentVdev); Vdev vd = to_examine.front(); if (vd.DoesNotExist()) return (NonexistentVdev); to_examine.pop_front(); children = vd.Children(); children_it = children.begin(); for (;children_it != children.end(); children_it++) { Vdev child = *children_it; if (child.GUID() == GUID()) return (vd); to_examine.push_front(child); } } } bool Vdev::IsAvailableSpare() const { /* If we have a pool guid, we cannot be an available spare. */ if (PoolGUID()) return (false); return (true); } bool Vdev::IsSpare() { uint64_t spare; if (nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_IS_SPARE, &spare) != 0) return (false); return (spare != 0); } bool Vdev::IsActiveSpare() const { vdev_stat_t *vs; uint_t c; if (m_poolConfig == NULL) return (false); (void) nvlist_lookup_uint64_array(m_config, ZPOOL_CONFIG_VDEV_STATS, reinterpret_cast(&vs), &c); if (vs == NULL || vs->vs_aux != VDEV_AUX_SPARED) return (false); return (true); } bool Vdev::IsResilvering() const { pool_scan_stat_t *ps = NULL; uint_t c; if (State() != VDEV_STATE_HEALTHY) return (false); (void) nvlist_lookup_uint64_array(m_config, ZPOOL_CONFIG_SCAN_STATS, reinterpret_cast(&ps), &c); if (ps == NULL || ps->pss_func != POOL_SCAN_RESILVER) return (false); return (true); } string Vdev::GUIDString() const { stringstream vdevGUIDString; vdevGUIDString << GUID(); return (vdevGUIDString.str()); } string Vdev::Name(zpool_handle_t *zhp, bool verbose) const { return (zpool_vdev_name(g_zfsHandle, zhp, m_config, verbose ? B_TRUE : B_FALSE)); } string Vdev::Path() const { char *path(NULL); if ((m_config != NULL) && (nvlist_lookup_string(m_config, ZPOOL_CONFIG_PATH, &path) == 0)) return (path); return (""); } string Vdev::PhysicalPath() const { char *path(NULL); if ((m_config != NULL) && (nvlist_lookup_string(m_config, ZPOOL_CONFIG_PHYS_PATH, &path) == 0)) return (path); return (""); } Index: projects/zfsd/head/cddl/usr.sbin/zfsd/zfsd_event.cc =================================================================== --- projects/zfsd/head/cddl/usr.sbin/zfsd/zfsd_event.cc (revision 300689) +++ projects/zfsd/head/cddl/usr.sbin/zfsd/zfsd_event.cc (revision 300690) @@ -1,530 +1,535 @@ /*- * Copyright (c) 2011 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) */ /** * \file zfsd_event.cc */ #include #include #include #include #include +/* + * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with + * C++ flush methods + */ +#undef flush #include #include #include #include #include #include #include #include #include #include "callout.h" #include "vdev_iterator.h" #include "zfsd_event.h" #include "case_file.h" #include "vdev.h" #include "zfsd.h" #include "zfsd_exception.h" #include "zpool_list.h" __FBSDID("$FreeBSD$"); /*============================ Namespace Control =============================*/ using DevdCtl::Event; using DevdCtl::Guid; using DevdCtl::NVPairMap; using std::stringstream; /*=========================== Class Implementations ==========================*/ /*-------------------------------- DevfsEvent --------------------------------*/ //- DevfsEvent Static Public Methods ------------------------------------------- Event * DevfsEvent::Builder(Event::Type type, NVPairMap &nvPairs, const string &eventString) { return (new DevfsEvent(type, nvPairs, eventString)); } //- DevfsEvent Static Protected Methods ---------------------------------------- nvlist_t * DevfsEvent::ReadLabel(int devFd, bool &inUse, bool °raded) { pool_state_t poolState; char *poolName; boolean_t b_inuse; inUse = false; degraded = false; poolName = NULL; if (zpool_in_use(g_zfsHandle, devFd, &poolState, &poolName, &b_inuse) == 0) { nvlist_t *devLabel; inUse = b_inuse == B_TRUE; if (poolName != NULL) free(poolName); if (zpool_read_label(devFd, &devLabel) != 0 || devLabel == NULL) return (NULL); try { Vdev vdev(devLabel); degraded = vdev.State() != VDEV_STATE_HEALTHY; return (devLabel); } catch (ZfsdException &exp) { string devName = fdevname(devFd); string devPath = _PATH_DEV + devName; string context("DevfsEvent::ReadLabel: " + devPath + ": "); exp.GetString().insert(0, context); exp.Log(); } } return (NULL); } bool DevfsEvent::OnlineByLabel(const string &devPath, const string& physPath, nvlist_t *devConfig) { try { /* * A device with ZFS label information has been * inserted. If it matches a device for which we * have a case, see if we can solve that case. */ syslog(LOG_INFO, "Interrogating VDEV label for %s\n", devPath.c_str()); Vdev vdev(devConfig); CaseFile *caseFile(CaseFile::Find(vdev.PoolGUID(), vdev.GUID())); if (caseFile != NULL) return (caseFile->ReEvaluate(devPath, physPath, &vdev)); } catch (ZfsdException &exp) { string context("DevfsEvent::OnlineByLabel: " + devPath + ": "); exp.GetString().insert(0, context); exp.Log(); } return (false); } //- DevfsEvent Virtual Public Methods ------------------------------------------ Event * DevfsEvent::DeepCopy() const { return (new DevfsEvent(*this)); } bool DevfsEvent::Process() const { /* * We are only concerned with newly discovered * devices that can be ZFS vdevs. */ if (Value("type") != "CREATE" || !IsDiskDev()) return (false); /* Log the event since it is of interest. */ Log(LOG_INFO); string devPath; if (!DevPath(devPath)) return (false); int devFd(open(devPath.c_str(), O_RDONLY)); if (devFd == -1) return (false); bool inUse; bool degraded; nvlist_t *devLabel(ReadLabel(devFd, inUse, degraded)); string physPath; bool havePhysPath(PhysicalPath(physPath)); string devName; DevName(devName); close(devFd); if (inUse && devLabel != NULL) { OnlineByLabel(devPath, physPath, devLabel); } else if (degraded) { syslog(LOG_INFO, "%s is marked degraded. Ignoring " "as a replace by physical path candidate.\n", devName.c_str()); } else if (havePhysPath && IsWholeDev()) { /* * TODO: attempt to resolve events using every casefile * that matches this physpath */ CaseFile *caseFile(CaseFile::Find(physPath)); if (caseFile != NULL) { syslog(LOG_INFO, "Found CaseFile(%s:%s:%s) - ReEvaluating\n", caseFile->PoolGUIDString().c_str(), caseFile->VdevGUIDString().c_str(), zpool_state_to_name(caseFile->VdevState(), VDEV_AUX_NONE)); caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL); } } if (devLabel != NULL) nvlist_free(devLabel); return (false); } //- DevfsEvent Protected Methods ----------------------------------------------- DevfsEvent::DevfsEvent(Event::Type type, NVPairMap &nvpairs, const string &eventString) : DevdCtl::DevfsEvent(type, nvpairs, eventString) { } DevfsEvent::DevfsEvent(const DevfsEvent &src) : DevdCtl::DevfsEvent::DevfsEvent(src) { } /*-------------------------------- GeomEvent --------------------------------*/ //- GeomEvent Static Public Methods ------------------------------------------- Event * GeomEvent::Builder(Event::Type type, NVPairMap &nvPairs, const string &eventString) { return (new GeomEvent(type, nvPairs, eventString)); } //- GeomEvent Virtual Public Methods ------------------------------------------ Event * GeomEvent::DeepCopy() const { return (new GeomEvent(*this)); } bool GeomEvent::Process() const { /* * We are only concerned with physical path changes, because those can * be used to satisfy autoreplace operations */ if (Value("type") != "GEOM::physpath" || !IsDiskDev()) return (false); /* Log the event since it is of interest. */ Log(LOG_INFO); string devPath; if (!DevPath(devPath)) return (false); string physPath; bool havePhysPath(PhysicalPath(physPath)); string devName; DevName(devName); if (havePhysPath) { /* * TODO: attempt to resolve events using every casefile * that matches this physpath */ CaseFile *caseFile(CaseFile::Find(physPath)); if (caseFile != NULL) { syslog(LOG_INFO, "Found CaseFile(%s:%s:%s) - ReEvaluating\n", caseFile->PoolGUIDString().c_str(), caseFile->VdevGUIDString().c_str(), zpool_state_to_name(caseFile->VdevState(), VDEV_AUX_NONE)); caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL); } } return (false); } //- GeomEvent Protected Methods ----------------------------------------------- GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs, const string &eventString) : DevdCtl::GeomEvent(type, nvpairs, eventString) { } GeomEvent::GeomEvent(const GeomEvent &src) : DevdCtl::GeomEvent::GeomEvent(src) { } /*--------------------------------- ZfsEvent ---------------------------------*/ //- ZfsEvent Static Public Methods --------------------------------------------- DevdCtl::Event * ZfsEvent::Builder(Event::Type type, NVPairMap &nvpairs, const string &eventString) { return (new ZfsEvent(type, nvpairs, eventString)); } //- ZfsEvent Virtual Public Methods -------------------------------------------- Event * ZfsEvent::DeepCopy() const { return (new ZfsEvent(*this)); } bool ZfsEvent::Process() const { string logstr(""); if (!Contains("class") && !Contains("type")) { syslog(LOG_ERR, "ZfsEvent::Process: Missing class or type data."); return (false); } /* On config syncs, replay any queued events first. */ if (Value("type").find("misc.fs.zfs.config_sync") == 0) { /* * Even if saved events are unconsumed the second time * around, drop them. Any events that still can't be * consumed are probably referring to vdevs or pools that * no longer exist. */ ZfsDaemon::Get().ReplayUnconsumedEvents(/*discard*/true); CaseFile::ReEvaluateByGuid(PoolGUID(), *this); } if (Value("type").find("misc.fs.zfs.") == 0) { /* Configuration changes, resilver events, etc. */ ProcessPoolEvent(); return (false); } if (!Contains("pool_guid") || !Contains("vdev_guid")) { /* Only currently interested in Vdev related events. */ return (false); } CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); if (caseFile != NULL) { Log(LOG_INFO); syslog(LOG_INFO, "Evaluating existing case file\n"); caseFile->ReEvaluate(*this); return (false); } /* Skip events that can't be handled. */ Guid poolGUID(PoolGUID()); /* If there are no replicas for a pool, then it's not manageable. */ if (Value("class").find("fs.zfs.vdev.no_replicas") == 0) { stringstream msg; msg << "No replicas available for pool " << poolGUID; msg << ", ignoring"; Log(LOG_INFO); syslog(LOG_INFO, "%s", msg.str().c_str()); return (false); } /* * Create a case file for this vdev, and have it * evaluate the event. */ ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); if (zpl.empty()) { stringstream msg; int priority = LOG_INFO; msg << "ZfsEvent::Process: Event for unknown pool "; msg << poolGUID << " "; msg << "queued"; Log(LOG_INFO); syslog(priority, "%s", msg.str().c_str()); return (true); } nvlist_t *vdevConfig = VdevIterator(zpl.front()).Find(VdevGUID()); if (vdevConfig == NULL) { stringstream msg; int priority = LOG_INFO; msg << "ZfsEvent::Process: Event for unknown vdev "; msg << VdevGUID() << " "; msg << "queued"; Log(LOG_INFO); syslog(priority, "%s", msg.str().c_str()); return (true); } Vdev vdev(zpl.front(), vdevConfig); caseFile = &CaseFile::Create(vdev); if (caseFile->ReEvaluate(*this) == false) { stringstream msg; int priority = LOG_INFO; msg << "ZfsEvent::Process: Unconsumed event for vdev("; msg << zpool_get_name(zpl.front()) << ","; msg << vdev.GUID() << ") "; msg << "queued"; Log(LOG_INFO); syslog(priority, "%s", msg.str().c_str()); return (true); } return (false); } //- ZfsEvent Protected Methods ------------------------------------------------- ZfsEvent::ZfsEvent(Event::Type type, NVPairMap &nvpairs, const string &eventString) : DevdCtl::ZfsEvent(type, nvpairs, eventString) { } ZfsEvent::ZfsEvent(const ZfsEvent &src) : DevdCtl::ZfsEvent(src) { } /* * Sometimes the kernel won't detach a spare when it is no longer needed. This * can happen for example if a drive is removed, then either the pool is * exported or the machine is powered off, then the drive is reinserted, then * the machine is powered on or the pool is imported. ZFSD must detach these * spares itself. */ void ZfsEvent::CleanupSpares() const { Guid poolGUID(PoolGUID()); ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); if (!zpl.empty()) { zpool_handle_t* hdl; hdl = zpl.front(); VdevIterator(hdl).Each(TryDetach, (void*)hdl); } } void ZfsEvent::ProcessPoolEvent() const { bool degradedDevice(false); /* The pool is destroyed. Discard any open cases */ if (Value("type") == "misc.fs.zfs.pool_destroy") { Log(LOG_INFO); CaseFile::ReEvaluateByGuid(PoolGUID(), *this); return; } CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); if (caseFile != NULL) { if (caseFile->VdevState() != VDEV_STATE_UNKNOWN && caseFile->VdevState() < VDEV_STATE_HEALTHY) degradedDevice = true; Log(LOG_INFO); caseFile->ReEvaluate(*this); } else if (Value("type") == "misc.fs.zfs.resilver_finish") { /* * It's possible to get a resilver_finish event with no * corresponding casefile. For example, if a damaged pool were * exported, repaired, then reimported. */ Log(LOG_INFO); CleanupSpares(); } if (Value("type") == "misc.fs.zfs.vdev_remove" && degradedDevice == false) { /* See if any other cases can make use of this device. */ Log(LOG_INFO); ZfsDaemon::RequestSystemRescan(); } } bool ZfsEvent::TryDetach(Vdev &vdev, void *cbArg) { /* * Outline: * if this device is a spare, and its parent includes one healthy, * non-spare child, then detach this device. */ zpool_handle_t *hdl(static_cast(cbArg)); if (vdev.IsSpare()) { std::list siblings; std::list::iterator siblings_it; boolean_t cleanup = B_FALSE; Vdev parent = vdev.Parent(); siblings = parent.Children(); /* Determine whether the parent should be cleaned up */ for (siblings_it = siblings.begin(); siblings_it != siblings.end(); siblings_it++) { Vdev sibling = *siblings_it; if (!sibling.IsSpare() && sibling.State() == VDEV_STATE_HEALTHY) { cleanup = B_TRUE; break; } } if (cleanup) { syslog(LOG_INFO, "Detaching spare vdev %s from pool %s", vdev.Path().c_str(), zpool_get_name(hdl)); zpool_vdev_detach(hdl, vdev.Path().c_str()); } } /* Always return false, because there may be other spares to detach */ return (false); } Index: projects/zfsd/head/lib/libdevdctl/event.cc =================================================================== --- projects/zfsd/head/lib/libdevdctl/event.cc (revision 300689) +++ projects/zfsd/head/lib/libdevdctl/event.cc (revision 300690) @@ -1,602 +1,602 @@ /*- * Copyright (c) 2011, 2012, 2013, 2016 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) */ /** * \file event.cc * * Implementation of the class hierarchy used to express events * received via the devdctl API. */ #include #include #include #include #include #include #include +#include #include #include #include #include -#include #include #include #include #include #include #include #include #include "guid.h" #include "event.h" #include "event_factory.h" #include "exception.h" __FBSDID("$FreeBSD$"); /*================================== Macros ==================================*/ #define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x)) /*============================ Namespace Control =============================*/ -using std::begin; using std::cout; -using std::end; using std::endl; using std::string; using std::stringstream; namespace DevdCtl { /*=========================== Class Implementations ==========================*/ /*----------------------------------- Event ----------------------------------*/ //- Event Static Protected Data ------------------------------------------------ const string Event::s_theEmptyString; Event::EventTypeRecord Event::s_typeTable[] = { { Event::NOTIFY, "Notify" }, { Event::NOMATCH, "No Driver Match" }, { Event::ATTACH, "Attach" }, { Event::DETACH, "Detach" } }; //- Event Static Public Methods ------------------------------------------------ Event * Event::Builder(Event::Type type, NVPairMap &nvPairs, const string &eventString) { return (new Event(type, nvPairs, eventString)); } Event * Event::CreateEvent(const EventFactory &factory, const string &eventString) { NVPairMap &nvpairs(*new NVPairMap); Type type(static_cast(eventString[0])); try { ParseEventString(type, eventString, nvpairs); } catch (const ParseException &exp) { if (exp.GetType() == ParseException::INVALID_FORMAT) exp.Log(); return (NULL); } /* * Allow entries in our table for events with no system specified. * These entries should specify the string "none". */ NVPairMap::iterator system_item(nvpairs.find("system")); if (system_item == nvpairs.end()) nvpairs["system"] = "none"; return (factory.Build(type, nvpairs, eventString)); } bool Event::DevName(std::string &name) const { return (false); } /* TODO: simplify this function with C++-11 methods */ bool Event::IsDiskDev() const { - static const char *diskDevNames[] = + const int numDrivers = 2; + static const char *diskDevNames[numDrivers] = { "da", "ada" }; const char **dName; string devName; if (! DevName(devName)) return false; size_t find_start = devName.rfind('/'); if (find_start == string::npos) { find_start = 0; } else { /* Just after the last '/'. */ find_start++; } - for (dName = begin(diskDevNames); dName <= end(diskDevNames); dName++) { + for (dName = &diskDevNames[0]; + dName <= &diskDevNames[numDrivers - 1]; dName++) { size_t loc(devName.find(*dName, find_start)); if (loc == find_start) { size_t prefixLen(strlen(*dName)); if (devName.length() - find_start >= prefixLen && isdigit(devName[find_start + prefixLen])) return (true); } } return (false); } const char * Event::TypeToString(Event::Type type) { EventTypeRecord *rec(s_typeTable); EventTypeRecord *lastRec(s_typeTable + NUM_ELEMENTS(s_typeTable) - 1); for (; rec <= lastRec; rec++) { if (rec->m_type == type) return (rec->m_typeName); } return ("Unknown"); } //- Event Public Methods ------------------------------------------------------- const string & Event::Value(const string &varName) const { NVPairMap::const_iterator item(m_nvPairs.find(varName)); if (item == m_nvPairs.end()) return (s_theEmptyString); return (item->second); } bool Event::Contains(const string &varName) const { return (m_nvPairs.find(varName) != m_nvPairs.end()); } string Event::ToString() const { stringstream result; NVPairMap::const_iterator devName(m_nvPairs.find("device-name")); if (devName != m_nvPairs.end()) result << devName->second << ": "; NVPairMap::const_iterator systemName(m_nvPairs.find("system")); if (systemName != m_nvPairs.end() && systemName->second != "none") result << systemName->second << ": "; result << TypeToString(GetType()) << ' '; for (NVPairMap::const_iterator curVar = m_nvPairs.begin(); curVar != m_nvPairs.end(); curVar++) { if (curVar == devName || curVar == systemName) continue; result << ' ' << curVar->first << "=" << curVar->second; } result << endl; return (result.str()); } void Event::Print() const { cout << ToString() << std::flush; } void Event::Log(int priority) const { syslog(priority, "%s", ToString().c_str()); } //- Event Virtual Public Methods ----------------------------------------------- Event::~Event() { delete &m_nvPairs; } Event * Event::DeepCopy() const { return (new Event(*this)); } bool Event::Process() const { return (false); } timeval Event::GetTimestamp() const { timeval tv_timestamp; struct tm tm_timestamp; if (!Contains("timestamp")) { throw Exception("Event contains no timestamp: %s", m_eventString.c_str()); } strptime(Value(string("timestamp")).c_str(), "%s", &tm_timestamp); tv_timestamp.tv_sec = mktime(&tm_timestamp); tv_timestamp.tv_usec = 0; return (tv_timestamp); } bool Event::DevPath(std::string &path) const { string devName; if (!DevName(devName)) return (false); string devPath(_PATH_DEV + devName); int devFd(open(devPath.c_str(), O_RDONLY)); if (devFd == -1) return (false); /* Normalize the device name in case the DEVFS event is for a link. */ devName = fdevname(devFd); path = _PATH_DEV + devName; close(devFd); return (true); } bool Event::PhysicalPath(std::string &path) const { string devPath; if (!DevPath(devPath)) return (false); int devFd(open(devPath.c_str(), O_RDONLY)); if (devFd == -1) return (false); char physPath[MAXPATHLEN]; physPath[0] = '\0'; bool result(ioctl(devFd, DIOCGPHYSPATH, physPath) == 0); close(devFd); if (result) path = physPath; return (result); } //- Event Protected Methods ---------------------------------------------------- Event::Event(Type type, NVPairMap &map, const string &eventString) : m_type(type), m_nvPairs(map), m_eventString(eventString) { } Event::Event(const Event &src) : m_type(src.m_type), m_nvPairs(*new NVPairMap(src.m_nvPairs)), m_eventString(src.m_eventString) { } void Event::ParseEventString(Event::Type type, const string &eventString, NVPairMap& nvpairs) { size_t start; size_t end; switch (type) { case ATTACH: case DETACH: /* * \ * at \ * on * * Handle all data that doesn't conform to the * "name=value" format, and let the generic parser * below handle the rest. * * Type is a single char. Skip it. */ start = 1; end = eventString.find_first_of(" \t\n", start); if (end == string::npos) throw ParseException(ParseException::INVALID_FORMAT, eventString, start); nvpairs["device-name"] = eventString.substr(start, end - start); start = eventString.find(" on ", end); if (end == string::npos) throw ParseException(ParseException::INVALID_FORMAT, eventString, start); start += 4; end = eventString.find_first_of(" \t\n", start); nvpairs["parent"] = eventString.substr(start, end); break; case NOTIFY: break; case NOMATCH: throw ParseException(ParseException::DISCARDED_EVENT_TYPE, eventString); default: throw ParseException(ParseException::UNKNOWN_EVENT_TYPE, eventString); } /* Process common "key=value" format. */ for (start = 1; start < eventString.length(); start = end + 1) { /* Find the '=' in the middle of the key/value pair. */ end = eventString.find('=', start); if (end == string::npos) break; /* * Find the start of the key by backing up until * we hit whitespace or '!' (event type "notice"). * Due to the devdctl format, all key/value pair must * start with one of these two characters. */ start = eventString.find_last_of("! \t\n", end); if (start == string::npos) throw ParseException(ParseException::INVALID_FORMAT, eventString, end); start++; string key(eventString.substr(start, end - start)); /* * Walk forward from the '=' until either we exhaust * the buffer or we hit whitespace. */ start = end + 1; if (start >= eventString.length()) throw ParseException(ParseException::INVALID_FORMAT, eventString, end); end = eventString.find_first_of(" \t\n", start); if (end == string::npos) end = eventString.length() - 1; string value(eventString.substr(start, end - start)); nvpairs[key] = value; } } void Event::TimestampEventString(std::string &eventString) { if (eventString.size() > 0) { /* * Add a timestamp as the final field of the event if it is * not already present. */ if (eventString.find("timestamp=") == string::npos) { const size_t bufsize = 32; // Long enough for a 64-bit int timeval now; char timebuf[bufsize]; size_t eventEnd(eventString.find_last_not_of('\n') + 1); if (gettimeofday(&now, NULL) != 0) err(1, "gettimeofday"); snprintf(timebuf, bufsize, " timestamp=%"PRId64, (int64_t) now.tv_sec); eventString.insert(eventEnd, timebuf); } } } /*-------------------------------- DevfsEvent --------------------------------*/ //- DevfsEvent Static Public Methods ------------------------------------------- Event * DevfsEvent::Builder(Event::Type type, NVPairMap &nvPairs, const string &eventString) { return (new DevfsEvent(type, nvPairs, eventString)); } //- DevfsEvent Static Protected Methods ---------------------------------------- bool DevfsEvent::IsWholeDev(const string &devName) { string::const_iterator i(devName.begin()); size_t start = devName.rfind('/'); if (start == string::npos) { start = 0; } else { /* Just after the last '/'. */ start++; } i += start; /* alpha prefix followed only by digits. */ for (; i < devName.end() && !isdigit(*i); i++) ; if (i == devName.end()) return (false); for (; i < devName.end() && isdigit(*i); i++) ; return (i == devName.end()); } //- DevfsEvent Virtual Public Methods ------------------------------------------ Event * DevfsEvent::DeepCopy() const { return (new DevfsEvent(*this)); } bool DevfsEvent::Process() const { return (true); } //- DevfsEvent Public Methods -------------------------------------------------- bool DevfsEvent::IsWholeDev() const { string devName; return (DevName(devName) && IsDiskDev() && IsWholeDev(devName)); } bool DevfsEvent::DevName(std::string &name) const { if (Value("subsystem") != "CDEV") return (false); name = Value("cdev"); return (!name.empty()); } //- DevfsEvent Protected Methods ----------------------------------------------- DevfsEvent::DevfsEvent(Event::Type type, NVPairMap &nvpairs, const string &eventString) : Event(type, nvpairs, eventString) { } DevfsEvent::DevfsEvent(const DevfsEvent &src) : Event(src) { } /*--------------------------------- GeomEvent --------------------------------*/ //- GeomEvent Static Public Methods -------------------------------------------- Event * GeomEvent::Builder(Event::Type type, NVPairMap &nvpairs, const string &eventString) { return (new GeomEvent(type, nvpairs, eventString)); } //- GeomEvent Virtual Public Methods ------------------------------------------- Event * GeomEvent::DeepCopy() const { return (new GeomEvent(*this)); } bool GeomEvent::DevName(std::string &name) const { name = Value("devname"); return (!name.empty()); } //- GeomEvent Protected Methods ------------------------------------------------ GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs, const string &eventString) : Event(type, nvpairs, eventString), m_devname(Value("devname")) { } GeomEvent::GeomEvent(const GeomEvent &src) : Event(src), m_devname(src.m_devname) { } /*--------------------------------- ZfsEvent ---------------------------------*/ //- ZfsEvent Static Public Methods --------------------------------------------- Event * ZfsEvent::Builder(Event::Type type, NVPairMap &nvpairs, const string &eventString) { return (new ZfsEvent(type, nvpairs, eventString)); } //- ZfsEvent Virtual Public Methods -------------------------------------------- Event * ZfsEvent::DeepCopy() const { return (new ZfsEvent(*this)); } bool ZfsEvent::DevName(std::string &name) const { return (false); } //- ZfsEvent Protected Methods ------------------------------------------------- ZfsEvent::ZfsEvent(Event::Type type, NVPairMap &nvpairs, const string &eventString) : Event(type, nvpairs, eventString), m_poolGUID(Guid(Value("pool_guid"))), m_vdevGUID(Guid(Value("vdev_guid"))) { } ZfsEvent::ZfsEvent(const ZfsEvent &src) : Event(src), m_poolGUID(src.m_poolGUID), m_vdevGUID(src.m_vdevGUID) { } } // namespace DevdCtl