Index: stable/11/cddl/usr.sbin/zfsd/case_file.cc =================================================================== --- stable/11/cddl/usr.sbin/zfsd/case_file.cc (revision 330732) +++ stable/11/cddl/usr.sbin/zfsd/case_file.cc (revision 330733) @@ -1,1152 +1,1158 @@ /*- * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) */ /** * \file case_file.cc * * We keep case files for any leaf vdev that is not in the optimal state. * However, we only serialize to disk those events that need to be preserved * across reboots. For now, this is just a log of soft errors which we * accumulate in order to mark a device as degraded. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "callout.h" #include "vdev_iterator.h" #include "zfsd_event.h" #include "case_file.h" #include "vdev.h" #include "zfsd.h" #include "zfsd_exception.h" #include "zpool_list.h" __FBSDID("$FreeBSD$"); /*============================ Namespace Control =============================*/ using std::auto_ptr; using std::hex; using std::ifstream; using std::stringstream; using std::setfill; using std::setw; using DevdCtl::Event; using DevdCtl::EventFactory; using DevdCtl::EventList; using DevdCtl::Guid; using DevdCtl::ParseException; /*--------------------------------- CaseFile ---------------------------------*/ //- CaseFile Static Data ------------------------------------------------------- CaseFileList CaseFile::s_activeCases; const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; //- CaseFile Static Public Methods --------------------------------------------- CaseFile * CaseFile::Find(Guid poolGUID, Guid vdevGUID) { for (CaseFileList::iterator curCase = s_activeCases.begin(); curCase != s_activeCases.end(); curCase++) { if (((*curCase)->PoolGUID() != poolGUID && Guid::InvalidGuid() != poolGUID) || (*curCase)->VdevGUID() != vdevGUID) continue; /* * We only carry one active case per-vdev. */ return (*curCase); } return (NULL); } CaseFile * CaseFile::Find(const string &physPath) { CaseFile *result = NULL; for (CaseFileList::iterator curCase = s_activeCases.begin(); curCase != s_activeCases.end(); curCase++) { if ((*curCase)->PhysicalPath() != physPath) continue; if (result != NULL) { syslog(LOG_WARNING, "Multiple casefiles found for " "physical path %s. " "This is most likely a bug in zfsd", physPath.c_str()); } result = *curCase; } return (result); } void CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) { CaseFileList::iterator casefile; for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ CaseFileList::iterator next = casefile; next++; if (poolGUID == (*casefile)->PoolGUID()) (*casefile)->ReEvaluate(event); casefile = next; } } CaseFile & CaseFile::Create(Vdev &vdev) { CaseFile *activeCase; activeCase = Find(vdev.PoolGUID(), vdev.GUID()); if (activeCase == NULL) activeCase = new CaseFile(vdev); return (*activeCase); } void CaseFile::DeSerialize() { struct dirent **caseFiles; int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, DeSerializeSelector, /*compar*/NULL)); if (numCaseFiles == -1) return; if (numCaseFiles == 0) { free(caseFiles); return; } for (int i = 0; i < numCaseFiles; i++) { DeSerializeFile(caseFiles[i]->d_name); free(caseFiles[i]); } free(caseFiles); } +bool +CaseFile::Empty() +{ + return (s_activeCases.empty()); +} + void CaseFile::LogAll() { for (CaseFileList::iterator curCase = s_activeCases.begin(); curCase != s_activeCases.end(); curCase++) (*curCase)->Log(); } void CaseFile::PurgeAll() { /* * Serialize casefiles before deleting them so that they can be reread * and revalidated during BuildCaseFiles. * CaseFiles remove themselves from this list on destruction. */ while (s_activeCases.size() != 0) { CaseFile *casefile = s_activeCases.front(); casefile->Serialize(); delete casefile; } } //- CaseFile Public Methods ---------------------------------------------------- bool CaseFile::RefreshVdevState() { ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); if (casePool == NULL) return (false); Vdev vd(casePool, CaseVdev(casePool)); if (vd.DoesNotExist()) return (false); m_vdevState = vd.State(); m_vdevPhysPath = vd.PhysicalPath(); return (true); } bool CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) { ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); if (pool == NULL || !RefreshVdevState()) { /* * The pool or vdev for this case file is no longer * part of the configuration. This can happen * if we process a device arrival notification * before seeing the ZFS configuration change * event. */ syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " "Closing\n", PoolGUIDString().c_str(), VdevGUIDString().c_str()); Close(); /* * Since this event was not used to close this * case, do not report it as consumed. */ return (/*consumed*/false); } if (VdevState() > VDEV_STATE_CANT_OPEN) { /* * For now, newly discovered devices only help for * devices that are missing. In the future, we might * use a newly inserted spare to replace a degraded * or faulted device. */ syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", PoolGUIDString().c_str(), VdevGUIDString().c_str()); return (/*consumed*/false); } if (vdev != NULL && ( vdev->PoolGUID() == m_poolGUID || vdev->PoolGUID() == Guid::InvalidGuid()) && vdev->GUID() == m_vdevGUID) { zpool_vdev_online(pool, vdev->GUIDString().c_str(), ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &m_vdevState); syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", zpool_get_name(pool), vdev->GUIDString().c_str(), devPath.c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); /* * Check the vdev state post the online action to see * if we can retire this case. */ CloseIfSolved(); return (/*consumed*/true); } /* * If the auto-replace policy is enabled, and we have physical * path information, try a physical path replacement. */ if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { syslog(LOG_INFO, "CaseFile(%s:%s:%s): AutoReplace not set. " "Ignoring device insertion.\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); return (/*consumed*/false); } if (PhysicalPath().empty()) { syslog(LOG_INFO, "CaseFile(%s:%s:%s): No physical path information. " "Ignoring device insertion.\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); return (/*consumed*/false); } if (physPath != PhysicalPath()) { syslog(LOG_INFO, "CaseFile(%s:%s:%s): Physical path mismatch. " "Ignoring device insertion.\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); return (/*consumed*/false); } /* Write a label on the newly inserted disk. */ if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { syslog(LOG_ERR, "Replace vdev(%s/%s) by physical path (label): %s: %s\n", zpool_get_name(pool), VdevGUIDString().c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); return (/*consumed*/false); } syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", PoolGUIDString().c_str(), VdevGUIDString().c_str(), devPath.c_str()); return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); } bool CaseFile::ReEvaluate(const ZfsEvent &event) { bool consumed(false); if (event.Value("type") == "misc.fs.zfs.vdev_remove") { /* * The Vdev we represent has been removed from the * configuration. This case is no longer of value. */ Close(); return (/*consumed*/true); } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { /* This Pool has been destroyed. Discard the case */ Close(); return (/*consumed*/true); } else if (event.Value("type") == "misc.fs.zfs.config_sync") { RefreshVdevState(); if (VdevState() < VDEV_STATE_HEALTHY) consumed = ActivateSpare(); } if (event.Value("class") == "resource.fs.zfs.removed") { bool spare_activated; if (!RefreshVdevState()) { /* * The pool or vdev for this case file is no longer * part of the configuration. This can happen * if we process a device arrival notification * before seeing the ZFS configuration change * event. */ syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " "unconfigured. Closing\n", PoolGUIDString().c_str(), VdevGUIDString().c_str()); /* * Close the case now so we won't waste cycles in the * system rescan */ Close(); /* * Since this event was not used to close this * case, do not report it as consumed. */ return (/*consumed*/false); } /* * Discard any tentative I/O error events for * this case. They were most likely caused by the * hot-unplug of this device. */ PurgeTentativeEvents(); /* Try to activate spares if they are available */ spare_activated = ActivateSpare(); /* * Rescan the drives in the system to see if a recent * drive arrival can be used to solve this case. */ ZfsDaemon::RequestSystemRescan(); /* * Consume the event if we successfully activated a spare. * Otherwise, leave it in the unconsumed events list so that the * future addition of a spare to this pool might be able to * close the case */ consumed = spare_activated; } else if (event.Value("class") == "resource.fs.zfs.statechange") { RefreshVdevState(); /* * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to * activate a hotspare. Otherwise, ignore the event */ if (VdevState() == VDEV_STATE_FAULTED || VdevState() == VDEV_STATE_DEGRADED || VdevState() == VDEV_STATE_CANT_OPEN) (void) ActivateSpare(); consumed = true; } else if (event.Value("class") == "ereport.fs.zfs.io" || event.Value("class") == "ereport.fs.zfs.checksum") { m_tentativeEvents.push_front(event.DeepCopy()); RegisterCallout(event); consumed = true; } bool closed(CloseIfSolved()); return (consumed || closed); } /* Find a Vdev containing the vdev with the given GUID */ static nvlist_t* find_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid) { nvlist_t **vdevChildren; int error; unsigned ch, numChildren; error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, &vdevChildren, &numChildren); if (error != 0 || numChildren == 0) return (NULL); for (ch = 0; ch < numChildren; ch++) { nvlist *result; Vdev vdev(pool_config, vdevChildren[ch]); if (vdev.GUID() == child_guid) return (config); result = find_parent(pool_config, vdevChildren[ch], child_guid); if (result != NULL) return (result); } return (NULL); } bool CaseFile::ActivateSpare() { nvlist_t *config, *nvroot, *parent_config; nvlist_t **spares; char *devPath, *vdev_type; const char *poolname; u_int nspares, i; int error; ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); if (zhp == NULL) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID); return (false); } poolname = zpool_get_name(zhp); config = zpool_get_config(zhp, NULL); if (config == NULL) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " "config for pool %s", poolname); return (false); } error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); if (error != 0){ syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " "tree for pool %s", poolname); return (false); } parent_config = find_parent(config, nvroot, m_vdevGUID); if (parent_config != NULL) { char *parent_type; /* * Don't activate spares for members of a "replacing" vdev. * They're already dealt with. Sparing them will just drag out * the resilver process. */ error = nvlist_lookup_string(parent_config, ZPOOL_CONFIG_TYPE, &parent_type); if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0) return (false); } nspares = 0; nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares); if (nspares == 0) { /* The pool has no spares configured */ syslog(LOG_INFO, "CaseFile::ActivateSpare: " "No spares available for pool %s", poolname); return (false); } for (i = 0; i < nspares; i++) { uint64_t *nvlist_array; vdev_stat_t *vs; uint_t nstats; if (nvlist_lookup_uint64_array(spares[i], ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " "find vdev stats for pool %s, spare %d", poolname, i); return (false); } vs = reinterpret_cast(nvlist_array); if ((vs->vs_aux != VDEV_AUX_SPARED) && (vs->vs_state == VDEV_STATE_HEALTHY)) { /* We found a usable spare */ break; } } if (i == nspares) { /* No available spares were found */ return (false); } error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath); if (error != 0) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " "the path of pool %s, spare %d. Error %d", poolname, i, error); return (false); } error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type); if (error != 0) { syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " "the vdev type of pool %s, spare %d. Error %d", poolname, i, error); return (false); } return (Replace(vdev_type, devPath, /*isspare*/true)); } void CaseFile::RegisterCallout(const Event &event) { timeval now, countdown, elapsed, timestamp, zero, remaining; gettimeofday(&now, 0); timestamp = event.GetTimestamp(); timersub(&now, ×tamp, &elapsed); timersub(&s_removeGracePeriod, &elapsed, &countdown); /* * If countdown is <= zero, Reset the timer to the * smallest positive time value instead */ timerclear(&zero); if (timercmp(&countdown, &zero, <=)) { timerclear(&countdown); countdown.tv_usec = 1; } remaining = m_tentativeTimer.TimeRemaining(); if (!m_tentativeTimer.IsPending() || timercmp(&countdown, &remaining, <)) m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this); } bool CaseFile::CloseIfSolved() { if (m_events.empty() && m_tentativeEvents.empty()) { /* * We currently do not track or take actions on * devices in the degraded or faulted state. * Once we have support for spare pools, we'll * retain these cases so that any spares added in * the future can be applied to them. */ switch (VdevState()) { case VDEV_STATE_HEALTHY: /* No need to keep cases for healthy vdevs */ Close(); return (true); case VDEV_STATE_REMOVED: case VDEV_STATE_CANT_OPEN: /* * Keep open. We may solve it with a newly inserted * device. */ case VDEV_STATE_FAULTED: case VDEV_STATE_DEGRADED: /* * Keep open. We may solve it with the future * addition of a spare to the pool */ case VDEV_STATE_UNKNOWN: case VDEV_STATE_CLOSED: case VDEV_STATE_OFFLINE: /* * Keep open? This may not be the correct behavior, * but it's what we've always done */ ; } /* * Re-serialize the case in order to remove any * previous event data. */ Serialize(); } return (false); } void CaseFile::Log() { syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), PhysicalPath().c_str()); syslog(LOG_INFO, "\tVdev State = %s\n", zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); if (m_tentativeEvents.size() != 0) { syslog(LOG_INFO, "\t=== Tentative Events ===\n"); for (EventList::iterator event(m_tentativeEvents.begin()); event != m_tentativeEvents.end(); event++) (*event)->Log(LOG_INFO); } if (m_events.size() != 0) { syslog(LOG_INFO, "\t=== Events ===\n"); for (EventList::iterator event(m_events.begin()); event != m_events.end(); event++) (*event)->Log(LOG_INFO); } } //- CaseFile Static Protected Methods ------------------------------------------ void CaseFile::OnGracePeriodEnded(void *arg) { CaseFile &casefile(*static_cast(arg)); casefile.OnGracePeriodEnded(); } int CaseFile::DeSerializeSelector(const struct dirent *dirEntry) { uint64_t poolGUID; uint64_t vdevGUID; if (dirEntry->d_type == DT_REG && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", &poolGUID, &vdevGUID) == 2) return (1); return (0); } void CaseFile::DeSerializeFile(const char *fileName) { string fullName(s_caseFilePath + '/' + fileName); CaseFile *existingCaseFile(NULL); CaseFile *caseFile(NULL); try { uint64_t poolGUID; uint64_t vdevGUID; nvlist_t *vdevConf; if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", &poolGUID, &vdevGUID) != 2) { throw ZfsdException("CaseFile::DeSerialize: " "Unintelligible CaseFile filename %s.\n", fileName); } existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID)); if (existingCaseFile != NULL) { /* * If the vdev is already degraded or faulted, * there's no point in keeping the state around * that we use to put a drive into the degraded * state. However, if the vdev is simply missing, * preserve the case data in the hopes that it will * return. */ caseFile = existingCaseFile; vdev_state curState(caseFile->VdevState()); if (curState > VDEV_STATE_CANT_OPEN && curState < VDEV_STATE_HEALTHY) { unlink(fileName); return; } } else { ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); if (zpl.empty() || (vdevConf = VdevIterator(zpl.front()) .Find(vdevGUID)) == NULL) { /* * Either the pool no longer exists * or this vdev is no longer a member of * the pool. */ unlink(fullName.c_str()); return; } /* * Any vdev we find that does not have a case file * must be in the healthy state and thus worthy of * continued SERD data tracking. */ caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); } ifstream caseStream(fullName.c_str()); if (!caseStream) throw ZfsdException("CaseFile::DeSerialize: Unable to " "read %s.\n", fileName); caseFile->DeSerialize(caseStream); } catch (const ParseException &exp) { exp.Log(); if (caseFile != existingCaseFile) delete caseFile; /* * Since we can't parse the file, unlink it so we don't * trip over it again. */ unlink(fileName); } catch (const ZfsdException &zfsException) { zfsException.Log(); if (caseFile != existingCaseFile) delete caseFile; } } //- CaseFile Protected Methods ------------------------------------------------- CaseFile::CaseFile(const Vdev &vdev) : m_poolGUID(vdev.PoolGUID()), m_vdevGUID(vdev.GUID()), m_vdevState(vdev.State()), m_vdevPhysPath(vdev.PhysicalPath()) { stringstream guidString; guidString << m_vdevGUID; m_vdevGUIDString = guidString.str(); guidString.str(""); guidString << m_poolGUID; m_poolGUIDString = guidString.str(); s_activeCases.push_back(this); syslog(LOG_INFO, "Creating new CaseFile:\n"); Log(); } CaseFile::~CaseFile() { PurgeEvents(); PurgeTentativeEvents(); m_tentativeTimer.Stop(); s_activeCases.remove(this); } void CaseFile::PurgeEvents() { for (EventList::iterator event(m_events.begin()); event != m_events.end(); event++) delete *event; m_events.clear(); } void CaseFile::PurgeTentativeEvents() { for (EventList::iterator event(m_tentativeEvents.begin()); event != m_tentativeEvents.end(); event++) delete *event; m_tentativeEvents.clear(); } void CaseFile::SerializeEvList(const EventList events, int fd, const char* prefix) const { if (events.empty()) return; for (EventList::const_iterator curEvent = events.begin(); curEvent != events.end(); curEvent++) { const string &eventString((*curEvent)->GetEventString()); // TODO: replace many write(2) calls with a single writev(2) if (prefix) write(fd, prefix, strlen(prefix)); write(fd, eventString.c_str(), eventString.length()); } } void CaseFile::Serialize() { stringstream saveFile; saveFile << setfill('0') << s_caseFilePath << "/" << "pool_" << PoolGUIDString() << "_vdev_" << VdevGUIDString() << ".case"; if (m_events.empty() && m_tentativeEvents.empty()) { unlink(saveFile.str().c_str()); return; } int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); if (fd == -1) { syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", saveFile.str().c_str()); return; } SerializeEvList(m_events, fd); SerializeEvList(m_tentativeEvents, fd, "tentative "); close(fd); } /* * XXX: This method assumes that events may not contain embedded newlines. If * ever events can contain embedded newlines, then CaseFile must switch * serialization formats */ void CaseFile::DeSerialize(ifstream &caseStream) { string evString; const EventFactory &factory(ZfsDaemon::Get().GetFactory()); caseStream >> std::noskipws >> std::ws; while (caseStream.good()) { /* * Outline: * read the beginning of a line and check it for * "tentative". If found, discard "tentative". * Create a new event * continue */ EventList* destEvents; const string tentFlag("tentative "); string line; std::stringbuf lineBuf; caseStream.get(lineBuf); caseStream.ignore(); /*discard the newline character*/ line = lineBuf.str(); if (line.compare(0, tentFlag.size(), tentFlag) == 0) { /* Discard "tentative" */ line.erase(0, tentFlag.size()); destEvents = &m_tentativeEvents; } else { destEvents = &m_events; } Event *event(Event::CreateEvent(factory, line)); if (event != NULL) { destEvents->push_back(event); RegisterCallout(*event); } } } void CaseFile::Close() { /* * This case is no longer relevant. Clean up our * serialization file, and delete the case. */ syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); /* * Serialization of a Case with no event data, clears the * Serialization data for that event. */ PurgeEvents(); Serialize(); delete this; } void CaseFile::OnGracePeriodEnded() { bool should_fault, should_degrade; ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); m_events.splice(m_events.begin(), m_tentativeEvents); should_fault = ShouldFault(); should_degrade = ShouldDegrade(); if (should_fault || should_degrade) { if (zhp == NULL || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) { /* * Either the pool no longer exists * or this vdev is no longer a member of * the pool. */ Close(); return; } } /* A fault condition has priority over a degrade condition */ if (ShouldFault()) { /* Fault the vdev and close the case. */ if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID, VDEV_AUX_ERR_EXCEEDED) == 0) { syslog(LOG_INFO, "Faulting vdev(%s/%s)", PoolGUIDString().c_str(), VdevGUIDString().c_str()); Close(); return; } else { syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); } } else if (ShouldDegrade()) { /* Degrade the vdev and close the case. */ if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID, VDEV_AUX_ERR_EXCEEDED) == 0) { syslog(LOG_INFO, "Degrading vdev(%s/%s)", PoolGUIDString().c_str(), VdevGUIDString().c_str()); Close(); return; } else { syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n", PoolGUIDString().c_str(), VdevGUIDString().c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); } } Serialize(); } Vdev CaseFile::BeingReplacedBy(zpool_handle_t *zhp) { Vdev vd(zhp, CaseVdev(zhp)); std::list children; std::list::iterator children_it; Vdev parent(vd.Parent()); Vdev replacing(NonexistentVdev); /* * To determine whether we are being replaced by another spare that * is still working, then make sure that it is currently spared and * that the spare is either resilvering or healthy. If any of these * conditions fail, then we are not being replaced by a spare. * * If the spare is healthy, then the case file should be closed very * soon after this check. */ if (parent.DoesNotExist() || parent.Name(zhp, /*verbose*/false) != "spare") return (NonexistentVdev); children = parent.Children(); children_it = children.begin(); for (;children_it != children.end(); children_it++) { Vdev child = *children_it; /* Skip our vdev. */ if (child.GUID() == VdevGUID()) continue; /* * Accept the first child that doesn't match our GUID, or * any resilvering/healthy device if one exists. */ if (replacing.DoesNotExist() || child.IsResilvering() || child.State() == VDEV_STATE_HEALTHY) replacing = child; } return (replacing); } bool CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) { nvlist_t *nvroot, *newvd; const char *poolname; string oldstr(VdevGUIDString()); bool retval = true; /* Figure out what pool we're working on */ ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); if (zhp == NULL) { syslog(LOG_ERR, "CaseFile::Replace: could not find pool for " "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID); return (false); } poolname = zpool_get_name(zhp); Vdev vd(zhp, CaseVdev(zhp)); Vdev replaced(BeingReplacedBy(zhp)); if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) { /* If we are already being replaced by a working spare, pass. */ if (replaced.IsResilvering() || replaced.State() == VDEV_STATE_HEALTHY) { syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already " "replaced", VdevGUIDString().c_str(), path); return (/*consumed*/false); } /* * If we have already been replaced by a spare, but that spare * is broken, we must spare the spare, not the original device. */ oldstr = replaced.GUIDString(); syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing " "broken spare %s instead", VdevGUIDString().c_str(), path, oldstr.c_str()); } /* * Build a root vdev/leaf vdev configuration suitable for * zpool_vdev_attach. Only enough data for the kernel to find * the device (i.e. type and disk device node path) are needed. */ nvroot = NULL; newvd = NULL; if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate " "configuration data.", poolname, oldstr.c_str()); if (nvroot != NULL) nvlist_free(nvroot); return (false); } if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd, 1) != 0) { syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize " "configuration data.", poolname, oldstr.c_str()); nvlist_free(newvd); nvlist_free(nvroot); return (true); } /* Data was copied when added to the root vdev. */ nvlist_free(newvd); retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot, /*replace*/B_TRUE) == 0); if (retval) syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", poolname, oldstr.c_str(), path); else syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n", poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle), libzfs_error_description(g_zfsHandle)); nvlist_free(nvroot); return (retval); } /* Does the argument event refer to a checksum error? */ static bool IsChecksumEvent(const Event* const event) { return ("ereport.fs.zfs.checksum" == event->Value("type")); } /* Does the argument event refer to an IO error? */ static bool IsIOEvent(const Event* const event) { return ("ereport.fs.zfs.io" == event->Value("type")); } bool CaseFile::ShouldDegrade() const { return (std::count_if(m_events.begin(), m_events.end(), IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT); } bool CaseFile::ShouldFault() const { return (std::count_if(m_events.begin(), m_events.end(), IsIOEvent) > ZFS_DEGRADE_IO_COUNT); } nvlist_t * CaseFile::CaseVdev(zpool_handle_t *zhp) const { return (VdevIterator(zhp).Find(VdevGUID())); } Index: stable/11/cddl/usr.sbin/zfsd/case_file.h =================================================================== --- stable/11/cddl/usr.sbin/zfsd/case_file.h (revision 330732) +++ stable/11/cddl/usr.sbin/zfsd/case_file.h (revision 330733) @@ -1,428 +1,433 @@ /*- * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) * * $FreeBSD$ */ /** * \file case_file.h * * CaseFile objects aggregate vdev faults that may require ZFSD action * in order to maintain the health of a ZFS pool. * * Header requirements: * * #include * * #include "callout.h" * #include "zfsd_event.h" */ #ifndef _CASE_FILE_H_ #define _CASE_FILE_H_ /*=========================== Forward Declarations ===========================*/ class CaseFile; class Vdev; /*============================= Class Definitions ============================*/ /*------------------------------- CaseFileList -------------------------------*/ /** * CaseFileList is a specialization of the standard list STL container. */ typedef std::list< CaseFile *> CaseFileList; /*--------------------------------- CaseFile ---------------------------------*/ /** * A CaseFile object is instantiated anytime a vdev for an active pool * experiences an I/O error, is faulted by ZFS, or is determined to be * missing/removed. * * A vdev may have at most one CaseFile. * * CaseFiles are retired when a vdev leaves an active pool configuration * or an action is taken to resolve the issues recorded in the CaseFile. * * Logging a case against a vdev does not imply that an immediate action * to resolve a fault is required or even desired. For example, a CaseFile * must accumulate a number of I/O errors in order to flag a device as * degraded. * * Vdev I/O errors are not recorded in ZFS label inforamation. For this * reasons, CaseFile%%s with accumulated I/O error events are serialized * to the file system so that they survive across boots. Currently all * other fault types can be reconstructed from ZFS label information, so * CaseFile%%s for missing, faulted, or degradded members are just recreated * at ZFSD startup instead of being deserialized from the file system. */ class CaseFile { public: /** * \brief Find a CaseFile object by a vdev's pool/vdev GUID tuple. * * \param poolGUID Pool GUID for the vdev of the CaseFile to find. * If InvalidGuid, then only match the vdev GUID * instead of both pool and vdev GUIDs. * \param vdevGUID Vdev GUID for the vdev of the CaseFile to find. * * \return If found, a pointer to a valid CaseFile object. * Otherwise NULL. */ static CaseFile *Find(DevdCtl::Guid poolGUID, DevdCtl::Guid vdevGUID); /** * \brief Find a CaseFile object by a vdev's current/last known * physical path. * * \param physPath Physical path of the vdev of the CaseFile to find. * * \return If found, a pointer to a valid CaseFile object. * Otherwise NULL. */ static CaseFile *Find(const string &physPath); /** * \brief ReEvaluate all open cases whose pool guid matches the argument * * \param poolGUID Only reevaluate cases for this pool * \param event Try to consume this event with the casefile */ static void ReEvaluateByGuid(DevdCtl::Guid poolGUID, const ZfsEvent &event); /** * \brief Create or return an existing active CaseFile for the * specified vdev. * * \param vdev The vdev object for which to find/create a CaseFile. * * \return A reference to a valid CaseFile object. */ static CaseFile &Create(Vdev &vdev); /** * \brief Deserialize all serialized CaseFile objects found in * the file system. */ static void DeSerialize(); /** + * \brief returns true if there are no CaseFiles + */ + static bool Empty(); + + /** * \brief Emit syslog data on all active CaseFile%%s in the system. */ static void LogAll(); /** * \brief Destroy the in-core cache of CaseFile data. * * This routine does not disturb the on disk, serialized, CaseFile * data. */ static void PurgeAll(); DevdCtl::Guid PoolGUID() const; DevdCtl::Guid VdevGUID() const; vdev_state VdevState() const; const string &PoolGUIDString() const; const string &VdevGUIDString() const; const string &PhysicalPath() const; /** * \brief Attempt to resolve this CaseFile using the disk * resource at the given device/physical path/vdev object * tuple. * * \param devPath The devfs path for the disk resource. * \param physPath The physical path information reported by * the disk resource. * \param vdev If the disk contains ZFS label information, * a pointer to the disk label's vdev object * data. Otherwise NULL. * * \return True if this event was consumed by this CaseFile. */ bool ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev); /** * \brief Update this CaseFile in light of the provided ZfsEvent. * * Must be virtual so it can be overridden in the unit tests * * \param event The ZfsEvent to evaluate. * * \return True if this event was consumed by this CaseFile. */ virtual bool ReEvaluate(const ZfsEvent &event); /** * \brief Register an itimer callout for the given event, if necessary */ virtual void RegisterCallout(const DevdCtl::Event &event); /** * \brief Close a case if it is no longer relevant. * * This method deals with cases tracking soft errors. Soft errors * will be discarded should a remove event occur within a short period * of the soft errors being reported. We also discard the events * if the vdev is marked degraded or failed. * * \return True if the case is closed. False otherwise. */ bool CloseIfSolved(); /** * \brief Emit data about this CaseFile via syslog(3). */ void Log(); /** * \brief Whether we should degrade this vdev */ bool ShouldDegrade() const; /** * \brief Whether we should fault this vdev */ bool ShouldFault() const; protected: enum { /** * The number of soft errors on a vdev required * to transition a vdev from healthy to degraded * status. */ ZFS_DEGRADE_IO_COUNT = 50 }; static CalloutFunc_t OnGracePeriodEnded; /** * \brief scandir(3) filter function used to find files containing * serialized CaseFile data. * * \param dirEntry Directory entry for the file to filter. * * \return Non-zero for a file to include in the selection, * otherwise 0. */ static int DeSerializeSelector(const struct dirent *dirEntry); /** * \brief Given the name of a file containing serialized events from a * CaseFile object, create/update an in-core CaseFile object * representing the serialized data. * * \param fileName The name of a file containing serialized events * from a CaseFile object. */ static void DeSerializeFile(const char *fileName); /** Constructor. */ CaseFile(const Vdev &vdev); /** * Destructor. * Must be virtual so it can be subclassed in the unit tests */ virtual ~CaseFile(); /** * \brief Reload state for the vdev associated with this CaseFile. * * \return True if the refresh was successful. False if the system * has no record of the pool or vdev for this CaseFile. */ virtual bool RefreshVdevState(); /** * \brief Free all events in the m_events list. */ void PurgeEvents(); /** * \brief Free all events in the m_tentativeEvents list. */ void PurgeTentativeEvents(); /** * \brief Commit to file system storage. */ void Serialize(); /** * \brief Retrieve event data from a serialization stream. * * \param caseStream The serializtion stream to parse. */ void DeSerialize(std::ifstream &caseStream); /** * \brief Serializes the supplied event list and writes it to fd * * \param prefix If not NULL, this prefix will be prepended to * every event in the file. */ void SerializeEvList(const DevdCtl::EventList events, int fd, const char* prefix=NULL) const; /** * \brief Unconditionally close a CaseFile. */ virtual void Close(); /** * \brief Callout callback invoked when the remove timer grace * period expires. * * If no remove events are received prior to the grace period * firing, then any tentative events are promoted and counted * against the health of the vdev. */ void OnGracePeriodEnded(); /** * \brief Attempt to activate a spare on this case's pool. * * Call this whenever a pool becomes degraded. It will look for any * spare devices and activate one to replace the casefile's vdev. It * will _not_ close the casefile; that should only happen when the * missing drive is replaced or the user promotes the spare. * * \return True if a spare was activated */ bool ActivateSpare(); /** * \brief replace a pool's vdev with another * * \param vdev_type The type of the new vdev. Usually either * VDEV_TYPE_DISK or VDEV_TYPE_FILE * \param path The file system path to the new vdev * \param isspare Whether the new vdev is a spare * * \return true iff the replacement was successful */ bool Replace(const char* vdev_type, const char* path, bool isspare); /** * \brief Which vdev, if any, is replacing ours. * * \param zhp Pool handle state from the caller context * * \return the vdev that is currently replacing ours, * or NonexistentVdev if there isn't one. */ Vdev BeingReplacedBy(zpool_handle_t *zhp); /** * \brief All CaseFiles being tracked by ZFSD. */ static CaseFileList s_activeCases; /** * \brief The file system path to serialized CaseFile data. */ static const string s_caseFilePath; /** * \brief The time ZFSD waits before promoting a tentative event * into a permanent event. */ static const timeval s_removeGracePeriod; /** * \brief A list of soft error events counted against the health of * a vdev. */ DevdCtl::EventList m_events; /** * \brief A list of soft error events waiting for a grace period * expiration before being counted against the health of * a vdev. */ DevdCtl::EventList m_tentativeEvents; DevdCtl::Guid m_poolGUID; DevdCtl::Guid m_vdevGUID; vdev_state m_vdevState; string m_poolGUIDString; string m_vdevGUIDString; string m_vdevPhysPath; /** * \brief Callout activated when a grace period */ Callout m_tentativeTimer; private: nvlist_t *CaseVdev(zpool_handle_t *zhp) const; }; inline DevdCtl::Guid CaseFile::PoolGUID() const { return (m_poolGUID); } inline DevdCtl::Guid CaseFile::VdevGUID() const { return (m_vdevGUID); } inline vdev_state CaseFile::VdevState() const { return (m_vdevState); } inline const string & CaseFile::PoolGUIDString() const { return (m_poolGUIDString); } inline const string & CaseFile::VdevGUIDString() const { return (m_vdevGUIDString); } inline const string & CaseFile::PhysicalPath() const { return (m_vdevPhysPath); } #endif /* _CASE_FILE_H_ */ Index: stable/11/cddl/usr.sbin/zfsd/zfsd.cc =================================================================== --- stable/11/cddl/usr.sbin/zfsd/zfsd.cc (revision 330732) +++ stable/11/cddl/usr.sbin/zfsd/zfsd.cc (revision 330733) @@ -1,448 +1,447 @@ /*- * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) */ /** * \file zfsd.cc * * The ZFS daemon consumes kernel devdctl(4) event data via devd(8)'s * unix domain socket in order to react to system changes that impact * the function of ZFS storage pools. The goal of this daemon is to * provide similar functionality to the Solaris ZFS Diagnostic Engine * (zfs-diagnosis), the Solaris ZFS fault handler (zfs-retire), and * the Solaris ZFS vdev insertion agent (zfs-mod sysevent handler). */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "callout.h" #include "vdev_iterator.h" #include "zfsd_event.h" #include "case_file.h" #include "vdev.h" #include "vdev_iterator.h" #include "zfsd.h" #include "zfsd_exception.h" #include "zpool_list.h" __FBSDID("$FreeBSD$"); /*================================== Macros ==================================*/ #define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x)) /*============================ Namespace Control =============================*/ using DevdCtl::Event; using DevdCtl::EventFactory; using DevdCtl::EventList; /*================================ Global Data ===============================*/ int g_debug = 0; libzfs_handle_t *g_zfsHandle; /*--------------------------------- ZfsDaemon --------------------------------*/ //- ZfsDaemon Static Private Data ---------------------------------------------- ZfsDaemon *ZfsDaemon::s_theZfsDaemon; bool ZfsDaemon::s_logCaseFiles; bool ZfsDaemon::s_terminateEventLoop; char ZfsDaemon::s_pidFilePath[] = "/var/run/zfsd.pid"; pidfh *ZfsDaemon::s_pidFH; int ZfsDaemon::s_signalPipeFD[2]; bool ZfsDaemon::s_systemRescanRequested(false); EventFactory::Record ZfsDaemon::s_registryEntries[] = { - { Event::NOTIFY, "DEVFS", &DevfsEvent::Builder }, { Event::NOTIFY, "GEOM", &GeomEvent::Builder }, { Event::NOTIFY, "ZFS", &ZfsEvent::Builder } }; //- ZfsDaemon Static Public Methods -------------------------------------------- ZfsDaemon & ZfsDaemon::Get() { return (*s_theZfsDaemon); } void ZfsDaemon::WakeEventLoop() { write(s_signalPipeFD[1], "+", 1); } void ZfsDaemon::RequestSystemRescan() { s_systemRescanRequested = true; ZfsDaemon::WakeEventLoop(); } void ZfsDaemon::Run() { ZfsDaemon daemon; while (s_terminateEventLoop == false) { try { daemon.DisconnectFromDevd(); if (daemon.ConnectToDevd() == false) { sleep(30); continue; } daemon.DetectMissedEvents(); daemon.EventLoop(); } catch (const DevdCtl::Exception &exp) { exp.Log(); } } daemon.DisconnectFromDevd(); } //- ZfsDaemon Private Methods -------------------------------------------------- ZfsDaemon::ZfsDaemon() : Consumer(/*defBuilder*/NULL, s_registryEntries, NUM_ELEMENTS(s_registryEntries)) { if (s_theZfsDaemon != NULL) errx(1, "Multiple ZfsDaemon instances created. Exiting"); s_theZfsDaemon = this; if (pipe(s_signalPipeFD) != 0) errx(1, "Unable to allocate signal pipe. Exiting"); if (fcntl(s_signalPipeFD[0], F_SETFL, O_NONBLOCK) == -1) errx(1, "Unable to set pipe as non-blocking. Exiting"); if (fcntl(s_signalPipeFD[1], F_SETFL, O_NONBLOCK) == -1) errx(1, "Unable to set pipe as non-blocking. Exiting"); signal(SIGHUP, ZfsDaemon::RescanSignalHandler); signal(SIGINFO, ZfsDaemon::InfoSignalHandler); signal(SIGINT, ZfsDaemon::QuitSignalHandler); signal(SIGTERM, ZfsDaemon::QuitSignalHandler); signal(SIGUSR1, ZfsDaemon::RescanSignalHandler); g_zfsHandle = libzfs_init(); if (g_zfsHandle == NULL) errx(1, "Unable to initialize ZFS library. Exiting"); Callout::Init(); InitializeSyslog(); OpenPIDFile(); if (g_debug == 0) daemon(0, 0); UpdatePIDFile(); } ZfsDaemon::~ZfsDaemon() { PurgeCaseFiles(); ClosePIDFile(); } void ZfsDaemon::PurgeCaseFiles() { CaseFile::PurgeAll(); } bool ZfsDaemon::VdevAddCaseFile(Vdev &vdev, void *cbArg) { if (vdev.State() != VDEV_STATE_HEALTHY) CaseFile::Create(vdev); return (/*break early*/false); } void ZfsDaemon::BuildCaseFiles() { ZpoolList zpl; ZpoolList::iterator pool; /* Add CaseFiles for vdevs with issues. */ for (pool = zpl.begin(); pool != zpl.end(); pool++) VdevIterator(*pool).Each(VdevAddCaseFile, NULL); /* De-serialize any saved cases. */ CaseFile::DeSerialize(); /* Simulate config_sync events to force CaseFile reevaluation */ for (pool = zpl.begin(); pool != zpl.end(); pool++) { char evString[160]; Event *event; nvlist_t *config; uint64_t poolGUID; const char *poolname; poolname = zpool_get_name(*pool); config = zpool_get_config(*pool, NULL); if (config == NULL) { syslog(LOG_ERR, "ZFSDaemon::BuildCaseFiles: Could not " "find pool config for pool %s", poolname); continue; } if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &poolGUID) != 0) { syslog(LOG_ERR, "ZFSDaemon::BuildCaseFiles: Could not " "find pool guid for pool %s", poolname); continue; } snprintf(evString, 160, "!system=ZFS subsystem=ZFS " "type=misc.fs.zfs.config_sync sub_type=synthesized " "pool_name=%s pool_guid=%" PRIu64 "\n", poolname, poolGUID); event = Event::CreateEvent(GetFactory(), string(evString)); if (event != NULL) { event->Process(); delete event; } } } void ZfsDaemon::RescanSystem() { struct gmesh mesh; struct gclass *mp; struct ggeom *gp; struct gprovider *pp; int result; /* * The devdctl system doesn't replay events for new consumers * of the interface. Emit manufactured DEVFS arrival events * for any devices that already before we started or during * periods where we've lost our connection to devd. */ result = geom_gettree(&mesh); if (result != 0) { syslog(LOG_ERR, "ZfsDaemon::RescanSystem: " "geom_gettree faild with error %d\n", result); return; } const string evStart("!system=DEVFS subsystem=CDEV type=CREATE " "sub_type=synthesized cdev="); LIST_FOREACH(mp, &mesh.lg_class, lg_class) { LIST_FOREACH(gp, &mp->lg_geom, lg_geom) { LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { Event *event; string evString(evStart + pp->lg_name + "\n"); event = Event::CreateEvent(GetFactory(), evString); if (event != NULL) { if (event->Process()) SaveEvent(*event); delete event; } } } } geom_deletetree(&mesh); } void ZfsDaemon::DetectMissedEvents() { do { PurgeCaseFiles(); /* * Discard any events waiting for us. We don't know * if they still apply to the current state of the * system. */ FlushEvents(); BuildCaseFiles(); /* * If the system state has changed during our * interrogation, start over. */ } while (s_terminateEventLoop == false && EventsPending()); RescanSystem(); } void ZfsDaemon::EventLoop() { while (s_terminateEventLoop == false) { struct pollfd fds[2]; int result; if (s_logCaseFiles == true) { EventList::iterator event(m_unconsumedEvents.begin()); s_logCaseFiles = false; CaseFile::LogAll(); while (event != m_unconsumedEvents.end()) (*event++)->Log(LOG_INFO); } Callout::ExpireCallouts(); /* Wait for data. */ fds[0].fd = m_devdSockFD; fds[0].events = POLLIN; fds[0].revents = 0; fds[1].fd = s_signalPipeFD[0]; fds[1].events = POLLIN; fds[1].revents = 0; result = poll(fds, NUM_ELEMENTS(fds), /*timeout*/INFTIM); if (result == -1) { if (errno == EINTR) continue; else err(1, "Polling for devd events failed"); } else if (result == 0) { errx(1, "Unexpected result of 0 from poll. Exiting"); } if ((fds[0].revents & POLLIN) != 0) ProcessEvents(); if ((fds[1].revents & POLLIN) != 0) { static char discardBuf[128]; /* * This pipe exists just to close the signal * race. Its contents are of no interest to * us, but we must ensure that future signals * have space in the pipe to write. */ while (read(s_signalPipeFD[0], discardBuf, sizeof(discardBuf)) > 0) ; } if (s_systemRescanRequested == true) { s_systemRescanRequested = false; syslog(LOG_INFO, "System Rescan request processed."); RescanSystem(); } if ((fds[0].revents & POLLERR) != 0) { syslog(LOG_INFO, "POLLERROR detected on devd socket."); break; } if ((fds[0].revents & POLLHUP) != 0) { syslog(LOG_INFO, "POLLHUP detected on devd socket."); break; } } } //- ZfsDaemon staic Private Methods -------------------------------------------- void ZfsDaemon::InfoSignalHandler(int) { s_logCaseFiles = true; ZfsDaemon::WakeEventLoop(); } void ZfsDaemon::RescanSignalHandler(int) { RequestSystemRescan(); } void ZfsDaemon::QuitSignalHandler(int) { s_terminateEventLoop = true; ZfsDaemon::WakeEventLoop(); } void ZfsDaemon::OpenPIDFile() { pid_t otherPID; s_pidFH = pidfile_open(s_pidFilePath, 0600, &otherPID); if (s_pidFH == NULL) { if (errno == EEXIST) errx(1, "already running as PID %d. Exiting", otherPID); warn("cannot open PID file"); } } void ZfsDaemon::UpdatePIDFile() { if (s_pidFH != NULL) pidfile_write(s_pidFH); } void ZfsDaemon::ClosePIDFile() { if (s_pidFH != NULL) pidfile_remove(s_pidFH); } void ZfsDaemon::InitializeSyslog() { openlog("zfsd", LOG_NDELAY, LOG_DAEMON); } Index: stable/11/cddl/usr.sbin/zfsd/zfsd_event.cc =================================================================== --- stable/11/cddl/usr.sbin/zfsd/zfsd_event.cc (revision 330732) +++ stable/11/cddl/usr.sbin/zfsd/zfsd_event.cc (revision 330733) @@ -1,548 +1,481 @@ /*- * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) */ /** * \file zfsd_event.cc */ #include #include #include #include #include #include /* * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with * C++ flush methods */ #undef flush #include #include #include #include #include #include #include #include #include #include "callout.h" #include "vdev_iterator.h" #include "zfsd_event.h" #include "case_file.h" #include "vdev.h" #include "zfsd.h" #include "zfsd_exception.h" #include "zpool_list.h" __FBSDID("$FreeBSD$"); /*============================ Namespace Control =============================*/ using DevdCtl::Event; using DevdCtl::Guid; using DevdCtl::NVPairMap; using std::stringstream; /*=========================== Class Implementations ==========================*/ -/*-------------------------------- DevfsEvent --------------------------------*/ +/*-------------------------------- GeomEvent --------------------------------*/ -//- DevfsEvent Static Public Methods ------------------------------------------- +//- GeomEvent Static Public Methods ------------------------------------------- Event * -DevfsEvent::Builder(Event::Type type, - NVPairMap &nvPairs, - const string &eventString) +GeomEvent::Builder(Event::Type type, + NVPairMap &nvPairs, + const string &eventString) { - return (new DevfsEvent(type, nvPairs, eventString)); + return (new GeomEvent(type, nvPairs, eventString)); } -//- DevfsEvent Static Protected Methods ---------------------------------------- -nvlist_t * -DevfsEvent::ReadLabel(int devFd, bool &inUse, bool °raded) -{ - pool_state_t poolState; - char *poolName; - boolean_t b_inuse; - int nlabels; - - inUse = false; - degraded = false; - poolName = NULL; - if (zpool_in_use(g_zfsHandle, devFd, &poolState, - &poolName, &b_inuse) == 0) { - nvlist_t *devLabel = NULL; - - inUse = b_inuse == B_TRUE; - if (poolName != NULL) - free(poolName); - - nlabels = zpool_read_all_labels(devFd, &devLabel); - /* - * If we find a disk with fewer than the maximum number of - * labels, it might be the whole disk of a partitioned disk - * where ZFS resides on a partition. In that case, we should do - * nothing and wait for the partition to appear. Or, the disk - * might be damaged. In that case, zfsd should do nothing and - * wait for the sysadmin to decide. - */ - if (nlabels != VDEV_LABELS || devLabel == NULL) { - nvlist_free(devLabel); - return (NULL); - } - - try { - Vdev vdev(devLabel); - degraded = vdev.State() != VDEV_STATE_HEALTHY; - return (devLabel); - } catch (ZfsdException &exp) { - string devName = fdevname(devFd); - string devPath = _PATH_DEV + devName; - string context("DevfsEvent::ReadLabel: " - + devPath + ": "); - - exp.GetString().insert(0, context); - exp.Log(); - nvlist_free(devLabel); - } - } - return (NULL); -} - -bool -DevfsEvent::OnlineByLabel(const string &devPath, const string& physPath, - nvlist_t *devConfig) -{ - try { - /* - * A device with ZFS label information has been - * inserted. If it matches a device for which we - * have a case, see if we can solve that case. - */ - syslog(LOG_INFO, "Interrogating VDEV label for %s\n", - devPath.c_str()); - Vdev vdev(devConfig); - CaseFile *caseFile(CaseFile::Find(vdev.PoolGUID(), - vdev.GUID())); - if (caseFile != NULL) - return (caseFile->ReEvaluate(devPath, physPath, &vdev)); - - } catch (ZfsdException &exp) { - string context("DevfsEvent::OnlineByLabel: " + devPath + ": "); - - exp.GetString().insert(0, context); - exp.Log(); - } - return (false); -} - -//- DevfsEvent Virtual Public Methods ------------------------------------------ +//- GeomEvent Virtual Public Methods ------------------------------------------ Event * -DevfsEvent::DeepCopy() const +GeomEvent::DeepCopy() const { - return (new DevfsEvent(*this)); + return (new GeomEvent(*this)); } - + bool -DevfsEvent::Process() const +GeomEvent::Process() const { /* - * We are only concerned with newly discovered - * devices that can be ZFS vdevs. + * We only use GEOM events to repair damaged pools. So return early if + * there are no damaged pools */ - if (Value("type") != "CREATE" || !IsDiskDev()) + if (CaseFile::Empty()) return (false); + /* + * We are only concerned with arrivals and physical path changes, + * because those can be used to satisfy online and autoreplace + * operations + */ + if (Value("type") != "GEOM::physpath" && Value("type") != "CREATE") + return (false); + /* Log the event since it is of interest. */ Log(LOG_INFO); string devPath; if (!DevPath(devPath)) return (false); int devFd(open(devPath.c_str(), O_RDONLY)); if (devFd == -1) return (false); bool inUse; bool degraded; nvlist_t *devLabel(ReadLabel(devFd, inUse, degraded)); string physPath; - bool havePhysPath(PhysicalPath(physPath)); + bool havePhysPath(PhysicalPath(physPath)); string devName; DevName(devName); close(devFd); if (inUse && devLabel != NULL) { OnlineByLabel(devPath, physPath, devLabel); } else if (degraded) { syslog(LOG_INFO, "%s is marked degraded. Ignoring " "as a replace by physical path candidate.\n", devName.c_str()); - } else if (havePhysPath && IsWholeDev()) { - /* + } else if (havePhysPath) { + /* * TODO: attempt to resolve events using every casefile * that matches this physpath */ CaseFile *caseFile(CaseFile::Find(physPath)); if (caseFile != NULL) { syslog(LOG_INFO, "Found CaseFile(%s:%s:%s) - ReEvaluating\n", caseFile->PoolGUIDString().c_str(), caseFile->VdevGUIDString().c_str(), zpool_state_to_name(caseFile->VdevState(), VDEV_AUX_NONE)); caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL); } } - if (devLabel != NULL) - nvlist_free(devLabel); return (false); } -//- DevfsEvent Protected Methods ----------------------------------------------- -DevfsEvent::DevfsEvent(Event::Type type, NVPairMap &nvpairs, +//- GeomEvent Protected Methods ----------------------------------------------- +GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs, const string &eventString) - : DevdCtl::DevfsEvent(type, nvpairs, eventString) + : DevdCtl::GeomEvent(type, nvpairs, eventString) { } -DevfsEvent::DevfsEvent(const DevfsEvent &src) - : DevdCtl::DevfsEvent::DevfsEvent(src) +GeomEvent::GeomEvent(const GeomEvent &src) + : DevdCtl::GeomEvent::GeomEvent(src) { } -/*-------------------------------- GeomEvent --------------------------------*/ - -//- GeomEvent Static Public Methods ------------------------------------------- -Event * -GeomEvent::Builder(Event::Type type, - NVPairMap &nvPairs, - const string &eventString) +nvlist_t * +GeomEvent::ReadLabel(int devFd, bool &inUse, bool °raded) { - return (new GeomEvent(type, nvPairs, eventString)); -} + pool_state_t poolState; + char *poolName; + boolean_t b_inuse; + int nlabels; -//- GeomEvent Virtual Public Methods ------------------------------------------ -Event * -GeomEvent::DeepCopy() const -{ - return (new GeomEvent(*this)); -} - -bool -GeomEvent::Process() const -{ - /* - * We are only concerned with physical path changes, because those can - * be used to satisfy autoreplace operations - */ - if (Value("type") != "GEOM::physpath" || !IsDiskDev()) - return (false); + inUse = false; + degraded = false; + poolName = NULL; + if (zpool_in_use(g_zfsHandle, devFd, &poolState, + &poolName, &b_inuse) == 0) { + nvlist_t *devLabel = NULL; - /* Log the event since it is of interest. */ - Log(LOG_INFO); + inUse = b_inuse == B_TRUE; + if (poolName != NULL) + free(poolName); - string devPath; - if (!DevPath(devPath)) - return (false); + nlabels = zpool_read_all_labels(devFd, &devLabel); + /* + * If we find a disk with fewer than the maximum number of + * labels, it might be the whole disk of a partitioned disk + * where ZFS resides on a partition. In that case, we should do + * nothing and wait for the partition to appear. Or, the disk + * might be damaged. In that case, zfsd should do nothing and + * wait for the sysadmin to decide. + */ + if (nlabels != VDEV_LABELS || devLabel == NULL) { + nvlist_free(devLabel); + return (NULL); + } - string physPath; - bool havePhysPath(PhysicalPath(physPath)); + try { + Vdev vdev(devLabel); + degraded = vdev.State() != VDEV_STATE_HEALTHY; + return (devLabel); + } catch (ZfsdException &exp) { + string devName = fdevname(devFd); + string devPath = _PATH_DEV + devName; + string context("GeomEvent::ReadLabel: " + + devPath + ": "); - string devName; - DevName(devName); - - if (havePhysPath) { - /* - * TODO: attempt to resolve events using every casefile - * that matches this physpath - */ - CaseFile *caseFile(CaseFile::Find(physPath)); - if (caseFile != NULL) { - syslog(LOG_INFO, - "Found CaseFile(%s:%s:%s) - ReEvaluating\n", - caseFile->PoolGUIDString().c_str(), - caseFile->VdevGUIDString().c_str(), - zpool_state_to_name(caseFile->VdevState(), - VDEV_AUX_NONE)); - caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL); + exp.GetString().insert(0, context); + exp.Log(); + nvlist_free(devLabel); } } - return (false); + return (NULL); } -//- GeomEvent Protected Methods ----------------------------------------------- -GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs, - const string &eventString) - : DevdCtl::GeomEvent(type, nvpairs, eventString) +bool +GeomEvent::OnlineByLabel(const string &devPath, const string& physPath, + nvlist_t *devConfig) { -} + try { + /* + * A device with ZFS label information has been + * inserted. If it matches a device for which we + * have a case, see if we can solve that case. + */ + syslog(LOG_INFO, "Interrogating VDEV label for %s\n", + devPath.c_str()); + Vdev vdev(devConfig); + CaseFile *caseFile(CaseFile::Find(vdev.PoolGUID(), + vdev.GUID())); + if (caseFile != NULL) + return (caseFile->ReEvaluate(devPath, physPath, &vdev)); -GeomEvent::GeomEvent(const GeomEvent &src) - : DevdCtl::GeomEvent::GeomEvent(src) -{ + } catch (ZfsdException &exp) { + string context("GeomEvent::OnlineByLabel: " + devPath + ": "); + + exp.GetString().insert(0, context); + exp.Log(); + } + return (false); } /*--------------------------------- ZfsEvent ---------------------------------*/ //- ZfsEvent Static Public Methods --------------------------------------------- DevdCtl::Event * ZfsEvent::Builder(Event::Type type, NVPairMap &nvpairs, const string &eventString) { return (new ZfsEvent(type, nvpairs, eventString)); } //- ZfsEvent Virtual Public Methods -------------------------------------------- Event * ZfsEvent::DeepCopy() const { return (new ZfsEvent(*this)); } bool ZfsEvent::Process() const { string logstr(""); if (!Contains("class") && !Contains("type")) { syslog(LOG_ERR, "ZfsEvent::Process: Missing class or type data."); return (false); } /* On config syncs, replay any queued events first. */ if (Value("type").find("misc.fs.zfs.config_sync") == 0) { /* * Even if saved events are unconsumed the second time * around, drop them. Any events that still can't be * consumed are probably referring to vdevs or pools that * no longer exist. */ ZfsDaemon::Get().ReplayUnconsumedEvents(/*discard*/true); CaseFile::ReEvaluateByGuid(PoolGUID(), *this); } if (Value("type").find("misc.fs.zfs.") == 0) { /* Configuration changes, resilver events, etc. */ ProcessPoolEvent(); return (false); } if (!Contains("pool_guid") || !Contains("vdev_guid")) { /* Only currently interested in Vdev related events. */ return (false); } CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); if (caseFile != NULL) { Log(LOG_INFO); syslog(LOG_INFO, "Evaluating existing case file\n"); caseFile->ReEvaluate(*this); return (false); } /* Skip events that can't be handled. */ Guid poolGUID(PoolGUID()); /* If there are no replicas for a pool, then it's not manageable. */ if (Value("class").find("fs.zfs.vdev.no_replicas") == 0) { stringstream msg; msg << "No replicas available for pool " << poolGUID; msg << ", ignoring"; Log(LOG_INFO); syslog(LOG_INFO, "%s", msg.str().c_str()); return (false); } /* * Create a case file for this vdev, and have it * evaluate the event. */ ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); if (zpl.empty()) { stringstream msg; int priority = LOG_INFO; msg << "ZfsEvent::Process: Event for unknown pool "; msg << poolGUID << " "; msg << "queued"; Log(LOG_INFO); syslog(priority, "%s", msg.str().c_str()); return (true); } nvlist_t *vdevConfig = VdevIterator(zpl.front()).Find(VdevGUID()); if (vdevConfig == NULL) { stringstream msg; int priority = LOG_INFO; msg << "ZfsEvent::Process: Event for unknown vdev "; msg << VdevGUID() << " "; msg << "queued"; Log(LOG_INFO); syslog(priority, "%s", msg.str().c_str()); return (true); } Vdev vdev(zpl.front(), vdevConfig); caseFile = &CaseFile::Create(vdev); if (caseFile->ReEvaluate(*this) == false) { stringstream msg; int priority = LOG_INFO; msg << "ZfsEvent::Process: Unconsumed event for vdev("; msg << zpool_get_name(zpl.front()) << ","; msg << vdev.GUID() << ") "; msg << "queued"; Log(LOG_INFO); syslog(priority, "%s", msg.str().c_str()); return (true); } return (false); } //- ZfsEvent Protected Methods ------------------------------------------------- ZfsEvent::ZfsEvent(Event::Type type, NVPairMap &nvpairs, const string &eventString) : DevdCtl::ZfsEvent(type, nvpairs, eventString) { } ZfsEvent::ZfsEvent(const ZfsEvent &src) : DevdCtl::ZfsEvent(src) { } /* * Sometimes the kernel won't detach a spare when it is no longer needed. This * can happen for example if a drive is removed, then either the pool is * exported or the machine is powered off, then the drive is reinserted, then * the machine is powered on or the pool is imported. ZFSD must detach these * spares itself. */ void ZfsEvent::CleanupSpares() const { Guid poolGUID(PoolGUID()); ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); if (!zpl.empty()) { zpool_handle_t* hdl; hdl = zpl.front(); VdevIterator(hdl).Each(TryDetach, (void*)hdl); } } void ZfsEvent::ProcessPoolEvent() const { bool degradedDevice(false); /* The pool is destroyed. Discard any open cases */ if (Value("type") == "misc.fs.zfs.pool_destroy") { Log(LOG_INFO); CaseFile::ReEvaluateByGuid(PoolGUID(), *this); return; } CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); if (caseFile != NULL) { if (caseFile->VdevState() != VDEV_STATE_UNKNOWN && caseFile->VdevState() < VDEV_STATE_HEALTHY) degradedDevice = true; Log(LOG_INFO); caseFile->ReEvaluate(*this); } else if (Value("type") == "misc.fs.zfs.resilver_finish") { /* * It's possible to get a resilver_finish event with no * corresponding casefile. For example, if a damaged pool were * exported, repaired, then reimported. */ Log(LOG_INFO); CleanupSpares(); } if (Value("type") == "misc.fs.zfs.vdev_remove" && degradedDevice == false) { /* See if any other cases can make use of this device. */ Log(LOG_INFO); ZfsDaemon::RequestSystemRescan(); } } bool ZfsEvent::TryDetach(Vdev &vdev, void *cbArg) { /* * Outline: * if this device is a spare, and its parent includes one healthy, * non-spare child, then detach this device. */ zpool_handle_t *hdl(static_cast(cbArg)); if (vdev.IsSpare()) { std::list siblings; std::list::iterator siblings_it; boolean_t cleanup = B_FALSE; Vdev parent = vdev.Parent(); siblings = parent.Children(); /* Determine whether the parent should be cleaned up */ for (siblings_it = siblings.begin(); siblings_it != siblings.end(); siblings_it++) { Vdev sibling = *siblings_it; if (!sibling.IsSpare() && sibling.State() == VDEV_STATE_HEALTHY) { cleanup = B_TRUE; break; } } if (cleanup) { syslog(LOG_INFO, "Detaching spare vdev %s from pool %s", vdev.Path().c_str(), zpool_get_name(hdl)); zpool_vdev_detach(hdl, vdev.Path().c_str()); } } /* Always return false, because there may be other spares to detach */ return (false); } Index: stable/11/cddl/usr.sbin/zfsd/zfsd_event.h =================================================================== --- stable/11/cddl/usr.sbin/zfsd/zfsd_event.h (revision 330732) +++ stable/11/cddl/usr.sbin/zfsd/zfsd_event.h (revision 330733) @@ -1,168 +1,144 @@ /*- * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) * * $FreeBSD$ */ /** * \file dev_ctl_event.h * * \brief Class hierarchy used to express events received via * the devdctl API. * * Header requirements: * #include * #include * #include * * #include * #include */ #ifndef _ZFSD_EVENT_H_ #define _ZFSD_EVENT_H_ /*============================ Namespace Control =============================*/ using std::string; /*=========================== Forward Declarations ===========================*/ struct zpool_handle; typedef struct zpool_handle zpool_handle_t; struct nvlist; typedef struct nvlist nvlist_t; -/*============================= Class Definitions ============================*/ -/*-------------------------------- DevfsEvent --------------------------------*/ -class DevfsEvent : public DevdCtl::DevfsEvent -{ -public: - /** Specialized DevdCtlEvent object factory for Devfs events. */ - static BuildMethod Builder; - - virtual DevdCtl::Event *DeepCopy() const; - - /** - * Interpret and perform any actions necessary to - * consume the event. - * \return True if this event should be queued for later reevaluation - */ - virtual bool Process() const; - -protected: - /** - * \brief Read and return label information for a device. - * - * \param devFd The device from which to read ZFS label information. - * \param inUse The device is part of an active or potentially - * active configuration. - * \param degraded The device label indicates the vdev is not healthy. - * - * \return If label information is available, an nvlist describing - * the vdev configuraiton found on the device specified by - * devFd. Otherwise NULL. - */ - static nvlist_t *ReadLabel(int devFd, bool &inUse, bool °raded); - - /** - * Attempt to match the ZFS labeled device at devPath with an active - * CaseFile for a missing vdev. If a CaseFile is found, attempt - * to re-integrate the device with its pool. - * - * \param devPath The devfs path to the potential leaf vdev. - * \param physPath The physical path string reported by the device - * at devPath. - * \param devConfig The ZFS label information found on the device - * at devPath. - * - * \return true if the event that caused the online action can - * be considered consumed. - */ - static bool OnlineByLabel(const string &devPath, - const string& physPath, - nvlist_t *devConfig); - - /** DeepCopy Constructor. */ - DevfsEvent(const DevfsEvent &src); - - /** Constructor */ - DevfsEvent(Type, DevdCtl::NVPairMap &, const string &); -}; - /*--------------------------------- ZfsEvent ---------------------------------*/ class ZfsEvent : public DevdCtl::ZfsEvent { public: /** Specialized DevdCtlEvent object factory for ZFS events. */ static BuildMethod Builder; virtual DevdCtl::Event *DeepCopy() const; /** * Interpret and perform any actions necessary to * consume the event. * \return True if this event should be queued for later reevaluation */ virtual bool Process() const; protected: /** DeepCopy Constructor. */ ZfsEvent(const ZfsEvent &src); /** Constructor */ ZfsEvent(Type, DevdCtl::NVPairMap &, const string &); /** * Detach any spares that are no longer needed, but were not * automatically detached by the kernel */ virtual void CleanupSpares() const; virtual void ProcessPoolEvent() const; static VdevCallback_t TryDetach; }; class GeomEvent : public DevdCtl::GeomEvent { public: static BuildMethod Builder; virtual DevdCtl::Event *DeepCopy() const; virtual bool Process() const; protected: /** DeepCopy Constructor. */ GeomEvent(const GeomEvent &src); /** Constructor */ GeomEvent(Type, DevdCtl::NVPairMap &, const string &); + + /** + * Attempt to match the ZFS labeled device at devPath with an active + * CaseFile for a missing vdev. If a CaseFile is found, attempt + * to re-integrate the device with its pool. + * + * \param devPath The devfs path to the potential leaf vdev. + * \param physPath The physical path string reported by the device + * at devPath. + * \param devConfig The ZFS label information found on the device + * at devPath. + * + * \return true if the event that caused the online action can + * be considered consumed. + */ + static bool OnlineByLabel(const string &devPath, + const string& physPath, + nvlist_t *devConfig); + + /** + * \brief Read and return label information for a device. + * + * \param devFd The device from which to read ZFS label information. + * \param inUse The device is part of an active or potentially + * active configuration. + * \param degraded The device label indicates the vdev is not healthy. + * + * \return If label information is available, an nvlist describing + * the vdev configuraiton found on the device specified by + * devFd. Otherwise NULL. + */ + static nvlist_t *ReadLabel(int devFd, bool &inUse, bool °raded); + }; #endif /*_ZFSD_EVENT_H_ */ Index: stable/11/etc/mtree/BSD.tests.dist =================================================================== --- stable/11/etc/mtree/BSD.tests.dist (revision 330732) +++ stable/11/etc/mtree/BSD.tests.dist (revision 330733) @@ -1,746 +1,748 @@ # $FreeBSD$ # # Please see the file src/etc/mtree/README before making changes to this file. # /set type=dir uname=root gname=wheel mode=0755 . bin cat .. chflags .. chmod .. date .. dd .. echo .. expr .. ln .. ls .. mkdir .. mv .. pax .. pkill .. pwait .. rcp .. rmdir .. sh builtins .. errors .. execution .. expansion .. parameters .. parser .. set-e .. .. sleep .. test .. .. cddl lib .. sbin .. usr.bin ctfconvert .. .. usr.sbin dtrace common aggs .. arithmetic .. arrays .. assocs .. begin .. bitfields .. buffering .. builtinvar .. cg .. clauses .. cpc .. decls .. drops .. dtraceUtil .. end .. enum .. error .. exit .. fbtprovider .. funcs .. grammar .. include .. inline .. io .. ip .. java_api .. json .. lexer .. llquantize .. mdb .. mib .. misc .. multiaggs .. offsetof .. operators .. pid .. plockstat .. pointers .. pragma .. predicates .. preprocessor .. print .. printa .. printf .. privs .. probes .. proc .. profile-n .. providers .. raise .. rates .. safety .. scalars .. sched .. scripting .. sdt .. sizeof .. speculation .. stability .. stack .. stackdepth .. stop .. strlen .. strtoll .. struct .. syscall .. sysevent .. tick-n .. trace .. tracemem .. translators .. typedef .. types .. uctf .. union .. usdt .. ustack .. vars .. version .. .. .. zfsd .. .. .. etc rc.d .. .. games .. gnu lib .. usr.bin diff .. .. .. lib atf libatf-c detail .. .. libatf-c++ detail .. .. test-programs .. .. libarchive .. libc c063 .. db .. gen execve .. posix_spawn .. .. hash data .. .. iconv .. inet .. locale .. net getaddrinfo data .. .. .. nss .. regex data .. .. resolv .. rpc .. ssp .. setjmp .. stdio .. stdlib .. string .. sys .. time .. tls dso .. .. termios .. ttyio .. .. libcam .. libcasper services cap_dns .. cap_grp .. cap_pwd .. cap_sysctl .. .. .. libcrypt .. libdevdctl .. libkvm .. libmp .. libnv .. libpam .. libproc .. librt .. libsbuf .. libthr dlopen .. .. libutil .. libxo .. msun .. .. libexec atf atf-check .. atf-sh .. .. rtld-elf .. .. sbin dhclient .. devd .. growfs .. ifconfig .. mdconfig .. .. secure lib .. libexec .. usr.bin .. usr.sbin .. .. share examples tests atf .. plain .. tap .. .. .. zoneinfo .. .. sys acl .. aio .. fifo .. file .. fs tmpfs .. .. geom class concat .. eli .. gate .. gpt .. mirror .. nop .. + part + .. raid3 .. shsec .. stripe .. uzip etalon .. .. .. .. kern acct .. execve .. pipe .. .. kqueue libkqueue .. .. mac bsdextended .. portacl .. .. mqueue .. netinet .. opencrypto .. pjdfstest chflags .. chmod .. chown .. ftruncate .. granular .. link .. mkdir .. mkfifo .. mknod .. open .. rename .. rmdir .. symlink .. truncate .. unlink .. .. posixshm .. sys .. vfs .. vm .. .. usr.bin apply .. basename .. bmake archives fmt_44bsd .. fmt_44bsd_mod .. fmt_oldbsd .. .. basic t0 .. t1 .. t2 .. t3 .. .. execution ellipsis .. empty .. joberr .. plus .. .. shell builtin .. meta .. path .. path_select .. replace .. select .. .. suffixes basic .. src_wild1 .. src_wild2 .. .. syntax directive-t0 .. enl .. funny-targets .. semi .. .. sysmk t0 2 1 .. .. mk .. .. t1 2 1 .. .. mk .. .. t2 2 1 .. .. mk .. .. .. variables modifier_M .. modifier_t .. opt_V .. t0 .. .. .. bsdcat .. calendar .. cmp .. compress .. cpio .. col .. comm .. cut .. dirname .. du .. file2c .. fold .. getconf .. grep .. gzip .. head .. hexdump .. ident .. join .. jot .. lastcomm .. limits .. m4 .. mkimg .. ncal .. opensm .. pr .. printf .. procstat .. rs .. sdiff .. sed regress.multitest.out .. .. soelim .. stat .. tail .. tar .. timeout .. tr .. truncate .. units .. uudecode .. uuencode .. uniq .. xargs .. xinstall .. xo .. yacc yacc .. .. .. usr.sbin chown .. etcupdate .. extattr .. fstyp .. makefs .. newsyslog .. nmtree .. pw .. rpcbind .. sa .. .. .. # vim: set expandtab ts=4 sw=4: Index: stable/11/lib/libdevdctl/event.cc =================================================================== --- stable/11/lib/libdevdctl/event.cc (revision 330732) +++ stable/11/lib/libdevdctl/event.cc (revision 330733) @@ -1,602 +1,605 @@ /*- * Copyright (c) 2011, 2012, 2013, 2016 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) */ /** * \file event.cc * * Implementation of the class hierarchy used to express events * received via the devdctl API. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "guid.h" #include "event.h" #include "event_factory.h" #include "exception.h" __FBSDID("$FreeBSD$"); /*================================== Macros ==================================*/ #define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x)) /*============================ Namespace Control =============================*/ using std::cout; using std::endl; using std::string; using std::stringstream; namespace DevdCtl { /*=========================== Class Implementations ==========================*/ /*----------------------------------- Event ----------------------------------*/ //- Event Static Protected Data ------------------------------------------------ const string Event::s_theEmptyString; Event::EventTypeRecord Event::s_typeTable[] = { { Event::NOTIFY, "Notify" }, { Event::NOMATCH, "No Driver Match" }, { Event::ATTACH, "Attach" }, { Event::DETACH, "Detach" } }; //- Event Static Public Methods ------------------------------------------------ Event * Event::Builder(Event::Type type, NVPairMap &nvPairs, const string &eventString) { return (new Event(type, nvPairs, eventString)); } Event * Event::CreateEvent(const EventFactory &factory, const string &eventString) { NVPairMap &nvpairs(*new NVPairMap); Type type(static_cast(eventString[0])); try { ParseEventString(type, eventString, nvpairs); } catch (const ParseException &exp) { if (exp.GetType() == ParseException::INVALID_FORMAT) exp.Log(); return (NULL); } /* * Allow entries in our table for events with no system specified. * These entries should specify the string "none". */ NVPairMap::iterator system_item(nvpairs.find("system")); if (system_item == nvpairs.end()) nvpairs["system"] = "none"; return (factory.Build(type, nvpairs, eventString)); } bool Event::DevName(std::string &name) const { return (false); } /* TODO: simplify this function with C++-11 methods */ bool Event::IsDiskDev() const { const int numDrivers = 2; static const char *diskDevNames[numDrivers] = { "da", "ada" }; const char **dName; string devName; if (! DevName(devName)) return false; size_t find_start = devName.rfind('/'); if (find_start == string::npos) { find_start = 0; } else { /* Just after the last '/'. */ find_start++; } for (dName = &diskDevNames[0]; dName <= &diskDevNames[numDrivers - 1]; dName++) { size_t loc(devName.find(*dName, find_start)); if (loc == find_start) { size_t prefixLen(strlen(*dName)); if (devName.length() - find_start >= prefixLen && isdigit(devName[find_start + prefixLen])) return (true); } } return (false); } const char * Event::TypeToString(Event::Type type) { EventTypeRecord *rec(s_typeTable); EventTypeRecord *lastRec(s_typeTable + NUM_ELEMENTS(s_typeTable) - 1); for (; rec <= lastRec; rec++) { if (rec->m_type == type) return (rec->m_typeName); } return ("Unknown"); } //- Event Public Methods ------------------------------------------------------- const string & Event::Value(const string &varName) const { NVPairMap::const_iterator item(m_nvPairs.find(varName)); if (item == m_nvPairs.end()) return (s_theEmptyString); return (item->second); } bool Event::Contains(const string &varName) const { return (m_nvPairs.find(varName) != m_nvPairs.end()); } string Event::ToString() const { stringstream result; NVPairMap::const_iterator devName(m_nvPairs.find("device-name")); if (devName != m_nvPairs.end()) result << devName->second << ": "; NVPairMap::const_iterator systemName(m_nvPairs.find("system")); if (systemName != m_nvPairs.end() && systemName->second != "none") result << systemName->second << ": "; result << TypeToString(GetType()) << ' '; for (NVPairMap::const_iterator curVar = m_nvPairs.begin(); curVar != m_nvPairs.end(); curVar++) { if (curVar == devName || curVar == systemName) continue; result << ' ' << curVar->first << "=" << curVar->second; } result << endl; return (result.str()); } void Event::Print() const { cout << ToString() << std::flush; } void Event::Log(int priority) const { syslog(priority, "%s", ToString().c_str()); } //- Event Virtual Public Methods ----------------------------------------------- Event::~Event() { delete &m_nvPairs; } Event * Event::DeepCopy() const { return (new Event(*this)); } bool Event::Process() const { return (false); } timeval Event::GetTimestamp() const { timeval tv_timestamp; struct tm tm_timestamp; if (!Contains("timestamp")) { throw Exception("Event contains no timestamp: %s", m_eventString.c_str()); } strptime(Value(string("timestamp")).c_str(), "%s", &tm_timestamp); tv_timestamp.tv_sec = mktime(&tm_timestamp); tv_timestamp.tv_usec = 0; return (tv_timestamp); } bool Event::DevPath(std::string &path) const { string devName; if (!DevName(devName)) return (false); string devPath(_PATH_DEV + devName); int devFd(open(devPath.c_str(), O_RDONLY)); if (devFd == -1) return (false); /* Normalize the device name in case the DEVFS event is for a link. */ devName = fdevname(devFd); path = _PATH_DEV + devName; close(devFd); return (true); } bool Event::PhysicalPath(std::string &path) const { string devPath; if (!DevPath(devPath)) return (false); int devFd(open(devPath.c_str(), O_RDONLY)); if (devFd == -1) return (false); char physPath[MAXPATHLEN]; physPath[0] = '\0'; bool result(ioctl(devFd, DIOCGPHYSPATH, physPath) == 0); close(devFd); if (result) path = physPath; return (result); } //- Event Protected Methods ---------------------------------------------------- Event::Event(Type type, NVPairMap &map, const string &eventString) : m_type(type), m_nvPairs(map), m_eventString(eventString) { } Event::Event(const Event &src) : m_type(src.m_type), m_nvPairs(*new NVPairMap(src.m_nvPairs)), m_eventString(src.m_eventString) { } void Event::ParseEventString(Event::Type type, const string &eventString, NVPairMap& nvpairs) { size_t start; size_t end; switch (type) { case ATTACH: case DETACH: /* * \ * at \ * on * * Handle all data that doesn't conform to the * "name=value" format, and let the generic parser * below handle the rest. * * Type is a single char. Skip it. */ start = 1; end = eventString.find_first_of(" \t\n", start); if (end == string::npos) throw ParseException(ParseException::INVALID_FORMAT, eventString, start); nvpairs["device-name"] = eventString.substr(start, end - start); start = eventString.find(" on ", end); if (end == string::npos) throw ParseException(ParseException::INVALID_FORMAT, eventString, start); start += 4; end = eventString.find_first_of(" \t\n", start); nvpairs["parent"] = eventString.substr(start, end); break; case NOTIFY: break; case NOMATCH: throw ParseException(ParseException::DISCARDED_EVENT_TYPE, eventString); default: throw ParseException(ParseException::UNKNOWN_EVENT_TYPE, eventString); } /* Process common "key=value" format. */ for (start = 1; start < eventString.length(); start = end + 1) { /* Find the '=' in the middle of the key/value pair. */ end = eventString.find('=', start); if (end == string::npos) break; /* * Find the start of the key by backing up until * we hit whitespace or '!' (event type "notice"). * Due to the devdctl format, all key/value pair must * start with one of these two characters. */ start = eventString.find_last_of("! \t\n", end); if (start == string::npos) throw ParseException(ParseException::INVALID_FORMAT, eventString, end); start++; string key(eventString.substr(start, end - start)); /* * Walk forward from the '=' until either we exhaust * the buffer or we hit whitespace. */ start = end + 1; if (start >= eventString.length()) throw ParseException(ParseException::INVALID_FORMAT, eventString, end); end = eventString.find_first_of(" \t\n", start); if (end == string::npos) end = eventString.length() - 1; string value(eventString.substr(start, end - start)); nvpairs[key] = value; } } void Event::TimestampEventString(std::string &eventString) { if (eventString.size() > 0) { /* * Add a timestamp as the final field of the event if it is * not already present. */ if (eventString.find("timestamp=") == string::npos) { const size_t bufsize = 32; // Long enough for a 64-bit int timeval now; char timebuf[bufsize]; size_t eventEnd(eventString.find_last_not_of('\n') + 1); if (gettimeofday(&now, NULL) != 0) err(1, "gettimeofday"); snprintf(timebuf, bufsize, " timestamp=%" PRId64, (int64_t) now.tv_sec); eventString.insert(eventEnd, timebuf); } } } /*-------------------------------- DevfsEvent --------------------------------*/ //- DevfsEvent Static Public Methods ------------------------------------------- Event * DevfsEvent::Builder(Event::Type type, NVPairMap &nvPairs, const string &eventString) { return (new DevfsEvent(type, nvPairs, eventString)); } //- DevfsEvent Static Protected Methods ---------------------------------------- bool DevfsEvent::IsWholeDev(const string &devName) { string::const_iterator i(devName.begin()); size_t start = devName.rfind('/'); if (start == string::npos) { start = 0; } else { /* Just after the last '/'. */ start++; } i += start; /* alpha prefix followed only by digits. */ for (; i < devName.end() && !isdigit(*i); i++) ; if (i == devName.end()) return (false); for (; i < devName.end() && isdigit(*i); i++) ; return (i == devName.end()); } //- DevfsEvent Virtual Public Methods ------------------------------------------ Event * DevfsEvent::DeepCopy() const { return (new DevfsEvent(*this)); } bool DevfsEvent::Process() const { return (true); } //- DevfsEvent Public Methods -------------------------------------------------- bool DevfsEvent::IsWholeDev() const { string devName; return (DevName(devName) && IsDiskDev() && IsWholeDev(devName)); } bool DevfsEvent::DevName(std::string &name) const { if (Value("subsystem") != "CDEV") return (false); name = Value("cdev"); return (!name.empty()); } //- DevfsEvent Protected Methods ----------------------------------------------- DevfsEvent::DevfsEvent(Event::Type type, NVPairMap &nvpairs, const string &eventString) : Event(type, nvpairs, eventString) { } DevfsEvent::DevfsEvent(const DevfsEvent &src) : Event(src) { } /*--------------------------------- GeomEvent --------------------------------*/ //- GeomEvent Static Public Methods -------------------------------------------- Event * GeomEvent::Builder(Event::Type type, NVPairMap &nvpairs, const string &eventString) { return (new GeomEvent(type, nvpairs, eventString)); } //- GeomEvent Virtual Public Methods ------------------------------------------- Event * GeomEvent::DeepCopy() const { return (new GeomEvent(*this)); } bool GeomEvent::DevName(std::string &name) const { - name = Value("devname"); + if (Value("subsystem") == "disk") + name = Value("devname"); + else + name = Value("cdev"); return (!name.empty()); } //- GeomEvent Protected Methods ------------------------------------------------ GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs, const string &eventString) : Event(type, nvpairs, eventString), m_devname(Value("devname")) { } GeomEvent::GeomEvent(const GeomEvent &src) : Event(src), m_devname(src.m_devname) { } /*--------------------------------- ZfsEvent ---------------------------------*/ //- ZfsEvent Static Public Methods --------------------------------------------- Event * ZfsEvent::Builder(Event::Type type, NVPairMap &nvpairs, const string &eventString) { return (new ZfsEvent(type, nvpairs, eventString)); } //- ZfsEvent Virtual Public Methods -------------------------------------------- Event * ZfsEvent::DeepCopy() const { return (new ZfsEvent(*this)); } bool ZfsEvent::DevName(std::string &name) const { return (false); } //- ZfsEvent Protected Methods ------------------------------------------------- ZfsEvent::ZfsEvent(Event::Type type, NVPairMap &nvpairs, const string &eventString) : Event(type, nvpairs, eventString), m_poolGUID(Guid(Value("pool_guid"))), m_vdevGUID(Guid(Value("vdev_guid"))) { } ZfsEvent::ZfsEvent(const ZfsEvent &src) : Event(src), m_poolGUID(src.m_poolGUID), m_vdevGUID(src.m_vdevGUID) { } } // namespace DevdCtl Index: stable/11/sys/geom/eli/g_eli.c =================================================================== --- stable/11/sys/geom/eli/g_eli.c (revision 330732) +++ stable/11/sys/geom/eli/g_eli.c (revision 330733) @@ -1,1320 +1,1333 @@ /*- * Copyright (c) 2005-2011 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(geom_eli, "GEOM crypto module"); MALLOC_DEFINE(M_ELI, "eli data", "GEOM_ELI Data"); SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, eli, CTLFLAG_RW, 0, "GEOM_ELI stuff"); static int g_eli_version = G_ELI_VERSION; SYSCTL_INT(_kern_geom_eli, OID_AUTO, version, CTLFLAG_RD, &g_eli_version, 0, "GELI version"); int g_eli_debug = 0; SYSCTL_INT(_kern_geom_eli, OID_AUTO, debug, CTLFLAG_RWTUN, &g_eli_debug, 0, "Debug level"); static u_int g_eli_tries = 3; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, tries, CTLFLAG_RWTUN, &g_eli_tries, 0, "Number of tries for entering the passphrase"); static u_int g_eli_visible_passphrase = GETS_NOECHO; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, visible_passphrase, CTLFLAG_RWTUN, &g_eli_visible_passphrase, 0, "Visibility of passphrase prompt (0 = invisible, 1 = visible, 2 = asterisk)"); u_int g_eli_overwrites = G_ELI_OVERWRITES; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, overwrites, CTLFLAG_RWTUN, &g_eli_overwrites, 0, "Number of times on-disk keys should be overwritten when destroying them"); static u_int g_eli_threads = 0; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, threads, CTLFLAG_RWTUN, &g_eli_threads, 0, "Number of threads doing crypto work"); u_int g_eli_batch = 0; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, batch, CTLFLAG_RWTUN, &g_eli_batch, 0, "Use crypto operations batching"); /* * Passphrase cached during boot, in order to be more user-friendly if * there are multiple providers using the same passphrase. */ static char cached_passphrase[256]; static u_int g_eli_boot_passcache = 1; TUNABLE_INT("kern.geom.eli.boot_passcache", &g_eli_boot_passcache); SYSCTL_UINT(_kern_geom_eli, OID_AUTO, boot_passcache, CTLFLAG_RD, &g_eli_boot_passcache, 0, "Passphrases are cached during boot process for possible reuse"); static void fetch_loader_passphrase(void * dummy) { char * env_passphrase; KASSERT(dynamic_kenv, ("need dynamic kenv")); if ((env_passphrase = kern_getenv("kern.geom.eli.passphrase")) != NULL) { /* Extract passphrase from the environment. */ strlcpy(cached_passphrase, env_passphrase, sizeof(cached_passphrase)); freeenv(env_passphrase); /* Wipe the passphrase from the environment. */ kern_unsetenv("kern.geom.eli.passphrase"); } } SYSINIT(geli_fetch_loader_passphrase, SI_SUB_KMEM + 1, SI_ORDER_ANY, fetch_loader_passphrase, NULL); static void zero_boot_passcache(void) { explicit_bzero(cached_passphrase, sizeof(cached_passphrase)); } static void zero_geli_intake_keys(void) { struct keybuf *keybuf; int i; if ((keybuf = get_keybuf()) != NULL) { /* Scan the key buffer, clear all GELI keys. */ for (i = 0; i < keybuf->kb_nents; i++) { if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) { explicit_bzero(keybuf->kb_ents[i].ke_data, sizeof(keybuf->kb_ents[i].ke_data)); keybuf->kb_ents[i].ke_type = KEYBUF_TYPE_NONE; } } } } static void zero_intake_passcache(void *dummy) { zero_boot_passcache(); zero_geli_intake_keys(); } EVENTHANDLER_DEFINE(mountroot, zero_intake_passcache, NULL, 0); static eventhandler_tag g_eli_pre_sync = NULL; static int g_eli_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp); static void g_eli_init(struct g_class *mp); static void g_eli_fini(struct g_class *mp); static g_taste_t g_eli_taste; static g_dumpconf_t g_eli_dumpconf; struct g_class g_eli_class = { .name = G_ELI_CLASS_NAME, .version = G_VERSION, .ctlreq = g_eli_config, .taste = g_eli_taste, .destroy_geom = g_eli_destroy_geom, .init = g_eli_init, .fini = g_eli_fini }; /* * Code paths: * BIO_READ: * g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver * BIO_WRITE: * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver */ /* * EAGAIN from crypto(9) means, that we were probably balanced to another crypto * accelerator or something like this. * The function updates the SID and rerun the operation. */ int g_eli_crypto_rerun(struct cryptop *crp) { struct g_eli_softc *sc; struct g_eli_worker *wr; struct bio *bp; int error; bp = (struct bio *)crp->crp_opaque; sc = bp->bio_to->geom->softc; LIST_FOREACH(wr, &sc->sc_workers, w_next) { if (wr->w_number == bp->bio_pflags) break; } KASSERT(wr != NULL, ("Invalid worker (%u).", bp->bio_pflags)); G_ELI_DEBUG(1, "Rerunning crypto %s request (sid: %ju -> %ju).", bp->bio_cmd == BIO_READ ? "READ" : "WRITE", (uintmax_t)wr->w_sid, (uintmax_t)crp->crp_sid); wr->w_sid = crp->crp_sid; crp->crp_etype = 0; error = crypto_dispatch(crp); if (error == 0) return (0); G_ELI_DEBUG(1, "%s: crypto_dispatch() returned %d.", __func__, error); crp->crp_etype = error; return (error); } +static void +g_eli_getattr_done(struct bio *bp) +{ + if (bp->bio_error == 0 && + !strcmp(bp->bio_attribute, "GEOM::physpath")) { + strlcat(bp->bio_data, "/eli", bp->bio_length); + } + g_std_done(bp); +} + /* * The function is called afer reading encrypted data from the provider. * * g_eli_start -> g_eli_crypto_read -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver */ void g_eli_read_done(struct bio *bp) { struct g_eli_softc *sc; struct bio *pbp; G_ELI_LOGREQ(2, bp, "Request done."); pbp = bp->bio_parent; if (pbp->bio_error == 0 && bp->bio_error != 0) pbp->bio_error = bp->bio_error; g_destroy_bio(bp); /* * Do we have all sectors already? */ pbp->bio_inbed++; if (pbp->bio_inbed < pbp->bio_children) return; sc = pbp->bio_to->geom->softc; if (pbp->bio_error != 0) { G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__, pbp->bio_error); pbp->bio_completed = 0; if (pbp->bio_driver2 != NULL) { free(pbp->bio_driver2, M_ELI); pbp->bio_driver2 = NULL; } g_io_deliver(pbp, pbp->bio_error); atomic_subtract_int(&sc->sc_inflight, 1); return; } mtx_lock(&sc->sc_queue_mtx); bioq_insert_tail(&sc->sc_queue, pbp); mtx_unlock(&sc->sc_queue_mtx); wakeup(sc); } /* * The function is called after we encrypt and write data. * * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> G_ELI_WRITE_DONE -> g_io_deliver */ void g_eli_write_done(struct bio *bp) { struct g_eli_softc *sc; struct bio *pbp; G_ELI_LOGREQ(2, bp, "Request done."); pbp = bp->bio_parent; if (pbp->bio_error == 0 && bp->bio_error != 0) pbp->bio_error = bp->bio_error; g_destroy_bio(bp); /* * Do we have all sectors already? */ pbp->bio_inbed++; if (pbp->bio_inbed < pbp->bio_children) return; free(pbp->bio_driver2, M_ELI); pbp->bio_driver2 = NULL; if (pbp->bio_error != 0) { G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__, pbp->bio_error); pbp->bio_completed = 0; } else pbp->bio_completed = pbp->bio_length; /* * Write is finished, send it up. */ sc = pbp->bio_to->geom->softc; g_io_deliver(pbp, pbp->bio_error); atomic_subtract_int(&sc->sc_inflight, 1); } /* * This function should never be called, but GEOM made as it set ->orphan() * method for every geom. */ static void g_eli_orphan_spoil_assert(struct g_consumer *cp) { panic("Function %s() called for %s.", __func__, cp->geom->name); } static void g_eli_orphan(struct g_consumer *cp) { struct g_eli_softc *sc; g_topology_assert(); sc = cp->geom->softc; if (sc == NULL) return; g_eli_destroy(sc, TRUE); } /* * BIO_READ: * G_ELI_START -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver * BIO_WRITE: * G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver */ static void g_eli_start(struct bio *bp) { struct g_eli_softc *sc; struct g_consumer *cp; struct bio *cbp; sc = bp->bio_to->geom->softc; KASSERT(sc != NULL, ("Provider's error should be set (error=%d)(device=%s).", bp->bio_to->error, bp->bio_to->name)); G_ELI_LOGREQ(2, bp, "Request received."); switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_GETATTR: case BIO_FLUSH: case BIO_ZONE: break; case BIO_DELETE: /* * If the user hasn't set the NODELETE flag, we just pass * it down the stack and let the layers beneath us do (or * not) whatever they do with it. If they have, we * reject it. A possible extension would be an * additional flag to take it as a hint to shred the data * with [multiple?] overwrites. */ if (!(sc->sc_flags & G_ELI_FLAG_NODELETE)) break; default: g_io_deliver(bp, EOPNOTSUPP); return; } cbp = g_clone_bio(bp); if (cbp == NULL) { g_io_deliver(bp, ENOMEM); return; } bp->bio_driver1 = cbp; bp->bio_pflags = G_ELI_NEW_BIO; switch (bp->bio_cmd) { case BIO_READ: if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) { g_eli_crypto_read(sc, bp, 0); break; } /* FALLTHROUGH */ case BIO_WRITE: mtx_lock(&sc->sc_queue_mtx); bioq_insert_tail(&sc->sc_queue, bp); mtx_unlock(&sc->sc_queue_mtx); wakeup(sc); break; case BIO_GETATTR: case BIO_FLUSH: case BIO_DELETE: case BIO_ZONE: - cbp->bio_done = g_std_done; + if (bp->bio_cmd == BIO_GETATTR) + cbp->bio_done = g_eli_getattr_done; + else + cbp->bio_done = g_std_done; cp = LIST_FIRST(&sc->sc_geom->consumer); cbp->bio_to = cp->provider; G_ELI_LOGREQ(2, cbp, "Sending request."); g_io_request(cbp, cp); break; } } static int g_eli_newsession(struct g_eli_worker *wr) { struct g_eli_softc *sc; struct cryptoini crie, cria; int error; sc = wr->w_softc; bzero(&crie, sizeof(crie)); crie.cri_alg = sc->sc_ealgo; crie.cri_klen = sc->sc_ekeylen; if (sc->sc_ealgo == CRYPTO_AES_XTS) crie.cri_klen <<= 1; if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) { crie.cri_key = g_eli_key_hold(sc, 0, LIST_FIRST(&sc->sc_geom->consumer)->provider->sectorsize); } else { crie.cri_key = sc->sc_ekey; } if (sc->sc_flags & G_ELI_FLAG_AUTH) { bzero(&cria, sizeof(cria)); cria.cri_alg = sc->sc_aalgo; cria.cri_klen = sc->sc_akeylen; cria.cri_key = sc->sc_akey; crie.cri_next = &cria; } switch (sc->sc_crypto) { case G_ELI_CRYPTO_SW: error = crypto_newsession(&wr->w_sid, &crie, CRYPTOCAP_F_SOFTWARE); break; case G_ELI_CRYPTO_HW: error = crypto_newsession(&wr->w_sid, &crie, CRYPTOCAP_F_HARDWARE); break; case G_ELI_CRYPTO_UNKNOWN: error = crypto_newsession(&wr->w_sid, &crie, CRYPTOCAP_F_HARDWARE); if (error == 0) { mtx_lock(&sc->sc_queue_mtx); if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN) sc->sc_crypto = G_ELI_CRYPTO_HW; mtx_unlock(&sc->sc_queue_mtx); } else { error = crypto_newsession(&wr->w_sid, &crie, CRYPTOCAP_F_SOFTWARE); mtx_lock(&sc->sc_queue_mtx); if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN) sc->sc_crypto = G_ELI_CRYPTO_SW; mtx_unlock(&sc->sc_queue_mtx); } break; default: panic("%s: invalid condition", __func__); } if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) g_eli_key_drop(sc, crie.cri_key); return (error); } static void g_eli_freesession(struct g_eli_worker *wr) { crypto_freesession(wr->w_sid); } static void g_eli_cancel(struct g_eli_softc *sc) { struct bio *bp; mtx_assert(&sc->sc_queue_mtx, MA_OWNED); while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) { KASSERT(bp->bio_pflags == G_ELI_NEW_BIO, ("Not new bio when canceling (bp=%p).", bp)); g_io_deliver(bp, ENXIO); } } static struct bio * g_eli_takefirst(struct g_eli_softc *sc) { struct bio *bp; mtx_assert(&sc->sc_queue_mtx, MA_OWNED); if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND)) return (bioq_takefirst(&sc->sc_queue)); /* * Device suspended, so we skip new I/O requests. */ TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { if (bp->bio_pflags != G_ELI_NEW_BIO) break; } if (bp != NULL) bioq_remove(&sc->sc_queue, bp); return (bp); } /* * This is the main function for kernel worker thread when we don't have * hardware acceleration and we have to do cryptography in software. * Dedicated thread is needed, so we don't slow down g_up/g_down GEOM * threads with crypto work. */ static void g_eli_worker(void *arg) { struct g_eli_softc *sc; struct g_eli_worker *wr; struct bio *bp; int error; wr = arg; sc = wr->w_softc; #ifdef EARLY_AP_STARTUP MPASS(!sc->sc_cpubind || smp_started); #elif defined(SMP) /* Before sched_bind() to a CPU, wait for all CPUs to go on-line. */ if (sc->sc_cpubind) { while (!smp_started) tsleep(wr, 0, "geli:smp", hz / 4); } #endif thread_lock(curthread); sched_prio(curthread, PUSER); if (sc->sc_cpubind) sched_bind(curthread, wr->w_number % mp_ncpus); thread_unlock(curthread); G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm); for (;;) { mtx_lock(&sc->sc_queue_mtx); again: bp = g_eli_takefirst(sc); if (bp == NULL) { if (sc->sc_flags & G_ELI_FLAG_DESTROY) { g_eli_cancel(sc); LIST_REMOVE(wr, w_next); g_eli_freesession(wr); free(wr, M_ELI); G_ELI_DEBUG(1, "Thread %s exiting.", curthread->td_proc->p_comm); wakeup(&sc->sc_workers); mtx_unlock(&sc->sc_queue_mtx); kproc_exit(0); } while (sc->sc_flags & G_ELI_FLAG_SUSPEND) { if (sc->sc_inflight > 0) { G_ELI_DEBUG(0, "inflight=%d", sc->sc_inflight); /* * We still have inflight BIOs, so * sleep and retry. */ msleep(sc, &sc->sc_queue_mtx, PRIBIO, "geli:inf", hz / 5); goto again; } /* * Suspend requested, mark the worker as * suspended and go to sleep. */ if (wr->w_active) { g_eli_freesession(wr); wr->w_active = FALSE; } wakeup(&sc->sc_workers); msleep(sc, &sc->sc_queue_mtx, PRIBIO, "geli:suspend", 0); if (!wr->w_active && !(sc->sc_flags & G_ELI_FLAG_SUSPEND)) { error = g_eli_newsession(wr); KASSERT(error == 0, ("g_eli_newsession() failed on resume (error=%d)", error)); wr->w_active = TRUE; } goto again; } msleep(sc, &sc->sc_queue_mtx, PDROP, "geli:w", 0); continue; } if (bp->bio_pflags == G_ELI_NEW_BIO) atomic_add_int(&sc->sc_inflight, 1); mtx_unlock(&sc->sc_queue_mtx); if (bp->bio_pflags == G_ELI_NEW_BIO) { bp->bio_pflags = 0; if (sc->sc_flags & G_ELI_FLAG_AUTH) { if (bp->bio_cmd == BIO_READ) g_eli_auth_read(sc, bp); else g_eli_auth_run(wr, bp); } else { if (bp->bio_cmd == BIO_READ) g_eli_crypto_read(sc, bp, 1); else g_eli_crypto_run(wr, bp); } } else { if (sc->sc_flags & G_ELI_FLAG_AUTH) g_eli_auth_run(wr, bp); else g_eli_crypto_run(wr, bp); } } } int g_eli_read_metadata(struct g_class *mp, struct g_provider *pp, struct g_eli_metadata *md) { struct g_geom *gp; struct g_consumer *cp; u_char *buf = NULL; int error; g_topology_assert(); gp = g_new_geomf(mp, "eli:taste"); gp->start = g_eli_start; gp->access = g_std_access; /* * g_eli_read_metadata() is always called from the event thread. * Our geom is created and destroyed in the same event, so there * could be no orphan nor spoil event in the meantime. */ gp->orphan = g_eli_orphan_spoil_assert; gp->spoiled = g_eli_orphan_spoil_assert; cp = g_new_consumer(gp); error = g_attach(cp, pp); if (error != 0) goto end; error = g_access(cp, 1, 0, 0); if (error != 0) goto end; g_topology_unlock(); buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, &error); g_topology_lock(); if (buf == NULL) goto end; error = eli_metadata_decode(buf, md); if (error != 0) goto end; /* Metadata was read and decoded successfully. */ end: if (buf != NULL) g_free(buf); if (cp->provider != NULL) { if (cp->acr == 1) g_access(cp, -1, 0, 0); g_detach(cp); } g_destroy_consumer(cp); g_destroy_geom(gp); return (error); } /* * The function is called when we had last close on provider and user requested * to close it when this situation occur. */ static void g_eli_last_close(void *arg, int flags __unused) { struct g_geom *gp; char gpname[64]; int error; g_topology_assert(); gp = arg; strlcpy(gpname, gp->name, sizeof(gpname)); error = g_eli_destroy(gp->softc, TRUE); KASSERT(error == 0, ("Cannot detach %s on last close (error=%d).", gpname, error)); G_ELI_DEBUG(0, "Detached %s on last close.", gpname); } int g_eli_access(struct g_provider *pp, int dr, int dw, int de) { struct g_eli_softc *sc; struct g_geom *gp; gp = pp->geom; sc = gp->softc; if (dw > 0) { if (sc->sc_flags & G_ELI_FLAG_RO) { /* Deny write attempts. */ return (EROFS); } /* Someone is opening us for write, we need to remember that. */ sc->sc_flags |= G_ELI_FLAG_WOPEN; return (0); } /* Is this the last close? */ if (pp->acr + dr > 0 || pp->acw + dw > 0 || pp->ace + de > 0) return (0); /* * Automatically detach on last close if requested. */ if ((sc->sc_flags & G_ELI_FLAG_RW_DETACH) || (sc->sc_flags & G_ELI_FLAG_WOPEN)) { g_post_event(g_eli_last_close, gp, M_WAITOK, NULL); } return (0); } static int g_eli_cpu_is_disabled(int cpu) { #ifdef SMP return (CPU_ISSET(cpu, &hlt_cpus_mask)); #else return (0); #endif } struct g_geom * g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp, const struct g_eli_metadata *md, const u_char *mkey, int nkey) { struct g_eli_softc *sc; struct g_eli_worker *wr; struct g_geom *gp; struct g_provider *pp; struct g_consumer *cp; u_int i, threads; int error; G_ELI_DEBUG(1, "Creating device %s%s.", bpp->name, G_ELI_SUFFIX); gp = g_new_geomf(mp, "%s%s", bpp->name, G_ELI_SUFFIX); sc = malloc(sizeof(*sc), M_ELI, M_WAITOK | M_ZERO); gp->start = g_eli_start; /* * Spoiling can happen even though we have the provider open * exclusively, e.g. through media change events. */ gp->spoiled = g_eli_orphan; gp->orphan = g_eli_orphan; gp->dumpconf = g_eli_dumpconf; /* * If detach-on-last-close feature is not enabled and we don't operate * on read-only provider, we can simply use g_std_access(). */ if (md->md_flags & (G_ELI_FLAG_WO_DETACH | G_ELI_FLAG_RO)) gp->access = g_eli_access; else gp->access = g_std_access; eli_metadata_softc(sc, md, bpp->sectorsize, bpp->mediasize); sc->sc_nkey = nkey; gp->softc = sc; sc->sc_geom = gp; bioq_init(&sc->sc_queue); mtx_init(&sc->sc_queue_mtx, "geli:queue", NULL, MTX_DEF); mtx_init(&sc->sc_ekeys_lock, "geli:ekeys", NULL, MTX_DEF); pp = NULL; cp = g_new_consumer(gp); error = g_attach(cp, bpp); if (error != 0) { if (req != NULL) { gctl_error(req, "Cannot attach to %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot attach to %s (error=%d).", bpp->name, error); } goto failed; } /* * Keep provider open all the time, so we can run critical tasks, * like Master Keys deletion, without wondering if we can open * provider or not. * We don't open provider for writing only when user requested read-only * access. */ if (sc->sc_flags & G_ELI_FLAG_RO) error = g_access(cp, 1, 0, 1); else error = g_access(cp, 1, 1, 1); if (error != 0) { if (req != NULL) { gctl_error(req, "Cannot access %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot access %s (error=%d).", bpp->name, error); } goto failed; } /* * Remember the keys in our softc structure. */ g_eli_mkey_propagate(sc, mkey); LIST_INIT(&sc->sc_workers); threads = g_eli_threads; if (threads == 0) threads = mp_ncpus; sc->sc_cpubind = (mp_ncpus > 1 && threads == mp_ncpus); for (i = 0; i < threads; i++) { if (g_eli_cpu_is_disabled(i)) { G_ELI_DEBUG(1, "%s: CPU %u disabled, skipping.", bpp->name, i); continue; } wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO); wr->w_softc = sc; wr->w_number = i; wr->w_active = TRUE; error = g_eli_newsession(wr); if (error != 0) { free(wr, M_ELI); if (req != NULL) { gctl_error(req, "Cannot set up crypto session " "for %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot set up crypto session " "for %s (error=%d).", bpp->name, error); } goto failed; } error = kproc_create(g_eli_worker, wr, &wr->w_proc, 0, 0, "g_eli[%u] %s", i, bpp->name); if (error != 0) { g_eli_freesession(wr); free(wr, M_ELI); if (req != NULL) { gctl_error(req, "Cannot create kernel thread " "for %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot create kernel thread " "for %s (error=%d).", bpp->name, error); } goto failed; } LIST_INSERT_HEAD(&sc->sc_workers, wr, w_next); } /* * Create decrypted provider. */ pp = g_new_providerf(gp, "%s%s", bpp->name, G_ELI_SUFFIX); pp->mediasize = sc->sc_mediasize; pp->sectorsize = sc->sc_sectorsize; g_error_provider(pp, 0); G_ELI_DEBUG(0, "Device %s created.", pp->name); G_ELI_DEBUG(0, "Encryption: %s %u", g_eli_algo2str(sc->sc_ealgo), sc->sc_ekeylen); if (sc->sc_flags & G_ELI_FLAG_AUTH) G_ELI_DEBUG(0, " Integrity: %s", g_eli_algo2str(sc->sc_aalgo)); G_ELI_DEBUG(0, " Crypto: %s", sc->sc_crypto == G_ELI_CRYPTO_SW ? "software" : "hardware"); return (gp); failed: mtx_lock(&sc->sc_queue_mtx); sc->sc_flags |= G_ELI_FLAG_DESTROY; wakeup(sc); /* * Wait for kernel threads self destruction. */ while (!LIST_EMPTY(&sc->sc_workers)) { msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO, "geli:destroy", 0); } mtx_destroy(&sc->sc_queue_mtx); if (cp->provider != NULL) { if (cp->acr == 1) g_access(cp, -1, -1, -1); g_detach(cp); } g_destroy_consumer(cp); g_destroy_geom(gp); g_eli_key_destroy(sc); bzero(sc, sizeof(*sc)); free(sc, M_ELI); return (NULL); } int g_eli_destroy(struct g_eli_softc *sc, boolean_t force) { struct g_geom *gp; struct g_provider *pp; g_topology_assert(); if (sc == NULL) return (ENXIO); gp = sc->sc_geom; pp = LIST_FIRST(&gp->provider); if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { if (force) { G_ELI_DEBUG(1, "Device %s is still open, so it " "cannot be definitely removed.", pp->name); sc->sc_flags |= G_ELI_FLAG_RW_DETACH; gp->access = g_eli_access; g_wither_provider(pp, ENXIO); return (EBUSY); } else { G_ELI_DEBUG(1, "Device %s is still open (r%dw%de%d).", pp->name, pp->acr, pp->acw, pp->ace); return (EBUSY); } } mtx_lock(&sc->sc_queue_mtx); sc->sc_flags |= G_ELI_FLAG_DESTROY; wakeup(sc); while (!LIST_EMPTY(&sc->sc_workers)) { msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO, "geli:destroy", 0); } mtx_destroy(&sc->sc_queue_mtx); gp->softc = NULL; g_eli_key_destroy(sc); bzero(sc, sizeof(*sc)); free(sc, M_ELI); if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)) G_ELI_DEBUG(0, "Device %s destroyed.", gp->name); g_wither_geom_close(gp, ENXIO); return (0); } static int g_eli_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused, struct g_geom *gp) { struct g_eli_softc *sc; sc = gp->softc; return (g_eli_destroy(sc, FALSE)); } static int g_eli_keyfiles_load(struct hmac_ctx *ctx, const char *provider) { u_char *keyfile, *data; char *file, name[64]; size_t size; int i; for (i = 0; ; i++) { snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i); keyfile = preload_search_by_type(name); if (keyfile == NULL && i == 0) { /* * If there is only one keyfile, allow simpler name. */ snprintf(name, sizeof(name), "%s:geli_keyfile", provider); keyfile = preload_search_by_type(name); } if (keyfile == NULL) return (i); /* Return number of loaded keyfiles. */ data = preload_fetch_addr(keyfile); if (data == NULL) { G_ELI_DEBUG(0, "Cannot find key file data for %s.", name); return (0); } size = preload_fetch_size(keyfile); if (size == 0) { G_ELI_DEBUG(0, "Cannot find key file size for %s.", name); return (0); } file = preload_search_info(keyfile, MODINFO_NAME); if (file == NULL) { G_ELI_DEBUG(0, "Cannot find key file name for %s.", name); return (0); } G_ELI_DEBUG(1, "Loaded keyfile %s for %s (type: %s).", file, provider, name); g_eli_crypto_hmac_update(ctx, data, size); } } static void g_eli_keyfiles_clear(const char *provider) { u_char *keyfile, *data; char name[64]; size_t size; int i; for (i = 0; ; i++) { snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i); keyfile = preload_search_by_type(name); if (keyfile == NULL) return; data = preload_fetch_addr(keyfile); size = preload_fetch_size(keyfile); if (data != NULL && size != 0) bzero(data, size); } } /* * Tasting is only made on boot. * We detect providers which should be attached before root is mounted. */ static struct g_geom * g_eli_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) { struct g_eli_metadata md; struct g_geom *gp; struct hmac_ctx ctx; char passphrase[256]; u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN]; u_int i, nkey, nkeyfiles, tries, showpass; int error; struct keybuf *keybuf; g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); g_topology_assert(); if (root_mounted() || g_eli_tries == 0) return (NULL); G_ELI_DEBUG(3, "Tasting %s.", pp->name); error = g_eli_read_metadata(mp, pp, &md); if (error != 0) return (NULL); gp = NULL; if (strcmp(md.md_magic, G_ELI_MAGIC) != 0) return (NULL); if (md.md_version > G_ELI_VERSION) { printf("geom_eli.ko module is too old to handle %s.\n", pp->name); return (NULL); } if (md.md_provsize != pp->mediasize) return (NULL); /* Should we attach it on boot? */ if (!(md.md_flags & G_ELI_FLAG_BOOT)) return (NULL); if (md.md_keys == 0x00) { G_ELI_DEBUG(0, "No valid keys on %s.", pp->name); return (NULL); } if (md.md_iterations == -1) { /* If there is no passphrase, we try only once. */ tries = 1; } else { /* Ask for the passphrase no more than g_eli_tries times. */ tries = g_eli_tries; } if ((keybuf = get_keybuf()) != NULL) { /* Scan the key buffer, try all GELI keys. */ for (i = 0; i < keybuf->kb_nents; i++) { if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) { memcpy(key, keybuf->kb_ents[i].ke_data, sizeof(key)); if (g_eli_mkey_decrypt(&md, key, mkey, &nkey) == 0 ) { explicit_bzero(key, sizeof(key)); goto have_key; } } } } for (i = 0; i <= tries; i++) { g_eli_crypto_hmac_init(&ctx, NULL, 0); /* * Load all key files. */ nkeyfiles = g_eli_keyfiles_load(&ctx, pp->name); if (nkeyfiles == 0 && md.md_iterations == -1) { /* * No key files and no passphrase, something is * definitely wrong here. * geli(8) doesn't allow for such situation, so assume * that there was really no passphrase and in that case * key files are no properly defined in loader.conf. */ G_ELI_DEBUG(0, "Found no key files in loader.conf for %s.", pp->name); return (NULL); } /* Ask for the passphrase if defined. */ if (md.md_iterations >= 0) { /* Try first with cached passphrase. */ if (i == 0) { if (!g_eli_boot_passcache) continue; memcpy(passphrase, cached_passphrase, sizeof(passphrase)); } else { printf("Enter passphrase for %s: ", pp->name); showpass = g_eli_visible_passphrase; if ((md.md_flags & G_ELI_FLAG_GELIDISPLAYPASS) != 0) showpass = GETS_ECHOPASS; cngets(passphrase, sizeof(passphrase), showpass); memcpy(cached_passphrase, passphrase, sizeof(passphrase)); } } /* * Prepare Derived-Key from the user passphrase. */ if (md.md_iterations == 0) { g_eli_crypto_hmac_update(&ctx, md.md_salt, sizeof(md.md_salt)); g_eli_crypto_hmac_update(&ctx, passphrase, strlen(passphrase)); explicit_bzero(passphrase, sizeof(passphrase)); } else if (md.md_iterations > 0) { u_char dkey[G_ELI_USERKEYLEN]; pkcs5v2_genkey(dkey, sizeof(dkey), md.md_salt, sizeof(md.md_salt), passphrase, md.md_iterations); bzero(passphrase, sizeof(passphrase)); g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey)); explicit_bzero(dkey, sizeof(dkey)); } g_eli_crypto_hmac_final(&ctx, key, 0); /* * Decrypt Master-Key. */ error = g_eli_mkey_decrypt(&md, key, mkey, &nkey); bzero(key, sizeof(key)); if (error == -1) { if (i == tries) { G_ELI_DEBUG(0, "Wrong key for %s. No tries left.", pp->name); g_eli_keyfiles_clear(pp->name); return (NULL); } if (i > 0) { G_ELI_DEBUG(0, "Wrong key for %s. Tries left: %u.", pp->name, tries - i); } /* Try again. */ continue; } else if (error > 0) { G_ELI_DEBUG(0, "Cannot decrypt Master Key for %s (error=%d).", pp->name, error); g_eli_keyfiles_clear(pp->name); return (NULL); } g_eli_keyfiles_clear(pp->name); G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name); break; } have_key: /* * We have correct key, let's attach provider. */ gp = g_eli_create(NULL, mp, pp, &md, mkey, nkey); bzero(mkey, sizeof(mkey)); bzero(&md, sizeof(md)); if (gp == NULL) { G_ELI_DEBUG(0, "Cannot create device %s%s.", pp->name, G_ELI_SUFFIX); return (NULL); } return (gp); } static void g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { struct g_eli_softc *sc; g_topology_assert(); sc = gp->softc; if (sc == NULL) return; if (pp != NULL || cp != NULL) return; /* Nothing here. */ sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)sc->sc_ekeys_total); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)sc->sc_ekeys_allocated); sbuf_printf(sb, "%s", indent); if (sc->sc_flags == 0) sbuf_printf(sb, "NONE"); else { int first = 1; #define ADD_FLAG(flag, name) do { \ if (sc->sc_flags & (flag)) { \ if (!first) \ sbuf_printf(sb, ", "); \ else \ first = 0; \ sbuf_printf(sb, name); \ } \ } while (0) ADD_FLAG(G_ELI_FLAG_SUSPEND, "SUSPEND"); ADD_FLAG(G_ELI_FLAG_SINGLE_KEY, "SINGLE-KEY"); ADD_FLAG(G_ELI_FLAG_NATIVE_BYTE_ORDER, "NATIVE-BYTE-ORDER"); ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME"); ADD_FLAG(G_ELI_FLAG_BOOT, "BOOT"); ADD_FLAG(G_ELI_FLAG_WO_DETACH, "W-DETACH"); ADD_FLAG(G_ELI_FLAG_RW_DETACH, "RW-DETACH"); ADD_FLAG(G_ELI_FLAG_AUTH, "AUTH"); ADD_FLAG(G_ELI_FLAG_WOPEN, "W-OPEN"); ADD_FLAG(G_ELI_FLAG_DESTROY, "DESTROY"); ADD_FLAG(G_ELI_FLAG_RO, "READ-ONLY"); ADD_FLAG(G_ELI_FLAG_NODELETE, "NODELETE"); ADD_FLAG(G_ELI_FLAG_GELIBOOT, "GELIBOOT"); ADD_FLAG(G_ELI_FLAG_GELIDISPLAYPASS, "GELIDISPLAYPASS"); #undef ADD_FLAG } sbuf_printf(sb, "\n"); if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) { sbuf_printf(sb, "%s%u\n", indent, sc->sc_nkey); } sbuf_printf(sb, "%s%u\n", indent, sc->sc_version); sbuf_printf(sb, "%s", indent); switch (sc->sc_crypto) { case G_ELI_CRYPTO_HW: sbuf_printf(sb, "hardware"); break; case G_ELI_CRYPTO_SW: sbuf_printf(sb, "software"); break; default: sbuf_printf(sb, "UNKNOWN"); break; } sbuf_printf(sb, "\n"); if (sc->sc_flags & G_ELI_FLAG_AUTH) { sbuf_printf(sb, "%s%s\n", indent, g_eli_algo2str(sc->sc_aalgo)); } sbuf_printf(sb, "%s%u\n", indent, sc->sc_ekeylen); sbuf_printf(sb, "%s%s\n", indent, g_eli_algo2str(sc->sc_ealgo)); sbuf_printf(sb, "%s%s\n", indent, (sc->sc_flags & G_ELI_FLAG_SUSPEND) ? "SUSPENDED" : "ACTIVE"); } static void g_eli_shutdown_pre_sync(void *arg, int howto) { struct g_class *mp; struct g_geom *gp, *gp2; struct g_provider *pp; struct g_eli_softc *sc; int error; mp = arg; g_topology_lock(); LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { sc = gp->softc; if (sc == NULL) continue; pp = LIST_FIRST(&gp->provider); KASSERT(pp != NULL, ("No provider? gp=%p (%s)", gp, gp->name)); if (pp->acr + pp->acw + pp->ace == 0) error = g_eli_destroy(sc, TRUE); else { sc->sc_flags |= G_ELI_FLAG_RW_DETACH; gp->access = g_eli_access; } } g_topology_unlock(); } static void g_eli_init(struct g_class *mp) { g_eli_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync, g_eli_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST); if (g_eli_pre_sync == NULL) G_ELI_DEBUG(0, "Warning! Cannot register shutdown event."); } static void g_eli_fini(struct g_class *mp) { if (g_eli_pre_sync != NULL) EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_eli_pre_sync); } DECLARE_GEOM_CLASS(g_eli_class, g_eli); MODULE_DEPEND(g_eli, crypto, 1, 1, 1); Index: stable/11/sys/geom/part/g_part.c =================================================================== --- stable/11/sys/geom/part/g_part.c (revision 330732) +++ stable/11/sys/geom/part/g_part.c (revision 330733) @@ -1,2345 +1,2379 @@ /*- * Copyright (c) 2002, 2005-2009 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "g_part_if.h" #ifndef _PATH_DEV #define _PATH_DEV "/dev/" #endif static kobj_method_t g_part_null_methods[] = { { 0, 0 } }; static struct g_part_scheme g_part_null_scheme = { "(none)", g_part_null_methods, sizeof(struct g_part_table), }; TAILQ_HEAD(, g_part_scheme) g_part_schemes = TAILQ_HEAD_INITIALIZER(g_part_schemes); struct g_part_alias_list { const char *lexeme; enum g_part_alias alias; } g_part_alias_list[G_PART_ALIAS_COUNT] = { { "apple-boot", G_PART_ALIAS_APPLE_BOOT }, { "apple-core-storage", G_PART_ALIAS_APPLE_CORE_STORAGE }, { "apple-hfs", G_PART_ALIAS_APPLE_HFS }, { "apple-label", G_PART_ALIAS_APPLE_LABEL }, { "apple-raid", G_PART_ALIAS_APPLE_RAID }, { "apple-raid-offline", G_PART_ALIAS_APPLE_RAID_OFFLINE }, { "apple-tv-recovery", G_PART_ALIAS_APPLE_TV_RECOVERY }, { "apple-ufs", G_PART_ALIAS_APPLE_UFS }, { "bios-boot", G_PART_ALIAS_BIOS_BOOT }, { "chromeos-firmware", G_PART_ALIAS_CHROMEOS_FIRMWARE }, { "chromeos-kernel", G_PART_ALIAS_CHROMEOS_KERNEL }, { "chromeos-reserved", G_PART_ALIAS_CHROMEOS_RESERVED }, { "chromeos-root", G_PART_ALIAS_CHROMEOS_ROOT }, { "dragonfly-ccd", G_PART_ALIAS_DFBSD_CCD }, { "dragonfly-hammer", G_PART_ALIAS_DFBSD_HAMMER }, { "dragonfly-hammer2", G_PART_ALIAS_DFBSD_HAMMER2 }, { "dragonfly-label32", G_PART_ALIAS_DFBSD }, { "dragonfly-label64", G_PART_ALIAS_DFBSD64 }, { "dragonfly-legacy", G_PART_ALIAS_DFBSD_LEGACY }, { "dragonfly-swap", G_PART_ALIAS_DFBSD_SWAP }, { "dragonfly-ufs", G_PART_ALIAS_DFBSD_UFS }, { "dragonfly-vinum", G_PART_ALIAS_DFBSD_VINUM }, { "ebr", G_PART_ALIAS_EBR }, { "efi", G_PART_ALIAS_EFI }, { "fat16", G_PART_ALIAS_MS_FAT16 }, { "fat32", G_PART_ALIAS_MS_FAT32 }, { "freebsd", G_PART_ALIAS_FREEBSD }, { "freebsd-boot", G_PART_ALIAS_FREEBSD_BOOT }, { "freebsd-nandfs", G_PART_ALIAS_FREEBSD_NANDFS }, { "freebsd-swap", G_PART_ALIAS_FREEBSD_SWAP }, { "freebsd-ufs", G_PART_ALIAS_FREEBSD_UFS }, { "freebsd-vinum", G_PART_ALIAS_FREEBSD_VINUM }, { "freebsd-zfs", G_PART_ALIAS_FREEBSD_ZFS }, { "linux-data", G_PART_ALIAS_LINUX_DATA }, { "linux-lvm", G_PART_ALIAS_LINUX_LVM }, { "linux-raid", G_PART_ALIAS_LINUX_RAID }, { "linux-swap", G_PART_ALIAS_LINUX_SWAP }, { "mbr", G_PART_ALIAS_MBR }, { "ms-basic-data", G_PART_ALIAS_MS_BASIC_DATA }, { "ms-ldm-data", G_PART_ALIAS_MS_LDM_DATA }, { "ms-ldm-metadata", G_PART_ALIAS_MS_LDM_METADATA }, { "ms-recovery", G_PART_ALIAS_MS_RECOVERY }, { "ms-reserved", G_PART_ALIAS_MS_RESERVED }, { "ms-spaces", G_PART_ALIAS_MS_SPACES }, { "netbsd-ccd", G_PART_ALIAS_NETBSD_CCD }, { "netbsd-cgd", G_PART_ALIAS_NETBSD_CGD }, { "netbsd-ffs", G_PART_ALIAS_NETBSD_FFS }, { "netbsd-lfs", G_PART_ALIAS_NETBSD_LFS }, { "netbsd-raid", G_PART_ALIAS_NETBSD_RAID }, { "netbsd-swap", G_PART_ALIAS_NETBSD_SWAP }, { "ntfs", G_PART_ALIAS_MS_NTFS }, { "openbsd-data", G_PART_ALIAS_OPENBSD_DATA }, { "prep-boot", G_PART_ALIAS_PREP_BOOT }, { "vmware-reserved", G_PART_ALIAS_VMRESERVED }, { "vmware-vmfs", G_PART_ALIAS_VMFS }, { "vmware-vmkdiag", G_PART_ALIAS_VMKDIAG }, { "vmware-vsanhdr", G_PART_ALIAS_VMVSANHDR }, }; SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, part, CTLFLAG_RW, 0, "GEOM_PART stuff"); static u_int check_integrity = 1; SYSCTL_UINT(_kern_geom_part, OID_AUTO, check_integrity, CTLFLAG_RWTUN, &check_integrity, 1, "Enable integrity checking"); /* * The GEOM partitioning class. */ static g_ctl_req_t g_part_ctlreq; static g_ctl_destroy_geom_t g_part_destroy_geom; static g_fini_t g_part_fini; static g_init_t g_part_init; static g_taste_t g_part_taste; static g_access_t g_part_access; static g_dumpconf_t g_part_dumpconf; static g_orphan_t g_part_orphan; static g_spoiled_t g_part_spoiled; static g_start_t g_part_start; static g_resize_t g_part_resize; static g_ioctl_t g_part_ioctl; static struct g_class g_part_class = { .name = "PART", .version = G_VERSION, /* Class methods. */ .ctlreq = g_part_ctlreq, .destroy_geom = g_part_destroy_geom, .fini = g_part_fini, .init = g_part_init, .taste = g_part_taste, /* Geom methods. */ .access = g_part_access, .dumpconf = g_part_dumpconf, .orphan = g_part_orphan, .spoiled = g_part_spoiled, .start = g_part_start, .resize = g_part_resize, .ioctl = g_part_ioctl, }; DECLARE_GEOM_CLASS(g_part_class, g_part); MODULE_VERSION(g_part, 0); /* * Support functions. */ static void g_part_wither(struct g_geom *, int); const char * g_part_alias_name(enum g_part_alias alias) { int i; for (i = 0; i < G_PART_ALIAS_COUNT; i++) { if (g_part_alias_list[i].alias != alias) continue; return (g_part_alias_list[i].lexeme); } return (NULL); } void g_part_geometry_heads(off_t blocks, u_int sectors, off_t *bestchs, u_int *bestheads) { static u_int candidate_heads[] = { 1, 2, 16, 32, 64, 128, 255, 0 }; off_t chs, cylinders; u_int heads; int idx; *bestchs = 0; *bestheads = 0; for (idx = 0; candidate_heads[idx] != 0; idx++) { heads = candidate_heads[idx]; cylinders = blocks / heads / sectors; if (cylinders < heads || cylinders < sectors) break; if (cylinders > 1023) continue; chs = cylinders * heads * sectors; if (chs > *bestchs || (chs == *bestchs && *bestheads == 1)) { *bestchs = chs; *bestheads = heads; } } } static void g_part_geometry(struct g_part_table *table, struct g_consumer *cp, off_t blocks) { static u_int candidate_sectors[] = { 1, 9, 17, 33, 63, 0 }; off_t chs, bestchs; u_int heads, sectors; int idx; if (g_getattr("GEOM::fwsectors", cp, §ors) != 0 || sectors == 0 || g_getattr("GEOM::fwheads", cp, &heads) != 0 || heads == 0) { table->gpt_fixgeom = 0; table->gpt_heads = 0; table->gpt_sectors = 0; bestchs = 0; for (idx = 0; candidate_sectors[idx] != 0; idx++) { sectors = candidate_sectors[idx]; g_part_geometry_heads(blocks, sectors, &chs, &heads); if (chs == 0) continue; /* * Prefer a geometry with sectors > 1, but only if * it doesn't bump down the number of heads to 1. */ if (chs > bestchs || (chs == bestchs && heads > 1 && table->gpt_sectors == 1)) { bestchs = chs; table->gpt_heads = heads; table->gpt_sectors = sectors; } } /* * If we didn't find a geometry at all, then the disk is * too big. This means we can use the maximum number of * heads and sectors. */ if (bestchs == 0) { table->gpt_heads = 255; table->gpt_sectors = 63; } } else { table->gpt_fixgeom = 1; table->gpt_heads = heads; table->gpt_sectors = sectors; } } +static void +g_part_get_physpath_done(struct bio *bp) +{ + struct g_geom *gp; + struct g_part_entry *entry; + struct g_part_table *table; + struct g_provider *pp; + struct bio *pbp; + + pbp = bp->bio_parent; + pp = pbp->bio_to; + gp = pp->geom; + table = gp->softc; + entry = pp->private; + + if (bp->bio_error == 0) { + char *end; + size_t len, remainder; + len = strlcat(bp->bio_data, "/", bp->bio_length); + if (len < bp->bio_length) { + end = bp->bio_data + len; + remainder = bp->bio_length - len; + G_PART_NAME(table, entry, end, remainder); + } + } + g_std_done(bp); +} + + #define DPRINTF(...) if (bootverbose) { \ printf("GEOM_PART: " __VA_ARGS__); \ } static int g_part_check_integrity(struct g_part_table *table, struct g_consumer *cp) { struct g_part_entry *e1, *e2; struct g_provider *pp; off_t offset; int failed; failed = 0; pp = cp->provider; if (table->gpt_last < table->gpt_first) { DPRINTF("last LBA is below first LBA: %jd < %jd\n", (intmax_t)table->gpt_last, (intmax_t)table->gpt_first); failed++; } if (table->gpt_last > pp->mediasize / pp->sectorsize - 1) { DPRINTF("last LBA extends beyond mediasize: " "%jd > %jd\n", (intmax_t)table->gpt_last, (intmax_t)pp->mediasize / pp->sectorsize - 1); failed++; } LIST_FOREACH(e1, &table->gpt_entry, gpe_entry) { if (e1->gpe_deleted || e1->gpe_internal) continue; if (e1->gpe_start < table->gpt_first) { DPRINTF("partition %d has start offset below first " "LBA: %jd < %jd\n", e1->gpe_index, (intmax_t)e1->gpe_start, (intmax_t)table->gpt_first); failed++; } if (e1->gpe_start > table->gpt_last) { DPRINTF("partition %d has start offset beyond last " "LBA: %jd > %jd\n", e1->gpe_index, (intmax_t)e1->gpe_start, (intmax_t)table->gpt_last); failed++; } if (e1->gpe_end < e1->gpe_start) { DPRINTF("partition %d has end offset below start " "offset: %jd < %jd\n", e1->gpe_index, (intmax_t)e1->gpe_end, (intmax_t)e1->gpe_start); failed++; } if (e1->gpe_end > table->gpt_last) { DPRINTF("partition %d has end offset beyond last " "LBA: %jd > %jd\n", e1->gpe_index, (intmax_t)e1->gpe_end, (intmax_t)table->gpt_last); failed++; } if (pp->stripesize > 0) { offset = e1->gpe_start * pp->sectorsize; if (e1->gpe_offset > offset) offset = e1->gpe_offset; if ((offset + pp->stripeoffset) % pp->stripesize) { DPRINTF("partition %d on (%s, %s) is not " "aligned on %u bytes\n", e1->gpe_index, pp->name, table->gpt_scheme->name, pp->stripesize); /* Don't treat this as a critical failure */ } } e2 = e1; while ((e2 = LIST_NEXT(e2, gpe_entry)) != NULL) { if (e2->gpe_deleted || e2->gpe_internal) continue; if (e1->gpe_start >= e2->gpe_start && e1->gpe_start <= e2->gpe_end) { DPRINTF("partition %d has start offset inside " "partition %d: start[%d] %jd >= start[%d] " "%jd <= end[%d] %jd\n", e1->gpe_index, e2->gpe_index, e2->gpe_index, (intmax_t)e2->gpe_start, e1->gpe_index, (intmax_t)e1->gpe_start, e2->gpe_index, (intmax_t)e2->gpe_end); failed++; } if (e1->gpe_end >= e2->gpe_start && e1->gpe_end <= e2->gpe_end) { DPRINTF("partition %d has end offset inside " "partition %d: start[%d] %jd >= end[%d] " "%jd <= end[%d] %jd\n", e1->gpe_index, e2->gpe_index, e2->gpe_index, (intmax_t)e2->gpe_start, e1->gpe_index, (intmax_t)e1->gpe_end, e2->gpe_index, (intmax_t)e2->gpe_end); failed++; } if (e1->gpe_start < e2->gpe_start && e1->gpe_end > e2->gpe_end) { DPRINTF("partition %d contains partition %d: " "start[%d] %jd > start[%d] %jd, end[%d] " "%jd < end[%d] %jd\n", e1->gpe_index, e2->gpe_index, e1->gpe_index, (intmax_t)e1->gpe_start, e2->gpe_index, (intmax_t)e2->gpe_start, e2->gpe_index, (intmax_t)e2->gpe_end, e1->gpe_index, (intmax_t)e1->gpe_end); failed++; } } } if (failed != 0) { printf("GEOM_PART: integrity check failed (%s, %s)\n", pp->name, table->gpt_scheme->name); if (check_integrity != 0) return (EINVAL); table->gpt_corrupt = 1; } return (0); } #undef DPRINTF struct g_part_entry * g_part_new_entry(struct g_part_table *table, int index, quad_t start, quad_t end) { struct g_part_entry *entry, *last; last = NULL; LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_index == index) break; if (entry->gpe_index > index) { entry = NULL; break; } last = entry; } if (entry == NULL) { entry = g_malloc(table->gpt_scheme->gps_entrysz, M_WAITOK | M_ZERO); entry->gpe_index = index; if (last == NULL) LIST_INSERT_HEAD(&table->gpt_entry, entry, gpe_entry); else LIST_INSERT_AFTER(last, entry, gpe_entry); } else entry->gpe_offset = 0; entry->gpe_start = start; entry->gpe_end = end; return (entry); } static void g_part_new_provider(struct g_geom *gp, struct g_part_table *table, struct g_part_entry *entry) { struct g_consumer *cp; struct g_provider *pp; struct sbuf *sb; off_t offset; cp = LIST_FIRST(&gp->consumer); pp = cp->provider; offset = entry->gpe_start * pp->sectorsize; if (entry->gpe_offset < offset) entry->gpe_offset = offset; if (entry->gpe_pp == NULL) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gp->name); sbuf_finish(sb); entry->gpe_pp = g_new_providerf(gp, "%s", sbuf_data(sb)); sbuf_delete(sb); entry->gpe_pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; entry->gpe_pp->private = entry; /* Close the circle. */ } entry->gpe_pp->index = entry->gpe_index - 1; /* index is 1-based. */ entry->gpe_pp->mediasize = (entry->gpe_end - entry->gpe_start + 1) * pp->sectorsize; entry->gpe_pp->mediasize -= entry->gpe_offset - offset; entry->gpe_pp->sectorsize = pp->sectorsize; entry->gpe_pp->stripesize = pp->stripesize; entry->gpe_pp->stripeoffset = pp->stripeoffset + entry->gpe_offset; if (pp->stripesize > 0) entry->gpe_pp->stripeoffset %= pp->stripesize; entry->gpe_pp->flags |= pp->flags & G_PF_ACCEPT_UNMAPPED; g_error_provider(entry->gpe_pp, 0); } static struct g_geom* g_part_find_geom(const char *name) { struct g_geom *gp; LIST_FOREACH(gp, &g_part_class.geom, geom) { if ((gp->flags & G_GEOM_WITHER) == 0 && strcmp(name, gp->name) == 0) break; } return (gp); } static int g_part_parm_geom(struct gctl_req *req, const char *name, struct g_geom **v) { struct g_geom *gp; const char *gname; gname = gctl_get_asciiparam(req, name); if (gname == NULL) return (ENOATTR); if (strncmp(gname, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) gname += sizeof(_PATH_DEV) - 1; gp = g_part_find_geom(gname); if (gp == NULL) { gctl_error(req, "%d %s '%s'", EINVAL, name, gname); return (EINVAL); } *v = gp; return (0); } static int g_part_parm_provider(struct gctl_req *req, const char *name, struct g_provider **v) { struct g_provider *pp; const char *pname; pname = gctl_get_asciiparam(req, name); if (pname == NULL) return (ENOATTR); if (strncmp(pname, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) pname += sizeof(_PATH_DEV) - 1; pp = g_provider_by_name(pname); if (pp == NULL) { gctl_error(req, "%d %s '%s'", EINVAL, name, pname); return (EINVAL); } *v = pp; return (0); } static int g_part_parm_quad(struct gctl_req *req, const char *name, quad_t *v) { const char *p; char *x; quad_t q; p = gctl_get_asciiparam(req, name); if (p == NULL) return (ENOATTR); q = strtoq(p, &x, 0); if (*x != '\0' || q < 0) { gctl_error(req, "%d %s '%s'", EINVAL, name, p); return (EINVAL); } *v = q; return (0); } static int g_part_parm_scheme(struct gctl_req *req, const char *name, struct g_part_scheme **v) { struct g_part_scheme *s; const char *p; p = gctl_get_asciiparam(req, name); if (p == NULL) return (ENOATTR); TAILQ_FOREACH(s, &g_part_schemes, scheme_list) { if (s == &g_part_null_scheme) continue; if (!strcasecmp(s->name, p)) break; } if (s == NULL) { gctl_error(req, "%d %s '%s'", EINVAL, name, p); return (EINVAL); } *v = s; return (0); } static int g_part_parm_str(struct gctl_req *req, const char *name, const char **v) { const char *p; p = gctl_get_asciiparam(req, name); if (p == NULL) return (ENOATTR); /* An empty label is always valid. */ if (strcmp(name, "label") != 0 && p[0] == '\0') { gctl_error(req, "%d %s '%s'", EINVAL, name, p); return (EINVAL); } *v = p; return (0); } static int g_part_parm_intmax(struct gctl_req *req, const char *name, u_int *v) { const intmax_t *p; int size; p = gctl_get_param(req, name, &size); if (p == NULL) return (ENOATTR); if (size != sizeof(*p) || *p < 0 || *p > INT_MAX) { gctl_error(req, "%d %s '%jd'", EINVAL, name, *p); return (EINVAL); } *v = (u_int)*p; return (0); } static int g_part_parm_uint32(struct gctl_req *req, const char *name, u_int *v) { const uint32_t *p; int size; p = gctl_get_param(req, name, &size); if (p == NULL) return (ENOATTR); if (size != sizeof(*p) || *p > INT_MAX) { gctl_error(req, "%d %s '%u'", EINVAL, name, (unsigned int)*p); return (EINVAL); } *v = (u_int)*p; return (0); } static int g_part_parm_bootcode(struct gctl_req *req, const char *name, const void **v, unsigned int *s) { const void *p; int size; p = gctl_get_param(req, name, &size); if (p == NULL) return (ENOATTR); *v = p; *s = size; return (0); } static int g_part_probe(struct g_geom *gp, struct g_consumer *cp, int depth) { struct g_part_scheme *iter, *scheme; struct g_part_table *table; int pri, probe; table = gp->softc; scheme = (table != NULL) ? table->gpt_scheme : NULL; pri = (scheme != NULL) ? G_PART_PROBE(table, cp) : INT_MIN; if (pri == 0) goto done; if (pri > 0) { /* error */ scheme = NULL; pri = INT_MIN; } TAILQ_FOREACH(iter, &g_part_schemes, scheme_list) { if (iter == &g_part_null_scheme) continue; table = (void *)kobj_create((kobj_class_t)iter, M_GEOM, M_WAITOK); table->gpt_gp = gp; table->gpt_scheme = iter; table->gpt_depth = depth; probe = G_PART_PROBE(table, cp); if (probe <= 0 && probe > pri) { pri = probe; scheme = iter; if (gp->softc != NULL) kobj_delete((kobj_t)gp->softc, M_GEOM); gp->softc = table; if (pri == 0) goto done; } else kobj_delete((kobj_t)table, M_GEOM); } done: return ((scheme == NULL) ? ENXIO : 0); } /* * Control request functions. */ static int g_part_ctl_add(struct gctl_req *req, struct g_part_parms *gpp) { struct g_geom *gp; struct g_provider *pp; struct g_part_entry *delent, *last, *entry; struct g_part_table *table; struct sbuf *sb; quad_t end; unsigned int index; int error; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); pp = LIST_FIRST(&gp->consumer)->provider; table = gp->softc; end = gpp->gpp_start + gpp->gpp_size - 1; if (gpp->gpp_start < table->gpt_first || gpp->gpp_start > table->gpt_last) { gctl_error(req, "%d start '%jd'", EINVAL, (intmax_t)gpp->gpp_start); return (EINVAL); } if (end < gpp->gpp_start || end > table->gpt_last) { gctl_error(req, "%d size '%jd'", EINVAL, (intmax_t)gpp->gpp_size); return (EINVAL); } if (gpp->gpp_index > table->gpt_entries) { gctl_error(req, "%d index '%d'", EINVAL, gpp->gpp_index); return (EINVAL); } delent = last = NULL; index = (gpp->gpp_index > 0) ? gpp->gpp_index : 1; LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted) { if (entry->gpe_index == index) delent = entry; continue; } if (entry->gpe_index == index) index = entry->gpe_index + 1; if (entry->gpe_index < index) last = entry; if (entry->gpe_internal) continue; if (gpp->gpp_start >= entry->gpe_start && gpp->gpp_start <= entry->gpe_end) { gctl_error(req, "%d start '%jd'", ENOSPC, (intmax_t)gpp->gpp_start); return (ENOSPC); } if (end >= entry->gpe_start && end <= entry->gpe_end) { gctl_error(req, "%d end '%jd'", ENOSPC, (intmax_t)end); return (ENOSPC); } if (gpp->gpp_start < entry->gpe_start && end > entry->gpe_end) { gctl_error(req, "%d size '%jd'", ENOSPC, (intmax_t)gpp->gpp_size); return (ENOSPC); } } if (gpp->gpp_index > 0 && index != gpp->gpp_index) { gctl_error(req, "%d index '%d'", EEXIST, gpp->gpp_index); return (EEXIST); } if (index > table->gpt_entries) { gctl_error(req, "%d index '%d'", ENOSPC, index); return (ENOSPC); } entry = (delent == NULL) ? g_malloc(table->gpt_scheme->gps_entrysz, M_WAITOK | M_ZERO) : delent; entry->gpe_index = index; entry->gpe_start = gpp->gpp_start; entry->gpe_end = end; error = G_PART_ADD(table, entry, gpp); if (error) { gctl_error(req, "%d", error); if (delent == NULL) g_free(entry); return (error); } if (delent == NULL) { if (last == NULL) LIST_INSERT_HEAD(&table->gpt_entry, entry, gpe_entry); else LIST_INSERT_AFTER(last, entry, gpe_entry); entry->gpe_created = 1; } else { entry->gpe_deleted = 0; entry->gpe_modified = 1; } g_part_new_provider(gp, table, entry); /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gp->name); if (pp->stripesize > 0 && entry->gpe_pp->stripeoffset != 0) sbuf_printf(sb, " added, but partition is not " "aligned on %u bytes\n", pp->stripesize); else sbuf_cat(sb, " added\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_bootcode(struct gctl_req *req, struct g_part_parms *gpp) { struct g_geom *gp; struct g_part_table *table; struct sbuf *sb; int error, sz; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; sz = table->gpt_scheme->gps_bootcodesz; if (sz == 0) { error = ENODEV; goto fail; } if (gpp->gpp_codesize > sz) { error = EFBIG; goto fail; } error = G_PART_BOOTCODE(table, gpp); if (error) goto fail; /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); sbuf_printf(sb, "bootcode written to %s\n", gp->name); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); fail: gctl_error(req, "%d", error); return (error); } static int g_part_ctl_commit(struct gctl_req *req, struct g_part_parms *gpp) { struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp; struct g_part_entry *entry, *tmp; struct g_part_table *table; char *buf; int error, i; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; if (!table->gpt_opened) { gctl_error(req, "%d", EPERM); return (EPERM); } g_topology_unlock(); cp = LIST_FIRST(&gp->consumer); if ((table->gpt_smhead | table->gpt_smtail) != 0) { pp = cp->provider; buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO); while (table->gpt_smhead != 0) { i = ffs(table->gpt_smhead) - 1; error = g_write_data(cp, i * pp->sectorsize, buf, pp->sectorsize); if (error) { g_free(buf); goto fail; } table->gpt_smhead &= ~(1 << i); } while (table->gpt_smtail != 0) { i = ffs(table->gpt_smtail) - 1; error = g_write_data(cp, pp->mediasize - (i + 1) * pp->sectorsize, buf, pp->sectorsize); if (error) { g_free(buf); goto fail; } table->gpt_smtail &= ~(1 << i); } g_free(buf); } if (table->gpt_scheme == &g_part_null_scheme) { g_topology_lock(); g_access(cp, -1, -1, -1); g_part_wither(gp, ENXIO); return (0); } error = G_PART_WRITE(table, cp); if (error) goto fail; LIST_FOREACH_SAFE(entry, &table->gpt_entry, gpe_entry, tmp) { if (!entry->gpe_deleted) { /* Notify consumers that provider might be changed. */ if (entry->gpe_modified && ( entry->gpe_pp->acw + entry->gpe_pp->ace + entry->gpe_pp->acr) == 0) g_media_changed(entry->gpe_pp, M_NOWAIT); entry->gpe_created = 0; entry->gpe_modified = 0; continue; } LIST_REMOVE(entry, gpe_entry); g_free(entry); } table->gpt_created = 0; table->gpt_opened = 0; g_topology_lock(); g_access(cp, -1, -1, -1); return (0); fail: g_topology_lock(); gctl_error(req, "%d", error); return (error); } static int g_part_ctl_create(struct gctl_req *req, struct g_part_parms *gpp) { struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp; struct g_part_scheme *scheme; struct g_part_table *null, *table; struct sbuf *sb; int attr, error; pp = gpp->gpp_provider; scheme = gpp->gpp_scheme; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, pp->name)); g_topology_assert(); /* Check that there isn't already a g_part geom on the provider. */ gp = g_part_find_geom(pp->name); if (gp != NULL) { null = gp->softc; if (null->gpt_scheme != &g_part_null_scheme) { gctl_error(req, "%d geom '%s'", EEXIST, pp->name); return (EEXIST); } } else null = NULL; if ((gpp->gpp_parms & G_PART_PARM_ENTRIES) && (gpp->gpp_entries < scheme->gps_minent || gpp->gpp_entries > scheme->gps_maxent)) { gctl_error(req, "%d entries '%d'", EINVAL, gpp->gpp_entries); return (EINVAL); } if (null == NULL) gp = g_new_geomf(&g_part_class, "%s", pp->name); gp->softc = kobj_create((kobj_class_t)gpp->gpp_scheme, M_GEOM, M_WAITOK); table = gp->softc; table->gpt_gp = gp; table->gpt_scheme = gpp->gpp_scheme; table->gpt_entries = (gpp->gpp_parms & G_PART_PARM_ENTRIES) ? gpp->gpp_entries : scheme->gps_minent; LIST_INIT(&table->gpt_entry); if (null == NULL) { cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 1, 1); if (error != 0) { g_part_wither(gp, error); gctl_error(req, "%d geom '%s'", error, pp->name); return (error); } table->gpt_opened = 1; } else { cp = LIST_FIRST(&gp->consumer); table->gpt_opened = null->gpt_opened; table->gpt_smhead = null->gpt_smhead; table->gpt_smtail = null->gpt_smtail; } g_topology_unlock(); /* Make sure the provider has media. */ if (pp->mediasize == 0 || pp->sectorsize == 0) { error = ENODEV; goto fail; } /* Make sure we can nest and if so, determine our depth. */ error = g_getattr("PART::isleaf", cp, &attr); if (!error && attr) { error = ENODEV; goto fail; } error = g_getattr("PART::depth", cp, &attr); table->gpt_depth = (!error) ? attr + 1 : 0; /* * Synthesize a disk geometry. Some partitioning schemes * depend on it and since some file systems need it even * when the partitition scheme doesn't, we do it here in * scheme-independent code. */ g_part_geometry(table, cp, pp->mediasize / pp->sectorsize); error = G_PART_CREATE(table, gpp); if (error) goto fail; g_topology_lock(); table->gpt_created = 1; if (null != NULL) kobj_delete((kobj_t)null, M_GEOM); /* * Support automatic commit by filling in the gpp_geom * parameter. */ gpp->gpp_parms |= G_PART_PARM_GEOM; gpp->gpp_geom = gp; /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); sbuf_printf(sb, "%s created\n", gp->name); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); fail: g_topology_lock(); if (null == NULL) { g_access(cp, -1, -1, -1); g_part_wither(gp, error); } else { kobj_delete((kobj_t)gp->softc, M_GEOM); gp->softc = null; } gctl_error(req, "%d provider", error); return (error); } static int g_part_ctl_delete(struct gctl_req *req, struct g_part_parms *gpp) { struct g_geom *gp; struct g_provider *pp; struct g_part_entry *entry; struct g_part_table *table; struct sbuf *sb; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; if (entry->gpe_index == gpp->gpp_index) break; } if (entry == NULL) { gctl_error(req, "%d index '%d'", ENOENT, gpp->gpp_index); return (ENOENT); } pp = entry->gpe_pp; if (pp != NULL) { if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0) { gctl_error(req, "%d", EBUSY); return (EBUSY); } pp->private = NULL; entry->gpe_pp = NULL; } if (pp != NULL) g_wither_provider(pp, ENXIO); /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gp->name); sbuf_cat(sb, " deleted\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } if (entry->gpe_created) { LIST_REMOVE(entry, gpe_entry); g_free(entry); } else { entry->gpe_modified = 0; entry->gpe_deleted = 1; } return (0); } static int g_part_ctl_destroy(struct gctl_req *req, struct g_part_parms *gpp) { struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp; struct g_part_entry *entry, *tmp; struct g_part_table *null, *table; struct sbuf *sb; int error; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; /* Check for busy providers. */ LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; if (gpp->gpp_force) { pp = entry->gpe_pp; if (pp == NULL) continue; if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) continue; } gctl_error(req, "%d", EBUSY); return (EBUSY); } if (gpp->gpp_force) { /* Destroy all providers. */ LIST_FOREACH_SAFE(entry, &table->gpt_entry, gpe_entry, tmp) { pp = entry->gpe_pp; if (pp != NULL) { pp->private = NULL; g_wither_provider(pp, ENXIO); } LIST_REMOVE(entry, gpe_entry); g_free(entry); } } error = G_PART_DESTROY(table, gpp); if (error) { gctl_error(req, "%d", error); return (error); } gp->softc = kobj_create((kobj_class_t)&g_part_null_scheme, M_GEOM, M_WAITOK); null = gp->softc; null->gpt_gp = gp; null->gpt_scheme = &g_part_null_scheme; LIST_INIT(&null->gpt_entry); cp = LIST_FIRST(&gp->consumer); pp = cp->provider; null->gpt_last = pp->mediasize / pp->sectorsize - 1; null->gpt_depth = table->gpt_depth; null->gpt_opened = table->gpt_opened; null->gpt_smhead = table->gpt_smhead; null->gpt_smtail = table->gpt_smtail; while ((entry = LIST_FIRST(&table->gpt_entry)) != NULL) { LIST_REMOVE(entry, gpe_entry); g_free(entry); } kobj_delete((kobj_t)table, M_GEOM); /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); sbuf_printf(sb, "%s destroyed\n", gp->name); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_modify(struct gctl_req *req, struct g_part_parms *gpp) { struct g_geom *gp; struct g_part_entry *entry; struct g_part_table *table; struct sbuf *sb; int error; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; if (entry->gpe_index == gpp->gpp_index) break; } if (entry == NULL) { gctl_error(req, "%d index '%d'", ENOENT, gpp->gpp_index); return (ENOENT); } error = G_PART_MODIFY(table, entry, gpp); if (error) { gctl_error(req, "%d", error); return (error); } if (!entry->gpe_created) entry->gpe_modified = 1; /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gp->name); sbuf_cat(sb, " modified\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_move(struct gctl_req *req, struct g_part_parms *gpp) { gctl_error(req, "%d verb 'move'", ENOSYS); return (ENOSYS); } static int g_part_ctl_recover(struct gctl_req *req, struct g_part_parms *gpp) { struct g_part_table *table; struct g_geom *gp; struct sbuf *sb; int error, recovered; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; error = recovered = 0; if (table->gpt_corrupt) { error = G_PART_RECOVER(table); if (error == 0) error = g_part_check_integrity(table, LIST_FIRST(&gp->consumer)); if (error) { gctl_error(req, "%d recovering '%s' failed", error, gp->name); return (error); } recovered = 1; } /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); if (recovered) sbuf_printf(sb, "%s recovered\n", gp->name); else sbuf_printf(sb, "%s recovering is not needed\n", gp->name); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_resize(struct gctl_req *req, struct g_part_parms *gpp) { struct g_geom *gp; struct g_provider *pp; struct g_part_entry *pe, *entry; struct g_part_table *table; struct sbuf *sb; quad_t end; int error; off_t mediasize; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; /* check gpp_index */ LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; if (entry->gpe_index == gpp->gpp_index) break; } if (entry == NULL) { gctl_error(req, "%d index '%d'", ENOENT, gpp->gpp_index); return (ENOENT); } /* check gpp_size */ end = entry->gpe_start + gpp->gpp_size - 1; if (gpp->gpp_size < 1 || end > table->gpt_last) { gctl_error(req, "%d size '%jd'", EINVAL, (intmax_t)gpp->gpp_size); return (EINVAL); } LIST_FOREACH(pe, &table->gpt_entry, gpe_entry) { if (pe->gpe_deleted || pe->gpe_internal || pe == entry) continue; if (end >= pe->gpe_start && end <= pe->gpe_end) { gctl_error(req, "%d end '%jd'", ENOSPC, (intmax_t)end); return (ENOSPC); } if (entry->gpe_start < pe->gpe_start && end > pe->gpe_end) { gctl_error(req, "%d size '%jd'", ENOSPC, (intmax_t)gpp->gpp_size); return (ENOSPC); } } pp = entry->gpe_pp; if ((g_debugflags & 16) == 0 && (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)) { if (entry->gpe_end - entry->gpe_start + 1 > gpp->gpp_size) { /* Deny shrinking of an opened partition. */ gctl_error(req, "%d", EBUSY); return (EBUSY); } } error = G_PART_RESIZE(table, entry, gpp); if (error) { gctl_error(req, "%d%s", error, error != EBUSY ? "": " resizing will lead to unexpected shrinking" " due to alignment"); return (error); } if (!entry->gpe_created) entry->gpe_modified = 1; /* update mediasize of changed provider */ mediasize = (entry->gpe_end - entry->gpe_start + 1) * pp->sectorsize; g_resize_provider(pp, mediasize); /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gp->name); sbuf_cat(sb, " resized\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_setunset(struct gctl_req *req, struct g_part_parms *gpp, unsigned int set) { struct g_geom *gp; struct g_part_entry *entry; struct g_part_table *table; struct sbuf *sb; int error; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; if (gpp->gpp_parms & G_PART_PARM_INDEX) { LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; if (entry->gpe_index == gpp->gpp_index) break; } if (entry == NULL) { gctl_error(req, "%d index '%d'", ENOENT, gpp->gpp_index); return (ENOENT); } } else entry = NULL; error = G_PART_SETUNSET(table, entry, gpp->gpp_attrib, set); if (error) { gctl_error(req, "%d attrib '%s'", error, gpp->gpp_attrib); return (error); } /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); sbuf_printf(sb, "%s %sset on ", gpp->gpp_attrib, (set) ? "" : "un"); if (entry) G_PART_FULLNAME(table, entry, sb, gp->name); else sbuf_cat(sb, gp->name); sbuf_cat(sb, "\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_undo(struct gctl_req *req, struct g_part_parms *gpp) { struct g_consumer *cp; struct g_provider *pp; struct g_geom *gp; struct g_part_entry *entry, *tmp; struct g_part_table *table; int error, reprobe; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; if (!table->gpt_opened) { gctl_error(req, "%d", EPERM); return (EPERM); } cp = LIST_FIRST(&gp->consumer); LIST_FOREACH_SAFE(entry, &table->gpt_entry, gpe_entry, tmp) { entry->gpe_modified = 0; if (entry->gpe_created) { pp = entry->gpe_pp; if (pp != NULL) { pp->private = NULL; entry->gpe_pp = NULL; g_wither_provider(pp, ENXIO); } entry->gpe_deleted = 1; } if (entry->gpe_deleted) { LIST_REMOVE(entry, gpe_entry); g_free(entry); } } g_topology_unlock(); reprobe = (table->gpt_scheme == &g_part_null_scheme || table->gpt_created) ? 1 : 0; if (reprobe) { LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_internal) continue; error = EBUSY; goto fail; } while ((entry = LIST_FIRST(&table->gpt_entry)) != NULL) { LIST_REMOVE(entry, gpe_entry); g_free(entry); } error = g_part_probe(gp, cp, table->gpt_depth); if (error) { g_topology_lock(); g_access(cp, -1, -1, -1); g_part_wither(gp, error); return (0); } table = gp->softc; /* * Synthesize a disk geometry. Some partitioning schemes * depend on it and since some file systems need it even * when the partitition scheme doesn't, we do it here in * scheme-independent code. */ pp = cp->provider; g_part_geometry(table, cp, pp->mediasize / pp->sectorsize); } error = G_PART_READ(table, cp); if (error) goto fail; error = g_part_check_integrity(table, cp); if (error) goto fail; g_topology_lock(); LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (!entry->gpe_internal) g_part_new_provider(gp, table, entry); } table->gpt_opened = 0; g_access(cp, -1, -1, -1); return (0); fail: g_topology_lock(); gctl_error(req, "%d", error); return (error); } static void g_part_wither(struct g_geom *gp, int error) { struct g_part_entry *entry; struct g_part_table *table; table = gp->softc; if (table != NULL) { G_PART_DESTROY(table, NULL); while ((entry = LIST_FIRST(&table->gpt_entry)) != NULL) { LIST_REMOVE(entry, gpe_entry); g_free(entry); } if (gp->softc != NULL) { kobj_delete((kobj_t)gp->softc, M_GEOM); gp->softc = NULL; } } g_wither_geom(gp, error); } /* * Class methods. */ static void g_part_ctlreq(struct gctl_req *req, struct g_class *mp, const char *verb) { struct g_part_parms gpp; struct g_part_table *table; struct gctl_req_arg *ap; enum g_part_ctl ctlreq; unsigned int i, mparms, oparms, parm; int auto_commit, close_on_error; int error, modifies; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, verb)); g_topology_assert(); ctlreq = G_PART_CTL_NONE; modifies = 1; mparms = 0; oparms = G_PART_PARM_FLAGS | G_PART_PARM_OUTPUT | G_PART_PARM_VERSION; switch (*verb) { case 'a': if (!strcmp(verb, "add")) { ctlreq = G_PART_CTL_ADD; mparms |= G_PART_PARM_GEOM | G_PART_PARM_SIZE | G_PART_PARM_START | G_PART_PARM_TYPE; oparms |= G_PART_PARM_INDEX | G_PART_PARM_LABEL; } break; case 'b': if (!strcmp(verb, "bootcode")) { ctlreq = G_PART_CTL_BOOTCODE; mparms |= G_PART_PARM_GEOM | G_PART_PARM_BOOTCODE; } break; case 'c': if (!strcmp(verb, "commit")) { ctlreq = G_PART_CTL_COMMIT; mparms |= G_PART_PARM_GEOM; modifies = 0; } else if (!strcmp(verb, "create")) { ctlreq = G_PART_CTL_CREATE; mparms |= G_PART_PARM_PROVIDER | G_PART_PARM_SCHEME; oparms |= G_PART_PARM_ENTRIES; } break; case 'd': if (!strcmp(verb, "delete")) { ctlreq = G_PART_CTL_DELETE; mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX; } else if (!strcmp(verb, "destroy")) { ctlreq = G_PART_CTL_DESTROY; mparms |= G_PART_PARM_GEOM; oparms |= G_PART_PARM_FORCE; } break; case 'm': if (!strcmp(verb, "modify")) { ctlreq = G_PART_CTL_MODIFY; mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX; oparms |= G_PART_PARM_LABEL | G_PART_PARM_TYPE; } else if (!strcmp(verb, "move")) { ctlreq = G_PART_CTL_MOVE; mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX; } break; case 'r': if (!strcmp(verb, "recover")) { ctlreq = G_PART_CTL_RECOVER; mparms |= G_PART_PARM_GEOM; } else if (!strcmp(verb, "resize")) { ctlreq = G_PART_CTL_RESIZE; mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX | G_PART_PARM_SIZE; } break; case 's': if (!strcmp(verb, "set")) { ctlreq = G_PART_CTL_SET; mparms |= G_PART_PARM_ATTRIB | G_PART_PARM_GEOM; oparms |= G_PART_PARM_INDEX; } break; case 'u': if (!strcmp(verb, "undo")) { ctlreq = G_PART_CTL_UNDO; mparms |= G_PART_PARM_GEOM; modifies = 0; } else if (!strcmp(verb, "unset")) { ctlreq = G_PART_CTL_UNSET; mparms |= G_PART_PARM_ATTRIB | G_PART_PARM_GEOM; oparms |= G_PART_PARM_INDEX; } break; } if (ctlreq == G_PART_CTL_NONE) { gctl_error(req, "%d verb '%s'", EINVAL, verb); return; } bzero(&gpp, sizeof(gpp)); for (i = 0; i < req->narg; i++) { ap = &req->arg[i]; parm = 0; switch (ap->name[0]) { case 'a': if (!strcmp(ap->name, "arg0")) { parm = mparms & (G_PART_PARM_GEOM | G_PART_PARM_PROVIDER); } if (!strcmp(ap->name, "attrib")) parm = G_PART_PARM_ATTRIB; break; case 'b': if (!strcmp(ap->name, "bootcode")) parm = G_PART_PARM_BOOTCODE; break; case 'c': if (!strcmp(ap->name, "class")) continue; break; case 'e': if (!strcmp(ap->name, "entries")) parm = G_PART_PARM_ENTRIES; break; case 'f': if (!strcmp(ap->name, "flags")) parm = G_PART_PARM_FLAGS; else if (!strcmp(ap->name, "force")) parm = G_PART_PARM_FORCE; break; case 'i': if (!strcmp(ap->name, "index")) parm = G_PART_PARM_INDEX; break; case 'l': if (!strcmp(ap->name, "label")) parm = G_PART_PARM_LABEL; break; case 'o': if (!strcmp(ap->name, "output")) parm = G_PART_PARM_OUTPUT; break; case 's': if (!strcmp(ap->name, "scheme")) parm = G_PART_PARM_SCHEME; else if (!strcmp(ap->name, "size")) parm = G_PART_PARM_SIZE; else if (!strcmp(ap->name, "start")) parm = G_PART_PARM_START; break; case 't': if (!strcmp(ap->name, "type")) parm = G_PART_PARM_TYPE; break; case 'v': if (!strcmp(ap->name, "verb")) continue; else if (!strcmp(ap->name, "version")) parm = G_PART_PARM_VERSION; break; } if ((parm & (mparms | oparms)) == 0) { gctl_error(req, "%d param '%s'", EINVAL, ap->name); return; } switch (parm) { case G_PART_PARM_ATTRIB: error = g_part_parm_str(req, ap->name, &gpp.gpp_attrib); break; case G_PART_PARM_BOOTCODE: error = g_part_parm_bootcode(req, ap->name, &gpp.gpp_codeptr, &gpp.gpp_codesize); break; case G_PART_PARM_ENTRIES: error = g_part_parm_intmax(req, ap->name, &gpp.gpp_entries); break; case G_PART_PARM_FLAGS: error = g_part_parm_str(req, ap->name, &gpp.gpp_flags); break; case G_PART_PARM_FORCE: error = g_part_parm_uint32(req, ap->name, &gpp.gpp_force); break; case G_PART_PARM_GEOM: error = g_part_parm_geom(req, ap->name, &gpp.gpp_geom); break; case G_PART_PARM_INDEX: error = g_part_parm_intmax(req, ap->name, &gpp.gpp_index); break; case G_PART_PARM_LABEL: error = g_part_parm_str(req, ap->name, &gpp.gpp_label); break; case G_PART_PARM_OUTPUT: error = 0; /* Write-only parameter */ break; case G_PART_PARM_PROVIDER: error = g_part_parm_provider(req, ap->name, &gpp.gpp_provider); break; case G_PART_PARM_SCHEME: error = g_part_parm_scheme(req, ap->name, &gpp.gpp_scheme); break; case G_PART_PARM_SIZE: error = g_part_parm_quad(req, ap->name, &gpp.gpp_size); break; case G_PART_PARM_START: error = g_part_parm_quad(req, ap->name, &gpp.gpp_start); break; case G_PART_PARM_TYPE: error = g_part_parm_str(req, ap->name, &gpp.gpp_type); break; case G_PART_PARM_VERSION: error = g_part_parm_uint32(req, ap->name, &gpp.gpp_version); break; default: error = EDOOFUS; gctl_error(req, "%d %s", error, ap->name); break; } if (error != 0) { if (error == ENOATTR) { gctl_error(req, "%d param '%s'", error, ap->name); } return; } gpp.gpp_parms |= parm; } if ((gpp.gpp_parms & mparms) != mparms) { parm = mparms - (gpp.gpp_parms & mparms); gctl_error(req, "%d param '%x'", ENOATTR, parm); return; } /* Obtain permissions if possible/necessary. */ close_on_error = 0; table = NULL; if (modifies && (gpp.gpp_parms & G_PART_PARM_GEOM)) { table = gpp.gpp_geom->softc; if (table != NULL && table->gpt_corrupt && ctlreq != G_PART_CTL_DESTROY && ctlreq != G_PART_CTL_RECOVER) { gctl_error(req, "%d table '%s' is corrupt", EPERM, gpp.gpp_geom->name); return; } if (table != NULL && !table->gpt_opened) { error = g_access(LIST_FIRST(&gpp.gpp_geom->consumer), 1, 1, 1); if (error) { gctl_error(req, "%d geom '%s'", error, gpp.gpp_geom->name); return; } table->gpt_opened = 1; close_on_error = 1; } } /* Allow the scheme to check or modify the parameters. */ if (table != NULL) { error = G_PART_PRECHECK(table, ctlreq, &gpp); if (error) { gctl_error(req, "%d pre-check failed", error); goto out; } } else error = EDOOFUS; /* Prevent bogus uninit. warning. */ switch (ctlreq) { case G_PART_CTL_NONE: panic("%s", __func__); case G_PART_CTL_ADD: error = g_part_ctl_add(req, &gpp); break; case G_PART_CTL_BOOTCODE: error = g_part_ctl_bootcode(req, &gpp); break; case G_PART_CTL_COMMIT: error = g_part_ctl_commit(req, &gpp); break; case G_PART_CTL_CREATE: error = g_part_ctl_create(req, &gpp); break; case G_PART_CTL_DELETE: error = g_part_ctl_delete(req, &gpp); break; case G_PART_CTL_DESTROY: error = g_part_ctl_destroy(req, &gpp); break; case G_PART_CTL_MODIFY: error = g_part_ctl_modify(req, &gpp); break; case G_PART_CTL_MOVE: error = g_part_ctl_move(req, &gpp); break; case G_PART_CTL_RECOVER: error = g_part_ctl_recover(req, &gpp); break; case G_PART_CTL_RESIZE: error = g_part_ctl_resize(req, &gpp); break; case G_PART_CTL_SET: error = g_part_ctl_setunset(req, &gpp, 1); break; case G_PART_CTL_UNDO: error = g_part_ctl_undo(req, &gpp); break; case G_PART_CTL_UNSET: error = g_part_ctl_setunset(req, &gpp, 0); break; } /* Implement automatic commit. */ if (!error) { auto_commit = (modifies && (gpp.gpp_parms & G_PART_PARM_FLAGS) && strchr(gpp.gpp_flags, 'C') != NULL) ? 1 : 0; if (auto_commit) { KASSERT(gpp.gpp_parms & G_PART_PARM_GEOM, ("%s", __func__)); error = g_part_ctl_commit(req, &gpp); } } out: if (error && close_on_error) { g_access(LIST_FIRST(&gpp.gpp_geom->consumer), -1, -1, -1); table->gpt_opened = 0; } } static int g_part_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) { G_PART_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, gp->name)); g_topology_assert(); g_part_wither(gp, EINVAL); return (0); } static struct g_geom * g_part_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) { struct g_consumer *cp; struct g_geom *gp; struct g_part_entry *entry; struct g_part_table *table; struct root_hold_token *rht; int attr, depth; int error; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, pp->name)); g_topology_assert(); /* Skip providers that are already open for writing. */ if (pp->acw > 0) return (NULL); /* * Create a GEOM with consumer and hook it up to the provider. * With that we become part of the topology. Optain read access * to the provider. */ gp = g_new_geomf(mp, "%s", pp->name); cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 0, 0); if (error != 0) { if (cp->provider) g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); return (NULL); } rht = root_mount_hold(mp->name); g_topology_unlock(); /* * Short-circuit the whole probing galore when there's no * media present. */ if (pp->mediasize == 0 || pp->sectorsize == 0) { error = ENODEV; goto fail; } /* Make sure we can nest and if so, determine our depth. */ error = g_getattr("PART::isleaf", cp, &attr); if (!error && attr) { error = ENODEV; goto fail; } error = g_getattr("PART::depth", cp, &attr); depth = (!error) ? attr + 1 : 0; error = g_part_probe(gp, cp, depth); if (error) goto fail; table = gp->softc; /* * Synthesize a disk geometry. Some partitioning schemes * depend on it and since some file systems need it even * when the partitition scheme doesn't, we do it here in * scheme-independent code. */ g_part_geometry(table, cp, pp->mediasize / pp->sectorsize); error = G_PART_READ(table, cp); if (error) goto fail; error = g_part_check_integrity(table, cp); if (error) goto fail; g_topology_lock(); LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (!entry->gpe_internal) g_part_new_provider(gp, table, entry); } root_mount_rel(rht); g_access(cp, -1, 0, 0); return (gp); fail: g_topology_lock(); root_mount_rel(rht); g_access(cp, -1, 0, 0); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); return (NULL); } /* * Geom methods. */ static int g_part_access(struct g_provider *pp, int dr, int dw, int de) { struct g_consumer *cp; G_PART_TRACE((G_T_ACCESS, "%s(%s,%d,%d,%d)", __func__, pp->name, dr, dw, de)); cp = LIST_FIRST(&pp->geom->consumer); /* We always gain write-exclusive access. */ return (g_access(cp, dr, dw, dw + de)); } static void g_part_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { char buf[64]; struct g_part_entry *entry; struct g_part_table *table; KASSERT(sb != NULL && gp != NULL, ("%s", __func__)); table = gp->softc; if (indent == NULL) { KASSERT(cp == NULL && pp != NULL, ("%s", __func__)); entry = pp->private; if (entry == NULL) return; sbuf_printf(sb, " i %u o %ju ty %s", entry->gpe_index, (uintmax_t)entry->gpe_offset, G_PART_TYPE(table, entry, buf, sizeof(buf))); /* * libdisk compatibility quirk - the scheme dumps the * slicer name and partition type in a way that is * compatible with libdisk. When libdisk is not used * anymore, this should go away. */ G_PART_DUMPCONF(table, entry, sb, indent); } else if (cp != NULL) { /* Consumer configuration. */ KASSERT(pp == NULL, ("%s", __func__)); /* none */ } else if (pp != NULL) { /* Provider configuration. */ entry = pp->private; if (entry == NULL) return; sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)entry->gpe_start); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)entry->gpe_end); sbuf_printf(sb, "%s%u\n", indent, entry->gpe_index); sbuf_printf(sb, "%s%s\n", indent, G_PART_TYPE(table, entry, buf, sizeof(buf))); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)entry->gpe_offset); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)pp->mediasize); G_PART_DUMPCONF(table, entry, sb, indent); } else { /* Geom configuration. */ sbuf_printf(sb, "%s%s\n", indent, table->gpt_scheme->name); sbuf_printf(sb, "%s%u\n", indent, table->gpt_entries); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)table->gpt_first); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)table->gpt_last); sbuf_printf(sb, "%s%u\n", indent, table->gpt_sectors); sbuf_printf(sb, "%s%u\n", indent, table->gpt_heads); sbuf_printf(sb, "%s%s\n", indent, table->gpt_corrupt ? "CORRUPT": "OK"); sbuf_printf(sb, "%s%s\n", indent, table->gpt_opened ? "true": "false"); G_PART_DUMPCONF(table, NULL, sb, indent); } } /*- * This start routine is only called for non-trivial requests, all the * trivial ones are handled autonomously by the slice code. * For requests we handle here, we must call the g_io_deliver() on the * bio, and return non-zero to indicate to the slice code that we did so. * This code executes in the "DOWN" I/O path, this means: * * No sleeping. * * Don't grab the topology lock. * * Don't call biowait, g_getattr(), g_setattr() or g_read_data() */ static int g_part_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td) { struct g_part_table *table; table = pp->geom->softc; return G_PART_IOCTL(table, pp, cmd, data, fflag, td); } static void g_part_resize(struct g_consumer *cp) { struct g_part_table *table; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name)); g_topology_assert(); table = cp->geom->softc; if (table->gpt_opened == 0) { if (g_access(cp, 1, 1, 1) != 0) return; table->gpt_opened = 1; } if (G_PART_RESIZE(table, NULL, NULL) == 0) printf("GEOM_PART: %s was automatically resized.\n" " Use `gpart commit %s` to save changes or " "`gpart undo %s` to revert them.\n", cp->geom->name, cp->geom->name, cp->geom->name); if (g_part_check_integrity(table, cp) != 0) { g_access(cp, -1, -1, -1); table->gpt_opened = 0; g_part_wither(table->gpt_gp, ENXIO); } } static void g_part_orphan(struct g_consumer *cp) { struct g_provider *pp; struct g_part_table *table; pp = cp->provider; KASSERT(pp != NULL, ("%s", __func__)); G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, pp->name)); g_topology_assert(); KASSERT(pp->error != 0, ("%s", __func__)); table = cp->geom->softc; if (table != NULL && table->gpt_opened) g_access(cp, -1, -1, -1); g_part_wither(cp->geom, pp->error); } static void g_part_spoiled(struct g_consumer *cp) { G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name)); g_topology_assert(); cp->flags |= G_CF_ORPHAN; g_part_wither(cp->geom, ENXIO); } static void g_part_start(struct bio *bp) { struct bio *bp2; struct g_consumer *cp; struct g_geom *gp; struct g_part_entry *entry; struct g_part_table *table; struct g_kerneldump *gkd; struct g_provider *pp; + void (*done_func)(struct bio *) = g_std_done; char buf[64]; pp = bp->bio_to; gp = pp->geom; table = gp->softc; cp = LIST_FIRST(&gp->consumer); G_PART_TRACE((G_T_BIO, "%s: cmd=%d, provider=%s", __func__, bp->bio_cmd, pp->name)); entry = pp->private; if (entry == NULL) { g_io_deliver(bp, ENXIO); return; } switch(bp->bio_cmd) { case BIO_DELETE: case BIO_READ: case BIO_WRITE: if (bp->bio_offset >= pp->mediasize) { g_io_deliver(bp, EIO); return; } bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } if (bp2->bio_offset + bp2->bio_length > pp->mediasize) bp2->bio_length = pp->mediasize - bp2->bio_offset; bp2->bio_done = g_std_done; bp2->bio_offset += entry->gpe_offset; g_io_request(bp2, cp); return; case BIO_FLUSH: break; case BIO_GETATTR: if (g_handleattr_int(bp, "GEOM::fwheads", table->gpt_heads)) return; if (g_handleattr_int(bp, "GEOM::fwsectors", table->gpt_sectors)) return; if (g_handleattr_int(bp, "PART::isleaf", table->gpt_isleaf)) return; if (g_handleattr_int(bp, "PART::depth", table->gpt_depth)) return; if (g_handleattr_str(bp, "PART::scheme", table->gpt_scheme->name)) return; if (g_handleattr_str(bp, "PART::type", G_PART_TYPE(table, entry, buf, sizeof(buf)))) return; + if (!strcmp("GEOM::physpath", bp->bio_attribute)) { + done_func = g_part_get_physpath_done; + break; + } if (!strcmp("GEOM::kerneldump", bp->bio_attribute)) { /* * Check that the partition is suitable for kernel * dumps. Typically only swap partitions should be * used. If the request comes from the nested scheme * we allow dumping there as well. */ if ((bp->bio_from == NULL || bp->bio_from->geom->class != &g_part_class) && G_PART_DUMPTO(table, entry) == 0) { g_io_deliver(bp, ENODEV); printf("GEOM_PART: Partition '%s' not suitable" " for kernel dumps (wrong type?)\n", pp->name); return; } gkd = (struct g_kerneldump *)bp->bio_data; if (gkd->offset >= pp->mediasize) { g_io_deliver(bp, EIO); return; } if (gkd->offset + gkd->length > pp->mediasize) gkd->length = pp->mediasize - gkd->offset; gkd->offset += entry->gpe_offset; } break; default: g_io_deliver(bp, EOPNOTSUPP); return; } bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } - bp2->bio_done = g_std_done; + bp2->bio_done = done_func; g_io_request(bp2, cp); } static void g_part_init(struct g_class *mp) { TAILQ_INSERT_HEAD(&g_part_schemes, &g_part_null_scheme, scheme_list); } static void g_part_fini(struct g_class *mp) { TAILQ_REMOVE(&g_part_schemes, &g_part_null_scheme, scheme_list); } static void g_part_unload_event(void *arg, int flag) { struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp; struct g_part_scheme *scheme; struct g_part_table *table; uintptr_t *xchg; int acc, error; if (flag == EV_CANCEL) return; xchg = arg; error = 0; scheme = (void *)(*xchg); g_topology_assert(); LIST_FOREACH(gp, &g_part_class.geom, geom) { table = gp->softc; if (table->gpt_scheme != scheme) continue; acc = 0; LIST_FOREACH(pp, &gp->provider, provider) acc += pp->acr + pp->acw + pp->ace; LIST_FOREACH(cp, &gp->consumer, consumer) acc += cp->acr + cp->acw + cp->ace; if (!acc) g_part_wither(gp, ENOSYS); else error = EBUSY; } if (!error) TAILQ_REMOVE(&g_part_schemes, scheme, scheme_list); *xchg = error; } int g_part_modevent(module_t mod, int type, struct g_part_scheme *scheme) { struct g_part_scheme *iter; uintptr_t arg; int error; error = 0; switch (type) { case MOD_LOAD: TAILQ_FOREACH(iter, &g_part_schemes, scheme_list) { if (scheme == iter) { printf("GEOM_PART: scheme %s is already " "registered!\n", scheme->name); break; } } if (iter == NULL) { TAILQ_INSERT_TAIL(&g_part_schemes, scheme, scheme_list); g_retaste(&g_part_class); } break; case MOD_UNLOAD: arg = (uintptr_t)scheme; error = g_waitfor_event(g_part_unload_event, &arg, M_WAITOK, NULL); if (error == 0) error = arg; break; default: error = EOPNOTSUPP; break; } return (error); } Index: stable/11/tests/sys/geom/class/Makefile =================================================================== --- stable/11/tests/sys/geom/class/Makefile (revision 330732) +++ stable/11/tests/sys/geom/class/Makefile (revision 330733) @@ -1,21 +1,20 @@ # $FreeBSD$ PACKAGE= tests TESTSDIR= ${TESTSBASE}/sys/geom/class TESTS_SUBDIRS+= concat TESTS_SUBDIRS+= eli TESTS_SUBDIRS+= gate -# XXX: might not work due to geom(4) changes; more investigation's needed -#TESTS_SUBDIRS+= gpt TESTS_SUBDIRS+= mirror TESTS_SUBDIRS+= nop +TESTS_SUBDIRS+= part TESTS_SUBDIRS+= raid3 TESTS_SUBDIRS+= shsec TESTS_SUBDIRS+= stripe TESTS_SUBDIRS+= uzip ${PACKAGE}FILES+= geom_subr.sh .include Index: stable/11/tests/sys/geom/class/eli/Makefile =================================================================== --- stable/11/tests/sys/geom/class/eli/Makefile (revision 330732) +++ stable/11/tests/sys/geom/class/eli/Makefile (revision 330733) @@ -1,42 +1,43 @@ # $FreeBSD$ .PATH: ${SRCTOP}/sys/geom/eli ${SRCTOP}/sys/crypto/sha2 PACKAGE= tests TESTSDIR= ${TESTSBASE}/sys/geom/class/${.CURDIR:T} ATF_TESTS_C= pbkdf2_test ATF_TESTS_SH+= attach_test ATF_TESTS_SH+= configure_test ATF_TESTS_SH+= delkey_test ATF_TESTS_SH+= detach_test ATF_TESTS_SH+= init_test ATF_TESTS_SH+= integrity_test ATF_TESTS_SH+= kill_test +ATF_TESTS_SH+= misc_test ATF_TESTS_SH+= onetime_test ATF_TESTS_SH+= resize_test ATF_TESTS_SH+= setkey_test ${PACKAGE}FILES+= conf.sh .for t in ${TAP_TESTS_SH} TEST_METADATA.$t+= required_user="root" .endfor CFLAGS.pbkdf2_test= -I${SRCTOP}/sys SRCS.pbkdf2_test= \ hmac_test.c \ g_eli_crypto.c \ g_eli_hmac.c \ pkcs5v2.c \ sha512c.c \ sha256c.c LIBADD.pbkdf2_test= crypto testvect.h: python gentestvect.py > ${.TARGET} .include Index: stable/11/tests/sys/geom/class/eli/misc_test.sh =================================================================== --- stable/11/tests/sys/geom/class/eli/misc_test.sh (nonexistent) +++ stable/11/tests/sys/geom/class/eli/misc_test.sh (revision 330733) @@ -0,0 +1,177 @@ +# Copyright (c) 2018 Alan Somers +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ + +atf_test_case preserve_props cleanup +preserve_props_head() +{ + atf_set "descr" "geli should preserve basic GEOM properties" + atf_set "require.user" "root" + atf_set "timeout" 15 +} +preserve_props_body() +{ + . $(atf_get_srcdir)/conf.sh + md=$(attach_md -s1m) + atf_check geli onetime /dev/${md} + md_secsize=$(diskinfo ${md} | cut -wf 2) + md_stripesize=$(diskinfo ${md} | cut -wf 5) + eli_secsize=$(diskinfo ${md}.eli | cut -wf 2) + eli_stripesize=$(diskinfo ${md}.eli | cut -wf 5) + atf_check_equal "$md_secsize" "$eli_secsize" + atf_check_equal "$md_stripesize" "$eli_stripesize" +} +preserve_props_cleanup() +{ + . $(atf_get_srcdir)/conf.sh + geli_test_cleanup +} + +atf_test_case preserve_disk_props cleanup +preserve_disk_props_head() +{ + atf_set "descr" "geli should preserve properties for disks" + atf_set "require.user" "root" + atf_set "require.config" "disks" + atf_set "timeout" 15 +} +preserve_disk_props_body() +{ + . $(atf_get_srcdir)/conf.sh + disks=`atf_config_get disks` + disk=${disks%% *} + if [ -z "$disk" ]; then + atf_skip "Must define disks (see tests(7))" + fi + atf_check geli onetime ${disk} + + disk_ident=$(diskinfo -s ${disk}) + disk_descr=$(diskinfo -v ${disk} | awk '/Disk descr/ {print $1}') + disk_rotrate=$(diskinfo -v ${disk} | awk '/Rotation rate/ {print $1}') + disk_zonemode=$(diskinfo -v ${disk} | awk '/Zone Mode/ {print $1}') + eli_ident=$(diskinfo -s ${disk}.eli) + eli_descr=$(diskinfo -v ${disk}.eli | awk '/Disk descr/ {print $1}') + eli_rotrate=$(diskinfo -v ${disk}.eli | awk '/Rotation/ {print $1}') + eli_zonemode=$(diskinfo -v ${disk}.eli | awk '/Zone Mode/ {print $1}') + atf_check_equal "$disk_ident" "$eli_ident" + atf_check_equal "$disk_descr" "$eli_descr" + atf_check_equal "$disk_rotrate" "$eli_rotrate" + atf_check_equal "$disk_zonemode" "$eli_zonemode" +} +preserve_disk_props_cleanup() +{ + . $(atf_get_srcdir)/conf.sh + disk_cleanup + geli_test_cleanup +} + +atf_test_case physpath cleanup +physpath_head() +{ + atf_set "descr" "geli should append /eli to the underlying device's physical path" + atf_set "require.user" "root" + atf_set "timeout" 15 +} +physpath_body() +{ + . $(atf_get_srcdir)/conf.sh + load_gnop + + md=$(attach_md -s1m) + # If the underlying device has no physical path, then geli should not + # create one. + atf_check -o empty -e ignore diskinfo -p $md + atf_check -s exit:0 geli onetime $md + atf_check -o empty -e ignore diskinfo -p $md.eli + atf_check -s exit:0 geli kill $md + + # If the underlying device does have a physical path, then geli should + # append "/eli" + physpath="some/physical/path" + atf_check gnop create -z $physpath ${md} + atf_check -s exit:0 geli onetime $md.nop + atf_check -o match:"^${physpath}/eli$" diskinfo -p $md.nop.eli +} +physpath_cleanup() +{ + . $(atf_get_srcdir)/conf.sh + + if [ -f "$TEST_MDS_FILE" ]; then + while read md; do + [ -c /dev/${md}.nop.eli ] && \ + geli detach $md.nop.eli 2>/dev/null + [ -c /dev/${md}.nop ] && \ + gnop destroy -f $md.nop 2>/dev/null + [ -c /dev/${md}.eli ] && \ + geli detach $md.eli 2>/dev/null + mdconfig -d -u $md 2>/dev/null + done < $TEST_MDS_FILE + fi + true +} + +atf_init_test_cases() +{ + atf_add_test_case physpath + atf_add_test_case preserve_props + atf_add_test_case preserve_disk_props +} + + +common_cleanup() +{ + + if [ -f "$MD_DEVS" ]; then + while read test_md; do + gnop destroy -f ${test_md}.nop 2>/dev/null + mdconfig -d -u $test_md 2>/dev/null + done < $MD_DEVS + rm $MD_DEVS + fi + + if [ -f "$PLAINFILES" ]; then + while read f; do + rm -f ${f} + done < ${PLAINFILES} + rm ${PLAINFILES} + fi + true +} + +disk_cleanup() +{ + disks=`atf_config_get disks` + disk=${disks%% *} + if [ -n "$disk" ]; then + geli kill ${disk} 2>/dev/null + fi +} + +load_gnop() +{ + if ! kldstat -q -m g_nop; then + geom nop load || atf_skip "could not load module for geom nop" + fi +} Property changes on: stable/11/tests/sys/geom/class/eli/misc_test.sh ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/tests/sys/geom/class/part/Makefile =================================================================== --- stable/11/tests/sys/geom/class/part/Makefile (nonexistent) +++ stable/11/tests/sys/geom/class/part/Makefile (revision 330733) @@ -0,0 +1,10 @@ +# $FreeBSD$ + +PACKAGE= tests + +TESTSDIR= ${TESTSBASE}/sys/geom/class/${.CURDIR:T} + +# TODO: port the perl tests in tools/regression/geom_gpt +ATF_TESTS_SH+= misc + +.include Property changes on: stable/11/tests/sys/geom/class/part/Makefile ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/tests/sys/geom/class/part/misc.sh =================================================================== --- stable/11/tests/sys/geom/class/part/misc.sh (nonexistent) +++ stable/11/tests/sys/geom/class/part/misc.sh (revision 330733) @@ -0,0 +1,187 @@ +# Copyright (c) 2018 Alan Somers +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ + +MD_DEVS="md.devs" + +atf_test_case blank_physpath cleanup +blank_physpath_head() +{ + atf_set "descr" "gpart shouldn't add physical paths to underlying providers that have none" + atf_set "require.user" "root" +} +blank_physpath_body() +{ + load_gnop + load_gpart + md=$(alloc_md) + atf_check -o empty -e ignore diskinfo -p ${md} + atf_check -s exit:0 -o ignore gpart create -s bsd ${md} + atf_check -s exit:0 -o ignore gpart add -t freebsd-ufs ${md} + atf_check -o empty -e ignore diskinfo -p ${md}a +} +blank_physpath_cleanup() +{ + common_cleanup +} + + +atf_test_case bsd_physpath cleanup +bsd_physpath_head() +{ + atf_set "descr" "BSD partitions should append /X to the underlying device's physical path" + atf_set "require.user" "root" +} +bsd_physpath_body() +{ + load_gnop + load_gpart + md=$(alloc_md) + physpath="some/physical/path" + atf_check gnop create -z $physpath /dev/${md} + atf_check -s exit:0 -o ignore gpart create -s bsd ${md}.nop + atf_check -s exit:0 -o ignore gpart add -t freebsd-ufs ${md}.nop + gpart_physpath=$(diskinfo -p ${md}.nopa) + atf_check_equal "${physpath}/a" "$gpart_physpath" +} +bsd_physpath_cleanup() +{ + common_cleanup +} + +atf_test_case gpt_physpath cleanup +gpt_physpath_head() +{ + atf_set "descr" "GPT partitions should append /pX to the underlying device's physical path" + atf_set "require.user" "root" +} +gpt_physpath_body() +{ + load_gnop + load_gpart + md=$(alloc_md) + physpath="some/physical/path" + atf_check gnop create -z $physpath /dev/${md} + atf_check -s exit:0 -o ignore gpart create -s gpt ${md}.nop + atf_check -s exit:0 -o ignore gpart add -t efi ${md}.nop + gpart_physpath=$(diskinfo -p ${md}.nopp1) + atf_check_equal "${physpath}/p1" "$gpart_physpath" +} +gpt_physpath_cleanup() +{ + common_cleanup +} + +atf_test_case mbr_physpath cleanup +mbr_physpath_head() +{ + atf_set "descr" "MBR partitions should append /sX to the underlying device's physical path" + atf_set "require.user" "root" +} +mbr_physpath_body() +{ + load_gnop + load_gpart + md=$(alloc_md) + physpath="some/physical/path" + atf_check gnop create -z $physpath /dev/${md} + atf_check -s exit:0 -o ignore gpart create -s mbr ${md}.nop + atf_check -s exit:0 -o ignore gpart add -t freebsd ${md}.nop + gpart_physpath=$(diskinfo -p ${md}.nops1) + atf_check_equal "${physpath}/s1" "$gpart_physpath" +} +mbr_physpath_cleanup() +{ + common_cleanup +} + +atf_test_case mbr_bsd_physpath cleanup +mbr_bsd_physpath_head() +{ + atf_set "descr" "BSD partitions nested within MBR partitions should append /sX/Y to the underlying device's physical path" + atf_set "require.user" "root" +} +mbr_bsd_physpath_body() +{ + load_gnop + load_gpart + md=$(alloc_md) + physpath="some/physical/path" + atf_check gnop create -z $physpath /dev/${md} + atf_check -s exit:0 -o ignore gpart create -s mbr ${md}.nop + atf_check -s exit:0 -o ignore gpart add -t freebsd ${md}.nop + atf_check -s exit:0 -o ignore gpart create -s bsd ${md}.nops1 + atf_check -s exit:0 -o ignore gpart add -t freebsd-ufs ${md}.nops1 + gpart_physpath=$(diskinfo -p ${md}.nops1a) + atf_check_equal "${physpath}/s1/a" "$gpart_physpath" +} +mbr_bsd_physpath_cleanup() +{ + common_cleanup +} + +atf_init_test_cases() +{ + atf_add_test_case blank_physpath + atf_add_test_case bsd_physpath + atf_add_test_case gpt_physpath + atf_add_test_case mbr_physpath + atf_add_test_case mbr_bsd_physpath +} + +alloc_md() +{ + local md + + md=$(mdconfig -a -t swap -s 1M) || atf_fail "mdconfig -a failed" + echo ${md} >> $MD_DEVS + echo ${md} +} + +common_cleanup() +{ + if [ -f "$MD_DEVS" ]; then + while read test_md; do + gnop destroy -f ${test_md}.nop 2>/dev/null + mdconfig -d -u $test_md 2>/dev/null + done < $MD_DEVS + rm $MD_DEVS + fi + true +} + +load_gpart() +{ + if ! kldstat -q -m g_part; then + geom part load || atf_skip "could not load module for geom part" + fi +} + +load_gnop() +{ + if ! kldstat -q -m g_nop; then + geom nop load || atf_skip "could not load module for geom nop" + fi +} Property changes on: stable/11/tests/sys/geom/class/part/misc.sh ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11 =================================================================== --- stable/11 (revision 330732) +++ stable/11 (revision 330733) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r329273,329275,329277,329284,329344