diff --git a/sys/geom/bde/g_bde.c b/sys/geom/bde/g_bde.c index 3d2de334e4bb..c9ef244bac81 100644 --- a/sys/geom/bde/g_bde.c +++ b/sys/geom/bde/g_bde.c @@ -1,299 +1,298 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define BDE_CLASS_NAME "BDE" FEATURE(geom_bde, "GEOM-based Disk Encryption"); static void g_bde_start(struct bio *bp) { switch (bp->bio_cmd) { case BIO_DELETE: case BIO_READ: case BIO_WRITE: g_bde_start1(bp); break; case BIO_GETATTR: g_io_deliver(bp, EOPNOTSUPP); break; default: g_io_deliver(bp, EOPNOTSUPP); return; } return; } static void g_bde_orphan(struct g_consumer *cp) { struct g_geom *gp; struct g_provider *pp; struct g_bde_softc *sc; g_trace(G_T_TOPOLOGY, "g_bde_orphan(%p/%s)", cp, cp->provider->name); g_topology_assert(); gp = cp->geom; sc = gp->softc; gp->flags |= G_GEOM_WITHER; LIST_FOREACH(pp, &gp->provider, provider) g_wither_provider(pp, ENXIO); explicit_bzero(sc, sizeof(struct g_bde_softc)); /* destroy evidence */ return; } static int g_bde_access(struct g_provider *pp, int dr, int dw, int de) { struct g_geom *gp; struct g_consumer *cp; gp = pp->geom; cp = LIST_FIRST(&gp->consumer); if (cp->acr == 0 && cp->acw == 0 && cp->ace == 0) { de++; dr++; } /* ... and let go of it on last close */ if ((cp->acr + dr) == 0 && (cp->acw + dw) == 0 && (cp->ace + de) == 1) { de--; dr--; } return (g_access(cp, dr, dw, de)); } static void g_bde_create_geom(struct gctl_req *req, struct g_class *mp, struct g_provider *pp) { struct g_geom *gp; struct g_consumer *cp; struct g_bde_key *kp; int error, i; u_int sectorsize; off_t mediasize; struct g_bde_softc *sc; void *pass; void *key; g_trace(G_T_TOPOLOGY, "g_bde_create_geom(%s, %s)", mp->name, pp->name); g_topology_assert(); gp = NULL; gp = g_new_geomf(mp, "%s.bde", pp->name); cp = g_new_consumer(gp); error = g_attach(cp, pp); if (error != 0) { g_destroy_consumer(cp); g_destroy_geom(gp); gctl_error(req, "could not attach consumer"); return; } error = g_access(cp, 1, 1, 1); if (error) { g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); gctl_error(req, "could not access consumer"); return; } pass = NULL; key = NULL; do { pass = gctl_get_param(req, "pass", &i); if (pass == NULL || i != SHA512_DIGEST_LENGTH) { gctl_error(req, "No usable key presented"); break; } key = gctl_get_param(req, "key", &i); if (key != NULL && i != 16) { gctl_error(req, "Invalid key presented"); break; } sectorsize = cp->provider->sectorsize; mediasize = cp->provider->mediasize; sc = g_malloc(sizeof(struct g_bde_softc), M_WAITOK | M_ZERO); gp->softc = sc; sc->geom = gp; sc->consumer = cp; error = g_bde_decrypt_lock(sc, pass, key, mediasize, sectorsize, NULL); explicit_bzero(sc->sha2, sizeof sc->sha2); if (error) break; kp = &sc->key; /* Initialize helper-fields */ kp->keys_per_sector = kp->sectorsize / G_BDE_SKEYLEN; kp->zone_cont = kp->keys_per_sector * kp->sectorsize; kp->zone_width = kp->zone_cont + kp->sectorsize; kp->media_width = kp->sectorN - kp->sector0 - G_BDE_MAXKEYS * kp->sectorsize; /* Our external parameters */ sc->zone_cont = kp->zone_cont; sc->mediasize = g_bde_max_sector(kp); sc->sectorsize = kp->sectorsize; TAILQ_INIT(&sc->freelist); TAILQ_INIT(&sc->worklist); mtx_init(&sc->worklist_mutex, "g_bde_worklist", NULL, MTX_DEF); /* XXX: error check */ kproc_create(g_bde_worker, gp, &sc->thread, 0, 0, "g_bde %s", gp->name); pp = g_new_providerf(gp, "%s", gp->name); pp->stripesize = kp->zone_cont; pp->stripeoffset = 0; pp->mediasize = sc->mediasize; pp->sectorsize = sc->sectorsize; g_error_provider(pp, 0); break; } while (0); if (pass != NULL) explicit_bzero(pass, SHA512_DIGEST_LENGTH); if (key != NULL) explicit_bzero(key, 16); if (error == 0) return; g_access(cp, -1, -1, -1); g_detach(cp); g_destroy_consumer(cp); - if (gp->softc != NULL) - g_free(gp->softc); + g_free(gp->softc); g_destroy_geom(gp); switch (error) { case ENOENT: gctl_error(req, "Lock was destroyed"); break; case ESRCH: gctl_error(req, "Lock was nuked"); break; case EINVAL: gctl_error(req, "Could not open lock"); break; case ENOTDIR: gctl_error(req, "Lock not found"); break; default: gctl_error(req, "Could not open lock (%d)", error); break; } return; } static int g_bde_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) { struct g_consumer *cp; struct g_provider *pp; struct g_bde_softc *sc; g_trace(G_T_TOPOLOGY, "g_bde_destroy_geom(%s, %s)", mp->name, gp->name); g_topology_assert(); /* * Orderly detachment. */ KASSERT(gp != NULL, ("NULL geom")); pp = LIST_FIRST(&gp->provider); KASSERT(pp != NULL, ("NULL provider")); if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0) return (EBUSY); sc = gp->softc; cp = LIST_FIRST(&gp->consumer); KASSERT(cp != NULL, ("NULL consumer")); sc->dead = 1; wakeup(sc); g_access(cp, -1, -1, -1); g_detach(cp); g_destroy_consumer(cp); while (sc->dead != 2 && !LIST_EMPTY(&pp->consumers)) tsleep(sc, PRIBIO, "g_bdedie", hz); mtx_destroy(&sc->worklist_mutex); explicit_bzero(&sc->key, sizeof sc->key); g_free(sc); g_wither_geom(gp, ENXIO); return (0); } static void g_bde_ctlreq(struct gctl_req *req, struct g_class *mp, char const *verb) { struct g_geom *gp; struct g_provider *pp; if (!strcmp(verb, "create geom")) { pp = gctl_get_provider(req, "provider"); if (pp != NULL) g_bde_create_geom(req, mp, pp); } else if (!strcmp(verb, "destroy geom")) { gp = gctl_get_geom(req, mp, "geom"); if (gp != NULL) g_bde_destroy_geom(req, mp, gp); } else { gctl_error(req, "unknown verb"); } } static struct g_class g_bde_class = { .name = BDE_CLASS_NAME, .version = G_VERSION, .destroy_geom = g_bde_destroy_geom, .ctlreq = g_bde_ctlreq, .start = g_bde_start, .orphan = g_bde_orphan, .access = g_bde_access, .spoiled = g_std_spoiled, }; DECLARE_GEOM_CLASS(g_bde_class, g_bde); MODULE_VERSION(geom_bde, 0); diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c index 2493fd31c7de..2dc93b1d9956 100644 --- a/sys/geom/eli/g_eli.c +++ b/sys/geom/eli/g_eli.c @@ -1,1468 +1,1467 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005-2019 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(geom_eli, "GEOM crypto module"); MALLOC_DEFINE(M_ELI, "eli data", "GEOM_ELI Data"); SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, eli, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "GEOM_ELI stuff"); static int g_eli_version = G_ELI_VERSION; SYSCTL_INT(_kern_geom_eli, OID_AUTO, version, CTLFLAG_RD, &g_eli_version, 0, "GELI version"); int g_eli_debug = 0; SYSCTL_INT(_kern_geom_eli, OID_AUTO, debug, CTLFLAG_RWTUN, &g_eli_debug, 0, "Debug level"); static u_int g_eli_tries = 3; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, tries, CTLFLAG_RWTUN, &g_eli_tries, 0, "Number of tries for entering the passphrase"); static u_int g_eli_visible_passphrase = GETS_NOECHO; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, visible_passphrase, CTLFLAG_RWTUN, &g_eli_visible_passphrase, 0, "Visibility of passphrase prompt (0 = invisible, 1 = visible, 2 = asterisk)"); u_int g_eli_overwrites = G_ELI_OVERWRITES; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, overwrites, CTLFLAG_RWTUN, &g_eli_overwrites, 0, "Number of times on-disk keys should be overwritten when destroying them"); static u_int g_eli_threads = 0; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, threads, CTLFLAG_RWTUN, &g_eli_threads, 0, "Number of threads doing crypto work"); u_int g_eli_batch = 0; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, batch, CTLFLAG_RWTUN, &g_eli_batch, 0, "Use crypto operations batching"); static bool g_eli_unmapped_io = true; SYSCTL_BOOL(_kern_geom_eli, OID_AUTO, unmapped_io, CTLFLAG_RDTUN, &g_eli_unmapped_io, 0, "Enable support for unmapped I/O"); /* * Passphrase cached during boot, in order to be more user-friendly if * there are multiple providers using the same passphrase. */ static char cached_passphrase[256]; static u_int g_eli_boot_passcache = 1; TUNABLE_INT("kern.geom.eli.boot_passcache", &g_eli_boot_passcache); SYSCTL_UINT(_kern_geom_eli, OID_AUTO, boot_passcache, CTLFLAG_RD, &g_eli_boot_passcache, 0, "Passphrases are cached during boot process for possible reuse"); static void fetch_loader_passphrase(void * dummy) { char * env_passphrase; KASSERT(dynamic_kenv, ("need dynamic kenv")); if ((env_passphrase = kern_getenv("kern.geom.eli.passphrase")) != NULL) { /* Extract passphrase from the environment. */ strlcpy(cached_passphrase, env_passphrase, sizeof(cached_passphrase)); freeenv(env_passphrase); /* Wipe the passphrase from the environment. */ kern_unsetenv("kern.geom.eli.passphrase"); } } SYSINIT(geli_fetch_loader_passphrase, SI_SUB_KMEM + 1, SI_ORDER_ANY, fetch_loader_passphrase, NULL); static void zero_boot_passcache(void) { explicit_bzero(cached_passphrase, sizeof(cached_passphrase)); } static void zero_geli_intake_keys(void) { struct keybuf *keybuf; int i; if ((keybuf = get_keybuf()) != NULL) { /* Scan the key buffer, clear all GELI keys. */ for (i = 0; i < keybuf->kb_nents; i++) { if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) { explicit_bzero(keybuf->kb_ents[i].ke_data, sizeof(keybuf->kb_ents[i].ke_data)); keybuf->kb_ents[i].ke_type = KEYBUF_TYPE_NONE; } } } } static void zero_intake_passcache(void *dummy) { zero_boot_passcache(); zero_geli_intake_keys(); } EVENTHANDLER_DEFINE(mountroot, zero_intake_passcache, NULL, 0); static eventhandler_tag g_eli_pre_sync = NULL; static int g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp, off_t offset, struct g_eli_metadata *md); static int g_eli_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp); static void g_eli_init(struct g_class *mp); static void g_eli_fini(struct g_class *mp); static g_taste_t g_eli_taste; static g_dumpconf_t g_eli_dumpconf; struct g_class g_eli_class = { .name = G_ELI_CLASS_NAME, .version = G_VERSION, .ctlreq = g_eli_config, .taste = g_eli_taste, .destroy_geom = g_eli_destroy_geom, .init = g_eli_init, .fini = g_eli_fini }; /* * Code paths: * BIO_READ: * g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver * BIO_WRITE: * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver */ /* * EAGAIN from crypto(9) means, that we were probably balanced to another crypto * accelerator or something like this. * The function updates the SID and rerun the operation. */ int g_eli_crypto_rerun(struct cryptop *crp) { struct g_eli_softc *sc; struct g_eli_worker *wr; struct bio *bp; int error; bp = (struct bio *)crp->crp_opaque; sc = bp->bio_to->geom->softc; LIST_FOREACH(wr, &sc->sc_workers, w_next) { if (wr->w_number == bp->bio_pflags) break; } KASSERT(wr != NULL, ("Invalid worker (%u).", bp->bio_pflags)); G_ELI_DEBUG(1, "Rerunning crypto %s request (sid: %p -> %p).", bp->bio_cmd == BIO_READ ? "READ" : "WRITE", wr->w_sid, crp->crp_session); wr->w_sid = crp->crp_session; crp->crp_etype = 0; error = crypto_dispatch(crp); if (error == 0) return (0); G_ELI_DEBUG(1, "%s: crypto_dispatch() returned %d.", __func__, error); crp->crp_etype = error; return (error); } static void g_eli_getattr_done(struct bio *bp) { if (bp->bio_error == 0 && !strcmp(bp->bio_attribute, "GEOM::physpath")) { strlcat(bp->bio_data, "/eli", bp->bio_length); } g_std_done(bp); } /* * The function is called afer reading encrypted data from the provider. * * g_eli_start -> g_eli_crypto_read -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver */ void g_eli_read_done(struct bio *bp) { struct g_eli_softc *sc; struct bio *pbp; G_ELI_LOGREQ(2, bp, "Request done."); pbp = bp->bio_parent; if (pbp->bio_error == 0 && bp->bio_error != 0) pbp->bio_error = bp->bio_error; g_destroy_bio(bp); /* * Do we have all sectors already? */ pbp->bio_inbed++; if (pbp->bio_inbed < pbp->bio_children) return; sc = pbp->bio_to->geom->softc; if (pbp->bio_error != 0) { G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__, pbp->bio_error); pbp->bio_completed = 0; if (pbp->bio_driver2 != NULL) { free(pbp->bio_driver2, M_ELI); pbp->bio_driver2 = NULL; } g_io_deliver(pbp, pbp->bio_error); if (sc != NULL) atomic_subtract_int(&sc->sc_inflight, 1); return; } mtx_lock(&sc->sc_queue_mtx); bioq_insert_tail(&sc->sc_queue, pbp); mtx_unlock(&sc->sc_queue_mtx); wakeup(sc); } /* * The function is called after we encrypt and write data. * * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> G_ELI_WRITE_DONE -> g_io_deliver */ void g_eli_write_done(struct bio *bp) { struct g_eli_softc *sc; struct bio *pbp; G_ELI_LOGREQ(2, bp, "Request done."); pbp = bp->bio_parent; if (pbp->bio_error == 0 && bp->bio_error != 0) pbp->bio_error = bp->bio_error; g_destroy_bio(bp); /* * Do we have all sectors already? */ pbp->bio_inbed++; if (pbp->bio_inbed < pbp->bio_children) return; free(pbp->bio_driver2, M_ELI); pbp->bio_driver2 = NULL; if (pbp->bio_error != 0) { G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__, pbp->bio_error); pbp->bio_completed = 0; } else pbp->bio_completed = pbp->bio_length; /* * Write is finished, send it up. */ sc = pbp->bio_to->geom->softc; g_io_deliver(pbp, pbp->bio_error); if (sc != NULL) atomic_subtract_int(&sc->sc_inflight, 1); } /* * This function should never be called, but GEOM made as it set ->orphan() * method for every geom. */ static void g_eli_orphan_spoil_assert(struct g_consumer *cp) { panic("Function %s() called for %s.", __func__, cp->geom->name); } static void g_eli_orphan(struct g_consumer *cp) { struct g_eli_softc *sc; g_topology_assert(); sc = cp->geom->softc; if (sc == NULL) return; g_eli_destroy(sc, TRUE); } static void g_eli_resize(struct g_consumer *cp) { struct g_eli_softc *sc; struct g_provider *epp, *pp; off_t oldsize; g_topology_assert(); sc = cp->geom->softc; if (sc == NULL) return; if ((sc->sc_flags & G_ELI_FLAG_AUTORESIZE) == 0) { G_ELI_DEBUG(0, "Autoresize is turned off, old size: %jd.", (intmax_t)sc->sc_provsize); return; } pp = cp->provider; if ((sc->sc_flags & G_ELI_FLAG_ONETIME) == 0) { struct g_eli_metadata md; u_char *sector; int error; sector = NULL; error = g_eli_read_metadata_offset(cp->geom->class, pp, sc->sc_provsize - pp->sectorsize, &md); if (error != 0) { G_ELI_DEBUG(0, "Cannot read metadata from %s (error=%d).", pp->name, error); goto iofail; } md.md_provsize = pp->mediasize; sector = malloc(pp->sectorsize, M_ELI, M_WAITOK | M_ZERO); eli_metadata_encode(&md, sector); error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector, pp->sectorsize); if (error != 0) { G_ELI_DEBUG(0, "Cannot store metadata on %s (error=%d).", pp->name, error); goto iofail; } explicit_bzero(sector, pp->sectorsize); error = g_write_data(cp, sc->sc_provsize - pp->sectorsize, sector, pp->sectorsize); if (error != 0) { G_ELI_DEBUG(0, "Cannot clear old metadata from %s (error=%d).", pp->name, error); goto iofail; } iofail: explicit_bzero(&md, sizeof(md)); zfree(sector, M_ELI); } oldsize = sc->sc_mediasize; sc->sc_mediasize = eli_mediasize(sc, pp->mediasize, pp->sectorsize); g_eli_key_resize(sc); sc->sc_provsize = pp->mediasize; epp = LIST_FIRST(&sc->sc_geom->provider); g_resize_provider(epp, sc->sc_mediasize); G_ELI_DEBUG(0, "Device %s size changed from %jd to %jd.", epp->name, (intmax_t)oldsize, (intmax_t)sc->sc_mediasize); } /* * BIO_READ: * G_ELI_START -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver * BIO_WRITE: * G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver */ static void g_eli_start(struct bio *bp) { struct g_eli_softc *sc; struct g_consumer *cp; struct bio *cbp; sc = bp->bio_to->geom->softc; KASSERT(sc != NULL, ("Provider's error should be set (error=%d)(device=%s).", bp->bio_to->error, bp->bio_to->name)); G_ELI_LOGREQ(2, bp, "Request received."); switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_GETATTR: case BIO_FLUSH: case BIO_ZONE: case BIO_SPEEDUP: break; case BIO_DELETE: /* * If the user hasn't set the NODELETE flag, we just pass * it down the stack and let the layers beneath us do (or * not) whatever they do with it. If they have, we * reject it. A possible extension would be an * additional flag to take it as a hint to shred the data * with [multiple?] overwrites. */ if (!(sc->sc_flags & G_ELI_FLAG_NODELETE)) break; default: g_io_deliver(bp, EOPNOTSUPP); return; } cbp = g_clone_bio(bp); if (cbp == NULL) { g_io_deliver(bp, ENOMEM); return; } bp->bio_driver1 = cbp; bp->bio_pflags = G_ELI_NEW_BIO; switch (bp->bio_cmd) { case BIO_READ: if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) { g_eli_crypto_read(sc, bp, 0); break; } /* FALLTHROUGH */ case BIO_WRITE: mtx_lock(&sc->sc_queue_mtx); bioq_insert_tail(&sc->sc_queue, bp); mtx_unlock(&sc->sc_queue_mtx); wakeup(sc); break; case BIO_GETATTR: case BIO_FLUSH: case BIO_DELETE: case BIO_SPEEDUP: case BIO_ZONE: if (bp->bio_cmd == BIO_GETATTR) cbp->bio_done = g_eli_getattr_done; else cbp->bio_done = g_std_done; cp = LIST_FIRST(&sc->sc_geom->consumer); cbp->bio_to = cp->provider; G_ELI_LOGREQ(2, cbp, "Sending request."); g_io_request(cbp, cp); break; } } static int g_eli_newsession(struct g_eli_worker *wr) { struct g_eli_softc *sc; struct crypto_session_params csp; uint32_t caps; int error, new_crypto; void *key; sc = wr->w_softc; memset(&csp, 0, sizeof(csp)); csp.csp_mode = CSP_MODE_CIPHER; csp.csp_cipher_alg = sc->sc_ealgo; csp.csp_ivlen = g_eli_ivlen(sc->sc_ealgo); csp.csp_cipher_klen = sc->sc_ekeylen / 8; if (sc->sc_ealgo == CRYPTO_AES_XTS) csp.csp_cipher_klen <<= 1; if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) { key = g_eli_key_hold(sc, 0, LIST_FIRST(&sc->sc_geom->consumer)->provider->sectorsize); csp.csp_cipher_key = key; } else { key = NULL; csp.csp_cipher_key = sc->sc_ekey; } if (sc->sc_flags & G_ELI_FLAG_AUTH) { csp.csp_mode = CSP_MODE_ETA; csp.csp_auth_alg = sc->sc_aalgo; csp.csp_auth_klen = G_ELI_AUTH_SECKEYLEN; } switch (sc->sc_crypto) { case G_ELI_CRYPTO_SW_ACCEL: case G_ELI_CRYPTO_SW: error = crypto_newsession(&wr->w_sid, &csp, CRYPTOCAP_F_SOFTWARE); break; case G_ELI_CRYPTO_HW: error = crypto_newsession(&wr->w_sid, &csp, CRYPTOCAP_F_HARDWARE); break; case G_ELI_CRYPTO_UNKNOWN: error = crypto_newsession(&wr->w_sid, &csp, CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE); if (error == 0) { caps = crypto_ses2caps(wr->w_sid); if (caps & CRYPTOCAP_F_HARDWARE) new_crypto = G_ELI_CRYPTO_HW; else if (caps & CRYPTOCAP_F_ACCEL_SOFTWARE) new_crypto = G_ELI_CRYPTO_SW_ACCEL; else new_crypto = G_ELI_CRYPTO_SW; mtx_lock(&sc->sc_queue_mtx); if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN) sc->sc_crypto = new_crypto; mtx_unlock(&sc->sc_queue_mtx); } break; default: panic("%s: invalid condition", __func__); } if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) { if (error) g_eli_key_drop(sc, key); else wr->w_first_key = key; } return (error); } static void g_eli_freesession(struct g_eli_worker *wr) { struct g_eli_softc *sc; crypto_freesession(wr->w_sid); if (wr->w_first_key != NULL) { sc = wr->w_softc; g_eli_key_drop(sc, wr->w_first_key); wr->w_first_key = NULL; } } static void g_eli_cancel(struct g_eli_softc *sc) { struct bio *bp; mtx_assert(&sc->sc_queue_mtx, MA_OWNED); while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) { KASSERT(bp->bio_pflags == G_ELI_NEW_BIO, ("Not new bio when canceling (bp=%p).", bp)); g_io_deliver(bp, ENXIO); } } static struct bio * g_eli_takefirst(struct g_eli_softc *sc) { struct bio *bp; mtx_assert(&sc->sc_queue_mtx, MA_OWNED); if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND)) return (bioq_takefirst(&sc->sc_queue)); /* * Device suspended, so we skip new I/O requests. */ TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { if (bp->bio_pflags != G_ELI_NEW_BIO) break; } if (bp != NULL) bioq_remove(&sc->sc_queue, bp); return (bp); } /* * This is the main function for kernel worker thread when we don't have * hardware acceleration and we have to do cryptography in software. * Dedicated thread is needed, so we don't slow down g_up/g_down GEOM * threads with crypto work. */ static void g_eli_worker(void *arg) { struct g_eli_softc *sc; struct g_eli_worker *wr; struct bio *bp; int error; wr = arg; sc = wr->w_softc; #ifdef EARLY_AP_STARTUP MPASS(!sc->sc_cpubind || smp_started); #elif defined(SMP) /* Before sched_bind() to a CPU, wait for all CPUs to go on-line. */ if (sc->sc_cpubind) { while (!smp_started) tsleep(wr, 0, "geli:smp", hz / 4); } #endif thread_lock(curthread); sched_prio(curthread, PUSER); if (sc->sc_cpubind) sched_bind(curthread, wr->w_number % mp_ncpus); thread_unlock(curthread); G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm); for (;;) { mtx_lock(&sc->sc_queue_mtx); again: bp = g_eli_takefirst(sc); if (bp == NULL) { if (sc->sc_flags & G_ELI_FLAG_DESTROY) { g_eli_cancel(sc); LIST_REMOVE(wr, w_next); g_eli_freesession(wr); free(wr, M_ELI); G_ELI_DEBUG(1, "Thread %s exiting.", curthread->td_proc->p_comm); wakeup(&sc->sc_workers); mtx_unlock(&sc->sc_queue_mtx); kproc_exit(0); } while (sc->sc_flags & G_ELI_FLAG_SUSPEND) { if (sc->sc_inflight > 0) { G_ELI_DEBUG(0, "inflight=%d", sc->sc_inflight); /* * We still have inflight BIOs, so * sleep and retry. */ msleep(sc, &sc->sc_queue_mtx, PRIBIO, "geli:inf", hz / 5); goto again; } /* * Suspend requested, mark the worker as * suspended and go to sleep. */ if (wr->w_active) { g_eli_freesession(wr); wr->w_active = FALSE; } wakeup(&sc->sc_workers); msleep(sc, &sc->sc_queue_mtx, PRIBIO, "geli:suspend", 0); if (!wr->w_active && !(sc->sc_flags & G_ELI_FLAG_SUSPEND)) { error = g_eli_newsession(wr); KASSERT(error == 0, ("g_eli_newsession() failed on resume (error=%d)", error)); wr->w_active = TRUE; } goto again; } msleep(sc, &sc->sc_queue_mtx, PDROP, "geli:w", 0); continue; } if (bp->bio_pflags == G_ELI_NEW_BIO) atomic_add_int(&sc->sc_inflight, 1); mtx_unlock(&sc->sc_queue_mtx); if (bp->bio_pflags == G_ELI_NEW_BIO) { bp->bio_pflags = 0; if (sc->sc_flags & G_ELI_FLAG_AUTH) { if (bp->bio_cmd == BIO_READ) g_eli_auth_read(sc, bp); else g_eli_auth_run(wr, bp); } else { if (bp->bio_cmd == BIO_READ) g_eli_crypto_read(sc, bp, 1); else g_eli_crypto_run(wr, bp); } } else { if (sc->sc_flags & G_ELI_FLAG_AUTH) g_eli_auth_run(wr, bp); else g_eli_crypto_run(wr, bp); } } } static int g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp, off_t offset, struct g_eli_metadata *md) { struct g_geom *gp; struct g_consumer *cp; u_char *buf = NULL; int error; g_topology_assert(); gp = g_new_geomf(mp, "eli:taste"); gp->start = g_eli_start; gp->access = g_std_access; /* * g_eli_read_metadata() is always called from the event thread. * Our geom is created and destroyed in the same event, so there * could be no orphan nor spoil event in the meantime. */ gp->orphan = g_eli_orphan_spoil_assert; gp->spoiled = g_eli_orphan_spoil_assert; cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error != 0) goto end; error = g_access(cp, 1, 0, 0); if (error != 0) goto end; g_topology_unlock(); buf = g_read_data(cp, offset, pp->sectorsize, &error); g_topology_lock(); if (buf == NULL) goto end; error = eli_metadata_decode(buf, md); if (error != 0) goto end; /* Metadata was read and decoded successfully. */ end: - if (buf != NULL) - g_free(buf); + g_free(buf); if (cp->provider != NULL) { if (cp->acr == 1) g_access(cp, -1, 0, 0); g_detach(cp); } g_destroy_consumer(cp); g_destroy_geom(gp); return (error); } int g_eli_read_metadata(struct g_class *mp, struct g_provider *pp, struct g_eli_metadata *md) { return (g_eli_read_metadata_offset(mp, pp, pp->mediasize - pp->sectorsize, md)); } /* * The function is called when we had last close on provider and user requested * to close it when this situation occur. */ static void g_eli_last_close(void *arg, int flags __unused) { struct g_geom *gp; char gpname[64]; int error; g_topology_assert(); gp = arg; strlcpy(gpname, gp->name, sizeof(gpname)); error = g_eli_destroy(gp->softc, TRUE); KASSERT(error == 0, ("Cannot detach %s on last close (error=%d).", gpname, error)); G_ELI_DEBUG(0, "Detached %s on last close.", gpname); } int g_eli_access(struct g_provider *pp, int dr, int dw, int de) { struct g_eli_softc *sc; struct g_geom *gp; gp = pp->geom; sc = gp->softc; if (dw > 0) { if (sc->sc_flags & G_ELI_FLAG_RO) { /* Deny write attempts. */ return (EROFS); } /* Someone is opening us for write, we need to remember that. */ sc->sc_flags |= G_ELI_FLAG_WOPEN; return (0); } /* Is this the last close? */ if (pp->acr + dr > 0 || pp->acw + dw > 0 || pp->ace + de > 0) return (0); /* * Automatically detach on last close if requested. */ if ((sc->sc_flags & G_ELI_FLAG_RW_DETACH) || (sc->sc_flags & G_ELI_FLAG_WOPEN)) { g_post_event(g_eli_last_close, gp, M_WAITOK, NULL); } return (0); } static int g_eli_cpu_is_disabled(int cpu) { #ifdef SMP return (CPU_ISSET(cpu, &hlt_cpus_mask)); #else return (0); #endif } struct g_geom * g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp, const struct g_eli_metadata *md, const u_char *mkey, int nkey) { struct g_eli_softc *sc; struct g_eli_worker *wr; struct g_geom *gp; struct g_provider *pp; struct g_consumer *cp; struct g_geom_alias *gap; u_int i, threads; int dcw, error; G_ELI_DEBUG(1, "Creating device %s%s.", bpp->name, G_ELI_SUFFIX); KASSERT(eli_metadata_crypto_supported(md), ("%s: unsupported crypto for %s", __func__, bpp->name)); gp = g_new_geomf(mp, "%s%s", bpp->name, G_ELI_SUFFIX); sc = malloc(sizeof(*sc), M_ELI, M_WAITOK | M_ZERO); gp->start = g_eli_start; /* * Spoiling can happen even though we have the provider open * exclusively, e.g. through media change events. */ gp->spoiled = g_eli_orphan; gp->orphan = g_eli_orphan; gp->resize = g_eli_resize; gp->dumpconf = g_eli_dumpconf; /* * If detach-on-last-close feature is not enabled and we don't operate * on read-only provider, we can simply use g_std_access(). */ if (md->md_flags & (G_ELI_FLAG_WO_DETACH | G_ELI_FLAG_RO)) gp->access = g_eli_access; else gp->access = g_std_access; eli_metadata_softc(sc, md, bpp->sectorsize, bpp->mediasize); sc->sc_nkey = nkey; gp->softc = sc; sc->sc_geom = gp; bioq_init(&sc->sc_queue); mtx_init(&sc->sc_queue_mtx, "geli:queue", NULL, MTX_DEF); mtx_init(&sc->sc_ekeys_lock, "geli:ekeys", NULL, MTX_DEF); pp = NULL; cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, bpp); if (error != 0) { if (req != NULL) { gctl_error(req, "Cannot attach to %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot attach to %s (error=%d).", bpp->name, error); } goto failed; } /* * Keep provider open all the time, so we can run critical tasks, * like Master Keys deletion, without wondering if we can open * provider or not. * We don't open provider for writing only when user requested read-only * access. */ dcw = (sc->sc_flags & G_ELI_FLAG_RO) ? 0 : 1; error = g_access(cp, 1, dcw, 1); if (error != 0) { if (req != NULL) { gctl_error(req, "Cannot access %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot access %s (error=%d).", bpp->name, error); } goto failed; } /* * Remember the keys in our softc structure. */ g_eli_mkey_propagate(sc, mkey); LIST_INIT(&sc->sc_workers); threads = g_eli_threads; if (threads == 0) threads = mp_ncpus; sc->sc_cpubind = (mp_ncpus > 1 && threads == mp_ncpus); for (i = 0; i < threads; i++) { if (g_eli_cpu_is_disabled(i)) { G_ELI_DEBUG(1, "%s: CPU %u disabled, skipping.", bpp->name, i); continue; } wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO); wr->w_softc = sc; wr->w_number = i; wr->w_active = TRUE; error = g_eli_newsession(wr); if (error != 0) { free(wr, M_ELI); if (req != NULL) { gctl_error(req, "Cannot set up crypto session " "for %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot set up crypto session " "for %s (error=%d).", bpp->name, error); } goto failed; } error = kproc_create(g_eli_worker, wr, &wr->w_proc, 0, 0, "g_eli[%u] %s", i, bpp->name); if (error != 0) { g_eli_freesession(wr); free(wr, M_ELI); if (req != NULL) { gctl_error(req, "Cannot create kernel thread " "for %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot create kernel thread " "for %s (error=%d).", bpp->name, error); } goto failed; } LIST_INSERT_HEAD(&sc->sc_workers, wr, w_next); } /* * Create decrypted provider. */ pp = g_new_providerf(gp, "%s%s", bpp->name, G_ELI_SUFFIX); pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; if (g_eli_unmapped_io && CRYPTO_HAS_VMPAGE) { /* * On DMAP architectures we can use unmapped I/O. But don't * use it with data integrity verification. That code hasn't * been written yet. */ if ((sc->sc_flags & G_ELI_FLAG_AUTH) == 0) pp->flags |= G_PF_ACCEPT_UNMAPPED; } pp->mediasize = sc->sc_mediasize; pp->sectorsize = sc->sc_sectorsize; LIST_FOREACH(gap, &bpp->aliases, ga_next) g_provider_add_alias(pp, "%s%s", gap->ga_alias, G_ELI_SUFFIX); g_error_provider(pp, 0); G_ELI_DEBUG(0, "Device %s created.", pp->name); G_ELI_DEBUG(0, "Encryption: %s %u", g_eli_algo2str(sc->sc_ealgo), sc->sc_ekeylen); if (sc->sc_flags & G_ELI_FLAG_AUTH) G_ELI_DEBUG(0, " Integrity: %s", g_eli_algo2str(sc->sc_aalgo)); G_ELI_DEBUG(0, " Crypto: %s", sc->sc_crypto == G_ELI_CRYPTO_SW_ACCEL ? "accelerated software" : sc->sc_crypto == G_ELI_CRYPTO_SW ? "software" : "hardware"); return (gp); failed: mtx_lock(&sc->sc_queue_mtx); sc->sc_flags |= G_ELI_FLAG_DESTROY; wakeup(sc); /* * Wait for kernel threads self destruction. */ while (!LIST_EMPTY(&sc->sc_workers)) { msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO, "geli:destroy", 0); } mtx_destroy(&sc->sc_queue_mtx); if (cp->provider != NULL) { if (cp->acr == 1) g_access(cp, -1, -dcw, -1); g_detach(cp); } g_destroy_consumer(cp); g_destroy_geom(gp); g_eli_key_destroy(sc); zfree(sc, M_ELI); return (NULL); } int g_eli_destroy(struct g_eli_softc *sc, boolean_t force) { struct g_geom *gp; struct g_provider *pp; g_topology_assert(); if (sc == NULL) return (ENXIO); gp = sc->sc_geom; pp = LIST_FIRST(&gp->provider); if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { if (force) { G_ELI_DEBUG(1, "Device %s is still open, so it " "cannot be definitely removed.", pp->name); sc->sc_flags |= G_ELI_FLAG_RW_DETACH; gp->access = g_eli_access; g_wither_provider(pp, ENXIO); return (EBUSY); } else { G_ELI_DEBUG(1, "Device %s is still open (r%dw%de%d).", pp->name, pp->acr, pp->acw, pp->ace); return (EBUSY); } } mtx_lock(&sc->sc_queue_mtx); sc->sc_flags |= G_ELI_FLAG_DESTROY; wakeup(sc); while (!LIST_EMPTY(&sc->sc_workers)) { msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO, "geli:destroy", 0); } mtx_destroy(&sc->sc_queue_mtx); gp->softc = NULL; g_eli_key_destroy(sc); zfree(sc, M_ELI); G_ELI_DEBUG(0, "Device %s destroyed.", gp->name); g_wither_geom_close(gp, ENXIO); return (0); } static int g_eli_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused, struct g_geom *gp) { struct g_eli_softc *sc; sc = gp->softc; return (g_eli_destroy(sc, FALSE)); } static int g_eli_keyfiles_load(struct hmac_ctx *ctx, const char *provider) { u_char *keyfile, *data; char *file, name[64]; size_t size; int i; for (i = 0; ; i++) { snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i); keyfile = preload_search_by_type(name); if (keyfile == NULL && i == 0) { /* * If there is only one keyfile, allow simpler name. */ snprintf(name, sizeof(name), "%s:geli_keyfile", provider); keyfile = preload_search_by_type(name); } if (keyfile == NULL) return (i); /* Return number of loaded keyfiles. */ data = preload_fetch_addr(keyfile); if (data == NULL) { G_ELI_DEBUG(0, "Cannot find key file data for %s.", name); return (0); } size = preload_fetch_size(keyfile); if (size == 0) { G_ELI_DEBUG(0, "Cannot find key file size for %s.", name); return (0); } file = preload_search_info(keyfile, MODINFO_NAME); if (file == NULL) { G_ELI_DEBUG(0, "Cannot find key file name for %s.", name); return (0); } G_ELI_DEBUG(1, "Loaded keyfile %s for %s (type: %s).", file, provider, name); g_eli_crypto_hmac_update(ctx, data, size); } } static void g_eli_keyfiles_clear(const char *provider) { u_char *keyfile, *data; char name[64]; size_t size; int i; for (i = 0; ; i++) { snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i); keyfile = preload_search_by_type(name); if (keyfile == NULL) return; data = preload_fetch_addr(keyfile); size = preload_fetch_size(keyfile); if (data != NULL && size != 0) explicit_bzero(data, size); } } /* * Tasting is only made on boot. * We detect providers which should be attached before root is mounted. */ static struct g_geom * g_eli_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) { struct g_eli_metadata md; struct g_geom *gp; struct hmac_ctx ctx; char passphrase[256]; u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN]; u_int i, nkey, nkeyfiles, tries, showpass; int error; struct keybuf *keybuf; g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); g_topology_assert(); if (root_mounted() || g_eli_tries == 0) return (NULL); G_ELI_DEBUG(3, "Tasting %s.", pp->name); error = g_eli_read_metadata(mp, pp, &md); if (error != 0) return (NULL); gp = NULL; if (strcmp(md.md_magic, G_ELI_MAGIC) != 0) return (NULL); if (md.md_version > G_ELI_VERSION) { printf("geom_eli.ko module is too old to handle %s.\n", pp->name); return (NULL); } if (md.md_provsize != pp->mediasize) return (NULL); /* Should we attach it on boot? */ if (!(md.md_flags & G_ELI_FLAG_BOOT) && !(md.md_flags & G_ELI_FLAG_GELIBOOT)) return (NULL); if (md.md_keys == 0x00) { G_ELI_DEBUG(0, "No valid keys on %s.", pp->name); return (NULL); } if (!eli_metadata_crypto_supported(&md)) { G_ELI_DEBUG(0, "%s uses invalid or unsupported algorithms\n", pp->name); return (NULL); } if (md.md_iterations == -1) { /* If there is no passphrase, we try only once. */ tries = 1; } else { /* Ask for the passphrase no more than g_eli_tries times. */ tries = g_eli_tries; } if ((keybuf = get_keybuf()) != NULL) { /* Scan the key buffer, try all GELI keys. */ for (i = 0; i < keybuf->kb_nents; i++) { if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) { memcpy(key, keybuf->kb_ents[i].ke_data, sizeof(key)); if (g_eli_mkey_decrypt_any(&md, key, mkey, &nkey) == 0 ) { explicit_bzero(key, sizeof(key)); goto have_key; } } } } for (i = 0; i <= tries; i++) { g_eli_crypto_hmac_init(&ctx, NULL, 0); /* * Load all key files. */ nkeyfiles = g_eli_keyfiles_load(&ctx, pp->name); if (nkeyfiles == 0 && md.md_iterations == -1) { /* * No key files and no passphrase, something is * definitely wrong here. * geli(8) doesn't allow for such situation, so assume * that there was really no passphrase and in that case * key files are no properly defined in loader.conf. */ G_ELI_DEBUG(0, "Found no key files in loader.conf for %s.", pp->name); return (NULL); } /* Ask for the passphrase if defined. */ if (md.md_iterations >= 0) { /* Try first with cached passphrase. */ if (i == 0) { if (!g_eli_boot_passcache) continue; memcpy(passphrase, cached_passphrase, sizeof(passphrase)); } else { printf("Enter passphrase for %s: ", pp->name); showpass = g_eli_visible_passphrase; if ((md.md_flags & G_ELI_FLAG_GELIDISPLAYPASS) != 0) showpass = GETS_ECHOPASS; cngets(passphrase, sizeof(passphrase), showpass); memcpy(cached_passphrase, passphrase, sizeof(passphrase)); } } /* * Prepare Derived-Key from the user passphrase. */ if (md.md_iterations == 0) { g_eli_crypto_hmac_update(&ctx, md.md_salt, sizeof(md.md_salt)); g_eli_crypto_hmac_update(&ctx, passphrase, strlen(passphrase)); explicit_bzero(passphrase, sizeof(passphrase)); } else if (md.md_iterations > 0) { u_char dkey[G_ELI_USERKEYLEN]; pkcs5v2_genkey(dkey, sizeof(dkey), md.md_salt, sizeof(md.md_salt), passphrase, md.md_iterations); explicit_bzero(passphrase, sizeof(passphrase)); g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey)); explicit_bzero(dkey, sizeof(dkey)); } g_eli_crypto_hmac_final(&ctx, key, 0); /* * Decrypt Master-Key. */ error = g_eli_mkey_decrypt_any(&md, key, mkey, &nkey); explicit_bzero(key, sizeof(key)); if (error == -1) { if (i == tries) { G_ELI_DEBUG(0, "Wrong key for %s. No tries left.", pp->name); g_eli_keyfiles_clear(pp->name); return (NULL); } if (i > 0) { G_ELI_DEBUG(0, "Wrong key for %s. Tries left: %u.", pp->name, tries - i); } /* Try again. */ continue; } else if (error > 0) { G_ELI_DEBUG(0, "Cannot decrypt Master Key for %s (error=%d).", pp->name, error); g_eli_keyfiles_clear(pp->name); return (NULL); } g_eli_keyfiles_clear(pp->name); G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name); break; } have_key: /* * We have correct key, let's attach provider. */ gp = g_eli_create(NULL, mp, pp, &md, mkey, nkey); explicit_bzero(mkey, sizeof(mkey)); explicit_bzero(&md, sizeof(md)); if (gp == NULL) { G_ELI_DEBUG(0, "Cannot create device %s%s.", pp->name, G_ELI_SUFFIX); return (NULL); } return (gp); } static void g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { struct g_eli_softc *sc; g_topology_assert(); sc = gp->softc; if (sc == NULL) return; if (pp != NULL || cp != NULL) return; /* Nothing here. */ sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)sc->sc_ekeys_total); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)sc->sc_ekeys_allocated); sbuf_printf(sb, "%s", indent); if (sc->sc_flags == 0) sbuf_cat(sb, "NONE"); else { int first = 1; #define ADD_FLAG(flag, name) do { \ if (sc->sc_flags & (flag)) { \ if (!first) \ sbuf_cat(sb, ", "); \ else \ first = 0; \ sbuf_cat(sb, name); \ } \ } while (0) ADD_FLAG(G_ELI_FLAG_SUSPEND, "SUSPEND"); ADD_FLAG(G_ELI_FLAG_SINGLE_KEY, "SINGLE-KEY"); ADD_FLAG(G_ELI_FLAG_NATIVE_BYTE_ORDER, "NATIVE-BYTE-ORDER"); ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME"); ADD_FLAG(G_ELI_FLAG_BOOT, "BOOT"); ADD_FLAG(G_ELI_FLAG_WO_DETACH, "W-DETACH"); ADD_FLAG(G_ELI_FLAG_RW_DETACH, "RW-DETACH"); ADD_FLAG(G_ELI_FLAG_AUTH, "AUTH"); ADD_FLAG(G_ELI_FLAG_WOPEN, "W-OPEN"); ADD_FLAG(G_ELI_FLAG_DESTROY, "DESTROY"); ADD_FLAG(G_ELI_FLAG_RO, "READ-ONLY"); ADD_FLAG(G_ELI_FLAG_NODELETE, "NODELETE"); ADD_FLAG(G_ELI_FLAG_GELIBOOT, "GELIBOOT"); ADD_FLAG(G_ELI_FLAG_GELIDISPLAYPASS, "GELIDISPLAYPASS"); ADD_FLAG(G_ELI_FLAG_AUTORESIZE, "AUTORESIZE"); #undef ADD_FLAG } sbuf_cat(sb, "\n"); if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) { sbuf_printf(sb, "%s%u\n", indent, sc->sc_nkey); } sbuf_printf(sb, "%s%u\n", indent, sc->sc_version); sbuf_printf(sb, "%s", indent); switch (sc->sc_crypto) { case G_ELI_CRYPTO_HW: sbuf_cat(sb, "hardware"); break; case G_ELI_CRYPTO_SW: sbuf_cat(sb, "software"); break; case G_ELI_CRYPTO_SW_ACCEL: sbuf_cat(sb, "accelerated software"); break; default: sbuf_cat(sb, "UNKNOWN"); break; } sbuf_cat(sb, "\n"); if (sc->sc_flags & G_ELI_FLAG_AUTH) { sbuf_printf(sb, "%s%s\n", indent, g_eli_algo2str(sc->sc_aalgo)); } sbuf_printf(sb, "%s%u\n", indent, sc->sc_ekeylen); sbuf_printf(sb, "%s%s\n", indent, g_eli_algo2str(sc->sc_ealgo)); sbuf_printf(sb, "%s%s\n", indent, (sc->sc_flags & G_ELI_FLAG_SUSPEND) ? "SUSPENDED" : "ACTIVE"); } static void g_eli_shutdown_pre_sync(void *arg, int howto) { struct g_class *mp; struct g_geom *gp, *gp2; struct g_provider *pp; struct g_eli_softc *sc; int error; mp = arg; g_topology_lock(); LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { sc = gp->softc; if (sc == NULL) continue; pp = LIST_FIRST(&gp->provider); KASSERT(pp != NULL, ("No provider? gp=%p (%s)", gp, gp->name)); if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0 || SCHEDULER_STOPPED()) { sc->sc_flags |= G_ELI_FLAG_RW_DETACH; gp->access = g_eli_access; } else { error = g_eli_destroy(sc, TRUE); } } g_topology_unlock(); } static void g_eli_init(struct g_class *mp) { g_eli_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync, g_eli_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST); if (g_eli_pre_sync == NULL) G_ELI_DEBUG(0, "Warning! Cannot register shutdown event."); } static void g_eli_fini(struct g_class *mp) { if (g_eli_pre_sync != NULL) EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_eli_pre_sync); } DECLARE_GEOM_CLASS(g_eli_class, g_eli); MODULE_DEPEND(g_eli, crypto, 1, 1, 1); MODULE_VERSION(geom_eli, 0); diff --git a/sys/geom/geom_ccd.c b/sys/geom/geom_ccd.c index 7f4dd3ca11ff..8e3859f0ce2f 100644 --- a/sys/geom/geom_ccd.c +++ b/sys/geom/geom_ccd.c @@ -1,936 +1,935 @@ /*- * SPDX-License-Identifier: (BSD-2-Clause-NetBSD AND BSD-3-Clause) * * Copyright (c) 2003 Poul-Henning Kamp. * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ /*- * Copyright (c) 1988 University of Utah. * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah $Hdr: cd.c 1.6 90/11/28$ * * @(#)cd.c 8.2 (Berkeley) 11/16/93 */ /* * Dynamic configuration and disklabel support by: * Jason R. Thorpe * Numerical Aerodynamic Simulation Facility * Mail Stop 258-6 * NASA Ames Research Center * Moffett Field, CA 94035 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include /* * Number of blocks to untouched in front of a component partition. * This is to avoid violating its disklabel area when it starts at the * beginning of the slice. */ #if !defined(CCD_OFFSET) #define CCD_OFFSET 16 #endif /* sc_flags */ #define CCDF_UNIFORM 0x02 /* use LCCD of sizes for uniform interleave */ #define CCDF_MIRROR 0x04 /* use mirroring */ #define CCDF_NO_OFFSET 0x08 /* do not leave space in front */ #define CCDF_LINUX 0x10 /* use Linux compatibility mode */ /* Mask of user-settable ccd flags. */ #define CCDF_USERMASK (CCDF_UNIFORM|CCDF_MIRROR) /* * Interleave description table. * Computed at boot time to speed irregular-interleave lookups. * The idea is that we interleave in "groups". First we interleave * evenly over all component disks up to the size of the smallest * component (the first group), then we interleave evenly over all * remaining disks up to the size of the next-smallest (second group), * and so on. * * Each table entry describes the interleave characteristics of one * of these groups. For example if a concatenated disk consisted of * three components of 5, 3, and 7 DEV_BSIZE blocks interleaved at * DEV_BSIZE (1), the table would have three entries: * * ndisk startblk startoff dev * 3 0 0 0, 1, 2 * 2 9 3 0, 2 * 1 13 5 2 * 0 - - - * * which says that the first nine blocks (0-8) are interleaved over * 3 disks (0, 1, 2) starting at block offset 0 on any component disk, * the next 4 blocks (9-12) are interleaved over 2 disks (0, 2) starting * at component block 3, and the remaining blocks (13-14) are on disk * 2 starting at offset 5. */ struct ccdiinfo { int ii_ndisk; /* # of disks range is interleaved over */ daddr_t ii_startblk; /* starting scaled block # for range */ daddr_t ii_startoff; /* starting component offset (block #) */ int *ii_index; /* ordered list of components in range */ }; /* * Component info table. * Describes a single component of a concatenated disk. */ struct ccdcinfo { daddr_t ci_size; /* size */ struct g_provider *ci_provider; /* provider */ struct g_consumer *ci_consumer; /* consumer */ }; /* * A concatenated disk is described by this structure. */ struct ccd_s { LIST_ENTRY(ccd_s) list; int sc_unit; /* logical unit number */ int sc_flags; /* flags */ daddr_t sc_size; /* size of ccd */ int sc_ileave; /* interleave */ u_int sc_ndisks; /* number of components */ struct ccdcinfo *sc_cinfo; /* component info */ struct ccdiinfo *sc_itable; /* interleave table */ u_int32_t sc_secsize; /* # bytes per sector */ int sc_pick; /* side of mirror picked */ daddr_t sc_blk[2]; /* mirror localization */ u_int32_t sc_offset; /* actual offset used */ }; static g_start_t g_ccd_start; static void ccdiodone(struct bio *bp); static void ccdinterleave(struct ccd_s *); static int ccdinit(struct gctl_req *req, struct ccd_s *); static int ccdbuffer(struct bio **ret, struct ccd_s *, struct bio *, daddr_t, caddr_t, long); static void g_ccd_orphan(struct g_consumer *cp) { /* * XXX: We don't do anything here. It is not obvious * XXX: what DTRT would be, so we do what the previous * XXX: code did: ignore it and let the user cope. */ } static int g_ccd_access(struct g_provider *pp, int dr, int dw, int de) { struct g_geom *gp; struct g_consumer *cp1, *cp2; int error; de += dr; de += dw; gp = pp->geom; error = ENXIO; LIST_FOREACH(cp1, &gp->consumer, consumer) { error = g_access(cp1, dr, dw, de); if (error) { LIST_FOREACH(cp2, &gp->consumer, consumer) { if (cp1 == cp2) break; g_access(cp2, -dr, -dw, -de); } break; } } return (error); } /* * Free the softc and its substructures. */ static void g_ccd_freesc(struct ccd_s *sc) { struct ccdiinfo *ii; g_free(sc->sc_cinfo); if (sc->sc_itable != NULL) { for (ii = sc->sc_itable; ii->ii_ndisk > 0; ii++) - if (ii->ii_index != NULL) - g_free(ii->ii_index); + g_free(ii->ii_index); g_free(sc->sc_itable); } g_free(sc); } static int ccdinit(struct gctl_req *req, struct ccd_s *cs) { struct ccdcinfo *ci; daddr_t size; int ix; daddr_t minsize; int maxsecsize; off_t mediasize; u_int sectorsize; cs->sc_size = 0; maxsecsize = 0; minsize = 0; if (cs->sc_flags & CCDF_LINUX) { cs->sc_offset = 0; cs->sc_ileave *= 2; if (cs->sc_flags & CCDF_MIRROR && cs->sc_ndisks != 2) gctl_error(req, "Mirror mode for Linux raids is " "only supported with 2 devices"); } else { if (cs->sc_flags & CCDF_NO_OFFSET) cs->sc_offset = 0; else cs->sc_offset = CCD_OFFSET; } for (ix = 0; ix < cs->sc_ndisks; ix++) { ci = &cs->sc_cinfo[ix]; mediasize = ci->ci_provider->mediasize; sectorsize = ci->ci_provider->sectorsize; if (sectorsize > maxsecsize) maxsecsize = sectorsize; size = mediasize / DEV_BSIZE - cs->sc_offset; /* Truncate to interleave boundary */ if (cs->sc_ileave > 1) size -= size % cs->sc_ileave; if (size == 0) { gctl_error(req, "Component %s has effective size zero", ci->ci_provider->name); return(ENODEV); } if (minsize == 0 || size < minsize) minsize = size; ci->ci_size = size; cs->sc_size += size; } /* * Don't allow the interleave to be smaller than * the biggest component sector. */ if ((cs->sc_ileave > 0) && (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { gctl_error(req, "Interleave to small for sector size"); return(EINVAL); } /* * If uniform interleave is desired set all sizes to that of * the smallest component. This will guarantee that a single * interleave table is generated. * * Lost space must be taken into account when calculating the * overall size. Half the space is lost when CCDF_MIRROR is * specified. */ if (cs->sc_flags & CCDF_UNIFORM) { for (ix = 0; ix < cs->sc_ndisks; ix++) { ci = &cs->sc_cinfo[ix]; ci->ci_size = minsize; } cs->sc_size = cs->sc_ndisks * minsize; } if (cs->sc_flags & CCDF_MIRROR) { /* * Check to see if an even number of components * have been specified. The interleave must also * be non-zero in order for us to be able to * guarantee the topology. */ if (cs->sc_ndisks % 2) { gctl_error(req, "Mirroring requires an even number of disks"); return(EINVAL); } if (cs->sc_ileave == 0) { gctl_error(req, "An interleave must be specified when mirroring"); return(EINVAL); } cs->sc_size = (cs->sc_ndisks/2) * minsize; } /* * Construct the interleave table. */ ccdinterleave(cs); /* * Create pseudo-geometry based on 1MB cylinders. It's * pretty close. */ cs->sc_secsize = maxsecsize; return (0); } static void ccdinterleave(struct ccd_s *cs) { struct ccdcinfo *ci, *smallci; struct ccdiinfo *ii; daddr_t bn, lbn; int ix; daddr_t size; /* * Allocate an interleave table. The worst case occurs when each * of N disks is of a different size, resulting in N interleave * tables. * * Chances are this is too big, but we don't care. */ size = (cs->sc_ndisks + 1) * sizeof(struct ccdiinfo); cs->sc_itable = g_malloc(size, M_WAITOK | M_ZERO); /* * Trivial case: no interleave (actually interleave of disk size). * Each table entry represents a single component in its entirety. * * An interleave of 0 may not be used with a mirror setup. */ if (cs->sc_ileave == 0) { bn = 0; ii = cs->sc_itable; for (ix = 0; ix < cs->sc_ndisks; ix++) { /* Allocate space for ii_index. */ ii->ii_index = g_malloc(sizeof(int), M_WAITOK); ii->ii_ndisk = 1; ii->ii_startblk = bn; ii->ii_startoff = 0; ii->ii_index[0] = ix; bn += cs->sc_cinfo[ix].ci_size; ii++; } ii->ii_ndisk = 0; return; } /* * The following isn't fast or pretty; it doesn't have to be. */ size = 0; bn = lbn = 0; for (ii = cs->sc_itable; ; ii++) { /* * Allocate space for ii_index. We might allocate more then * we use. */ ii->ii_index = g_malloc((sizeof(int) * cs->sc_ndisks), M_WAITOK); /* * Locate the smallest of the remaining components */ smallci = NULL; for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) { if (ci->ci_size > size && (smallci == NULL || ci->ci_size < smallci->ci_size)) { smallci = ci; } } /* * Nobody left, all done */ if (smallci == NULL) { ii->ii_ndisk = 0; g_free(ii->ii_index); ii->ii_index = NULL; break; } /* * Record starting logical block using an sc_ileave blocksize. */ ii->ii_startblk = bn / cs->sc_ileave; /* * Record starting component block using an sc_ileave * blocksize. This value is relative to the beginning of * a component disk. */ ii->ii_startoff = lbn; /* * Determine how many disks take part in this interleave * and record their indices. */ ix = 0; for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) { if (ci->ci_size >= smallci->ci_size) { ii->ii_index[ix++] = ci - cs->sc_cinfo; } } ii->ii_ndisk = ix; bn += ix * (smallci->ci_size - size); lbn = smallci->ci_size / cs->sc_ileave; size = smallci->ci_size; } } static void g_ccd_start(struct bio *bp) { long bcount, rcount; struct bio *cbp[2]; caddr_t addr; daddr_t bn; int err; struct ccd_s *cs; cs = bp->bio_to->geom->softc; /* * Block all GETATTR requests, we wouldn't know which of our * subdevices we should ship it off to. * XXX: this may not be the right policy. */ if(bp->bio_cmd == BIO_GETATTR) { g_io_deliver(bp, EINVAL); return; } /* * Translate the partition-relative block number to an absolute. */ bn = bp->bio_offset / cs->sc_secsize; /* * Allocate component buffers and fire off the requests */ addr = bp->bio_data; for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) { err = ccdbuffer(cbp, cs, bp, bn, addr, bcount); if (err) { bp->bio_completed += bcount; if (bp->bio_error == 0) bp->bio_error = err; if (bp->bio_completed == bp->bio_length) g_io_deliver(bp, bp->bio_error); return; } rcount = cbp[0]->bio_length; if (cs->sc_flags & CCDF_MIRROR) { /* * Mirroring. Writes go to both disks, reads are * taken from whichever disk seems most appropriate. * * We attempt to localize reads to the disk whos arm * is nearest the read request. We ignore seeks due * to writes when making this determination and we * also try to avoid hogging. */ if (cbp[0]->bio_cmd != BIO_READ) { g_io_request(cbp[0], cbp[0]->bio_from); g_io_request(cbp[1], cbp[1]->bio_from); } else { int pick = cs->sc_pick; daddr_t range = cs->sc_size / 16; if (bn < cs->sc_blk[pick] - range || bn > cs->sc_blk[pick] + range ) { cs->sc_pick = pick = 1 - pick; } cs->sc_blk[pick] = bn + btodb(rcount); g_io_request(cbp[pick], cbp[pick]->bio_from); } } else { /* * Not mirroring */ g_io_request(cbp[0], cbp[0]->bio_from); } bn += btodb(rcount); addr += rcount; } } /* * Build a component buffer header. */ static int ccdbuffer(struct bio **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount) { struct ccdcinfo *ci, *ci2 = NULL; struct bio *cbp; daddr_t cbn, cboff; off_t cbc; /* * Determine which component bn falls in. */ cbn = bn; cboff = 0; if (cs->sc_ileave == 0) { /* * Serially concatenated and neither a mirror nor a parity * config. This is a special case. */ daddr_t sblk; sblk = 0; for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) sblk += ci->ci_size; cbn -= sblk; } else { struct ccdiinfo *ii; int ccdisk, off; /* * Calculate cbn, the logical superblock (sc_ileave chunks), * and cboff, a normal block offset (DEV_BSIZE chunks) relative * to cbn. */ cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ /* * Figure out which interleave table to use. */ for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { if (ii->ii_startblk > cbn) break; } ii--; /* * off is the logical superblock relative to the beginning * of this interleave block. */ off = cbn - ii->ii_startblk; /* * We must calculate which disk component to use (ccdisk), * and recalculate cbn to be the superblock relative to * the beginning of the component. This is typically done by * adding 'off' and ii->ii_startoff together. However, 'off' * must typically be divided by the number of components in * this interleave array to be properly convert it from a * CCD-relative logical superblock number to a * component-relative superblock number. */ if (ii->ii_ndisk == 1) { /* * When we have just one disk, it can't be a mirror * or a parity config. */ ccdisk = ii->ii_index[0]; cbn = ii->ii_startoff + off; } else { if (cs->sc_flags & CCDF_MIRROR) { /* * We have forced a uniform mapping, resulting * in a single interleave array. We double * up on the first half of the available * components and our mirror is in the second * half. This only works with a single * interleave array because doubling up * doubles the number of sectors, so there * cannot be another interleave array because * the next interleave array's calculations * would be off. */ int ndisk2 = ii->ii_ndisk / 2; ccdisk = ii->ii_index[off % ndisk2]; cbn = ii->ii_startoff + off / ndisk2; ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; } else { ccdisk = ii->ii_index[off % ii->ii_ndisk]; cbn = ii->ii_startoff + off / ii->ii_ndisk; } } ci = &cs->sc_cinfo[ccdisk]; /* * Convert cbn from a superblock to a normal block so it * can be used to calculate (along with cboff) the normal * block index into this particular disk. */ cbn *= cs->sc_ileave; } /* * Fill in the component buf structure. */ cbp = g_clone_bio(bp); if (cbp == NULL) return (ENOMEM); cbp->bio_done = g_std_done; cbp->bio_offset = dbtob(cbn + cboff + cs->sc_offset); cbp->bio_data = addr; if (cs->sc_ileave == 0) cbc = dbtob((off_t)(ci->ci_size - cbn)); else cbc = dbtob((off_t)(cs->sc_ileave - cboff)); cbp->bio_length = (cbc < bcount) ? cbc : bcount; cbp->bio_from = ci->ci_consumer; cb[0] = cbp; if (cs->sc_flags & CCDF_MIRROR) { cbp = g_clone_bio(bp); if (cbp == NULL) return (ENOMEM); cbp->bio_done = cb[0]->bio_done = ccdiodone; cbp->bio_offset = cb[0]->bio_offset; cbp->bio_data = cb[0]->bio_data; cbp->bio_length = cb[0]->bio_length; cbp->bio_from = ci2->ci_consumer; cbp->bio_caller1 = cb[0]; cb[0]->bio_caller1 = cbp; cb[1] = cbp; } return (0); } /* * Called only for mirrored operations. */ static void ccdiodone(struct bio *cbp) { struct bio *mbp, *pbp; mbp = cbp->bio_caller1; pbp = cbp->bio_parent; if (pbp->bio_cmd == BIO_READ) { if (cbp->bio_error == 0) { /* We will not be needing the partner bio */ if (mbp != NULL) { pbp->bio_inbed++; g_destroy_bio(mbp); } g_std_done(cbp); return; } if (mbp != NULL) { /* Try partner the bio instead */ mbp->bio_caller1 = NULL; pbp->bio_inbed++; g_destroy_bio(cbp); g_io_request(mbp, mbp->bio_from); /* * XXX: If this comes back OK, we should actually * try to write the good data on the failed mirror */ return; } g_std_done(cbp); return; } if (mbp != NULL) { mbp->bio_caller1 = NULL; pbp->bio_inbed++; if (cbp->bio_error != 0 && pbp->bio_error == 0) pbp->bio_error = cbp->bio_error; g_destroy_bio(cbp); return; } g_std_done(cbp); } static void g_ccd_create(struct gctl_req *req, struct g_class *mp) { int *unit, *ileave, *nprovider; struct g_geom *gp; struct g_consumer *cp; struct g_provider *pp; struct ccd_s *sc; struct sbuf *sb; char buf[20]; int i, error; g_topology_assert(); unit = gctl_get_paraml(req, "unit", sizeof (*unit)); if (unit == NULL) { gctl_error(req, "unit parameter not given"); return; } ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave)); if (ileave == NULL) { gctl_error(req, "ileave parameter not given"); return; } nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider)); if (nprovider == NULL) { gctl_error(req, "nprovider parameter not given"); return; } /* Check for duplicate unit */ LIST_FOREACH(gp, &mp->geom, geom) { sc = gp->softc; if (sc != NULL && sc->sc_unit == *unit) { gctl_error(req, "Unit %d already configured", *unit); return; } } if (*nprovider <= 0) { gctl_error(req, "Bogus nprovider argument (= %d)", *nprovider); return; } /* Check all providers are valid */ for (i = 0; i < *nprovider; i++) { snprintf(buf, sizeof(buf), "provider%d", i); pp = gctl_get_provider(req, buf); if (pp == NULL) return; } gp = g_new_geomf(mp, "ccd%d", *unit); sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO); gp->softc = sc; sc->sc_ndisks = *nprovider; /* Allocate space for the component info. */ sc->sc_cinfo = g_malloc(sc->sc_ndisks * sizeof(struct ccdcinfo), M_WAITOK | M_ZERO); /* Create consumers and attach to all providers */ for (i = 0; i < *nprovider; i++) { snprintf(buf, sizeof(buf), "provider%d", i); pp = gctl_get_provider(req, buf); cp = g_new_consumer(gp); error = g_attach(cp, pp); KASSERT(error == 0, ("attach to %s failed", pp->name)); sc->sc_cinfo[i].ci_consumer = cp; sc->sc_cinfo[i].ci_provider = pp; } sc->sc_unit = *unit; sc->sc_ileave = *ileave; if (gctl_get_param(req, "no_offset", NULL)) sc->sc_flags |= CCDF_NO_OFFSET; if (gctl_get_param(req, "linux", NULL)) sc->sc_flags |= CCDF_LINUX; if (gctl_get_param(req, "uniform", NULL)) sc->sc_flags |= CCDF_UNIFORM; if (gctl_get_param(req, "mirror", NULL)) sc->sc_flags |= CCDF_MIRROR; if (sc->sc_ileave == 0 && (sc->sc_flags & CCDF_MIRROR)) { printf("%s: disabling mirror, interleave is 0\n", gp->name); sc->sc_flags &= ~(CCDF_MIRROR); } if ((sc->sc_flags & CCDF_MIRROR) && !(sc->sc_flags & CCDF_UNIFORM)) { printf("%s: mirror/parity forces uniform flag\n", gp->name); sc->sc_flags |= CCDF_UNIFORM; } error = ccdinit(req, sc); if (error != 0) { g_ccd_freesc(sc); gp->softc = NULL; g_wither_geom(gp, ENXIO); return; } pp = g_new_providerf(gp, "%s", gp->name); pp->mediasize = sc->sc_size * (off_t)sc->sc_secsize; pp->sectorsize = sc->sc_secsize; g_error_provider(pp, 0); sb = sbuf_new_auto(); sbuf_printf(sb, "ccd%d: %d components ", sc->sc_unit, *nprovider); for (i = 0; i < *nprovider; i++) { sbuf_printf(sb, "%s%s", i == 0 ? "(" : ", ", sc->sc_cinfo[i].ci_provider->name); } sbuf_printf(sb, "), %jd blocks ", (off_t)pp->mediasize / DEV_BSIZE); if (sc->sc_ileave != 0) sbuf_printf(sb, "interleaved at %d blocks\n", sc->sc_ileave); else sbuf_printf(sb, "concatenated\n"); sbuf_finish(sb); gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } static int g_ccd_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) { struct g_provider *pp; struct ccd_s *sc; g_topology_assert(); sc = gp->softc; pp = LIST_FIRST(&gp->provider); if (sc == NULL || pp == NULL) return (EBUSY); if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) { gctl_error(req, "%s is open(r%dw%de%d)", gp->name, pp->acr, pp->acw, pp->ace); return (EBUSY); } g_ccd_freesc(sc); gp->softc = NULL; g_wither_geom(gp, ENXIO); return (0); } static void g_ccd_list(struct gctl_req *req, struct g_class *mp) { struct sbuf *sb; struct ccd_s *cs; struct g_geom *gp; int i, unit, *up; up = gctl_get_paraml(req, "unit", sizeof (*up)); if (up == NULL) { gctl_error(req, "unit parameter not given"); return; } unit = *up; sb = sbuf_new_auto(); LIST_FOREACH(gp, &mp->geom, geom) { cs = gp->softc; if (cs == NULL || (unit >= 0 && unit != cs->sc_unit)) continue; sbuf_printf(sb, "ccd%d\t\t%d\t%d\t", cs->sc_unit, cs->sc_ileave, cs->sc_flags & CCDF_USERMASK); for (i = 0; i < cs->sc_ndisks; ++i) { sbuf_printf(sb, "%s/dev/%s", i == 0 ? "" : " ", cs->sc_cinfo[i].ci_provider->name); } sbuf_printf(sb, "\n"); } sbuf_finish(sb); gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } static void g_ccd_config(struct gctl_req *req, struct g_class *mp, char const *verb) { struct g_geom *gp; g_topology_assert(); if (!strcmp(verb, "create geom")) { g_ccd_create(req, mp); } else if (!strcmp(verb, "destroy geom")) { gp = gctl_get_geom(req, mp, "geom"); if (gp != NULL) g_ccd_destroy_geom(req, mp, gp); } else if (!strcmp(verb, "list")) { g_ccd_list(req, mp); } else { gctl_error(req, "unknown verb"); } } static struct g_class g_ccd_class = { .name = "CCD", .version = G_VERSION, .ctlreq = g_ccd_config, .destroy_geom = g_ccd_destroy_geom, .start = g_ccd_start, .orphan = g_ccd_orphan, .access = g_ccd_access, }; DECLARE_GEOM_CLASS(g_ccd_class, g_ccd); MODULE_VERSION(geom_ccd, 0); diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c index e52f8b8cccc2..32974e3bf822 100644 --- a/sys/geom/geom_dev.c +++ b/sys/geom/geom_dev.c @@ -1,928 +1,927 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct g_dev_softc { struct mtx sc_mtx; struct cdev *sc_dev; struct cdev *sc_alias; int sc_open; u_int sc_active; struct selinfo sc_selinfo; #define SC_A_DESTROY (1 << 31) #define SC_A_OPEN (1 << 30) #define SC_A_ACTIVE (SC_A_OPEN - 1) }; static d_open_t g_dev_open; static d_close_t g_dev_close; static d_strategy_t g_dev_strategy; static d_ioctl_t g_dev_ioctl; static d_kqfilter_t g_dev_kqfilter; static void gdev_filter_detach(struct knote *kn); static int gdev_filter_vnode(struct knote *kn, long hint); static struct filterops gdev_filterops_vnode = { .f_isfd = 1, .f_detach = gdev_filter_detach, .f_event = gdev_filter_vnode, }; static struct cdevsw g_dev_cdevsw = { .d_version = D_VERSION, .d_open = g_dev_open, .d_close = g_dev_close, .d_read = physread, .d_write = physwrite, .d_ioctl = g_dev_ioctl, .d_strategy = g_dev_strategy, .d_name = "g_dev", .d_flags = D_DISK | D_TRACKCLOSE, .d_kqfilter = g_dev_kqfilter, }; static g_init_t g_dev_init; static g_fini_t g_dev_fini; static g_taste_t g_dev_taste; static g_orphan_t g_dev_orphan; static g_attrchanged_t g_dev_attrchanged; static g_resize_t g_dev_resize; static struct g_class g_dev_class = { .name = "DEV", .version = G_VERSION, .init = g_dev_init, .fini = g_dev_fini, .taste = g_dev_taste, .orphan = g_dev_orphan, .attrchanged = g_dev_attrchanged, .resize = g_dev_resize }; /* * We target 262144 (8 x 32768) sectors by default as this significantly * increases the throughput on commonly used SSD's with a marginal * increase in non-interruptible request latency. */ static uint64_t g_dev_del_max_sectors = 262144; SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, dev, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "GEOM_DEV stuff"); SYSCTL_QUAD(_kern_geom_dev, OID_AUTO, delete_max_sectors, CTLFLAG_RW, &g_dev_del_max_sectors, 0, "Maximum number of sectors in a single " "delete request sent to the provider. Larger requests are chunked " "so they can be interrupted. (0 = disable chunking)"); static char *dumpdev = NULL; static void g_dev_init(struct g_class *mp) { dumpdev = kern_getenv("dumpdev"); } static void g_dev_fini(struct g_class *mp) { freeenv(dumpdev); dumpdev = NULL; } static int g_dev_setdumpdev(struct cdev *dev, struct diocskerneldump_arg *kda) { struct g_kerneldump kd; struct g_consumer *cp; int error, len; MPASS(dev != NULL && kda != NULL); MPASS(kda->kda_index != KDA_REMOVE); cp = dev->si_drv2; len = sizeof(kd); memset(&kd, 0, len); kd.offset = 0; kd.length = OFF_MAX; error = g_io_getattr("GEOM::kerneldump", cp, &len, &kd); if (error != 0) return (error); error = dumper_insert(&kd.di, devtoname(dev), kda); if (error == 0) dev->si_flags |= SI_DUMPDEV; return (error); } static int init_dumpdev(struct cdev *dev) { struct diocskerneldump_arg kda; struct g_consumer *cp; const char *devprefix = _PATH_DEV, *devname; int error; size_t len; bzero(&kda, sizeof(kda)); kda.kda_index = KDA_APPEND; if (dumpdev == NULL) return (0); len = strlen(devprefix); devname = devtoname(dev); if (strcmp(devname, dumpdev) != 0 && (strncmp(dumpdev, devprefix, len) != 0 || strcmp(devname, dumpdev + len) != 0)) return (0); cp = (struct g_consumer *)dev->si_drv2; error = g_access(cp, 1, 0, 0); if (error != 0) return (error); error = g_dev_setdumpdev(dev, &kda); if (error == 0) { freeenv(dumpdev); dumpdev = NULL; } (void)g_access(cp, -1, 0, 0); return (error); } static void g_dev_destroy(void *arg, int flags __unused) { struct g_consumer *cp; struct g_geom *gp; struct g_dev_softc *sc; char buf[SPECNAMELEN + 6]; g_topology_assert(); cp = arg; gp = cp->geom; sc = cp->private; g_trace(G_T_TOPOLOGY, "g_dev_destroy(%p(%s))", cp, gp->name); snprintf(buf, sizeof(buf), "cdev=%s", gp->name); devctl_notify("GEOM", "DEV", "DESTROY", buf); knlist_clear(&sc->sc_selinfo.si_note, 0); knlist_destroy(&sc->sc_selinfo.si_note); if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) g_access(cp, -cp->acr, -cp->acw, -cp->ace); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); mtx_destroy(&sc->sc_mtx); g_free(sc); } void g_dev_print(void) { struct g_geom *gp; char const *p = ""; LIST_FOREACH(gp, &g_dev_class.geom, geom) { printf("%s%s", p, gp->name); p = " "; } printf("\n"); } static void g_dev_set_physpath(struct g_consumer *cp) { struct g_dev_softc *sc; char *physpath; int error, physpath_len; if (g_access(cp, 1, 0, 0) != 0) return; sc = cp->private; physpath_len = MAXPATHLEN; physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); g_access(cp, -1, 0, 0); if (error == 0 && strlen(physpath) != 0) { struct cdev *dev, *old_alias_dev; struct cdev **alias_devp; dev = sc->sc_dev; old_alias_dev = sc->sc_alias; alias_devp = (struct cdev **)&sc->sc_alias; make_dev_physpath_alias(MAKEDEV_WAITOK | MAKEDEV_CHECKNAME, alias_devp, dev, old_alias_dev, physpath); } else if (sc->sc_alias) { destroy_dev((struct cdev *)sc->sc_alias); sc->sc_alias = NULL; } g_free(physpath); } static void g_dev_set_media(struct g_consumer *cp) { struct g_dev_softc *sc; struct cdev *dev; char buf[SPECNAMELEN + 6]; sc = cp->private; dev = sc->sc_dev; snprintf(buf, sizeof(buf), "cdev=%s", dev->si_name); devctl_notify("DEVFS", "CDEV", "MEDIACHANGE", buf); devctl_notify("GEOM", "DEV", "MEDIACHANGE", buf); dev = sc->sc_alias; if (dev != NULL) { snprintf(buf, sizeof(buf), "cdev=%s", dev->si_name); devctl_notify("DEVFS", "CDEV", "MEDIACHANGE", buf); devctl_notify("GEOM", "DEV", "MEDIACHANGE", buf); } } static void g_dev_attrchanged(struct g_consumer *cp, const char *attr) { if (strcmp(attr, "GEOM::media") == 0) { g_dev_set_media(cp); return; } if (strcmp(attr, "GEOM::physpath") == 0) { g_dev_set_physpath(cp); return; } } static void g_dev_resize(struct g_consumer *cp) { struct g_dev_softc *sc; char buf[SPECNAMELEN + 6]; sc = cp->private; KNOTE_UNLOCKED(&sc->sc_selinfo.si_note, NOTE_ATTRIB); snprintf(buf, sizeof(buf), "cdev=%s", cp->provider->name); devctl_notify("GEOM", "DEV", "SIZECHANGE", buf); } struct g_provider * g_dev_getprovider(struct cdev *dev) { struct g_consumer *cp; g_topology_assert(); if (dev == NULL) return (NULL); if (dev->si_devsw != &g_dev_cdevsw) return (NULL); cp = dev->si_drv2; return (cp->provider); } static struct g_geom * g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused) { struct g_geom *gp; struct g_geom_alias *gap; struct g_consumer *cp; struct g_dev_softc *sc; int error; struct cdev *dev, *adev; char buf[SPECNAMELEN + 6]; struct make_dev_args args; g_trace(G_T_TOPOLOGY, "dev_taste(%s,%s)", mp->name, pp->name); g_topology_assert(); gp = g_new_geomf(mp, "%s", pp->name); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); mtx_init(&sc->sc_mtx, "g_dev", NULL, MTX_DEF); cp = g_new_consumer(gp); cp->private = sc; cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error != 0) { printf("%s: g_dev_taste(%s) failed to g_attach, error=%d\n", __func__, pp->name, error); g_destroy_consumer(cp); g_destroy_geom(gp); mtx_destroy(&sc->sc_mtx); g_free(sc); return (NULL); } make_dev_args_init(&args); args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; args.mda_devsw = &g_dev_cdevsw; args.mda_cr = NULL; args.mda_uid = UID_ROOT; args.mda_gid = GID_OPERATOR; args.mda_mode = 0640; args.mda_si_drv1 = sc; args.mda_si_drv2 = cp; error = make_dev_s(&args, &sc->sc_dev, "%s", gp->name); if (error != 0) { printf("%s: make_dev_p() failed (gp->name=%s, error=%d)\n", __func__, gp->name, error); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); mtx_destroy(&sc->sc_mtx); g_free(sc); return (NULL); } dev = sc->sc_dev; dev->si_flags |= SI_UNMAPPED; dev->si_iosize_max = maxphys; knlist_init_mtx(&sc->sc_selinfo.si_note, &sc->sc_mtx); error = init_dumpdev(dev); if (error != 0) printf("%s: init_dumpdev() failed (gp->name=%s, error=%d)\n", __func__, gp->name, error); g_dev_attrchanged(cp, "GEOM::physpath"); snprintf(buf, sizeof(buf), "cdev=%s", gp->name); devctl_notify("GEOM", "DEV", "CREATE", buf); /* * Now add all the aliases for this drive */ LIST_FOREACH(gap, &pp->aliases, ga_next) { error = make_dev_alias_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &adev, dev, "%s", gap->ga_alias); if (error) { printf("%s: make_dev_alias_p() failed (name=%s, error=%d)\n", __func__, gap->ga_alias, error); continue; } snprintf(buf, sizeof(buf), "cdev=%s", gap->ga_alias); devctl_notify("GEOM", "DEV", "CREATE", buf); } return (gp); } static int g_dev_open(struct cdev *dev, int flags, int fmt, struct thread *td) { struct g_consumer *cp; struct g_dev_softc *sc; int error, r, w, e; cp = dev->si_drv2; g_trace(G_T_ACCESS, "g_dev_open(%s, %d, %d, %p)", cp->geom->name, flags, fmt, td); r = flags & FREAD ? 1 : 0; w = flags & FWRITE ? 1 : 0; #ifdef notyet e = flags & O_EXCL ? 1 : 0; #else e = 0; #endif /* * This happens on attempt to open a device node with O_EXEC. */ if (r + w + e == 0) return (EINVAL); if (w) { /* * When running in very secure mode, do not allow * opens for writing of any disks. */ error = securelevel_ge(td->td_ucred, 2); if (error) return (error); } g_topology_lock(); error = g_access(cp, r, w, e); g_topology_unlock(); if (error == 0) { sc = dev->si_drv1; mtx_lock(&sc->sc_mtx); if (sc->sc_open == 0 && (sc->sc_active & SC_A_ACTIVE) != 0) wakeup(&sc->sc_active); sc->sc_open += r + w + e; if (sc->sc_open == 0) atomic_clear_int(&sc->sc_active, SC_A_OPEN); else atomic_set_int(&sc->sc_active, SC_A_OPEN); mtx_unlock(&sc->sc_mtx); } return (error); } static int g_dev_close(struct cdev *dev, int flags, int fmt, struct thread *td) { struct g_consumer *cp; struct g_dev_softc *sc; int error, r, w, e; cp = dev->si_drv2; g_trace(G_T_ACCESS, "g_dev_close(%s, %d, %d, %p)", cp->geom->name, flags, fmt, td); r = flags & FREAD ? -1 : 0; w = flags & FWRITE ? -1 : 0; #ifdef notyet e = flags & O_EXCL ? -1 : 0; #else e = 0; #endif /* * The vgonel(9) - caused by eg. forced unmount of devfs - calls * VOP_CLOSE(9) on devfs vnode without any FREAD or FWRITE flags, * which would result in zero deltas, which in turn would cause * panic in g_access(9). * * Note that we cannot zero the counters (ie. do "r = cp->acr" * etc) instead, because the consumer might be opened in another * devfs instance. */ if (r + w + e == 0) return (EINVAL); sc = dev->si_drv1; mtx_lock(&sc->sc_mtx); sc->sc_open += r + w + e; if (sc->sc_open == 0) atomic_clear_int(&sc->sc_active, SC_A_OPEN); else atomic_set_int(&sc->sc_active, SC_A_OPEN); while (sc->sc_open == 0 && (sc->sc_active & SC_A_ACTIVE) != 0) msleep(&sc->sc_active, &sc->sc_mtx, 0, "g_dev_close", hz / 10); mtx_unlock(&sc->sc_mtx); g_topology_lock(); error = g_access(cp, r, w, e); g_topology_unlock(); return (error); } static int g_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { struct g_consumer *cp; struct g_provider *pp; off_t offset, length, chunk, odd; int i, error; #ifdef COMPAT_FREEBSD12 struct diocskerneldump_arg kda_copy; #endif cp = dev->si_drv2; pp = cp->provider; /* If consumer or provider is dying, don't disturb. */ if (cp->flags & G_CF_ORPHAN) return (ENXIO); if (pp->error) return (pp->error); error = 0; KASSERT(cp->acr || cp->acw, ("Consumer with zero access count in g_dev_ioctl")); i = IOCPARM_LEN(cmd); switch (cmd) { case DIOCGSECTORSIZE: *(u_int *)data = pp->sectorsize; if (*(u_int *)data == 0) error = ENOENT; break; case DIOCGMEDIASIZE: *(off_t *)data = pp->mediasize; if (*(off_t *)data == 0) error = ENOENT; break; case DIOCGFWSECTORS: error = g_io_getattr("GEOM::fwsectors", cp, &i, data); if (error == 0 && *(u_int *)data == 0) error = ENOENT; break; case DIOCGFWHEADS: error = g_io_getattr("GEOM::fwheads", cp, &i, data); if (error == 0 && *(u_int *)data == 0) error = ENOENT; break; #ifdef COMPAT_FREEBSD12 case DIOCSKERNELDUMP_FREEBSD12: { struct diocskerneldump_arg_freebsd12 *kda12; gone_in(14, "FreeBSD 12.x ABI compat"); kda12 = (void *)data; memcpy(&kda_copy, kda12, sizeof(kda_copy)); kda_copy.kda_index = (kda12->kda12_enable ? 0 : KDA_REMOVE_ALL); explicit_bzero(kda12, sizeof(*kda12)); /* Kludge to pass kda_copy to kda in fallthrough. */ data = (void *)&kda_copy; } /* FALLTHROUGH */ #endif case DIOCSKERNELDUMP: { struct diocskerneldump_arg *kda; uint8_t *encryptedkey; kda = (struct diocskerneldump_arg *)data; if (kda->kda_index == KDA_REMOVE_ALL || kda->kda_index == KDA_REMOVE_DEV || kda->kda_index == KDA_REMOVE) { error = dumper_remove(devtoname(dev), kda); explicit_bzero(kda, sizeof(*kda)); break; } if (kda->kda_encryption != KERNELDUMP_ENC_NONE) { if (kda->kda_encryptedkeysize == 0 || kda->kda_encryptedkeysize > KERNELDUMP_ENCKEY_MAX_SIZE) { explicit_bzero(kda, sizeof(*kda)); return (EINVAL); } encryptedkey = malloc(kda->kda_encryptedkeysize, M_TEMP, M_WAITOK); error = copyin(kda->kda_encryptedkey, encryptedkey, kda->kda_encryptedkeysize); } else { encryptedkey = NULL; } if (error == 0) { kda->kda_encryptedkey = encryptedkey; error = g_dev_setdumpdev(dev, kda); } zfree(encryptedkey, M_TEMP); explicit_bzero(kda, sizeof(*kda)); break; } case DIOCGFLUSH: error = g_io_flush(cp); break; case DIOCGDELETE: offset = ((off_t *)data)[0]; length = ((off_t *)data)[1]; if ((offset % pp->sectorsize) != 0 || (length % pp->sectorsize) != 0 || length <= 0) { printf("%s: offset=%jd length=%jd\n", __func__, offset, length); error = EINVAL; break; } while (length > 0) { chunk = length; if (g_dev_del_max_sectors != 0 && chunk > g_dev_del_max_sectors * pp->sectorsize) { chunk = g_dev_del_max_sectors * pp->sectorsize; if (pp->stripesize > 0) { odd = (offset + chunk + pp->stripeoffset) % pp->stripesize; if (chunk > odd) chunk -= odd; } } error = g_delete_data(cp, offset, chunk); length -= chunk; offset += chunk; if (error) break; /* * Since the request size can be large, the service * time can be is likewise. We make this ioctl * interruptible by checking for signals for each bio. */ if (SIGPENDING(td)) break; } break; case DIOCGIDENT: error = g_io_getattr("GEOM::ident", cp, &i, data); break; case DIOCGPROVIDERNAME: strlcpy(data, pp->name, i); break; case DIOCGSTRIPESIZE: *(off_t *)data = pp->stripesize; break; case DIOCGSTRIPEOFFSET: *(off_t *)data = pp->stripeoffset; break; case DIOCGPHYSPATH: error = g_io_getattr("GEOM::physpath", cp, &i, data); if (error == 0 && *(char *)data == '\0') error = ENOENT; break; case DIOCGATTR: { struct diocgattr_arg *arg = (struct diocgattr_arg *)data; if (arg->len > sizeof(arg->value)) { error = EINVAL; break; } error = g_io_getattr(arg->name, cp, &arg->len, &arg->value); break; } case DIOCZONECMD: { struct disk_zone_args *zone_args =(struct disk_zone_args *)data; struct disk_zone_rep_entry *new_entries, *old_entries; struct disk_zone_report *rep; size_t alloc_size; old_entries = NULL; new_entries = NULL; rep = NULL; alloc_size = 0; if (zone_args->zone_cmd == DISK_ZONE_REPORT_ZONES) { rep = &zone_args->zone_params.report; #define MAXENTRIES (maxphys / sizeof(struct disk_zone_rep_entry)) if (rep->entries_allocated > MAXENTRIES) rep->entries_allocated = MAXENTRIES; alloc_size = rep->entries_allocated * sizeof(struct disk_zone_rep_entry); if (alloc_size != 0) new_entries = g_malloc(alloc_size, M_WAITOK | M_ZERO); old_entries = rep->entries; rep->entries = new_entries; } error = g_io_zonecmd(zone_args, cp); if (zone_args->zone_cmd == DISK_ZONE_REPORT_ZONES && alloc_size != 0 && error == 0) error = copyout(new_entries, old_entries, alloc_size); if (old_entries != NULL && rep != NULL) rep->entries = old_entries; - if (new_entries != NULL) - g_free(new_entries); + g_free(new_entries); break; } default: if (pp->geom->ioctl != NULL) { error = pp->geom->ioctl(pp, cmd, data, fflag, td); } else { error = ENOIOCTL; } } return (error); } static void g_dev_done(struct bio *bp2) { struct g_consumer *cp; struct g_dev_softc *sc; struct bio *bp; int active; cp = bp2->bio_from; sc = cp->private; bp = bp2->bio_parent; bp->bio_error = bp2->bio_error; bp->bio_completed = bp2->bio_completed; bp->bio_resid = bp->bio_length - bp2->bio_completed; if (bp2->bio_cmd == BIO_ZONE) bcopy(&bp2->bio_zone, &bp->bio_zone, sizeof(bp->bio_zone)); if (bp2->bio_error != 0) { g_trace(G_T_BIO, "g_dev_done(%p) had error %d", bp2, bp2->bio_error); bp->bio_flags |= BIO_ERROR; } else { g_trace(G_T_BIO, "g_dev_done(%p/%p) resid %ld completed %jd", bp2, bp, bp2->bio_resid, (intmax_t)bp2->bio_completed); } g_destroy_bio(bp2); active = atomic_fetchadd_int(&sc->sc_active, -1) - 1; if ((active & SC_A_ACTIVE) == 0) { if ((active & SC_A_OPEN) == 0) wakeup(&sc->sc_active); if (active & SC_A_DESTROY) g_post_event(g_dev_destroy, cp, M_NOWAIT, NULL); } biodone(bp); } static void g_dev_strategy(struct bio *bp) { struct g_consumer *cp; struct bio *bp2; struct cdev *dev; struct g_dev_softc *sc; KASSERT(bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE || bp->bio_cmd == BIO_FLUSH || bp->bio_cmd == BIO_ZONE, ("Wrong bio_cmd bio=%p cmd=%d", bp, bp->bio_cmd)); dev = bp->bio_dev; cp = dev->si_drv2; KASSERT(cp->acr || cp->acw, ("Consumer with zero access count in g_dev_strategy")); biotrack(bp, __func__); #ifdef INVARIANTS if ((bp->bio_offset % cp->provider->sectorsize) != 0 || (bp->bio_bcount % cp->provider->sectorsize) != 0) { bp->bio_resid = bp->bio_bcount; biofinish(bp, NULL, EINVAL); return; } #endif sc = dev->si_drv1; KASSERT(sc->sc_open > 0, ("Closed device in g_dev_strategy")); atomic_add_int(&sc->sc_active, 1); for (;;) { /* * XXX: This is not an ideal solution, but I believe it to * XXX: deadlock safely, all things considered. */ bp2 = g_clone_bio(bp); if (bp2 != NULL) break; pause("gdstrat", hz / 10); } KASSERT(bp2 != NULL, ("XXX: ENOMEM in a bad place")); bp2->bio_done = g_dev_done; g_trace(G_T_BIO, "g_dev_strategy(%p/%p) offset %jd length %jd data %p cmd %d", bp, bp2, (intmax_t)bp->bio_offset, (intmax_t)bp2->bio_length, bp2->bio_data, bp2->bio_cmd); g_io_request(bp2, cp); KASSERT(cp->acr || cp->acw, ("g_dev_strategy raced with g_dev_close and lost")); } /* * g_dev_callback() * * Called by devfs when asynchronous device destruction is completed. * - Mark that we have no attached device any more. * - If there are no outstanding requests, schedule geom destruction. * Otherwise destruction will be scheduled later by g_dev_done(). */ static void g_dev_callback(void *arg) { struct g_consumer *cp; struct g_dev_softc *sc; int active; cp = arg; sc = cp->private; g_trace(G_T_TOPOLOGY, "g_dev_callback(%p(%s))", cp, cp->geom->name); sc->sc_dev = NULL; sc->sc_alias = NULL; active = atomic_fetchadd_int(&sc->sc_active, SC_A_DESTROY); if ((active & SC_A_ACTIVE) == 0) g_post_event(g_dev_destroy, cp, M_WAITOK, NULL); } /* * g_dev_orphan() * * Called from below when the provider orphaned us. * - Clear any dump settings. * - Request asynchronous device destruction to prevent any more requests * from coming in. The provider is already marked with an error, so * anything which comes in the interim will be returned immediately. */ static void g_dev_orphan(struct g_consumer *cp) { struct cdev *dev; struct g_dev_softc *sc; g_topology_assert(); sc = cp->private; dev = sc->sc_dev; g_trace(G_T_TOPOLOGY, "g_dev_orphan(%p(%s))", cp, cp->geom->name); /* Reset any dump-area set on this device */ if (dev->si_flags & SI_DUMPDEV) { struct diocskerneldump_arg kda; bzero(&kda, sizeof(kda)); kda.kda_index = KDA_REMOVE_DEV; (void)dumper_remove(devtoname(dev), &kda); } /* Destroy the struct cdev *so we get no more requests */ delist_dev(dev); destroy_dev_sched_cb(dev, g_dev_callback, cp); } static void gdev_filter_detach(struct knote *kn) { struct g_dev_softc *sc; sc = kn->kn_hook; knlist_remove(&sc->sc_selinfo.si_note, kn, 0); } static int gdev_filter_vnode(struct knote *kn, long hint) { kn->kn_fflags |= kn->kn_sfflags & hint; return (kn->kn_fflags != 0); } static int g_dev_kqfilter(struct cdev *dev, struct knote *kn) { struct g_dev_softc *sc; sc = dev->si_drv1; if (kn->kn_filter != EVFILT_VNODE) return (EINVAL); /* XXX: extend support for other NOTE_* events */ if (kn->kn_sfflags != NOTE_ATTRIB) return (EINVAL); kn->kn_fop = &gdev_filterops_vnode; kn->kn_hook = sc; knlist_add(&sc->sc_selinfo.si_note, kn, 0); return (0); } DECLARE_GEOM_CLASS(g_dev_class, g_dev); diff --git a/sys/geom/geom_redboot.c b/sys/geom/geom_redboot.c index ffdb64d16274..8f21dc30526e 100644 --- a/sys/geom/geom_redboot.c +++ b/sys/geom/geom_redboot.c @@ -1,347 +1,345 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define REDBOOT_CLASS_NAME "REDBOOT" struct fis_image_desc { uint8_t name[16]; /* null-terminated name */ uint32_t offset; /* offset in flash */ uint32_t addr; /* address in memory */ uint32_t size; /* image size in bytes */ uint32_t entry; /* offset in image for entry point */ uint32_t dsize; /* data size in bytes */ uint8_t pad[256-(16+7*sizeof(uint32_t)+sizeof(void*))]; struct fis_image_desc *next; /* linked list (in memory) */ uint32_t dsum; /* descriptor checksum */ uint32_t fsum; /* checksum over image data */ }; #define FISDIR_NAME "FIS directory" #define REDBCFG_NAME "RedBoot config" #define REDBOOT_NAME "RedBoot" #define REDBOOT_MAXSLICE 64 #define REDBOOT_MAXOFF \ (REDBOOT_MAXSLICE*sizeof(struct fis_image_desc)) struct g_redboot_softc { uint32_t entry[REDBOOT_MAXSLICE]; uint32_t dsize[REDBOOT_MAXSLICE]; uint8_t readonly[REDBOOT_MAXSLICE]; g_access_t *parent_access; }; static void g_redboot_print(int i, struct fis_image_desc *fd) { printf("[%2d] \"%-15.15s\" %08x:%08x", i, fd->name, fd->offset, fd->size); printf(" addr %08x entry %08x\n", fd->addr, fd->entry); printf(" dsize 0x%x dsum 0x%x fsum 0x%x\n", fd->dsize, fd->dsum, fd->fsum); } static int g_redboot_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td) { return (ENOIOCTL); } static int g_redboot_access(struct g_provider *pp, int dread, int dwrite, int dexcl) { struct g_geom *gp = pp->geom; struct g_slicer *gsp = gp->softc; struct g_redboot_softc *sc = gsp->softc; if (dwrite > 0 && sc->readonly[pp->index]) return (EPERM); return (sc->parent_access(pp, dread, dwrite, dexcl)); } static int g_redboot_start(struct bio *bp) { struct g_provider *pp; struct g_geom *gp; struct g_redboot_softc *sc; struct g_slicer *gsp; int idx; pp = bp->bio_to; idx = pp->index; gp = pp->geom; gsp = gp->softc; sc = gsp->softc; if (bp->bio_cmd == BIO_GETATTR) { if (g_handleattr_int(bp, REDBOOT_CLASS_NAME "::entry", sc->entry[idx])) return (1); if (g_handleattr_int(bp, REDBOOT_CLASS_NAME "::dsize", sc->dsize[idx])) return (1); } return (0); } static void g_redboot_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp) { struct g_redboot_softc *sc; struct g_slicer *gsp; gsp = gp->softc; sc = gsp->softc; g_slice_dumpconf(sb, indent, gp, cp, pp); if (pp != NULL) { if (indent == NULL) { sbuf_printf(sb, " entry %d", sc->entry[pp->index]); sbuf_printf(sb, " dsize %d", sc->dsize[pp->index]); } else { sbuf_printf(sb, "%s%d\n", indent, sc->entry[pp->index]); sbuf_printf(sb, "%s%d\n", indent, sc->dsize[pp->index]); } } } #include static int nameok(const char name[16]) { int i; /* descriptor names are null-terminated printable ascii */ for (i = 0; i < 15; i++) if (!isprint(name[i])) break; return (name[i] == '\0'); } static struct fis_image_desc * parse_fis_directory(u_char *buf, size_t bufsize, off_t offset, uint32_t offmask) { #define match(a,b) (bcmp(a, b, sizeof(b)-1) == 0) struct fis_image_desc *fd, *efd; struct fis_image_desc *fisdir, *redbcfg; struct fis_image_desc *head, **tail; int i; fd = (struct fis_image_desc *)buf; efd = fd + (bufsize / sizeof(struct fis_image_desc)); #if 0 /* * Find the start of the FIS table. */ while (fd < efd && fd->name[0] != 0xff) fd++; if (fd == efd) return (NULL); if (bootverbose) printf("RedBoot FIS table starts at 0x%jx\n", offset + fd - (struct fis_image_desc *) buf); #endif /* * Scan forward collecting entries in a list. */ fisdir = redbcfg = NULL; *(tail = &head) = NULL; for (i = 0; fd < efd; i++, fd++) { if (fd->name[0] == 0xff) continue; if (match(fd->name, FISDIR_NAME)) fisdir = fd; else if (match(fd->name, REDBCFG_NAME)) redbcfg = fd; if (nameok(fd->name)) { /* * NB: flash address includes platform mapping; * strip it so we have only a flash offset. */ fd->offset &= offmask; if (bootverbose) g_redboot_print(i, fd); *tail = fd; *(tail = &fd->next) = NULL; } } if (fisdir == NULL) { if (bootverbose) printf("No RedBoot FIS table located at %lu\n", (long) offset); return (NULL); } if (redbcfg != NULL && fisdir->offset + fisdir->size == redbcfg->offset) { /* * Merged FIS/RedBoot config directory. */ if (bootverbose) printf("FIS/RedBoot merged at 0x%jx (not yet)\n", offset + fisdir->offset); /* XXX */ } return head; #undef match } static struct g_geom * g_redboot_taste(struct g_class *mp, struct g_provider *pp, int insist) { struct g_geom *gp; struct g_consumer *cp; struct g_redboot_softc *sc; int error, sectorsize, i; struct fis_image_desc *fd, *head; uint32_t offmask; off_t blksize; /* NB: flash block size stored as stripesize */ u_char *buf; off_t offset; const char *value; char *op; offset = 0; if (resource_string_value("redboot", 0, "fisoffset", &value) == 0) { offset = strtouq(value, &op, 0); if (*op != '\0') { offset = 0; } } g_trace(G_T_TOPOLOGY, "redboot_taste(%s,%s)", mp->name, pp->name); g_topology_assert(); if (!strcmp(pp->geom->class->name, REDBOOT_CLASS_NAME)) return (NULL); /* XXX only taste flash providers */ if (strncmp(pp->name, "cfi", 3) && strncmp(pp->name, "flash/spi", 9)) return (NULL); gp = g_slice_new(mp, REDBOOT_MAXSLICE, pp, &cp, &sc, sizeof(*sc), g_redboot_start); if (gp == NULL) return (NULL); /* interpose our access method */ sc->parent_access = gp->access; gp->access = g_redboot_access; sectorsize = cp->provider->sectorsize; blksize = cp->provider->stripesize; if (powerof2(cp->provider->mediasize)) offmask = cp->provider->mediasize-1; else offmask = 0xffffffff; /* XXX */ if (bootverbose) printf("%s: mediasize %ld secsize %d blksize %ju offmask 0x%x\n", __func__, (long) cp->provider->mediasize, sectorsize, (uintmax_t)blksize, offmask); if (sectorsize < sizeof(struct fis_image_desc) || (sectorsize % sizeof(struct fis_image_desc))) return (NULL); g_topology_unlock(); head = NULL; if(offset == 0) offset = cp->provider->mediasize - blksize; again: buf = g_read_data(cp, offset, blksize, NULL); if (buf != NULL) head = parse_fis_directory(buf, blksize, offset, offmask); if (head == NULL && offset != 0) { - if (buf != NULL) - g_free(buf); + g_free(buf); offset = 0; /* check the front */ goto again; } g_topology_lock(); if (head == NULL) { - if (buf != NULL) - g_free(buf); + g_free(buf); return NULL; } /* * Craft a slice for each entry. */ for (fd = head, i = 0; fd != NULL; fd = fd->next) { if (fd->name[0] == '\0') continue; error = g_slice_config(gp, i, G_SLICE_CONFIG_SET, fd->offset, fd->size, sectorsize, "redboot/%s", fd->name); if (error) printf("%s: g_slice_config returns %d for \"%s\"\n", __func__, error, fd->name); sc->entry[i] = fd->entry; sc->dsize[i] = fd->dsize; /* disallow writing hard-to-recover entries */ sc->readonly[i] = (strcmp(fd->name, FISDIR_NAME) == 0) || (strcmp(fd->name, REDBOOT_NAME) == 0); i++; } g_free(buf); g_access(cp, -1, 0, 0); if (LIST_EMPTY(&gp->provider)) { g_slice_spoiled(cp); return (NULL); } return (gp); } static struct g_class g_redboot_class = { .name = REDBOOT_CLASS_NAME, .version = G_VERSION, .taste = g_redboot_taste, .dumpconf = g_redboot_dumpconf, .ioctl = g_redboot_ioctl, }; DECLARE_GEOM_CLASS(g_redboot_class, g_redboot); MODULE_VERSION(geom_redboot, 0); diff --git a/sys/geom/geom_slice.c b/sys/geom/geom_slice.c index 397a1fe5e974..ceb6754f9d15 100644 --- a/sys/geom/geom_slice.c +++ b/sys/geom/geom_slice.c @@ -1,563 +1,561 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include static g_access_t g_slice_access; static g_start_t g_slice_start; static struct g_slicer * g_slice_alloc(unsigned nslice, unsigned scsize) { struct g_slicer *gsp; gsp = g_malloc(sizeof *gsp, M_WAITOK | M_ZERO); if (scsize > 0) gsp->softc = g_malloc(scsize, M_WAITOK | M_ZERO); else gsp->softc = NULL; gsp->slices = g_malloc(nslice * sizeof(struct g_slice), M_WAITOK | M_ZERO); gsp->nslice = nslice; return (gsp); } static void g_slice_free(struct g_geom *gp) { struct g_slicer *gsp; gsp = gp->softc; gp->softc = NULL; /* * We can get multiple spoiled events before wither-washer * detaches our consumer, so this can get called multiple * times. */ if (gsp == NULL) return; g_free(gsp->slices); - if (gsp->hotspot != NULL) - g_free(gsp->hotspot); - if (gsp->softc != NULL) - g_free(gsp->softc); + g_free(gsp->hotspot); + g_free(gsp->softc); g_free(gsp); } static int g_slice_access(struct g_provider *pp, int dr, int dw, int de) { int error; u_int u; struct g_geom *gp; struct g_consumer *cp; struct g_provider *pp2; struct g_slicer *gsp; struct g_slice *gsl, *gsl2; gp = pp->geom; cp = LIST_FIRST(&gp->consumer); KASSERT (cp != NULL, ("g_slice_access but no consumer")); gsp = gp->softc; if (dr > 0 || dw > 0 || de > 0) { gsl = &gsp->slices[pp->index]; for (u = 0; u < gsp->nslice; u++) { gsl2 = &gsp->slices[u]; if (gsl2->length == 0) continue; if (u == pp->index) continue; if (gsl->offset + gsl->length <= gsl2->offset) continue; if (gsl2->offset + gsl2->length <= gsl->offset) continue; /* overlap */ pp2 = gsl2->provider; if ((pp->acw + dw) > 0 && pp2->ace > 0) return (EPERM); if ((pp->ace + de) > 0 && pp2->acw > 0) return (EPERM); } } /* On first open, grab an extra "exclusive" bit */ if (cp->acr == 0 && cp->acw == 0 && cp->ace == 0) de++; /* ... and let go of it on last close */ if ((cp->acr + dr) == 0 && (cp->acw + dw) == 0 && (cp->ace + de) == 1) de--; error = g_access(cp, dr, dw, de); /* * Free the softc if all providers have been closed and this geom * is being removed. */ if (error == 0 && (gp->flags & G_GEOM_WITHER) != 0 && (cp->acr + cp->acw + cp->ace) == 0) g_slice_free(gp); return (error); } /* * XXX: It should be possible to specify here if we should finish all of the * XXX: bio, or only the non-hot bits. This would get messy if there were * XXX: two hot spots in the same bio, so for now we simply finish off the * XXX: entire bio. Modifying hot data on the way to disk is frowned on * XXX: so making that considerably harder is not a bad idea anyway. */ void g_slice_finish_hot(struct bio *bp) { struct bio *bp2; struct g_geom *gp; struct g_consumer *cp; struct g_slicer *gsp; struct g_slice *gsl; int idx; KASSERT(bp->bio_to != NULL, ("NULL bio_to in g_slice_finish_hot(%p)", bp)); KASSERT(bp->bio_from != NULL, ("NULL bio_from in g_slice_finish_hot(%p)", bp)); gp = bp->bio_to->geom; gsp = gp->softc; cp = LIST_FIRST(&gp->consumer); KASSERT(cp != NULL, ("NULL consumer in g_slice_finish_hot(%p)", bp)); idx = bp->bio_to->index; gsl = &gsp->slices[idx]; bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } if (bp2->bio_offset + bp2->bio_length > gsl->length) bp2->bio_length = gsl->length - bp2->bio_offset; bp2->bio_done = g_std_done; bp2->bio_offset += gsl->offset; g_io_request(bp2, cp); return; } static void g_slice_done(struct bio *bp) { KASSERT(bp->bio_cmd == BIO_GETATTR && strcmp(bp->bio_attribute, "GEOM::ident") == 0, ("bio_cmd=0x%x bio_attribute=%s", bp->bio_cmd, bp->bio_attribute)); if (bp->bio_error == 0 && bp->bio_data[0] != '\0') { char idx[8]; /* Add index to the ident received. */ snprintf(idx, sizeof(idx), "s%d", bp->bio_parent->bio_to->index); if (strlcat(bp->bio_data, idx, bp->bio_length) >= bp->bio_length) { bp->bio_error = EFAULT; } } g_std_done(bp); } static void g_slice_start(struct bio *bp) { struct bio *bp2; struct g_provider *pp; struct g_geom *gp; struct g_consumer *cp; struct g_slicer *gsp; struct g_slice *gsl; struct g_slice_hot *ghp; int idx, error; u_int m_index; off_t t; pp = bp->bio_to; gp = pp->geom; gsp = gp->softc; cp = LIST_FIRST(&gp->consumer); idx = pp->index; gsl = &gsp->slices[idx]; switch(bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_DELETE: if (bp->bio_offset > gsl->length) { g_io_deliver(bp, EINVAL); /* XXX: EWHAT ? */ return; } /* * Check if we collide with any hot spaces, and call the * method once if so. */ t = bp->bio_offset + gsl->offset; for (m_index = 0; m_index < gsp->nhotspot; m_index++) { ghp = &gsp->hotspot[m_index]; if (t >= ghp->offset + ghp->length) continue; if (t + bp->bio_length <= ghp->offset) continue; switch(bp->bio_cmd) { case BIO_READ: idx = ghp->ract; break; case BIO_WRITE: idx = ghp->wact; break; case BIO_DELETE: idx = ghp->dact; break; } switch(idx) { case G_SLICE_HOT_ALLOW: /* Fall out and continue normal processing */ continue; case G_SLICE_HOT_DENY: g_io_deliver(bp, EROFS); return; case G_SLICE_HOT_START: error = gsp->start(bp); if (error && error != EJUSTRETURN) g_io_deliver(bp, error); return; case G_SLICE_HOT_CALL: error = g_post_event(gsp->hot, bp, M_NOWAIT, gp, NULL); if (error) g_io_deliver(bp, error); return; } break; } bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } if (bp2->bio_offset + bp2->bio_length > gsl->length) bp2->bio_length = gsl->length - bp2->bio_offset; bp2->bio_done = g_std_done; bp2->bio_offset += gsl->offset; g_io_request(bp2, cp); return; case BIO_GETATTR: /* Give the real method a chance to override */ if (gsp->start != NULL && gsp->start(bp)) return; if (!strcmp("GEOM::ident", bp->bio_attribute)) { bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } bp2->bio_done = g_slice_done; g_io_request(bp2, cp); return; } if (!strcmp("GEOM::kerneldump", bp->bio_attribute)) { struct g_kerneldump *gkd; gkd = (struct g_kerneldump *)bp->bio_data; gkd->offset += gsp->slices[idx].offset; if (gkd->length > gsp->slices[idx].length) gkd->length = gsp->slices[idx].length; /* now, pass it on downwards... */ } /* FALLTHROUGH */ case BIO_SPEEDUP: case BIO_FLUSH: bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } bp2->bio_done = g_std_done; g_io_request(bp2, cp); break; default: g_io_deliver(bp, EOPNOTSUPP); return; } } void g_slice_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { struct g_slicer *gsp; gsp = gp->softc; if (indent == NULL) { sbuf_printf(sb, " i %u", pp->index); sbuf_printf(sb, " o %ju", (uintmax_t)gsp->slices[pp->index].offset); return; } if (pp != NULL) { sbuf_printf(sb, "%s%u\n", indent, pp->index); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)gsp->slices[pp->index].length); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)gsp->slices[pp->index].length / 512); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)gsp->slices[pp->index].offset); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)gsp->slices[pp->index].offset / 512); } } int g_slice_config(struct g_geom *gp, u_int idx, int how, off_t offset, off_t length, u_int sectorsize, const char *fmt, ...) { struct g_provider *pp, *pp2; struct g_slicer *gsp; struct g_slice *gsl; va_list ap; struct sbuf *sb; int acc; g_trace(G_T_TOPOLOGY, "g_slice_config(%s, %d, %d)", gp->name, idx, how); g_topology_assert(); gsp = gp->softc; if (idx >= gsp->nslice) return(EINVAL); gsl = &gsp->slices[idx]; pp = gsl->provider; if (pp != NULL) acc = pp->acr + pp->acw + pp->ace; else acc = 0; if (acc != 0 && how != G_SLICE_CONFIG_FORCE) { if (length < gsl->length) return(EBUSY); if (offset != gsl->offset) return(EBUSY); } /* XXX: check offset + length <= MEDIASIZE */ if (how == G_SLICE_CONFIG_CHECK) return (0); gsl->length = length; gsl->offset = offset; gsl->sectorsize = sectorsize; if (length == 0) { if (pp == NULL) return (0); if (bootverbose) printf("GEOM: Deconfigure %s\n", pp->name); g_wither_provider(pp, ENXIO); gsl->provider = NULL; gsp->nprovider--; return (0); } if (pp != NULL) { if (bootverbose) printf("GEOM: Reconfigure %s, start %jd length %jd end %jd\n", pp->name, (intmax_t)offset, (intmax_t)length, (intmax_t)(offset + length - 1)); g_resize_provider(pp, gsl->length); return (0); } sb = sbuf_new_auto(); va_start(ap, fmt); sbuf_vprintf(sb, fmt, ap); va_end(ap); sbuf_finish(sb); pp = g_new_providerf(gp, "%s", sbuf_data(sb)); pp2 = LIST_FIRST(&gp->consumer)->provider; pp->stripesize = pp2->stripesize; pp->stripeoffset = pp2->stripeoffset + offset; if (pp->stripesize > 0) pp->stripeoffset %= pp->stripesize; if (gsp->nhotspot == 0) { pp->flags |= pp2->flags & G_PF_ACCEPT_UNMAPPED; pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; } if (0 && bootverbose) printf("GEOM: Configure %s, start %jd length %jd end %jd\n", pp->name, (intmax_t)offset, (intmax_t)length, (intmax_t)(offset + length - 1)); pp->index = idx; pp->mediasize = gsl->length; pp->sectorsize = gsl->sectorsize; gsl->provider = pp; gsp->nprovider++; g_error_provider(pp, 0); sbuf_delete(sb); return(0); } /* * Configure "hotspots". A hotspot is a piece of the parent device which * this particular slicer cares about for some reason. Typically because * it contains meta-data used to configure the slicer. * A hotspot is identified by its index number. The offset and length are * relative to the parent device, and the three "?act" fields specify * what action to take on BIO_READ, BIO_DELETE and BIO_WRITE. * * XXX: There may be a race relative to g_slice_start() here, if an existing * XXX: hotspot is changed wile I/O is happening. Should this become a problem * XXX: we can protect the hotspot stuff with a mutex. */ int g_slice_conf_hot(struct g_geom *gp, u_int idx, off_t offset, off_t length, int ract, int dact, int wact) { struct g_slicer *gsp; struct g_slice_hot *gsl, *gsl2; struct g_consumer *cp; struct g_provider *pp; g_trace(G_T_TOPOLOGY, "g_slice_conf_hot(%s, idx: %d, off: %jd, len: %jd)", gp->name, idx, (intmax_t)offset, (intmax_t)length); g_topology_assert(); gsp = gp->softc; /* Deny unmapped I/O and direct dispatch if hotspots are used. */ if (gsp->nhotspot == 0) { LIST_FOREACH(pp, &gp->provider, provider) pp->flags &= ~(G_PF_ACCEPT_UNMAPPED | G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE); LIST_FOREACH(cp, &gp->consumer, consumer) cp->flags &= ~(G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE); } gsl = gsp->hotspot; if(idx >= gsp->nhotspot) { gsl2 = g_malloc((idx + 1) * sizeof *gsl2, M_WAITOK | M_ZERO); if (gsp->hotspot != NULL) bcopy(gsp->hotspot, gsl2, gsp->nhotspot * sizeof *gsl2); gsp->hotspot = gsl2; if (gsp->hotspot != NULL) g_free(gsl); gsl = gsl2; gsp->nhotspot = idx + 1; } gsl[idx].offset = offset; gsl[idx].length = length; KASSERT(!((ract | dact | wact) & G_SLICE_HOT_START) || gsp->start != NULL, ("G_SLICE_HOT_START but no slice->start")); /* XXX: check that we _have_ a start function if HOT_START specified */ gsl[idx].ract = ract; gsl[idx].dact = dact; gsl[idx].wact = wact; return (0); } void g_slice_orphan(struct g_consumer *cp) { struct g_geom *gp; g_topology_assert(); gp = cp->geom; g_trace(G_T_TOPOLOGY, "%s(%p/%s)", __func__, cp, gp->name); g_wither_geom(gp, ENXIO); /* * We can safely free the softc now if there are no accesses, * otherwise g_slice_access() will do that after the last close. */ if ((cp->acr + cp->acw + cp->ace) == 0) g_slice_free(gp); } void g_slice_spoiled(struct g_consumer *cp) { g_trace(G_T_TOPOLOGY, "%s(%p/%s)", __func__, cp, cp->geom->name); cp->flags |= G_CF_ORPHAN; g_slice_orphan(cp); } int g_slice_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) { g_slice_spoiled(LIST_FIRST(&gp->consumer)); return (0); } struct g_geom * g_slice_new(struct g_class *mp, u_int slices, struct g_provider *pp, struct g_consumer **cpp, void *extrap, int extra, g_slice_start_t *start) { struct g_geom *gp; struct g_slicer *gsp; struct g_consumer *cp; void **vp; int error; g_topology_assert(); vp = (void **)extrap; gp = g_new_geomf(mp, "%s", pp->name); gsp = g_slice_alloc(slices, extra); gsp->start = start; gp->softc = gsp; gp->start = g_slice_start; gp->access = g_slice_access; gp->orphan = g_slice_orphan; gp->spoiled = g_slice_spoiled; if (gp->dumpconf == NULL) gp->dumpconf = g_slice_dumpconf; if (gp->class->destroy_geom == NULL) gp->class->destroy_geom = g_slice_destroy_geom; cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 0, 0); if (error) { g_wither_geom(gp, ENXIO); return (NULL); } if (extrap != NULL) *vp = gsp->softc; *cpp = cp; return (gp); } diff --git a/sys/geom/label/g_label_msdosfs.c b/sys/geom/label/g_label_msdosfs.c index 67ac879d62c2..9ee052c7c1ef 100644 --- a/sys/geom/label/g_label_msdosfs.c +++ b/sys/geom/label/g_label_msdosfs.c @@ -1,224 +1,222 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004 Pawel Jakub Dawidek * Copyright (c) 2006 Tobias Reifenberger * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #define LABEL_NO_NAME "NO NAME " static void g_label_msdosfs_taste(struct g_consumer *cp, char *label, size_t size) { struct g_provider *pp; FAT_BSBPB *pfat_bsbpb; FAT32_BSBPB *pfat32_bsbpb; FAT_DES *pfat_entry; uint8_t *sector0, *sector; g_topology_assert_not(); pp = cp->provider; sector0 = NULL; sector = NULL; bzero(label, size); /* Check if the sector size of the medium is a valid FAT sector size. */ switch(pp->sectorsize) { case 512: case 1024: case 2048: case 4096: break; default: G_LABEL_DEBUG(1, "MSDOSFS: %s: sector size %d not compatible.", pp->name, pp->sectorsize); return; } /* Load 1st sector with boot sector and boot parameter block. */ sector0 = (uint8_t *)g_read_data(cp, 0, pp->sectorsize, NULL); if (sector0 == NULL) return; /* Check for the FAT boot sector signature. */ if (sector0[510] != 0x55 || sector0[511] != 0xaa) { G_LABEL_DEBUG(1, "MSDOSFS: %s: no FAT signature found.", pp->name); goto error; } /* * Test if this is really a FAT volume and determine the FAT type. */ pfat_bsbpb = (FAT_BSBPB *)sector0; pfat32_bsbpb = (FAT32_BSBPB *)sector0; if (UINT16BYTES(pfat_bsbpb->BPB_FATSz16) != 0) { /* * If the BPB_FATSz16 field is not zero and the string "FAT" is * at the right place, this should be a FAT12 or FAT16 volume. */ if (strncmp(pfat_bsbpb->BS_FilSysType, "FAT", 3) != 0) { G_LABEL_DEBUG(1, "MSDOSFS: %s: FAT12/16 volume not valid.", pp->name); goto error; } G_LABEL_DEBUG(1, "MSDOSFS: %s: FAT12/FAT16 volume detected.", pp->name); /* A volume with no name should have "NO NAME " as label. */ if (strncmp(pfat_bsbpb->BS_VolLab, LABEL_NO_NAME, sizeof(pfat_bsbpb->BS_VolLab)) == 0) { G_LABEL_DEBUG(1, "MSDOSFS: %s: FAT12/16 volume has no name.", pp->name); goto error; } strlcpy(label, pfat_bsbpb->BS_VolLab, MIN(size, sizeof(pfat_bsbpb->BS_VolLab) + 1)); } else if (UINT32BYTES(pfat32_bsbpb->BPB_FATSz32) != 0) { uint32_t fat_FirstDataSector, fat_BytesPerSector, offset; /* * If the BPB_FATSz32 field is not zero and the string "FAT" is * at the right place, this should be a FAT32 volume. */ if (strncmp(pfat32_bsbpb->BS_FilSysType, "FAT", 3) != 0) { G_LABEL_DEBUG(1, "MSDOSFS: %s: FAT32 volume not valid.", pp->name); goto error; } G_LABEL_DEBUG(1, "MSDOSFS: %s: FAT32 volume detected.", pp->name); /* * If the volume label is not "NO NAME " we're done. */ if (strncmp(pfat32_bsbpb->BS_VolLab, LABEL_NO_NAME, sizeof(pfat32_bsbpb->BS_VolLab)) != 0) { strlcpy(label, pfat32_bsbpb->BS_VolLab, MIN(size, sizeof(pfat32_bsbpb->BS_VolLab) + 1)); goto endofchecks; } /* * If the volume label "NO NAME " is in the boot sector, the * label of FAT32 volumes may be stored as a special entry in * the root directory. */ fat_FirstDataSector = UINT16BYTES(pfat32_bsbpb->BPB_RsvdSecCnt) + (pfat32_bsbpb->BPB_NumFATs * UINT32BYTES(pfat32_bsbpb->BPB_FATSz32)); fat_BytesPerSector = UINT16BYTES(pfat32_bsbpb->BPB_BytsPerSec); G_LABEL_DEBUG(2, "MSDOSFS: FAT_FirstDataSector=0x%x, FAT_BytesPerSector=%d", fat_FirstDataSector, fat_BytesPerSector); if (fat_BytesPerSector == 0 || fat_BytesPerSector % pp->sectorsize != 0) { G_LABEL_DEBUG(1, "MSDOSFS: %s: corrupted BPB", pp->name); goto error; } for (offset = fat_BytesPerSector * fat_FirstDataSector;; offset += fat_BytesPerSector) { sector = (uint8_t *)g_read_data(cp, offset, fat_BytesPerSector, NULL); if (sector == NULL) goto error; pfat_entry = (FAT_DES *)sector; do { /* No more entries available. */ if (pfat_entry->DIR_Name[0] == 0) { G_LABEL_DEBUG(1, "MSDOSFS: %s: " "FAT32 volume has no name.", pp->name); goto error; } /* Skip empty or long name entries. */ if (pfat_entry->DIR_Name[0] == 0xe5 || (pfat_entry->DIR_Attr & FAT_DES_ATTR_LONG_NAME) == FAT_DES_ATTR_LONG_NAME) { continue; } /* * The name of the entry is the volume label if * ATTR_VOLUME_ID is set. */ if (pfat_entry->DIR_Attr & FAT_DES_ATTR_VOLUME_ID) { strlcpy(label, pfat_entry->DIR_Name, MIN(size, sizeof(pfat_entry->DIR_Name) + 1)); goto endofchecks; } } while((uint8_t *)(++pfat_entry) < (uint8_t *)(sector + fat_BytesPerSector)); g_free(sector); } } else { G_LABEL_DEBUG(1, "MSDOSFS: %s: no FAT volume detected.", pp->name); goto error; } endofchecks: g_label_rtrim(label, size); error: - if (sector0 != NULL) - g_free(sector0); - if (sector != NULL) - g_free(sector); + g_free(sector0); + g_free(sector); } struct g_label_desc g_label_msdosfs = { .ld_taste = g_label_msdosfs_taste, .ld_dirprefix = "msdosfs/", .ld_enabled = 1 }; G_LABEL_INIT(msdosfs, g_label_msdosfs, "Create device nodes for MSDOSFS volumes"); diff --git a/sys/geom/label/g_label_ntfs.c b/sys/geom/label/g_label_ntfs.c index 888096164b09..bcaeebce99f2 100644 --- a/sys/geom/label/g_label_ntfs.c +++ b/sys/geom/label/g_label_ntfs.c @@ -1,190 +1,188 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005 Takanori Watanabe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #define NTFS_A_VOLUMENAME 0x60 #define NTFS_FILEMAGIC ((uint32_t)(0x454C4946)) #define NTFS_VOLUMEINO 3 struct ntfs_attr { uint32_t a_type; uint32_t reclen; uint8_t a_flag; uint8_t a_namelen; uint8_t a_nameoff; uint8_t reserved1; uint8_t a_compression; uint8_t reserved2; uint16_t a_index; uint16_t a_datalen; uint16_t reserved3; uint16_t a_dataoff; uint16_t a_indexed; } __packed; struct ntfs_filerec { uint32_t fr_hdrmagic; uint16_t fr_hdrfoff; uint16_t fr_hdrfnum; uint8_t reserved[8]; uint16_t fr_seqnum; uint16_t fr_nlink; uint16_t fr_attroff; uint16_t fr_flags; uint32_t fr_size; uint32_t fr_allocated; uint64_t fr_mainrec; uint16_t fr_attrnum; } __packed; struct ntfs_bootfile { uint8_t reserved1[3]; uint8_t bf_sysid[8]; uint16_t bf_bps; uint8_t bf_spc; uint8_t reserved2[7]; uint8_t bf_media; uint8_t reserved3[2]; uint16_t bf_spt; uint16_t bf_heads; uint8_t reserver4[12]; uint64_t bf_spv; uint64_t bf_mftcn; uint64_t bf_mftmirrcn; int8_t bf_mftrecsz; uint32_t bf_ibsz; uint32_t bf_volsn; } __packed; static void g_label_ntfs_taste(struct g_consumer *cp, char *label, size_t size) { struct g_provider *pp; struct ntfs_bootfile *bf; struct ntfs_filerec *fr; struct ntfs_attr *atr; off_t voloff; size_t recoff; char *filerecp; int8_t mftrecsz; char vnchar; int recsize, j; g_topology_assert_not(); label[0] = '\0'; pp = cp->provider; bf = NULL; filerecp = NULL; if (pp->sectorsize < sizeof(*bf)) goto done; bf = g_read_data(cp, 0, pp->sectorsize, NULL); if (bf == NULL || strncmp(bf->bf_sysid, "NTFS ", 8) != 0) goto done; mftrecsz = bf->bf_mftrecsz; recsize = (mftrecsz > 0) ? (mftrecsz * bf->bf_bps * bf->bf_spc) : (1 << -mftrecsz); if (recsize <= 0 || recsize > maxphys || recsize % pp->sectorsize != 0) goto done; voloff = bf->bf_mftcn * bf->bf_spc * bf->bf_bps + recsize * NTFS_VOLUMEINO; if (voloff % pp->sectorsize != 0) goto done; filerecp = g_read_data(cp, voloff, recsize, NULL); if (filerecp == NULL) goto done; fr = (struct ntfs_filerec *)filerecp; if (fr->fr_hdrmagic != NTFS_FILEMAGIC) goto done; for (recoff = fr->fr_attroff; recoff <= recsize - 2 * sizeof(uint32_t); recoff += atr->reclen) { atr = (struct ntfs_attr *)(filerecp + recoff); if (atr->a_type == -1) break; if (atr->reclen < sizeof(*atr)) break; if (recsize - recoff < atr->reclen) break; if (atr->a_type == NTFS_A_VOLUMENAME) { if (atr->a_dataoff > atr->reclen || atr->a_datalen > atr->reclen - atr->a_dataoff) break; /* * UNICODE to ASCII. * Should we need to use iconv(9)? */ if (atr->a_datalen >= size * 2 || atr->a_datalen % 2 != 0) break; for (j = 0; j < atr->a_datalen; j++) { vnchar = ((char *)atr)[atr->a_dataoff + j]; if (j & 1) { if (vnchar) { label[0] = 0; goto done; } } else { label[j / 2] = vnchar; } } label[j / 2] = 0; break; } } done: - if (bf != NULL) - g_free(bf); - if (filerecp != NULL) - g_free(filerecp); + g_free(bf); + g_free(filerecp); } struct g_label_desc g_label_ntfs = { .ld_taste = g_label_ntfs_taste, .ld_dirprefix = "ntfs/", .ld_enabled = 1 }; G_LABEL_INIT(ntfs, g_label_ntfs, "Create device nodes for NTFS volumes"); diff --git a/sys/geom/part/g_part_bsd.c b/sys/geom/part/g_part_bsd.c index 2432d1911493..0f23a277ce8e 100644 --- a/sys/geom/part/g_part_bsd.c +++ b/sys/geom/part/g_part_bsd.c @@ -1,542 +1,541 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2007 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "g_part_if.h" #define BOOT1_SIZE 512 #define LABEL_SIZE 512 #define BOOT2_OFF (BOOT1_SIZE + LABEL_SIZE) #define BOOT2_SIZE (BBSIZE - BOOT2_OFF) FEATURE(geom_part_bsd, "GEOM partitioning class for BSD disklabels"); struct g_part_bsd_table { struct g_part_table base; u_char *bbarea; uint32_t offset; }; struct g_part_bsd_entry { struct g_part_entry base; struct partition part; }; static int g_part_bsd_add(struct g_part_table *, struct g_part_entry *, struct g_part_parms *); static int g_part_bsd_bootcode(struct g_part_table *, struct g_part_parms *); static int g_part_bsd_create(struct g_part_table *, struct g_part_parms *); static int g_part_bsd_destroy(struct g_part_table *, struct g_part_parms *); static void g_part_bsd_dumpconf(struct g_part_table *, struct g_part_entry *, struct sbuf *, const char *); static int g_part_bsd_dumpto(struct g_part_table *, struct g_part_entry *); static int g_part_bsd_modify(struct g_part_table *, struct g_part_entry *, struct g_part_parms *); static const char *g_part_bsd_name(struct g_part_table *, struct g_part_entry *, char *, size_t); static int g_part_bsd_probe(struct g_part_table *, struct g_consumer *); static int g_part_bsd_read(struct g_part_table *, struct g_consumer *); static const char *g_part_bsd_type(struct g_part_table *, struct g_part_entry *, char *, size_t); static int g_part_bsd_write(struct g_part_table *, struct g_consumer *); static int g_part_bsd_resize(struct g_part_table *, struct g_part_entry *, struct g_part_parms *); static kobj_method_t g_part_bsd_methods[] = { KOBJMETHOD(g_part_add, g_part_bsd_add), KOBJMETHOD(g_part_bootcode, g_part_bsd_bootcode), KOBJMETHOD(g_part_create, g_part_bsd_create), KOBJMETHOD(g_part_destroy, g_part_bsd_destroy), KOBJMETHOD(g_part_dumpconf, g_part_bsd_dumpconf), KOBJMETHOD(g_part_dumpto, g_part_bsd_dumpto), KOBJMETHOD(g_part_modify, g_part_bsd_modify), KOBJMETHOD(g_part_resize, g_part_bsd_resize), KOBJMETHOD(g_part_name, g_part_bsd_name), KOBJMETHOD(g_part_probe, g_part_bsd_probe), KOBJMETHOD(g_part_read, g_part_bsd_read), KOBJMETHOD(g_part_type, g_part_bsd_type), KOBJMETHOD(g_part_write, g_part_bsd_write), { 0, 0 } }; static struct g_part_scheme g_part_bsd_scheme = { "BSD", g_part_bsd_methods, sizeof(struct g_part_bsd_table), .gps_entrysz = sizeof(struct g_part_bsd_entry), .gps_minent = 8, .gps_maxent = 20, /* Only 22 entries fit in 512 byte sectors */ .gps_bootcodesz = BBSIZE, }; G_PART_SCHEME_DECLARE(g_part_bsd); MODULE_VERSION(geom_part_bsd, 0); static struct g_part_bsd_alias { uint8_t type; int alias; } bsd_alias_match[] = { { FS_BSDFFS, G_PART_ALIAS_FREEBSD_UFS }, { FS_SWAP, G_PART_ALIAS_FREEBSD_SWAP }, { FS_ZFS, G_PART_ALIAS_FREEBSD_ZFS }, { FS_VINUM, G_PART_ALIAS_FREEBSD_VINUM }, { FS_NANDFS, G_PART_ALIAS_FREEBSD_NANDFS }, { FS_HAMMER, G_PART_ALIAS_DFBSD_HAMMER }, { FS_HAMMER2, G_PART_ALIAS_DFBSD_HAMMER2 }, }; static int bsd_parse_type(const char *type, uint8_t *fstype) { const char *alias; char *endp; long lt; int i; if (type[0] == '!') { lt = strtol(type + 1, &endp, 0); if (type[1] == '\0' || *endp != '\0' || lt <= 0 || lt >= 256) return (EINVAL); *fstype = (u_int)lt; return (0); } for (i = 0; i < nitems(bsd_alias_match); i++) { alias = g_part_alias_name(bsd_alias_match[i].alias); if (strcasecmp(type, alias) == 0) { *fstype = bsd_alias_match[i].type; return (0); } } return (EINVAL); } static int g_part_bsd_add(struct g_part_table *basetable, struct g_part_entry *baseentry, struct g_part_parms *gpp) { struct g_part_bsd_entry *entry; struct g_part_bsd_table *table; if (gpp->gpp_parms & G_PART_PARM_LABEL) return (EINVAL); entry = (struct g_part_bsd_entry *)baseentry; table = (struct g_part_bsd_table *)basetable; entry->part.p_size = gpp->gpp_size; entry->part.p_offset = gpp->gpp_start + table->offset; entry->part.p_fsize = 0; entry->part.p_frag = 0; entry->part.p_cpg = 0; return (bsd_parse_type(gpp->gpp_type, &entry->part.p_fstype)); } static int g_part_bsd_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp) { struct g_part_bsd_table *table; const u_char *codeptr; if (gpp->gpp_codesize != BOOT1_SIZE && gpp->gpp_codesize != BBSIZE) return (ENODEV); table = (struct g_part_bsd_table *)basetable; codeptr = gpp->gpp_codeptr; bcopy(codeptr, table->bbarea, BOOT1_SIZE); if (gpp->gpp_codesize == BBSIZE) bcopy(codeptr + BOOT2_OFF, table->bbarea + BOOT2_OFF, BOOT2_SIZE); return (0); } static int g_part_bsd_create(struct g_part_table *basetable, struct g_part_parms *gpp) { struct g_provider *pp; struct g_part_entry *baseentry; struct g_part_bsd_entry *entry; struct g_part_bsd_table *table; u_char *ptr; uint32_t msize, ncyls, secpercyl; pp = gpp->gpp_provider; if (pp->sectorsize < sizeof(struct disklabel)) return (ENOSPC); if (BBSIZE % pp->sectorsize) return (ENOTBLK); msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX); secpercyl = basetable->gpt_sectors * basetable->gpt_heads; ncyls = msize / secpercyl; table = (struct g_part_bsd_table *)basetable; table->bbarea = g_malloc(BBSIZE, M_WAITOK | M_ZERO); ptr = table->bbarea + pp->sectorsize; le32enc(ptr + 0, DISKMAGIC); /* d_magic */ le32enc(ptr + 40, pp->sectorsize); /* d_secsize */ le32enc(ptr + 44, basetable->gpt_sectors); /* d_nsectors */ le32enc(ptr + 48, basetable->gpt_heads); /* d_ntracks */ le32enc(ptr + 52, ncyls); /* d_ncylinders */ le32enc(ptr + 56, secpercyl); /* d_secpercyl */ le32enc(ptr + 60, msize); /* d_secperunit */ le16enc(ptr + 72, 3600); /* d_rpm */ le32enc(ptr + 132, DISKMAGIC); /* d_magic2 */ le16enc(ptr + 138, basetable->gpt_entries); /* d_npartitions */ le32enc(ptr + 140, BBSIZE); /* d_bbsize */ basetable->gpt_first = 0; basetable->gpt_last = msize - 1; basetable->gpt_isleaf = 1; baseentry = g_part_new_entry(basetable, RAW_PART + 1, basetable->gpt_first, basetable->gpt_last); baseentry->gpe_internal = 1; entry = (struct g_part_bsd_entry *)baseentry; entry->part.p_size = basetable->gpt_last + 1; entry->part.p_offset = table->offset; return (0); } static int g_part_bsd_destroy(struct g_part_table *basetable, struct g_part_parms *gpp) { struct g_part_bsd_table *table; table = (struct g_part_bsd_table *)basetable; - if (table->bbarea != NULL) - g_free(table->bbarea); + g_free(table->bbarea); table->bbarea = NULL; /* Wipe the second sector to clear the partitioning. */ basetable->gpt_smhead |= 2; return (0); } static void g_part_bsd_dumpconf(struct g_part_table *table, struct g_part_entry *baseentry, struct sbuf *sb, const char *indent) { struct g_part_bsd_entry *entry; entry = (struct g_part_bsd_entry *)baseentry; if (indent == NULL) { /* conftxt: libdisk compatibility */ sbuf_printf(sb, " xs BSD xt %u", entry->part.p_fstype); } else if (entry != NULL) { /* confxml: partition entry information */ sbuf_printf(sb, "%s%u\n", indent, entry->part.p_fstype); } else { /* confxml: scheme information */ } } static int g_part_bsd_dumpto(struct g_part_table *table, struct g_part_entry *baseentry) { struct g_part_bsd_entry *entry; /* Allow dumping to a swap partition or an unused partition. */ entry = (struct g_part_bsd_entry *)baseentry; return ((entry->part.p_fstype == FS_UNUSED || entry->part.p_fstype == FS_SWAP) ? 1 : 0); } static int g_part_bsd_modify(struct g_part_table *basetable, struct g_part_entry *baseentry, struct g_part_parms *gpp) { struct g_part_bsd_entry *entry; if (gpp->gpp_parms & G_PART_PARM_LABEL) return (EINVAL); entry = (struct g_part_bsd_entry *)baseentry; if (gpp->gpp_parms & G_PART_PARM_TYPE) return (bsd_parse_type(gpp->gpp_type, &entry->part.p_fstype)); return (0); } static void bsd_set_rawsize(struct g_part_table *basetable, struct g_provider *pp) { struct g_part_bsd_table *table; struct g_part_bsd_entry *entry; struct g_part_entry *baseentry; uint32_t msize; table = (struct g_part_bsd_table *)basetable; msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX); le32enc(table->bbarea + pp->sectorsize + 60, msize); /* d_secperunit */ basetable->gpt_last = msize - 1; LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) { if (baseentry->gpe_index != RAW_PART + 1) continue; baseentry->gpe_end = basetable->gpt_last; entry = (struct g_part_bsd_entry *)baseentry; entry->part.p_size = msize; return; } } static int g_part_bsd_resize(struct g_part_table *basetable, struct g_part_entry *baseentry, struct g_part_parms *gpp) { struct g_part_bsd_entry *entry; struct g_provider *pp; if (baseentry == NULL) { pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider; bsd_set_rawsize(basetable, pp); return (0); } entry = (struct g_part_bsd_entry *)baseentry; baseentry->gpe_end = baseentry->gpe_start + gpp->gpp_size - 1; entry->part.p_size = gpp->gpp_size; return (0); } static const char * g_part_bsd_name(struct g_part_table *table, struct g_part_entry *baseentry, char *buf, size_t bufsz) { snprintf(buf, bufsz, "%c", 'a' + baseentry->gpe_index - 1); return (buf); } static int g_part_bsd_probe(struct g_part_table *table, struct g_consumer *cp) { struct g_provider *pp; u_char *buf; uint32_t magic1, magic2; int error; pp = cp->provider; /* Sanity-check the provider. */ if (pp->sectorsize < sizeof(struct disklabel) || pp->mediasize < BBSIZE) return (ENOSPC); if (BBSIZE % pp->sectorsize) return (ENOTBLK); /* Check that there's a disklabel. */ buf = g_read_data(cp, pp->sectorsize, pp->sectorsize, &error); if (buf == NULL) return (error); magic1 = le32dec(buf + 0); magic2 = le32dec(buf + 132); g_free(buf); return ((magic1 == DISKMAGIC && magic2 == DISKMAGIC) ? G_PART_PROBE_PRI_HIGH : ENXIO); } static int g_part_bsd_read(struct g_part_table *basetable, struct g_consumer *cp) { struct g_provider *pp; struct g_part_bsd_table *table; struct g_part_entry *baseentry; struct g_part_bsd_entry *entry; struct partition part; u_char *buf, *p; off_t chs, msize; u_int sectors, heads; int error, index; pp = cp->provider; table = (struct g_part_bsd_table *)basetable; msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX); table->bbarea = g_read_data(cp, 0, BBSIZE, &error); if (table->bbarea == NULL) return (error); buf = table->bbarea + pp->sectorsize; if (le32dec(buf + 40) != pp->sectorsize) goto invalid_label; sectors = le32dec(buf + 44); if (sectors < 1 || sectors > 255) goto invalid_label; if (sectors != basetable->gpt_sectors && !basetable->gpt_fixgeom) { g_part_geometry_heads(msize, sectors, &chs, &heads); if (chs != 0) { basetable->gpt_sectors = sectors; basetable->gpt_heads = heads; } } heads = le32dec(buf + 48); if (heads < 1 || heads > 255) goto invalid_label; if (heads != basetable->gpt_heads && !basetable->gpt_fixgeom) basetable->gpt_heads = heads; chs = le32dec(buf + 60); if (chs < 1) goto invalid_label; /* Fix-up a sysinstall bug. */ if (chs > msize) { chs = msize; le32enc(buf + 60, msize); } basetable->gpt_first = 0; basetable->gpt_last = msize - 1; basetable->gpt_isleaf = 1; basetable->gpt_entries = le16dec(buf + 138); if (basetable->gpt_entries < g_part_bsd_scheme.gps_minent || basetable->gpt_entries > g_part_bsd_scheme.gps_maxent) goto invalid_label; table->offset = le32dec(buf + 148 + RAW_PART * 16 + 4); for (index = basetable->gpt_entries - 1; index >= 0; index--) { p = buf + 148 + index * 16; part.p_size = le32dec(p + 0); part.p_offset = le32dec(p + 4); part.p_fsize = le32dec(p + 8); part.p_fstype = p[12]; part.p_frag = p[13]; part.p_cpg = le16dec(p + 14); if (part.p_size == 0) continue; if (part.p_offset < table->offset) continue; if (part.p_offset - table->offset > basetable->gpt_last) goto invalid_label; baseentry = g_part_new_entry(basetable, index + 1, part.p_offset - table->offset, part.p_offset - table->offset + part.p_size - 1); entry = (struct g_part_bsd_entry *)baseentry; entry->part = part; if (index == RAW_PART) baseentry->gpe_internal = 1; } return (0); invalid_label: printf("GEOM: %s: invalid disklabel.\n", pp->name); g_free(table->bbarea); table->bbarea = NULL; return (EINVAL); } static const char * g_part_bsd_type(struct g_part_table *basetable, struct g_part_entry *baseentry, char *buf, size_t bufsz) { struct g_part_bsd_entry *entry; int type; entry = (struct g_part_bsd_entry *)baseentry; type = entry->part.p_fstype; if (type == FS_NANDFS) return (g_part_alias_name(G_PART_ALIAS_FREEBSD_NANDFS)); if (type == FS_SWAP) return (g_part_alias_name(G_PART_ALIAS_FREEBSD_SWAP)); if (type == FS_BSDFFS) return (g_part_alias_name(G_PART_ALIAS_FREEBSD_UFS)); if (type == FS_VINUM) return (g_part_alias_name(G_PART_ALIAS_FREEBSD_VINUM)); if (type == FS_ZFS) return (g_part_alias_name(G_PART_ALIAS_FREEBSD_ZFS)); snprintf(buf, bufsz, "!%d", type); return (buf); } static int g_part_bsd_write(struct g_part_table *basetable, struct g_consumer *cp) { struct g_provider *pp; struct g_part_entry *baseentry; struct g_part_bsd_entry *entry; struct g_part_bsd_table *table; uint16_t sum; u_char *label, *p, *pe; int error, index; pp = cp->provider; table = (struct g_part_bsd_table *)basetable; baseentry = LIST_FIRST(&basetable->gpt_entry); label = table->bbarea + pp->sectorsize; for (index = 1; index <= basetable->gpt_entries; index++) { p = label + 148 + (index - 1) * 16; entry = (baseentry != NULL && index == baseentry->gpe_index) ? (struct g_part_bsd_entry *)baseentry : NULL; if (entry != NULL && !baseentry->gpe_deleted) { le32enc(p + 0, entry->part.p_size); le32enc(p + 4, entry->part.p_offset); le32enc(p + 8, entry->part.p_fsize); p[12] = entry->part.p_fstype; p[13] = entry->part.p_frag; le16enc(p + 14, entry->part.p_cpg); } else bzero(p, 16); if (entry != NULL) baseentry = LIST_NEXT(baseentry, gpe_entry); } /* Calculate checksum. */ le16enc(label + 136, 0); pe = label + 148 + basetable->gpt_entries * 16; sum = 0; for (p = label; p < pe; p += 2) sum ^= le16dec(p); le16enc(label + 136, sum); error = g_write_data(cp, 0, table->bbarea, BBSIZE); return (error); } diff --git a/sys/geom/part/g_part_gpt.c b/sys/geom/part/g_part_gpt.c index 775ec20081ea..702474e23cd0 100644 --- a/sys/geom/part/g_part_gpt.c +++ b/sys/geom/part/g_part_gpt.c @@ -1,1469 +1,1460 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002, 2005-2007, 2011 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "g_part_if.h" FEATURE(geom_part_gpt, "GEOM partitioning class for GPT partitions support"); SYSCTL_DECL(_kern_geom_part); static SYSCTL_NODE(_kern_geom_part, OID_AUTO, gpt, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "GEOM_PART_GPT GUID Partition Table"); static u_int allow_nesting = 0; SYSCTL_UINT(_kern_geom_part_gpt, OID_AUTO, allow_nesting, CTLFLAG_RWTUN, &allow_nesting, 0, "Allow GPT to be nested inside other schemes"); CTASSERT(offsetof(struct gpt_hdr, padding) == 92); CTASSERT(sizeof(struct gpt_ent) == 128); extern u_int geom_part_check_integrity; #define EQUUID(a,b) (memcmp(a, b, sizeof(struct uuid)) == 0) #define MBRSIZE 512 enum gpt_elt { GPT_ELT_PRIHDR, GPT_ELT_PRITBL, GPT_ELT_SECHDR, GPT_ELT_SECTBL, GPT_ELT_COUNT }; enum gpt_state { GPT_STATE_UNKNOWN, /* Not determined. */ GPT_STATE_MISSING, /* No signature found. */ GPT_STATE_CORRUPT, /* Checksum mismatch. */ GPT_STATE_INVALID, /* Nonconformant/invalid. */ GPT_STATE_UNSUPPORTED, /* Not supported. */ GPT_STATE_OK /* Perfectly fine. */ }; struct g_part_gpt_table { struct g_part_table base; u_char mbr[MBRSIZE]; struct gpt_hdr *hdr; quad_t lba[GPT_ELT_COUNT]; enum gpt_state state[GPT_ELT_COUNT]; int bootcamp; }; struct g_part_gpt_entry { struct g_part_entry base; struct gpt_ent ent; }; static void g_gpt_printf_utf16(struct sbuf *, uint16_t *, size_t); static void g_gpt_utf8_to_utf16(const uint8_t *, uint16_t *, size_t); static void g_gpt_set_defaults(struct g_part_table *, struct g_provider *); static int g_part_gpt_add(struct g_part_table *, struct g_part_entry *, struct g_part_parms *); static int g_part_gpt_bootcode(struct g_part_table *, struct g_part_parms *); static int g_part_gpt_create(struct g_part_table *, struct g_part_parms *); static int g_part_gpt_destroy(struct g_part_table *, struct g_part_parms *); static void g_part_gpt_dumpconf(struct g_part_table *, struct g_part_entry *, struct sbuf *, const char *); static int g_part_gpt_dumpto(struct g_part_table *, struct g_part_entry *); static int g_part_gpt_modify(struct g_part_table *, struct g_part_entry *, struct g_part_parms *); static const char *g_part_gpt_name(struct g_part_table *, struct g_part_entry *, char *, size_t); static int g_part_gpt_probe(struct g_part_table *, struct g_consumer *); static int g_part_gpt_read(struct g_part_table *, struct g_consumer *); static int g_part_gpt_setunset(struct g_part_table *table, struct g_part_entry *baseentry, const char *attrib, unsigned int set); static const char *g_part_gpt_type(struct g_part_table *, struct g_part_entry *, char *, size_t); static int g_part_gpt_write(struct g_part_table *, struct g_consumer *); static int g_part_gpt_resize(struct g_part_table *, struct g_part_entry *, struct g_part_parms *); static int g_part_gpt_recover(struct g_part_table *); static kobj_method_t g_part_gpt_methods[] = { KOBJMETHOD(g_part_add, g_part_gpt_add), KOBJMETHOD(g_part_bootcode, g_part_gpt_bootcode), KOBJMETHOD(g_part_create, g_part_gpt_create), KOBJMETHOD(g_part_destroy, g_part_gpt_destroy), KOBJMETHOD(g_part_dumpconf, g_part_gpt_dumpconf), KOBJMETHOD(g_part_dumpto, g_part_gpt_dumpto), KOBJMETHOD(g_part_modify, g_part_gpt_modify), KOBJMETHOD(g_part_resize, g_part_gpt_resize), KOBJMETHOD(g_part_name, g_part_gpt_name), KOBJMETHOD(g_part_probe, g_part_gpt_probe), KOBJMETHOD(g_part_read, g_part_gpt_read), KOBJMETHOD(g_part_recover, g_part_gpt_recover), KOBJMETHOD(g_part_setunset, g_part_gpt_setunset), KOBJMETHOD(g_part_type, g_part_gpt_type), KOBJMETHOD(g_part_write, g_part_gpt_write), { 0, 0 } }; #define MAXENTSIZE 1024 static struct g_part_scheme g_part_gpt_scheme = { "GPT", g_part_gpt_methods, sizeof(struct g_part_gpt_table), .gps_entrysz = sizeof(struct g_part_gpt_entry), .gps_minent = 128, .gps_maxent = 4096, .gps_bootcodesz = MBRSIZE, }; G_PART_SCHEME_DECLARE(g_part_gpt); MODULE_VERSION(geom_part_gpt, 0); static struct uuid gpt_uuid_apple_apfs = GPT_ENT_TYPE_APPLE_APFS; static struct uuid gpt_uuid_apple_boot = GPT_ENT_TYPE_APPLE_BOOT; static struct uuid gpt_uuid_apple_core_storage = GPT_ENT_TYPE_APPLE_CORE_STORAGE; static struct uuid gpt_uuid_apple_hfs = GPT_ENT_TYPE_APPLE_HFS; static struct uuid gpt_uuid_apple_label = GPT_ENT_TYPE_APPLE_LABEL; static struct uuid gpt_uuid_apple_raid = GPT_ENT_TYPE_APPLE_RAID; static struct uuid gpt_uuid_apple_raid_offline = GPT_ENT_TYPE_APPLE_RAID_OFFLINE; static struct uuid gpt_uuid_apple_tv_recovery = GPT_ENT_TYPE_APPLE_TV_RECOVERY; static struct uuid gpt_uuid_apple_ufs = GPT_ENT_TYPE_APPLE_UFS; static struct uuid gpt_uuid_apple_zfs = GPT_ENT_TYPE_APPLE_ZFS; static struct uuid gpt_uuid_bios_boot = GPT_ENT_TYPE_BIOS_BOOT; static struct uuid gpt_uuid_chromeos_firmware = GPT_ENT_TYPE_CHROMEOS_FIRMWARE; static struct uuid gpt_uuid_chromeos_kernel = GPT_ENT_TYPE_CHROMEOS_KERNEL; static struct uuid gpt_uuid_chromeos_reserved = GPT_ENT_TYPE_CHROMEOS_RESERVED; static struct uuid gpt_uuid_chromeos_root = GPT_ENT_TYPE_CHROMEOS_ROOT; static struct uuid gpt_uuid_dfbsd_ccd = GPT_ENT_TYPE_DRAGONFLY_CCD; static struct uuid gpt_uuid_dfbsd_hammer = GPT_ENT_TYPE_DRAGONFLY_HAMMER; static struct uuid gpt_uuid_dfbsd_hammer2 = GPT_ENT_TYPE_DRAGONFLY_HAMMER2; static struct uuid gpt_uuid_dfbsd_label32 = GPT_ENT_TYPE_DRAGONFLY_LABEL32; static struct uuid gpt_uuid_dfbsd_label64 = GPT_ENT_TYPE_DRAGONFLY_LABEL64; static struct uuid gpt_uuid_dfbsd_legacy = GPT_ENT_TYPE_DRAGONFLY_LEGACY; static struct uuid gpt_uuid_dfbsd_swap = GPT_ENT_TYPE_DRAGONFLY_SWAP; static struct uuid gpt_uuid_dfbsd_ufs1 = GPT_ENT_TYPE_DRAGONFLY_UFS1; static struct uuid gpt_uuid_dfbsd_vinum = GPT_ENT_TYPE_DRAGONFLY_VINUM; static struct uuid gpt_uuid_efi = GPT_ENT_TYPE_EFI; static struct uuid gpt_uuid_freebsd = GPT_ENT_TYPE_FREEBSD; static struct uuid gpt_uuid_freebsd_boot = GPT_ENT_TYPE_FREEBSD_BOOT; static struct uuid gpt_uuid_freebsd_nandfs = GPT_ENT_TYPE_FREEBSD_NANDFS; static struct uuid gpt_uuid_freebsd_swap = GPT_ENT_TYPE_FREEBSD_SWAP; static struct uuid gpt_uuid_freebsd_ufs = GPT_ENT_TYPE_FREEBSD_UFS; static struct uuid gpt_uuid_freebsd_vinum = GPT_ENT_TYPE_FREEBSD_VINUM; static struct uuid gpt_uuid_freebsd_zfs = GPT_ENT_TYPE_FREEBSD_ZFS; static struct uuid gpt_uuid_hifive_fsbl = GPT_ENT_TYPE_HIFIVE_FSBL; static struct uuid gpt_uuid_hifive_bbl = GPT_ENT_TYPE_HIFIVE_BBL; static struct uuid gpt_uuid_linux_data = GPT_ENT_TYPE_LINUX_DATA; static struct uuid gpt_uuid_linux_lvm = GPT_ENT_TYPE_LINUX_LVM; static struct uuid gpt_uuid_linux_raid = GPT_ENT_TYPE_LINUX_RAID; static struct uuid gpt_uuid_linux_swap = GPT_ENT_TYPE_LINUX_SWAP; static struct uuid gpt_uuid_mbr = GPT_ENT_TYPE_MBR; static struct uuid gpt_uuid_ms_basic_data = GPT_ENT_TYPE_MS_BASIC_DATA; static struct uuid gpt_uuid_ms_ldm_data = GPT_ENT_TYPE_MS_LDM_DATA; static struct uuid gpt_uuid_ms_ldm_metadata = GPT_ENT_TYPE_MS_LDM_METADATA; static struct uuid gpt_uuid_ms_recovery = GPT_ENT_TYPE_MS_RECOVERY; static struct uuid gpt_uuid_ms_reserved = GPT_ENT_TYPE_MS_RESERVED; static struct uuid gpt_uuid_ms_spaces = GPT_ENT_TYPE_MS_SPACES; static struct uuid gpt_uuid_netbsd_ccd = GPT_ENT_TYPE_NETBSD_CCD; static struct uuid gpt_uuid_netbsd_cgd = GPT_ENT_TYPE_NETBSD_CGD; static struct uuid gpt_uuid_netbsd_ffs = GPT_ENT_TYPE_NETBSD_FFS; static struct uuid gpt_uuid_netbsd_lfs = GPT_ENT_TYPE_NETBSD_LFS; static struct uuid gpt_uuid_netbsd_raid = GPT_ENT_TYPE_NETBSD_RAID; static struct uuid gpt_uuid_netbsd_swap = GPT_ENT_TYPE_NETBSD_SWAP; static struct uuid gpt_uuid_openbsd_data = GPT_ENT_TYPE_OPENBSD_DATA; static struct uuid gpt_uuid_prep_boot = GPT_ENT_TYPE_PREP_BOOT; static struct uuid gpt_uuid_solaris_boot = GPT_ENT_TYPE_SOLARIS_BOOT; static struct uuid gpt_uuid_solaris_root = GPT_ENT_TYPE_SOLARIS_ROOT; static struct uuid gpt_uuid_solaris_swap = GPT_ENT_TYPE_SOLARIS_SWAP; static struct uuid gpt_uuid_solaris_backup = GPT_ENT_TYPE_SOLARIS_BACKUP; static struct uuid gpt_uuid_solaris_var = GPT_ENT_TYPE_SOLARIS_VAR; static struct uuid gpt_uuid_solaris_home = GPT_ENT_TYPE_SOLARIS_HOME; static struct uuid gpt_uuid_solaris_altsec = GPT_ENT_TYPE_SOLARIS_ALTSEC; static struct uuid gpt_uuid_solaris_reserved = GPT_ENT_TYPE_SOLARIS_RESERVED; static struct uuid gpt_uuid_unused = GPT_ENT_TYPE_UNUSED; static struct uuid gpt_uuid_vmfs = GPT_ENT_TYPE_VMFS; static struct uuid gpt_uuid_vmkdiag = GPT_ENT_TYPE_VMKDIAG; static struct uuid gpt_uuid_vmreserved = GPT_ENT_TYPE_VMRESERVED; static struct uuid gpt_uuid_vmvsanhdr = GPT_ENT_TYPE_VMVSANHDR; static struct g_part_uuid_alias { struct uuid *uuid; int alias; int mbrtype; } gpt_uuid_alias_match[] = { { &gpt_uuid_apple_apfs, G_PART_ALIAS_APPLE_APFS, 0 }, { &gpt_uuid_apple_boot, G_PART_ALIAS_APPLE_BOOT, 0xab }, { &gpt_uuid_apple_core_storage, G_PART_ALIAS_APPLE_CORE_STORAGE, 0 }, { &gpt_uuid_apple_hfs, G_PART_ALIAS_APPLE_HFS, 0xaf }, { &gpt_uuid_apple_label, G_PART_ALIAS_APPLE_LABEL, 0 }, { &gpt_uuid_apple_raid, G_PART_ALIAS_APPLE_RAID, 0 }, { &gpt_uuid_apple_raid_offline, G_PART_ALIAS_APPLE_RAID_OFFLINE, 0 }, { &gpt_uuid_apple_tv_recovery, G_PART_ALIAS_APPLE_TV_RECOVERY, 0 }, { &gpt_uuid_apple_ufs, G_PART_ALIAS_APPLE_UFS, 0 }, { &gpt_uuid_apple_zfs, G_PART_ALIAS_APPLE_ZFS, 0 }, { &gpt_uuid_bios_boot, G_PART_ALIAS_BIOS_BOOT, 0 }, { &gpt_uuid_chromeos_firmware, G_PART_ALIAS_CHROMEOS_FIRMWARE, 0 }, { &gpt_uuid_chromeos_kernel, G_PART_ALIAS_CHROMEOS_KERNEL, 0 }, { &gpt_uuid_chromeos_reserved, G_PART_ALIAS_CHROMEOS_RESERVED, 0 }, { &gpt_uuid_chromeos_root, G_PART_ALIAS_CHROMEOS_ROOT, 0 }, { &gpt_uuid_dfbsd_ccd, G_PART_ALIAS_DFBSD_CCD, 0 }, { &gpt_uuid_dfbsd_hammer, G_PART_ALIAS_DFBSD_HAMMER, 0 }, { &gpt_uuid_dfbsd_hammer2, G_PART_ALIAS_DFBSD_HAMMER2, 0 }, { &gpt_uuid_dfbsd_label32, G_PART_ALIAS_DFBSD, 0xa5 }, { &gpt_uuid_dfbsd_label64, G_PART_ALIAS_DFBSD64, 0xa5 }, { &gpt_uuid_dfbsd_legacy, G_PART_ALIAS_DFBSD_LEGACY, 0 }, { &gpt_uuid_dfbsd_swap, G_PART_ALIAS_DFBSD_SWAP, 0 }, { &gpt_uuid_dfbsd_ufs1, G_PART_ALIAS_DFBSD_UFS, 0 }, { &gpt_uuid_dfbsd_vinum, G_PART_ALIAS_DFBSD_VINUM, 0 }, { &gpt_uuid_efi, G_PART_ALIAS_EFI, 0xee }, { &gpt_uuid_freebsd, G_PART_ALIAS_FREEBSD, 0xa5 }, { &gpt_uuid_freebsd_boot, G_PART_ALIAS_FREEBSD_BOOT, 0 }, { &gpt_uuid_freebsd_nandfs, G_PART_ALIAS_FREEBSD_NANDFS, 0 }, { &gpt_uuid_freebsd_swap, G_PART_ALIAS_FREEBSD_SWAP, 0 }, { &gpt_uuid_freebsd_ufs, G_PART_ALIAS_FREEBSD_UFS, 0 }, { &gpt_uuid_freebsd_vinum, G_PART_ALIAS_FREEBSD_VINUM, 0 }, { &gpt_uuid_freebsd_zfs, G_PART_ALIAS_FREEBSD_ZFS, 0 }, { &gpt_uuid_hifive_fsbl, G_PART_ALIAS_HIFIVE_FSBL, 0 }, { &gpt_uuid_hifive_bbl, G_PART_ALIAS_HIFIVE_BBL, 0 }, { &gpt_uuid_linux_data, G_PART_ALIAS_LINUX_DATA, 0x0b }, { &gpt_uuid_linux_lvm, G_PART_ALIAS_LINUX_LVM, 0 }, { &gpt_uuid_linux_raid, G_PART_ALIAS_LINUX_RAID, 0 }, { &gpt_uuid_linux_swap, G_PART_ALIAS_LINUX_SWAP, 0 }, { &gpt_uuid_mbr, G_PART_ALIAS_MBR, 0 }, { &gpt_uuid_ms_basic_data, G_PART_ALIAS_MS_BASIC_DATA, 0x0b }, { &gpt_uuid_ms_ldm_data, G_PART_ALIAS_MS_LDM_DATA, 0 }, { &gpt_uuid_ms_ldm_metadata, G_PART_ALIAS_MS_LDM_METADATA, 0 }, { &gpt_uuid_ms_recovery, G_PART_ALIAS_MS_RECOVERY, 0 }, { &gpt_uuid_ms_reserved, G_PART_ALIAS_MS_RESERVED, 0 }, { &gpt_uuid_ms_spaces, G_PART_ALIAS_MS_SPACES, 0 }, { &gpt_uuid_netbsd_ccd, G_PART_ALIAS_NETBSD_CCD, 0 }, { &gpt_uuid_netbsd_cgd, G_PART_ALIAS_NETBSD_CGD, 0 }, { &gpt_uuid_netbsd_ffs, G_PART_ALIAS_NETBSD_FFS, 0 }, { &gpt_uuid_netbsd_lfs, G_PART_ALIAS_NETBSD_LFS, 0 }, { &gpt_uuid_netbsd_raid, G_PART_ALIAS_NETBSD_RAID, 0 }, { &gpt_uuid_netbsd_swap, G_PART_ALIAS_NETBSD_SWAP, 0 }, { &gpt_uuid_openbsd_data, G_PART_ALIAS_OPENBSD_DATA, 0 }, { &gpt_uuid_prep_boot, G_PART_ALIAS_PREP_BOOT, 0x41 }, { &gpt_uuid_solaris_boot, G_PART_ALIAS_SOLARIS_BOOT, 0 }, { &gpt_uuid_solaris_root, G_PART_ALIAS_SOLARIS_ROOT, 0 }, { &gpt_uuid_solaris_swap, G_PART_ALIAS_SOLARIS_SWAP, 0 }, { &gpt_uuid_solaris_backup, G_PART_ALIAS_SOLARIS_BACKUP, 0 }, { &gpt_uuid_solaris_var, G_PART_ALIAS_SOLARIS_VAR, 0 }, { &gpt_uuid_solaris_home, G_PART_ALIAS_SOLARIS_HOME, 0 }, { &gpt_uuid_solaris_altsec, G_PART_ALIAS_SOLARIS_ALTSEC, 0 }, { &gpt_uuid_solaris_reserved, G_PART_ALIAS_SOLARIS_RESERVED, 0 }, { &gpt_uuid_vmfs, G_PART_ALIAS_VMFS, 0 }, { &gpt_uuid_vmkdiag, G_PART_ALIAS_VMKDIAG, 0 }, { &gpt_uuid_vmreserved, G_PART_ALIAS_VMRESERVED, 0 }, { &gpt_uuid_vmvsanhdr, G_PART_ALIAS_VMVSANHDR, 0 }, { NULL, 0, 0 } }; static int gpt_write_mbr_entry(u_char *mbr, int idx, int typ, quad_t start, quad_t end) { if (typ == 0 || start > UINT32_MAX || end > UINT32_MAX) return (EINVAL); mbr += DOSPARTOFF + idx * DOSPARTSIZE; mbr[0] = 0; if (start == 1) { /* * Treat the PMBR partition specially to maximize * interoperability with BIOSes. */ mbr[1] = mbr[3] = 0; mbr[2] = 2; } else mbr[1] = mbr[2] = mbr[3] = 0xff; mbr[4] = typ; mbr[5] = mbr[6] = mbr[7] = 0xff; le32enc(mbr + 8, (uint32_t)start); le32enc(mbr + 12, (uint32_t)(end - start + 1)); return (0); } static int gpt_map_type(struct uuid *t) { struct g_part_uuid_alias *uap; for (uap = &gpt_uuid_alias_match[0]; uap->uuid; uap++) { if (EQUUID(t, uap->uuid)) return (uap->mbrtype); } return (0); } static void gpt_create_pmbr(struct g_part_gpt_table *table, struct g_provider *pp) { bzero(table->mbr + DOSPARTOFF, DOSPARTSIZE * NDOSPART); gpt_write_mbr_entry(table->mbr, 0, 0xee, 1, MIN(pp->mediasize / pp->sectorsize - 1, UINT32_MAX)); le16enc(table->mbr + DOSMAGICOFFSET, DOSMAGIC); } /* * Under Boot Camp the PMBR partition (type 0xEE) doesn't cover the * whole disk anymore. Rather, it covers the GPT table and the EFI * system partition only. This way the HFS+ partition and any FAT * partitions can be added to the MBR without creating an overlap. */ static int gpt_is_bootcamp(struct g_part_gpt_table *table, const char *provname) { uint8_t *p; p = table->mbr + DOSPARTOFF; if (p[4] != 0xee || le32dec(p + 8) != 1) return (0); p += DOSPARTSIZE; if (p[4] != 0xaf) return (0); printf("GEOM: %s: enabling Boot Camp\n", provname); return (1); } static void gpt_update_bootcamp(struct g_part_table *basetable, struct g_provider *pp) { struct g_part_entry *baseentry; struct g_part_gpt_entry *entry; struct g_part_gpt_table *table; int bootable, error, index, slices, typ; table = (struct g_part_gpt_table *)basetable; bootable = -1; for (index = 0; index < NDOSPART; index++) { if (table->mbr[DOSPARTOFF + DOSPARTSIZE * index]) bootable = index; } bzero(table->mbr + DOSPARTOFF, DOSPARTSIZE * NDOSPART); slices = 0; LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) { if (baseentry->gpe_deleted) continue; index = baseentry->gpe_index - 1; if (index >= NDOSPART) continue; entry = (struct g_part_gpt_entry *)baseentry; switch (index) { case 0: /* This must be the EFI system partition. */ if (!EQUUID(&entry->ent.ent_type, &gpt_uuid_efi)) goto disable; error = gpt_write_mbr_entry(table->mbr, index, 0xee, 1ull, entry->ent.ent_lba_end); break; case 1: /* This must be the HFS+ partition. */ if (!EQUUID(&entry->ent.ent_type, &gpt_uuid_apple_hfs)) goto disable; error = gpt_write_mbr_entry(table->mbr, index, 0xaf, entry->ent.ent_lba_start, entry->ent.ent_lba_end); break; default: typ = gpt_map_type(&entry->ent.ent_type); error = gpt_write_mbr_entry(table->mbr, index, typ, entry->ent.ent_lba_start, entry->ent.ent_lba_end); break; } if (error) continue; if (index == bootable) table->mbr[DOSPARTOFF + DOSPARTSIZE * index] = 0x80; slices |= 1 << index; } if ((slices & 3) == 3) return; disable: table->bootcamp = 0; gpt_create_pmbr(table, pp); } static struct gpt_hdr * gpt_read_hdr(struct g_part_gpt_table *table, struct g_consumer *cp, enum gpt_elt elt) { struct gpt_hdr *buf, *hdr; struct g_provider *pp; quad_t lba, last; int error; uint32_t crc, sz; pp = cp->provider; last = (pp->mediasize / pp->sectorsize) - 1; table->state[elt] = GPT_STATE_MISSING; /* * If the primary header is valid look for secondary * header in AlternateLBA, otherwise in the last medium's LBA. */ if (elt == GPT_ELT_SECHDR) { if (table->state[GPT_ELT_PRIHDR] != GPT_STATE_OK) table->lba[elt] = last; } else table->lba[elt] = 1; buf = g_read_data(cp, table->lba[elt] * pp->sectorsize, pp->sectorsize, &error); if (buf == NULL) return (NULL); hdr = NULL; if (memcmp(buf->hdr_sig, GPT_HDR_SIG, sizeof(buf->hdr_sig)) != 0) goto fail; table->state[elt] = GPT_STATE_CORRUPT; sz = le32toh(buf->hdr_size); if (sz < 92 || sz > pp->sectorsize) goto fail; hdr = g_malloc(sz, M_WAITOK | M_ZERO); bcopy(buf, hdr, sz); hdr->hdr_size = sz; crc = le32toh(buf->hdr_crc_self); buf->hdr_crc_self = 0; if (crc32(buf, sz) != crc) goto fail; hdr->hdr_crc_self = crc; table->state[elt] = GPT_STATE_INVALID; hdr->hdr_revision = le32toh(buf->hdr_revision); if (hdr->hdr_revision < GPT_HDR_REVISION) goto fail; hdr->hdr_lba_self = le64toh(buf->hdr_lba_self); if (hdr->hdr_lba_self != table->lba[elt]) goto fail; hdr->hdr_lba_alt = le64toh(buf->hdr_lba_alt); if (hdr->hdr_lba_alt == hdr->hdr_lba_self) goto fail; if (hdr->hdr_lba_alt > last && geom_part_check_integrity) goto fail; /* Check the managed area. */ hdr->hdr_lba_start = le64toh(buf->hdr_lba_start); if (hdr->hdr_lba_start < 2 || hdr->hdr_lba_start >= last) goto fail; hdr->hdr_lba_end = le64toh(buf->hdr_lba_end); if (hdr->hdr_lba_end < hdr->hdr_lba_start || hdr->hdr_lba_end >= last) goto fail; /* Check the table location and size of the table. */ hdr->hdr_entries = le32toh(buf->hdr_entries); hdr->hdr_entsz = le32toh(buf->hdr_entsz); if (hdr->hdr_entries == 0 || hdr->hdr_entsz < 128 || (hdr->hdr_entsz & 7) != 0) goto fail; hdr->hdr_lba_table = le64toh(buf->hdr_lba_table); if (hdr->hdr_lba_table < 2 || hdr->hdr_lba_table >= last) goto fail; if (hdr->hdr_lba_table >= hdr->hdr_lba_start && hdr->hdr_lba_table <= hdr->hdr_lba_end) goto fail; lba = hdr->hdr_lba_table + howmany((uint64_t)hdr->hdr_entries * hdr->hdr_entsz, pp->sectorsize) - 1; if (lba >= last) goto fail; if (lba >= hdr->hdr_lba_start && lba <= hdr->hdr_lba_end) goto fail; table->state[elt] = GPT_STATE_OK; le_uuid_dec(&buf->hdr_uuid, &hdr->hdr_uuid); hdr->hdr_crc_table = le32toh(buf->hdr_crc_table); /* save LBA for secondary header */ if (elt == GPT_ELT_PRIHDR) table->lba[GPT_ELT_SECHDR] = hdr->hdr_lba_alt; g_free(buf); return (hdr); fail: - if (hdr != NULL) - g_free(hdr); + g_free(hdr); g_free(buf); return (NULL); } static struct gpt_ent * gpt_read_tbl(struct g_part_gpt_table *table, struct g_consumer *cp, enum gpt_elt elt, struct gpt_hdr *hdr) { struct g_provider *pp; struct gpt_ent *ent, *tbl; char *buf, *p; unsigned int idx, sectors, tblsz, size; int error; if (hdr == NULL) return (NULL); if (hdr->hdr_entries > g_part_gpt_scheme.gps_maxent || hdr->hdr_entsz > MAXENTSIZE) { table->state[elt] = GPT_STATE_UNSUPPORTED; return (NULL); } pp = cp->provider; table->lba[elt] = hdr->hdr_lba_table; table->state[elt] = GPT_STATE_MISSING; tblsz = hdr->hdr_entries * hdr->hdr_entsz; sectors = howmany(tblsz, pp->sectorsize); buf = g_malloc(sectors * pp->sectorsize, M_WAITOK | M_ZERO); for (idx = 0; idx < sectors; idx += maxphys / pp->sectorsize) { size = (sectors - idx > maxphys / pp->sectorsize) ? maxphys: (sectors - idx) * pp->sectorsize; p = g_read_data(cp, (table->lba[elt] + idx) * pp->sectorsize, size, &error); if (p == NULL) { g_free(buf); return (NULL); } bcopy(p, buf + idx * pp->sectorsize, size); g_free(p); } table->state[elt] = GPT_STATE_CORRUPT; if (crc32(buf, tblsz) != hdr->hdr_crc_table) { g_free(buf); return (NULL); } table->state[elt] = GPT_STATE_OK; tbl = g_malloc(hdr->hdr_entries * sizeof(struct gpt_ent), M_WAITOK | M_ZERO); for (idx = 0, ent = tbl, p = buf; idx < hdr->hdr_entries; idx++, ent++, p += hdr->hdr_entsz) { le_uuid_dec(p, &ent->ent_type); le_uuid_dec(p + 16, &ent->ent_uuid); ent->ent_lba_start = le64dec(p + 32); ent->ent_lba_end = le64dec(p + 40); ent->ent_attr = le64dec(p + 48); /* Keep UTF-16 in little-endian. */ bcopy(p + 56, ent->ent_name, sizeof(ent->ent_name)); } g_free(buf); return (tbl); } static int gpt_matched_hdrs(struct gpt_hdr *pri, struct gpt_hdr *sec) { if (pri == NULL || sec == NULL) return (0); if (!EQUUID(&pri->hdr_uuid, &sec->hdr_uuid)) return (0); return ((pri->hdr_revision == sec->hdr_revision && pri->hdr_size == sec->hdr_size && pri->hdr_lba_start == sec->hdr_lba_start && pri->hdr_lba_end == sec->hdr_lba_end && pri->hdr_entries == sec->hdr_entries && pri->hdr_entsz == sec->hdr_entsz && pri->hdr_crc_table == sec->hdr_crc_table) ? 1 : 0); } static int gpt_parse_type(const char *type, struct uuid *uuid) { struct uuid tmp; const char *alias; int error; struct g_part_uuid_alias *uap; if (type[0] == '!') { error = parse_uuid(type + 1, &tmp); if (error) return (error); if (EQUUID(&tmp, &gpt_uuid_unused)) return (EINVAL); *uuid = tmp; return (0); } for (uap = &gpt_uuid_alias_match[0]; uap->uuid; uap++) { alias = g_part_alias_name(uap->alias); if (!strcasecmp(type, alias)) { *uuid = *uap->uuid; return (0); } } return (EINVAL); } static int g_part_gpt_add(struct g_part_table *basetable, struct g_part_entry *baseentry, struct g_part_parms *gpp) { struct g_part_gpt_entry *entry; int error; entry = (struct g_part_gpt_entry *)baseentry; error = gpt_parse_type(gpp->gpp_type, &entry->ent.ent_type); if (error) return (error); kern_uuidgen(&entry->ent.ent_uuid, 1); entry->ent.ent_lba_start = baseentry->gpe_start; entry->ent.ent_lba_end = baseentry->gpe_end; if (baseentry->gpe_deleted) { entry->ent.ent_attr = 0; bzero(entry->ent.ent_name, sizeof(entry->ent.ent_name)); } if (gpp->gpp_parms & G_PART_PARM_LABEL) g_gpt_utf8_to_utf16(gpp->gpp_label, entry->ent.ent_name, sizeof(entry->ent.ent_name) / sizeof(entry->ent.ent_name[0])); return (0); } static int g_part_gpt_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp) { struct g_part_gpt_table *table; size_t codesz; codesz = DOSPARTOFF; table = (struct g_part_gpt_table *)basetable; bzero(table->mbr, codesz); codesz = MIN(codesz, gpp->gpp_codesize); if (codesz > 0) bcopy(gpp->gpp_codeptr, table->mbr, codesz); return (0); } static int g_part_gpt_create(struct g_part_table *basetable, struct g_part_parms *gpp) { struct g_provider *pp; struct g_part_gpt_table *table; size_t tblsz; /* Our depth should be 0 unless nesting was explicitly enabled. */ if (!allow_nesting && basetable->gpt_depth != 0) return (ENXIO); table = (struct g_part_gpt_table *)basetable; pp = gpp->gpp_provider; tblsz = howmany(basetable->gpt_entries * sizeof(struct gpt_ent), pp->sectorsize); if (pp->sectorsize < MBRSIZE || pp->mediasize < (3 + 2 * tblsz + basetable->gpt_entries) * pp->sectorsize) return (ENOSPC); gpt_create_pmbr(table, pp); /* Allocate space for the header */ table->hdr = g_malloc(sizeof(struct gpt_hdr), M_WAITOK | M_ZERO); bcopy(GPT_HDR_SIG, table->hdr->hdr_sig, sizeof(table->hdr->hdr_sig)); table->hdr->hdr_revision = GPT_HDR_REVISION; table->hdr->hdr_size = offsetof(struct gpt_hdr, padding); kern_uuidgen(&table->hdr->hdr_uuid, 1); table->hdr->hdr_entries = basetable->gpt_entries; table->hdr->hdr_entsz = sizeof(struct gpt_ent); g_gpt_set_defaults(basetable, pp); return (0); } static int g_part_gpt_destroy(struct g_part_table *basetable, struct g_part_parms *gpp) { struct g_part_gpt_table *table; struct g_provider *pp; table = (struct g_part_gpt_table *)basetable; pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider; g_free(table->hdr); table->hdr = NULL; /* * Wipe the first 2 sectors and last one to clear the partitioning. * Wipe sectors only if they have valid metadata. */ if (table->state[GPT_ELT_PRIHDR] == GPT_STATE_OK) basetable->gpt_smhead |= 3; if (table->state[GPT_ELT_SECHDR] == GPT_STATE_OK && table->lba[GPT_ELT_SECHDR] == pp->mediasize / pp->sectorsize - 1) basetable->gpt_smtail |= 1; return (0); } static void g_part_gpt_dumpconf(struct g_part_table *table, struct g_part_entry *baseentry, struct sbuf *sb, const char *indent) { struct g_part_gpt_entry *entry; entry = (struct g_part_gpt_entry *)baseentry; if (indent == NULL) { /* conftxt: libdisk compatibility */ sbuf_cat(sb, " xs GPT xt "); sbuf_printf_uuid(sb, &entry->ent.ent_type); } else if (entry != NULL) { /* confxml: partition entry information */ sbuf_printf(sb, "%s\n"); if (entry->ent.ent_attr & GPT_ENT_ATTR_BOOTME) sbuf_printf(sb, "%sbootme\n", indent); if (entry->ent.ent_attr & GPT_ENT_ATTR_BOOTONCE) { sbuf_printf(sb, "%sbootonce\n", indent); } if (entry->ent.ent_attr & GPT_ENT_ATTR_BOOTFAILED) { sbuf_printf(sb, "%sbootfailed\n", indent); } sbuf_printf(sb, "%s", indent); sbuf_printf_uuid(sb, &entry->ent.ent_type); sbuf_cat(sb, "\n"); sbuf_printf(sb, "%s", indent); sbuf_printf_uuid(sb, &entry->ent.ent_uuid); sbuf_cat(sb, "\n"); sbuf_printf(sb, "%s", indent); sbuf_printf(sb, "HD(%d,GPT,", entry->base.gpe_index); sbuf_printf_uuid(sb, &entry->ent.ent_uuid); sbuf_printf(sb, ",%#jx,%#jx)", (intmax_t)entry->base.gpe_start, (intmax_t)(entry->base.gpe_end - entry->base.gpe_start + 1)); sbuf_cat(sb, "\n"); } else { /* confxml: scheme information */ } } static int g_part_gpt_dumpto(struct g_part_table *table, struct g_part_entry *baseentry) { struct g_part_gpt_entry *entry; entry = (struct g_part_gpt_entry *)baseentry; return ((EQUUID(&entry->ent.ent_type, &gpt_uuid_freebsd_swap) || EQUUID(&entry->ent.ent_type, &gpt_uuid_linux_swap) || EQUUID(&entry->ent.ent_type, &gpt_uuid_dfbsd_swap)) ? 1 : 0); } static int g_part_gpt_modify(struct g_part_table *basetable, struct g_part_entry *baseentry, struct g_part_parms *gpp) { struct g_part_gpt_entry *entry; int error; entry = (struct g_part_gpt_entry *)baseentry; if (gpp->gpp_parms & G_PART_PARM_TYPE) { error = gpt_parse_type(gpp->gpp_type, &entry->ent.ent_type); if (error) return (error); } if (gpp->gpp_parms & G_PART_PARM_LABEL) g_gpt_utf8_to_utf16(gpp->gpp_label, entry->ent.ent_name, sizeof(entry->ent.ent_name) / sizeof(entry->ent.ent_name[0])); return (0); } static int g_part_gpt_resize(struct g_part_table *basetable, struct g_part_entry *baseentry, struct g_part_parms *gpp) { struct g_part_gpt_entry *entry; if (baseentry == NULL) return (g_part_gpt_recover(basetable)); entry = (struct g_part_gpt_entry *)baseentry; baseentry->gpe_end = baseentry->gpe_start + gpp->gpp_size - 1; entry->ent.ent_lba_end = baseentry->gpe_end; return (0); } static const char * g_part_gpt_name(struct g_part_table *table, struct g_part_entry *baseentry, char *buf, size_t bufsz) { struct g_part_gpt_entry *entry; char c; entry = (struct g_part_gpt_entry *)baseentry; c = (EQUUID(&entry->ent.ent_type, &gpt_uuid_freebsd)) ? 's' : 'p'; snprintf(buf, bufsz, "%c%d", c, baseentry->gpe_index); return (buf); } static int g_part_gpt_probe(struct g_part_table *table, struct g_consumer *cp) { struct g_provider *pp; u_char *buf; int error, index, pri, res; /* Our depth should be 0 unless nesting was explicitly enabled. */ if (!allow_nesting && table->gpt_depth != 0) return (ENXIO); pp = cp->provider; /* * Sanity-check the provider. Since the first sector on the provider * must be a PMBR and a PMBR is 512 bytes large, the sector size * must be at least 512 bytes. Also, since the theoretical minimum * number of sectors needed by GPT is 6, any medium that has less * than 6 sectors is never going to be able to hold a GPT. The * number 6 comes from: * 1 sector for the PMBR * 2 sectors for the GPT headers (each 1 sector) * 2 sectors for the GPT tables (each 1 sector) * 1 sector for an actual partition * It's better to catch this pathological case early than behaving * pathologically later on... */ if (pp->sectorsize < MBRSIZE || pp->mediasize < 6 * pp->sectorsize) return (ENOSPC); /* * Check that there's a MBR or a PMBR. If it's a PMBR, we return * as the highest priority on a match, otherwise we assume some * GPT-unaware tool has destroyed the GPT by recreating a MBR and * we really want the MBR scheme to take precedence. */ buf = g_read_data(cp, 0L, pp->sectorsize, &error); if (buf == NULL) return (error); res = le16dec(buf + DOSMAGICOFFSET); pri = G_PART_PROBE_PRI_LOW; if (res == DOSMAGIC) { for (index = 0; index < NDOSPART; index++) { if (buf[DOSPARTOFF + DOSPARTSIZE * index + 4] == 0xee) pri = G_PART_PROBE_PRI_HIGH; } g_free(buf); /* Check that there's a primary header. */ buf = g_read_data(cp, pp->sectorsize, pp->sectorsize, &error); if (buf == NULL) return (error); res = memcmp(buf, GPT_HDR_SIG, 8); g_free(buf); if (res == 0) return (pri); } else g_free(buf); /* No primary? Check that there's a secondary. */ buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, &error); if (buf == NULL) return (error); res = memcmp(buf, GPT_HDR_SIG, 8); g_free(buf); return ((res == 0) ? pri : ENXIO); } static int g_part_gpt_read(struct g_part_table *basetable, struct g_consumer *cp) { struct gpt_hdr *prihdr, *sechdr; struct gpt_ent *tbl, *pritbl, *sectbl; struct g_provider *pp; struct g_part_gpt_table *table; struct g_part_gpt_entry *entry; u_char *buf; uint64_t last; int error, index; table = (struct g_part_gpt_table *)basetable; pp = cp->provider; last = (pp->mediasize / pp->sectorsize) - 1; /* Read the PMBR */ buf = g_read_data(cp, 0, pp->sectorsize, &error); if (buf == NULL) return (error); bcopy(buf, table->mbr, MBRSIZE); g_free(buf); /* Read the primary header and table. */ prihdr = gpt_read_hdr(table, cp, GPT_ELT_PRIHDR); if (table->state[GPT_ELT_PRIHDR] == GPT_STATE_OK) { pritbl = gpt_read_tbl(table, cp, GPT_ELT_PRITBL, prihdr); } else { table->state[GPT_ELT_PRITBL] = GPT_STATE_MISSING; pritbl = NULL; } /* Read the secondary header and table. */ sechdr = gpt_read_hdr(table, cp, GPT_ELT_SECHDR); if (table->state[GPT_ELT_SECHDR] == GPT_STATE_OK) { sectbl = gpt_read_tbl(table, cp, GPT_ELT_SECTBL, sechdr); } else { table->state[GPT_ELT_SECTBL] = GPT_STATE_MISSING; sectbl = NULL; } /* Fail if we haven't got any good tables at all. */ if (table->state[GPT_ELT_PRITBL] != GPT_STATE_OK && table->state[GPT_ELT_SECTBL] != GPT_STATE_OK) { if (table->state[GPT_ELT_PRITBL] == GPT_STATE_UNSUPPORTED && table->state[GPT_ELT_SECTBL] == GPT_STATE_UNSUPPORTED && gpt_matched_hdrs(prihdr, sechdr)) { printf("GEOM: %s: unsupported GPT detected.\n", pp->name); printf( "GEOM: %s: number of GPT entries: %u, entry size: %uB.\n", pp->name, prihdr->hdr_entries, prihdr->hdr_entsz); printf( "GEOM: %s: maximum supported number of GPT entries: %u, entry size: %uB.\n", pp->name, g_part_gpt_scheme.gps_maxent, MAXENTSIZE); printf("GEOM: %s: GPT rejected.\n", pp->name); } else { printf("GEOM: %s: corrupt or invalid GPT detected.\n", pp->name); printf( "GEOM: %s: GPT rejected -- may not be recoverable.\n", pp->name); } - if (prihdr != NULL) - g_free(prihdr); - if (pritbl != NULL) - g_free(pritbl); - if (sechdr != NULL) - g_free(sechdr); - if (sectbl != NULL) - g_free(sectbl); + g_free(prihdr); + g_free(pritbl); + g_free(sechdr); + g_free(sectbl); return (EINVAL); } /* * If both headers are good but they disagree with each other, * then invalidate one. We prefer to keep the primary header, * unless the primary table is corrupt. */ if (table->state[GPT_ELT_PRIHDR] == GPT_STATE_OK && table->state[GPT_ELT_SECHDR] == GPT_STATE_OK && !gpt_matched_hdrs(prihdr, sechdr)) { if (table->state[GPT_ELT_PRITBL] == GPT_STATE_OK) { table->state[GPT_ELT_SECHDR] = GPT_STATE_INVALID; table->state[GPT_ELT_SECTBL] = GPT_STATE_MISSING; g_free(sechdr); sechdr = NULL; } else { table->state[GPT_ELT_PRIHDR] = GPT_STATE_INVALID; table->state[GPT_ELT_PRITBL] = GPT_STATE_MISSING; g_free(prihdr); prihdr = NULL; } } if (table->state[GPT_ELT_PRITBL] != GPT_STATE_OK) { printf("GEOM: %s: the primary GPT table is corrupt or " "invalid.\n", pp->name); printf("GEOM: %s: using the secondary instead -- recovery " "strongly advised.\n", pp->name); table->hdr = sechdr; basetable->gpt_corrupt = 1; - if (prihdr != NULL) - g_free(prihdr); + g_free(prihdr); tbl = sectbl; - if (pritbl != NULL) - g_free(pritbl); + g_free(pritbl); } else { if (table->state[GPT_ELT_SECTBL] != GPT_STATE_OK) { printf("GEOM: %s: the secondary GPT table is corrupt " "or invalid.\n", pp->name); printf("GEOM: %s: using the primary only -- recovery " "suggested.\n", pp->name); basetable->gpt_corrupt = 1; } else if (table->lba[GPT_ELT_SECHDR] != last) { printf( "GEOM: %s: the secondary GPT header is not in " "the last LBA.\n", pp->name); basetable->gpt_corrupt = 1; } table->hdr = prihdr; - if (sechdr != NULL) - g_free(sechdr); + g_free(sechdr); tbl = pritbl; - if (sectbl != NULL) - g_free(sectbl); + g_free(sectbl); } basetable->gpt_first = table->hdr->hdr_lba_start; basetable->gpt_last = table->hdr->hdr_lba_end; basetable->gpt_entries = table->hdr->hdr_entries; for (index = basetable->gpt_entries - 1; index >= 0; index--) { if (EQUUID(&tbl[index].ent_type, &gpt_uuid_unused)) continue; entry = (struct g_part_gpt_entry *)g_part_new_entry( basetable, index + 1, tbl[index].ent_lba_start, tbl[index].ent_lba_end); entry->ent = tbl[index]; } g_free(tbl); /* * Under Mac OS X, the MBR mirrors the first 4 GPT partitions * if (and only if) any FAT32 or FAT16 partitions have been * created. This happens irrespective of whether Boot Camp is * used/enabled, though it's generally understood to be done * to support legacy Windows under Boot Camp. We refer to this * mirroring simply as Boot Camp. We try to detect Boot Camp * so that we can update the MBR if and when GPT changes have * been made. Note that we do not enable Boot Camp if not * previously enabled because we can't assume that we're on a * Mac alongside Mac OS X. */ table->bootcamp = gpt_is_bootcamp(table, pp->name); return (0); } static int g_part_gpt_recover(struct g_part_table *basetable) { struct g_part_gpt_table *table; struct g_provider *pp; table = (struct g_part_gpt_table *)basetable; pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider; gpt_create_pmbr(table, pp); g_gpt_set_defaults(basetable, pp); basetable->gpt_corrupt = 0; return (0); } static int g_part_gpt_setunset(struct g_part_table *basetable, struct g_part_entry *baseentry, const char *attrib, unsigned int set) { struct g_part_gpt_entry *entry; struct g_part_gpt_table *table; struct g_provider *pp; uint8_t *p; uint64_t attr; int i; table = (struct g_part_gpt_table *)basetable; entry = (struct g_part_gpt_entry *)baseentry; if (strcasecmp(attrib, "active") == 0) { if (table->bootcamp) { /* The active flag must be set on a valid entry. */ if (entry == NULL) return (ENXIO); if (baseentry->gpe_index > NDOSPART) return (EINVAL); for (i = 0; i < NDOSPART; i++) { p = &table->mbr[DOSPARTOFF + i * DOSPARTSIZE]; p[0] = (i == baseentry->gpe_index - 1) ? ((set) ? 0x80 : 0) : 0; } } else { /* The PMBR is marked as active without an entry. */ if (entry != NULL) return (ENXIO); for (i = 0; i < NDOSPART; i++) { p = &table->mbr[DOSPARTOFF + i * DOSPARTSIZE]; p[0] = (p[4] == 0xee) ? ((set) ? 0x80 : 0) : 0; } } return (0); } else if (strcasecmp(attrib, "lenovofix") == 0) { /* * Write the 0xee GPT entry to slot #1 (2nd slot) in the pMBR. * This workaround allows Lenovo X220, T420, T520, etc to boot * from GPT Partitions in BIOS mode. */ if (entry != NULL) return (ENXIO); pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider; bzero(table->mbr + DOSPARTOFF, DOSPARTSIZE * NDOSPART); gpt_write_mbr_entry(table->mbr, ((set) ? 1 : 0), 0xee, 1, MIN(pp->mediasize / pp->sectorsize - 1, UINT32_MAX)); return (0); } if (entry == NULL) return (ENODEV); attr = 0; if (strcasecmp(attrib, "bootme") == 0) { attr |= GPT_ENT_ATTR_BOOTME; } else if (strcasecmp(attrib, "bootonce") == 0) { attr |= GPT_ENT_ATTR_BOOTONCE; if (set) attr |= GPT_ENT_ATTR_BOOTME; } else if (strcasecmp(attrib, "bootfailed") == 0) { /* * It should only be possible to unset BOOTFAILED, but it might * be useful for test purposes to also be able to set it. */ attr |= GPT_ENT_ATTR_BOOTFAILED; } if (attr == 0) return (EINVAL); if (set) attr = entry->ent.ent_attr | attr; else attr = entry->ent.ent_attr & ~attr; if (attr != entry->ent.ent_attr) { entry->ent.ent_attr = attr; if (!baseentry->gpe_created) baseentry->gpe_modified = 1; } return (0); } static const char * g_part_gpt_type(struct g_part_table *basetable, struct g_part_entry *baseentry, char *buf, size_t bufsz) { struct g_part_gpt_entry *entry; struct uuid *type; struct g_part_uuid_alias *uap; entry = (struct g_part_gpt_entry *)baseentry; type = &entry->ent.ent_type; for (uap = &gpt_uuid_alias_match[0]; uap->uuid; uap++) if (EQUUID(type, uap->uuid)) return (g_part_alias_name(uap->alias)); buf[0] = '!'; snprintf_uuid(buf + 1, bufsz - 1, type); return (buf); } static int g_part_gpt_write(struct g_part_table *basetable, struct g_consumer *cp) { unsigned char *buf, *bp; struct g_provider *pp; struct g_part_entry *baseentry; struct g_part_gpt_entry *entry; struct g_part_gpt_table *table; size_t tblsz; uint32_t crc; int error, index; pp = cp->provider; table = (struct g_part_gpt_table *)basetable; tblsz = howmany(table->hdr->hdr_entries * table->hdr->hdr_entsz, pp->sectorsize); /* Reconstruct the MBR from the GPT if under Boot Camp. */ if (table->bootcamp) gpt_update_bootcamp(basetable, pp); /* Write the PMBR */ buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO); bcopy(table->mbr, buf, MBRSIZE); error = g_write_data(cp, 0, buf, pp->sectorsize); g_free(buf); if (error) return (error); /* Allocate space for the header and entries. */ buf = g_malloc((tblsz + 1) * pp->sectorsize, M_WAITOK | M_ZERO); memcpy(buf, table->hdr->hdr_sig, sizeof(table->hdr->hdr_sig)); le32enc(buf + 8, table->hdr->hdr_revision); le32enc(buf + 12, table->hdr->hdr_size); le64enc(buf + 40, table->hdr->hdr_lba_start); le64enc(buf + 48, table->hdr->hdr_lba_end); le_uuid_enc(buf + 56, &table->hdr->hdr_uuid); le32enc(buf + 80, table->hdr->hdr_entries); le32enc(buf + 84, table->hdr->hdr_entsz); LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) { if (baseentry->gpe_deleted) continue; entry = (struct g_part_gpt_entry *)baseentry; index = baseentry->gpe_index - 1; bp = buf + pp->sectorsize + table->hdr->hdr_entsz * index; le_uuid_enc(bp, &entry->ent.ent_type); le_uuid_enc(bp + 16, &entry->ent.ent_uuid); le64enc(bp + 32, entry->ent.ent_lba_start); le64enc(bp + 40, entry->ent.ent_lba_end); le64enc(bp + 48, entry->ent.ent_attr); memcpy(bp + 56, entry->ent.ent_name, sizeof(entry->ent.ent_name)); } crc = crc32(buf + pp->sectorsize, table->hdr->hdr_entries * table->hdr->hdr_entsz); le32enc(buf + 88, crc); /* Write primary meta-data. */ le32enc(buf + 16, 0); /* hdr_crc_self. */ le64enc(buf + 24, table->lba[GPT_ELT_PRIHDR]); /* hdr_lba_self. */ le64enc(buf + 32, table->lba[GPT_ELT_SECHDR]); /* hdr_lba_alt. */ le64enc(buf + 72, table->lba[GPT_ELT_PRITBL]); /* hdr_lba_table. */ crc = crc32(buf, table->hdr->hdr_size); le32enc(buf + 16, crc); for (index = 0; index < tblsz; index += maxphys / pp->sectorsize) { error = g_write_data(cp, (table->lba[GPT_ELT_PRITBL] + index) * pp->sectorsize, buf + (index + 1) * pp->sectorsize, (tblsz - index > maxphys / pp->sectorsize) ? maxphys : (tblsz - index) * pp->sectorsize); if (error) goto out; } error = g_write_data(cp, table->lba[GPT_ELT_PRIHDR] * pp->sectorsize, buf, pp->sectorsize); if (error) goto out; /* Write secondary meta-data. */ le32enc(buf + 16, 0); /* hdr_crc_self. */ le64enc(buf + 24, table->lba[GPT_ELT_SECHDR]); /* hdr_lba_self. */ le64enc(buf + 32, table->lba[GPT_ELT_PRIHDR]); /* hdr_lba_alt. */ le64enc(buf + 72, table->lba[GPT_ELT_SECTBL]); /* hdr_lba_table. */ crc = crc32(buf, table->hdr->hdr_size); le32enc(buf + 16, crc); for (index = 0; index < tblsz; index += maxphys / pp->sectorsize) { error = g_write_data(cp, (table->lba[GPT_ELT_SECTBL] + index) * pp->sectorsize, buf + (index + 1) * pp->sectorsize, (tblsz - index > maxphys / pp->sectorsize) ? maxphys : (tblsz - index) * pp->sectorsize); if (error) goto out; } error = g_write_data(cp, table->lba[GPT_ELT_SECHDR] * pp->sectorsize, buf, pp->sectorsize); out: g_free(buf); return (error); } static void g_gpt_set_defaults(struct g_part_table *basetable, struct g_provider *pp) { struct g_part_entry *baseentry; struct g_part_gpt_entry *entry; struct g_part_gpt_table *table; quad_t start, end, min, max; quad_t lba, last; size_t spb, tblsz; table = (struct g_part_gpt_table *)basetable; last = pp->mediasize / pp->sectorsize - 1; tblsz = howmany(basetable->gpt_entries * sizeof(struct gpt_ent), pp->sectorsize); table->lba[GPT_ELT_PRIHDR] = 1; table->lba[GPT_ELT_PRITBL] = 2; table->lba[GPT_ELT_SECHDR] = last; table->lba[GPT_ELT_SECTBL] = last - tblsz; table->state[GPT_ELT_PRIHDR] = GPT_STATE_OK; table->state[GPT_ELT_PRITBL] = GPT_STATE_OK; table->state[GPT_ELT_SECHDR] = GPT_STATE_OK; table->state[GPT_ELT_SECTBL] = GPT_STATE_OK; max = start = 2 + tblsz; min = end = last - tblsz - 1; LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) { if (baseentry->gpe_deleted) continue; entry = (struct g_part_gpt_entry *)baseentry; if (entry->ent.ent_lba_start < min) min = entry->ent.ent_lba_start; if (entry->ent.ent_lba_end > max) max = entry->ent.ent_lba_end; } spb = 4096 / pp->sectorsize; if (spb > 1) { lba = start + ((start % spb) ? spb - start % spb : 0); if (lba <= min) start = lba; lba = end - (end + 1) % spb; if (max <= lba) end = lba; } table->hdr->hdr_lba_start = start; table->hdr->hdr_lba_end = end; basetable->gpt_first = start; basetable->gpt_last = end; } static void g_gpt_printf_utf16(struct sbuf *sb, uint16_t *str, size_t len) { u_int bo; uint32_t ch; uint16_t c; bo = LITTLE_ENDIAN; /* GPT is little-endian */ while (len > 0 && *str != 0) { ch = (bo == BIG_ENDIAN) ? be16toh(*str) : le16toh(*str); str++, len--; if ((ch & 0xf800) == 0xd800) { if (len > 0) { c = (bo == BIG_ENDIAN) ? be16toh(*str) : le16toh(*str); str++, len--; } else c = 0xfffd; if ((ch & 0x400) == 0 && (c & 0xfc00) == 0xdc00) { ch = ((ch & 0x3ff) << 10) + (c & 0x3ff); ch += 0x10000; } else ch = 0xfffd; } else if (ch == 0xfffe) { /* BOM (U+FEFF) swapped. */ bo = (bo == BIG_ENDIAN) ? LITTLE_ENDIAN : BIG_ENDIAN; continue; } else if (ch == 0xfeff) /* BOM (U+FEFF) unswapped. */ continue; /* Write the Unicode character in UTF-8 */ if (ch < 0x80) g_conf_printf_escaped(sb, "%c", ch); else if (ch < 0x800) g_conf_printf_escaped(sb, "%c%c", 0xc0 | (ch >> 6), 0x80 | (ch & 0x3f)); else if (ch < 0x10000) g_conf_printf_escaped(sb, "%c%c%c", 0xe0 | (ch >> 12), 0x80 | ((ch >> 6) & 0x3f), 0x80 | (ch & 0x3f)); else if (ch < 0x200000) g_conf_printf_escaped(sb, "%c%c%c%c", 0xf0 | (ch >> 18), 0x80 | ((ch >> 12) & 0x3f), 0x80 | ((ch >> 6) & 0x3f), 0x80 | (ch & 0x3f)); } } static void g_gpt_utf8_to_utf16(const uint8_t *s8, uint16_t *s16, size_t s16len) { size_t s16idx, s8idx; uint32_t utfchar; unsigned int c, utfbytes; s8idx = s16idx = 0; utfchar = 0; utfbytes = 0; bzero(s16, s16len << 1); while (s8[s8idx] != 0 && s16idx < s16len) { c = s8[s8idx++]; if ((c & 0xc0) != 0x80) { /* Initial characters. */ if (utfbytes != 0) { /* Incomplete encoding of previous char. */ s16[s16idx++] = htole16(0xfffd); } if ((c & 0xf8) == 0xf0) { utfchar = c & 0x07; utfbytes = 3; } else if ((c & 0xf0) == 0xe0) { utfchar = c & 0x0f; utfbytes = 2; } else if ((c & 0xe0) == 0xc0) { utfchar = c & 0x1f; utfbytes = 1; } else { utfchar = c & 0x7f; utfbytes = 0; } } else { /* Followup characters. */ if (utfbytes > 0) { utfchar = (utfchar << 6) + (c & 0x3f); utfbytes--; } else if (utfbytes == 0) utfbytes = ~0; } /* * Write the complete Unicode character as UTF-16 when we * have all the UTF-8 charactars collected. */ if (utfbytes == 0) { /* * If we need to write 2 UTF-16 characters, but * we only have room for 1, then we truncate the * string by writing a 0 instead. */ if (utfchar >= 0x10000 && s16idx < s16len - 1) { s16[s16idx++] = htole16(0xd800 | ((utfchar >> 10) - 0x40)); s16[s16idx++] = htole16(0xdc00 | (utfchar & 0x3ff)); } else s16[s16idx++] = (utfchar >= 0x10000) ? 0 : htole16(utfchar); } } /* * If our input string was truncated, append an invalid encoding * character to the output string. */ if (utfbytes != 0 && s16idx < s16len) s16[s16idx++] = htole16(0xfffd); } diff --git a/sys/geom/vinum/geom_vinum_create.c b/sys/geom/vinum/geom_vinum_create.c index 036ce82c45e8..42caa112e8fc 100644 --- a/sys/geom/vinum/geom_vinum_create.c +++ b/sys/geom/vinum/geom_vinum_create.c @@ -1,613 +1,612 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2007 Lukas Ertl * Copyright (c) 2007, 2009 Ulf Lilleengen * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #define DEFAULT_STRIPESIZE 262144 /* * Create a new drive object, either by user request, during taste of the drive * itself, or because it was referenced by a subdisk during taste. */ int gv_create_drive(struct gv_softc *sc, struct gv_drive *d) { struct g_geom *gp; struct g_provider *pp; struct g_consumer *cp, *cp2; struct gv_drive *d2; struct gv_hdr *hdr; struct gv_freelist *fl; KASSERT(d != NULL, ("gv_create_drive: NULL d")); gp = sc->geom; pp = NULL; cp = cp2 = NULL; /* The drive already has a consumer if it was tasted before. */ if (d->consumer != NULL) { cp = d->consumer; cp->private = d; pp = cp->provider; } else if (!(d->flags & GV_DRIVE_REFERENCED)) { if (gv_find_drive(sc, d->name) != NULL) { G_VINUM_DEBUG(0, "drive '%s' already exists", d->name); g_free(d); return (GV_ERR_CREATE); } if (gv_find_drive_device(sc, d->device) != NULL) { G_VINUM_DEBUG(0, "provider '%s' already in use by " "gvinum", d->device); return (GV_ERR_CREATE); } pp = g_provider_by_name(d->device); if (pp == NULL) { G_VINUM_DEBUG(0, "create '%s': device '%s' disappeared", d->name, d->device); g_free(d); return (GV_ERR_CREATE); } g_topology_lock(); cp = g_new_consumer(gp); if (g_attach(cp, pp) != 0) { g_destroy_consumer(cp); g_topology_unlock(); G_VINUM_DEBUG(0, "create drive '%s': unable to attach", d->name); g_free(d); return (GV_ERR_CREATE); } g_topology_unlock(); d->consumer = cp; cp->private = d; } /* * If this was just a "referenced" drive, we're almost finished, but * insert this drive not on the head of the drives list, as * gv_drive_is_newer() expects a "real" drive from LIST_FIRST(). */ if (d->flags & GV_DRIVE_REFERENCED) { snprintf(d->device, sizeof(d->device), "???"); d2 = LIST_FIRST(&sc->drives); if (d2 == NULL) LIST_INSERT_HEAD(&sc->drives, d, drive); else LIST_INSERT_AFTER(d2, d, drive); return (0); } /* * Update access counts of the new drive to those of an already * existing drive. */ LIST_FOREACH(d2, &sc->drives, drive) { if ((d == d2) || (d2->consumer == NULL)) continue; cp2 = d2->consumer; g_topology_lock(); if ((cp2->acr || cp2->acw || cp2->ace) && (g_access(cp, cp2->acr, cp2->acw, cp2->ace) != 0)) { g_detach(cp); g_destroy_consumer(cp); g_topology_unlock(); G_VINUM_DEBUG(0, "create drive '%s': unable to update " "access counts", d->name); - if (d->hdr != NULL) - g_free(d->hdr); + g_free(d->hdr); g_free(d); return (GV_ERR_CREATE); } g_topology_unlock(); break; } d->size = pp->mediasize - GV_DATA_START; d->avail = d->size; d->vinumconf = sc; LIST_INIT(&d->subdisks); LIST_INIT(&d->freelist); /* The header might have been set during taste. */ if (d->hdr == NULL) { hdr = g_malloc(sizeof(*hdr), M_WAITOK | M_ZERO); hdr->magic = GV_MAGIC; hdr->config_length = GV_CFG_LEN; getcredhostname(NULL, hdr->label.sysname, GV_HOSTNAME_LEN); strlcpy(hdr->label.name, d->name, sizeof(hdr->label.name)); microtime(&hdr->label.date_of_birth); d->hdr = hdr; } /* We also need a freelist entry. */ fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO); fl->offset = GV_DATA_START; fl->size = d->avail; LIST_INSERT_HEAD(&d->freelist, fl, freelist); d->freelist_entries = 1; if (gv_find_drive(sc, d->name) == NULL) LIST_INSERT_HEAD(&sc->drives, d, drive); gv_set_drive_state(d, GV_DRIVE_UP, 0); return (0); } int gv_create_volume(struct gv_softc *sc, struct gv_volume *v) { KASSERT(v != NULL, ("gv_create_volume: NULL v")); v->vinumconf = sc; v->flags |= GV_VOL_NEWBORN; LIST_INIT(&v->plexes); LIST_INSERT_HEAD(&sc->volumes, v, volume); v->wqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO); bioq_init(v->wqueue); return (0); } int gv_create_plex(struct gv_softc *sc, struct gv_plex *p) { struct gv_volume *v; KASSERT(p != NULL, ("gv_create_plex: NULL p")); /* Find the volume this plex should be attached to. */ v = gv_find_vol(sc, p->volume); if (v == NULL) { G_VINUM_DEBUG(0, "create plex '%s': volume '%s' not found", p->name, p->volume); g_free(p); return (GV_ERR_CREATE); } if (!(v->flags & GV_VOL_NEWBORN)) p->flags |= GV_PLEX_ADDED; p->vol_sc = v; v->plexcount++; p->vinumconf = sc; p->synced = 0; p->flags |= GV_PLEX_NEWBORN; LIST_INSERT_HEAD(&v->plexes, p, in_volume); LIST_INIT(&p->subdisks); TAILQ_INIT(&p->packets); LIST_INSERT_HEAD(&sc->plexes, p, plex); p->bqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO); bioq_init(p->bqueue); p->wqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO); bioq_init(p->wqueue); p->rqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO); bioq_init(p->rqueue); return (0); } int gv_create_sd(struct gv_softc *sc, struct gv_sd *s) { struct gv_plex *p; struct gv_drive *d; KASSERT(s != NULL, ("gv_create_sd: NULL s")); /* Find the drive where this subdisk should be put on. */ d = gv_find_drive(sc, s->drive); if (d == NULL) { /* * It's possible that the subdisk references a drive that * doesn't exist yet (during the taste process), so create a * practically empty "referenced" drive. */ if (s->flags & GV_SD_TASTED) { d = g_malloc(sizeof(struct gv_drive), M_WAITOK | M_ZERO); d->flags |= GV_DRIVE_REFERENCED; strlcpy(d->name, s->drive, sizeof(d->name)); gv_create_drive(sc, d); } else { G_VINUM_DEBUG(0, "create sd '%s': drive '%s' not found", s->name, s->drive); g_free(s); return (GV_ERR_CREATE); } } /* Find the plex where this subdisk belongs to. */ p = gv_find_plex(sc, s->plex); if (p == NULL) { G_VINUM_DEBUG(0, "create sd '%s': plex '%s' not found", s->name, s->plex); g_free(s); return (GV_ERR_CREATE); } /* * First we give the subdisk to the drive, to handle autosized * values ... */ if (gv_sd_to_drive(s, d) != 0) { g_free(s); return (GV_ERR_CREATE); } /* * Then, we give the subdisk to the plex; we check if the * given values are correct and maybe adjust them. */ if (gv_sd_to_plex(s, p) != 0) { G_VINUM_DEBUG(0, "unable to give sd '%s' to plex '%s'", s->name, p->name); if (s->drive_sc && !(s->drive_sc->flags & GV_DRIVE_REFERENCED)) LIST_REMOVE(s, from_drive); gv_free_sd(s); g_free(s); /* * If this subdisk can't be created, we won't create * the attached plex either, if it is also a new one. */ if (!(p->flags & GV_PLEX_NEWBORN)) return (GV_ERR_CREATE); gv_rm_plex(sc, p); return (GV_ERR_CREATE); } s->flags |= GV_SD_NEWBORN; s->vinumconf = sc; LIST_INSERT_HEAD(&sc->subdisks, s, sd); return (0); } /* * Create a concatenated volume from specified drives or drivegroups. */ void gv_concat(struct g_geom *gp, struct gctl_req *req) { struct gv_drive *d; struct gv_sd *s; struct gv_volume *v; struct gv_plex *p; struct gv_softc *sc; char *drive, buf[30], *vol; int *drives, dcount; sc = gp->softc; dcount = 0; vol = gctl_get_param(req, "name", NULL); if (vol == NULL) { gctl_error(req, "volume name not given"); return; } drives = gctl_get_paraml(req, "drives", sizeof(*drives)); if (drives == NULL) { gctl_error(req, "drive names not given"); return; } /* First we create the volume. */ v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO); strlcpy(v->name, vol, sizeof(v->name)); v->state = GV_VOL_UP; gv_post_event(sc, GV_EVENT_CREATE_VOLUME, v, NULL, 0, 0); /* Then we create the plex. */ p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO); snprintf(p->name, sizeof(p->name), "%s.p%d", v->name, v->plexcount); strlcpy(p->volume, v->name, sizeof(p->volume)); p->org = GV_PLEX_CONCAT; p->stripesize = 0; gv_post_event(sc, GV_EVENT_CREATE_PLEX, p, NULL, 0, 0); /* Drives are first (right now) priority */ for (dcount = 0; dcount < *drives; dcount++) { snprintf(buf, sizeof(buf), "drive%d", dcount); drive = gctl_get_param(req, buf, NULL); d = gv_find_drive(sc, drive); if (d == NULL) { gctl_error(req, "No such drive '%s'", drive); continue; } s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO); snprintf(s->name, sizeof(s->name), "%s.s%d", p->name, dcount); strlcpy(s->plex, p->name, sizeof(s->plex)); strlcpy(s->drive, drive, sizeof(s->drive)); s->plex_offset = -1; s->drive_offset = -1; s->size = -1; gv_post_event(sc, GV_EVENT_CREATE_SD, s, NULL, 0, 0); } gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc, NULL, 0, 0); gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); } /* * Create a mirrored volume from specified drives or drivegroups. */ void gv_mirror(struct g_geom *gp, struct gctl_req *req) { struct gv_drive *d; struct gv_sd *s; struct gv_volume *v; struct gv_plex *p; struct gv_softc *sc; char *drive, buf[30], *vol; int *drives, *flags, dcount, pcount, scount; sc = gp->softc; dcount = 0; scount = 0; pcount = 0; vol = gctl_get_param(req, "name", NULL); if (vol == NULL) { gctl_error(req, "volume name not given"); return; } flags = gctl_get_paraml(req, "flags", sizeof(*flags)); drives = gctl_get_paraml(req, "drives", sizeof(*drives)); if (drives == NULL) { gctl_error(req, "drive names not given"); return; } /* We must have an even number of drives. */ if (*drives % 2 != 0) { gctl_error(req, "mirror organization must have an even number " "of drives"); return; } if (*flags & GV_FLAG_S && *drives < 4) { gctl_error(req, "must have at least 4 drives for striped plex"); return; } /* First we create the volume. */ v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO); strlcpy(v->name, vol, sizeof(v->name)); v->state = GV_VOL_UP; gv_post_event(sc, GV_EVENT_CREATE_VOLUME, v, NULL, 0, 0); /* Then we create the plexes. */ for (pcount = 0; pcount < 2; pcount++) { p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO); snprintf(p->name, sizeof(p->name), "%s.p%d", v->name, pcount); strlcpy(p->volume, v->name, sizeof(p->volume)); if (*flags & GV_FLAG_S) { p->org = GV_PLEX_STRIPED; p->stripesize = DEFAULT_STRIPESIZE; } else { p->org = GV_PLEX_CONCAT; p->stripesize = -1; } gv_post_event(sc, GV_EVENT_CREATE_PLEX, p, NULL, 0, 0); /* * We just gives each even drive to plex one, and each odd to * plex two. */ scount = 0; for (dcount = pcount; dcount < *drives; dcount += 2) { snprintf(buf, sizeof(buf), "drive%d", dcount); drive = gctl_get_param(req, buf, NULL); d = gv_find_drive(sc, drive); if (d == NULL) { gctl_error(req, "No such drive '%s', aborting", drive); scount++; break; } s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO); snprintf(s->name, sizeof(s->name), "%s.s%d", p->name, scount); strlcpy(s->plex, p->name, sizeof(s->plex)); strlcpy(s->drive, drive, sizeof(s->drive)); s->plex_offset = -1; s->drive_offset = -1; s->size = -1; gv_post_event(sc, GV_EVENT_CREATE_SD, s, NULL, 0, 0); scount++; } } gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc, NULL, 0, 0); gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); } void gv_raid5(struct g_geom *gp, struct gctl_req *req) { struct gv_softc *sc; struct gv_drive *d; struct gv_volume *v; struct gv_plex *p; struct gv_sd *s; int *drives, *flags, dcount; char *vol, *drive, buf[30]; off_t *stripesize; sc = gp->softc; vol = gctl_get_param(req, "name", NULL); if (vol == NULL) { gctl_error(req, "volume name not given"); return; } flags = gctl_get_paraml(req, "flags", sizeof(*flags)); drives = gctl_get_paraml(req, "drives", sizeof(*drives)); stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize)); if (stripesize == NULL) { gctl_error(req, "no stripesize given"); return; } if (drives == NULL) { gctl_error(req, "drive names not given"); return; } /* We must have at least three drives. */ if (*drives < 3) { gctl_error(req, "must have at least three drives for this " "plex organisation"); return; } /* First we create the volume. */ v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO); strlcpy(v->name, vol, sizeof(v->name)); v->state = GV_VOL_UP; gv_post_event(sc, GV_EVENT_CREATE_VOLUME, v, NULL, 0, 0); /* Then we create the plex. */ p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO); snprintf(p->name, sizeof(p->name), "%s.p%d", v->name, v->plexcount); strlcpy(p->volume, v->name, sizeof(p->volume)); p->org = GV_PLEX_RAID5; p->stripesize = *stripesize; gv_post_event(sc, GV_EVENT_CREATE_PLEX, p, NULL, 0, 0); /* Create subdisks on drives. */ for (dcount = 0; dcount < *drives; dcount++) { snprintf(buf, sizeof(buf), "drive%d", dcount); drive = gctl_get_param(req, buf, NULL); d = gv_find_drive(sc, drive); if (d == NULL) { gctl_error(req, "No such drive '%s'", drive); continue; } s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO); snprintf(s->name, sizeof(s->name), "%s.s%d", p->name, dcount); strlcpy(s->plex, p->name, sizeof(s->plex)); strlcpy(s->drive, drive, sizeof(s->drive)); s->plex_offset = -1; s->drive_offset = -1; s->size = -1; gv_post_event(sc, GV_EVENT_CREATE_SD, s, NULL, 0, 0); } gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc, NULL, 0, 0); gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); } /* * Create a striped volume from specified drives or drivegroups. */ void gv_stripe(struct g_geom *gp, struct gctl_req *req) { struct gv_drive *d; struct gv_sd *s; struct gv_volume *v; struct gv_plex *p; struct gv_softc *sc; char *drive, buf[30], *vol; int *drives, *flags, dcount, pcount; sc = gp->softc; dcount = 0; pcount = 0; vol = gctl_get_param(req, "name", NULL); if (vol == NULL) { gctl_error(req, "volume name not given"); return; } flags = gctl_get_paraml(req, "flags", sizeof(*flags)); drives = gctl_get_paraml(req, "drives", sizeof(*drives)); if (drives == NULL) { gctl_error(req, "drive names not given"); return; } /* We must have at least two drives. */ if (*drives < 2) { gctl_error(req, "must have at least 2 drives"); return; } /* First we create the volume. */ v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO); strlcpy(v->name, vol, sizeof(v->name)); v->state = GV_VOL_UP; gv_post_event(sc, GV_EVENT_CREATE_VOLUME, v, NULL, 0, 0); /* Then we create the plex. */ p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO); snprintf(p->name, sizeof(p->name), "%s.p%d", v->name, v->plexcount); strlcpy(p->volume, v->name, sizeof(p->volume)); p->org = GV_PLEX_STRIPED; p->stripesize = 262144; gv_post_event(sc, GV_EVENT_CREATE_PLEX, p, NULL, 0, 0); /* Create subdisks on drives. */ for (dcount = 0; dcount < *drives; dcount++) { snprintf(buf, sizeof(buf), "drive%d", dcount); drive = gctl_get_param(req, buf, NULL); d = gv_find_drive(sc, drive); if (d == NULL) { gctl_error(req, "No such drive '%s'", drive); continue; } s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO); snprintf(s->name, sizeof(s->name), "%s.s%d", p->name, dcount); strlcpy(s->plex, p->name, sizeof(s->plex)); strlcpy(s->drive, drive, sizeof(s->drive)); s->plex_offset = -1; s->drive_offset = -1; s->size = -1; gv_post_event(sc, GV_EVENT_CREATE_SD, s, NULL, 0, 0); } gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc, NULL, 0, 0); gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); } diff --git a/sys/geom/vinum/geom_vinum_events.c b/sys/geom/vinum/geom_vinum_events.c index 78aa0adab8cf..74b9bbe4de9d 100644 --- a/sys/geom/vinum/geom_vinum_events.c +++ b/sys/geom/vinum/geom_vinum_events.c @@ -1,277 +1,276 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2007 Lukas Ertl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include void gv_post_event(struct gv_softc *sc, int event, void *arg1, void *arg2, intmax_t arg3, intmax_t arg4) { struct gv_event *ev; ev = g_malloc(sizeof(*ev), M_WAITOK | M_ZERO); ev->type = event; ev->arg1 = arg1; ev->arg2 = arg2; ev->arg3 = arg3; ev->arg4 = arg4; mtx_lock(&sc->equeue_mtx); TAILQ_INSERT_TAIL(&sc->equeue, ev, events); wakeup(sc); mtx_unlock(&sc->equeue_mtx); } void gv_worker_exit(struct gv_softc *sc) { struct gv_event *ev; ev = g_malloc(sizeof(*ev), M_WAITOK | M_ZERO); ev->type = GV_EVENT_THREAD_EXIT; mtx_lock(&sc->equeue_mtx); TAILQ_INSERT_TAIL(&sc->equeue, ev, events); wakeup(sc); msleep(sc->worker, &sc->equeue_mtx, PDROP, "gv_wor", 0); } struct gv_event * gv_get_event(struct gv_softc *sc) { struct gv_event *ev; KASSERT(sc != NULL, ("NULL sc")); mtx_lock(&sc->equeue_mtx); ev = TAILQ_FIRST(&sc->equeue); mtx_unlock(&sc->equeue_mtx); return (ev); } void gv_remove_event(struct gv_softc *sc, struct gv_event *ev) { KASSERT(sc != NULL, ("NULL sc")); KASSERT(ev != NULL, ("NULL ev")); mtx_lock(&sc->equeue_mtx); TAILQ_REMOVE(&sc->equeue, ev, events); mtx_unlock(&sc->equeue_mtx); } void gv_drive_tasted(struct gv_softc *sc, struct g_provider *pp) { struct g_geom *gp; struct g_consumer *cp; struct gv_hdr *hdr; struct gv_drive *d; char *buf; int error; hdr = NULL; buf = NULL; G_VINUM_DEBUG(2, "tasted drive on '%s'", pp->name); if ((GV_CFG_OFFSET % pp->sectorsize) != 0 || (GV_CFG_LEN % pp->sectorsize) != 0) { G_VINUM_DEBUG(0, "provider %s has unsupported sectorsize.", pp->name); return; } gp = sc->geom; g_topology_lock(); cp = g_new_consumer(gp); if (g_attach(cp, pp) != 0) { g_destroy_consumer(cp); g_topology_unlock(); G_VINUM_DEBUG(0, "failed to attach to provider on taste event"); return; } if (g_access(cp, 1, 0, 0) != 0) { g_detach(cp); g_destroy_consumer(cp); g_topology_unlock(); G_VINUM_DEBUG(0, "failed to access consumer on taste event"); return; } g_topology_unlock(); hdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO); /* Read header and on-disk configuration. */ error = gv_read_header(cp, hdr); if (error) { G_VINUM_DEBUG(0, "failed to read header during taste"); goto failed; } /* * Setup the drive before we parse the on-disk configuration, so that * we already know about the drive then. */ d = gv_find_drive(sc, hdr->label.name); if (d == NULL) { d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO); strlcpy(d->name, hdr->label.name, sizeof(d->name)); strlcpy(d->device, pp->name, sizeof(d->device)); } else if (d->flags & GV_DRIVE_REFERENCED) { strlcpy(d->device, pp->name, sizeof(d->device)); d->flags &= ~GV_DRIVE_REFERENCED; } else { G_VINUM_DEBUG(2, "drive '%s' is already known", d->name); goto failed; } /* Add the consumer and header to the new drive. */ d->consumer = cp; d->hdr = hdr; gv_create_drive(sc, d); buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, NULL); if (buf == NULL) { G_VINUM_DEBUG(0, "failed to read config during taste"); goto failed; } gv_parse_config(sc, buf, d); g_free(buf); g_topology_lock(); g_access(cp, -1, 0, 0); g_topology_unlock(); gv_setup_objects(sc); gv_set_drive_state(d, GV_DRIVE_UP, 0); return; failed: - if (hdr != NULL) - g_free(hdr); + g_free(hdr); g_topology_lock(); g_access(cp, -1, 0, 0); g_detach(cp); g_destroy_consumer(cp); g_topology_unlock(); } /* * Count completed BIOs and handle orphanization when all are done. */ void gv_drive_done(struct gv_drive *d) { KASSERT(d->active >= 0, ("Negative number of BIOs (%d)", d->active)); if (--d->active == 0 && (d->flags & GV_DRIVE_ORPHANED)) { d->flags &= ~GV_DRIVE_ORPHANED; gv_post_event(d->vinumconf, GV_EVENT_DRIVE_LOST, d, NULL, 0, 0); } } /* * When losing a drive (e.g. hardware failure), we cut down the consumer * attached to the underlying device and bring the drive itself to a * "referenced" state so that normal tasting could bring it up cleanly if it * possibly arrives again. */ void gv_drive_lost(struct gv_softc *sc, struct gv_drive *d) { struct g_consumer *cp; struct gv_drive *d2; struct gv_sd *s, *s2; struct gv_freelist *fl, *fl2; gv_set_drive_state(d, GV_DRIVE_DOWN, GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); cp = d->consumer; if (cp != NULL) { if (d->active > 0) { G_VINUM_DEBUG(2, "dead drive '%s' has still active " "requests, unable to detach consumer", d->name); d->flags |= GV_DRIVE_ORPHANED; return; } g_topology_lock(); if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0) g_access(cp, -cp->acr, -cp->acw, -cp->ace); g_detach(cp); g_destroy_consumer(cp); g_topology_unlock(); } LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) { LIST_REMOVE(fl, freelist); g_free(fl); } d->consumer = NULL; g_free(d->hdr); d->hdr = NULL; d->flags |= GV_DRIVE_REFERENCED; snprintf(d->device, sizeof(d->device), "???"); d->size = 0; d->avail = 0; d->freelist_entries = 0; d->sdcount = 0; /* Put the subdisk in tasted mode, and remove from drive list. */ LIST_FOREACH_SAFE(s, &d->subdisks, from_drive, s2) { LIST_REMOVE(s, from_drive); s->flags |= GV_SD_TASTED; } /* * Don't forget that gv_is_newer wants a "real" drive at the beginning * of the list, so, just to be safe, we shuffle around. */ LIST_REMOVE(d, drive); d2 = LIST_FIRST(&sc->drives); if (d2 == NULL) LIST_INSERT_HEAD(&sc->drives, d, drive); else LIST_INSERT_AFTER(d2, d, drive); gv_save_config(sc); } diff --git a/sys/geom/vinum/geom_vinum_plex.c b/sys/geom/vinum/geom_vinum_plex.c index a7b1e1e5a8bd..3e5b2e3d51a3 100644 --- a/sys/geom/vinum/geom_vinum_plex.c +++ b/sys/geom/vinum/geom_vinum_plex.c @@ -1,1051 +1,1050 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004, 2007 Lukas Ertl * Copyright (c) 2007, 2009 Ulf Lilleengen * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include static int gv_check_parity(struct gv_plex *, struct bio *, struct gv_raid5_packet *); static int gv_normal_parity(struct gv_plex *, struct bio *, struct gv_raid5_packet *); static void gv_plex_flush(struct gv_plex *); static int gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *, int *, int); static int gv_plex_normal_request(struct gv_plex *, struct bio *, off_t, off_t, caddr_t); static void gv_post_bio(struct gv_softc *, struct bio *); void gv_plex_start(struct gv_plex *p, struct bio *bp) { struct bio *cbp; struct gv_sd *s; struct gv_raid5_packet *wp; caddr_t addr; off_t bcount, boff, len; bcount = bp->bio_length; addr = bp->bio_data; boff = bp->bio_offset; /* Walk over the whole length of the request, we might split it up. */ while (bcount > 0) { wp = NULL; /* * RAID5 plexes need special treatment, as a single request * might involve several read/write sub-requests. */ if (p->org == GV_PLEX_RAID5) { wp = gv_raid5_start(p, bp, addr, boff, bcount); if (wp == NULL) return; len = wp->length; if (TAILQ_EMPTY(&wp->bits)) g_free(wp); else if (wp->lockbase != -1) TAILQ_INSERT_TAIL(&p->packets, wp, list); /* * Requests to concatenated and striped plexes go straight * through. */ } else { len = gv_plex_normal_request(p, bp, boff, bcount, addr); } if (len < 0) return; bcount -= len; addr += len; boff += len; } /* * Fire off all sub-requests. We get the correct consumer (== drive) * to send each request to via the subdisk that was stored in * cbp->bio_caller1. */ cbp = bioq_takefirst(p->bqueue); while (cbp != NULL) { /* * RAID5 sub-requests need to come in correct order, otherwise * we trip over the parity, as it might be overwritten by * another sub-request. We abuse cbp->bio_caller2 to mark * potential overlap situations. */ if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) { /* Park the bio on the waiting queue. */ cbp->bio_pflags |= GV_BIO_ONHOLD; bioq_disksort(p->wqueue, cbp); } else { s = cbp->bio_caller1; g_io_request(cbp, s->drive_sc->consumer); } cbp = bioq_takefirst(p->bqueue); } } static int gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off, off_t *real_len, int *sdno, int growing) { struct gv_sd *s; int i, sdcount; off_t len_left, stripeend, stripeno, stripestart; switch (p->org) { case GV_PLEX_CONCAT: /* * Find the subdisk where this request starts. The subdisks in * this list must be ordered by plex_offset. */ i = 0; LIST_FOREACH(s, &p->subdisks, in_plex) { if (s->plex_offset <= boff && s->plex_offset + s->size > boff) { *sdno = i; break; } i++; } if (s == NULL || s->drive_sc == NULL) return (GV_ERR_NOTFOUND); /* Calculate corresponding offsets on disk. */ *real_off = boff - s->plex_offset; len_left = s->size - (*real_off); KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); *real_len = (bcount > len_left) ? len_left : bcount; break; case GV_PLEX_STRIPED: /* The number of the stripe where the request starts. */ stripeno = boff / p->stripesize; KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0")); /* Take growing subdisks into account when calculating. */ sdcount = gv_sdcount(p, (boff >= p->synced)); if (!(boff + bcount <= p->synced) && (p->flags & GV_PLEX_GROWING) && !growing) return (GV_ERR_ISBUSY); *sdno = stripeno % sdcount; KASSERT(sdno >= 0, ("gv_plex_offset: sdno < 0")); stripestart = (stripeno / sdcount) * p->stripesize; KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0")); stripeend = stripestart + p->stripesize; *real_off = boff - (stripeno * p->stripesize) + stripestart; len_left = stripeend - *real_off; KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); *real_len = (bcount <= len_left) ? bcount : len_left; break; default: return (GV_ERR_PLEXORG); } return (0); } /* * Prepare a normal plex request. */ static int gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff, off_t bcount, caddr_t addr) { struct gv_sd *s; struct bio *cbp; off_t real_len, real_off; int i, err, sdno; s = NULL; sdno = -1; real_len = real_off = 0; err = ENXIO; if (p == NULL || LIST_EMPTY(&p->subdisks)) goto bad; err = gv_plex_offset(p, boff, bcount, &real_off, &real_len, &sdno, (bp->bio_pflags & GV_BIO_GROW)); /* If the request was blocked, put it into wait. */ if (err == GV_ERR_ISBUSY) { bioq_disksort(p->rqueue, bp); return (-1); /* "Fail", and delay request. */ } if (err) { err = ENXIO; goto bad; } err = ENXIO; /* Find the right subdisk. */ i = 0; LIST_FOREACH(s, &p->subdisks, in_plex) { if (i == sdno) break; i++; } /* Subdisk not found. */ if (s == NULL || s->drive_sc == NULL) goto bad; /* Now check if we can handle the request on this subdisk. */ switch (s->state) { case GV_SD_UP: /* If the subdisk is up, just continue. */ break; case GV_SD_DOWN: if (bp->bio_pflags & GV_BIO_INTERNAL) G_VINUM_DEBUG(0, "subdisk must be in the stale state in" " order to perform administrative requests"); goto bad; case GV_SD_STALE: if (!(bp->bio_pflags & GV_BIO_SYNCREQ)) { G_VINUM_DEBUG(0, "subdisk stale, unable to perform " "regular requests"); goto bad; } G_VINUM_DEBUG(1, "sd %s is initializing", s->name); gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE); break; case GV_SD_INITIALIZING: if (bp->bio_cmd == BIO_READ) goto bad; break; default: /* All other subdisk states mean it's not accessible. */ goto bad; } /* Clone the bio and adjust the offsets and sizes. */ cbp = g_clone_bio(bp); if (cbp == NULL) { err = ENOMEM; goto bad; } cbp->bio_offset = real_off + s->drive_offset; cbp->bio_length = real_len; cbp->bio_data = addr; cbp->bio_done = gv_done; cbp->bio_caller1 = s; s->drive_sc->active++; /* Store the sub-requests now and let others issue them. */ bioq_insert_tail(p->bqueue, cbp); return (real_len); bad: G_VINUM_LOGREQ(0, bp, "plex request failed."); /* Building the sub-request failed. If internal BIO, do not deliver. */ if (bp->bio_pflags & GV_BIO_INTERNAL) { if (bp->bio_pflags & GV_BIO_MALLOC) g_free(bp->bio_data); g_destroy_bio(bp); p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING | GV_PLEX_GROWING); return (-1); } g_io_deliver(bp, err); return (-1); } /* * Handle a completed request to a striped or concatenated plex. */ void gv_plex_normal_done(struct gv_plex *p, struct bio *bp) { struct bio *pbp; pbp = bp->bio_parent; if (pbp->bio_error == 0) pbp->bio_error = bp->bio_error; g_destroy_bio(bp); pbp->bio_inbed++; if (pbp->bio_children == pbp->bio_inbed) { /* Just set it to length since multiple plexes will * screw things up. */ pbp->bio_completed = pbp->bio_length; if (pbp->bio_pflags & GV_BIO_SYNCREQ) gv_sync_complete(p, pbp); else if (pbp->bio_pflags & GV_BIO_GROW) gv_grow_complete(p, pbp); else g_io_deliver(pbp, pbp->bio_error); } } /* * Handle a completed request to a RAID-5 plex. */ void gv_plex_raid5_done(struct gv_plex *p, struct bio *bp) { struct gv_softc *sc; struct bio *cbp, *pbp; struct gv_bioq *bq, *bq2; struct gv_raid5_packet *wp; off_t completed; int i; completed = 0; sc = p->vinumconf; wp = bp->bio_caller2; switch (bp->bio_parent->bio_cmd) { case BIO_READ: if (wp == NULL) { completed = bp->bio_completed; break; } TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { if (bq->bp != bp) continue; TAILQ_REMOVE(&wp->bits, bq, queue); g_free(bq); for (i = 0; i < wp->length; i++) wp->data[i] ^= bp->bio_data[i]; break; } if (TAILQ_EMPTY(&wp->bits)) { completed = wp->length; if (wp->lockbase != -1) { TAILQ_REMOVE(&p->packets, wp, list); /* Bring the waiting bios back into the game. */ pbp = bioq_takefirst(p->wqueue); while (pbp != NULL) { gv_post_bio(sc, pbp); pbp = bioq_takefirst(p->wqueue); } } g_free(wp); } break; case BIO_WRITE: /* XXX can this ever happen? */ if (wp == NULL) { completed = bp->bio_completed; break; } /* Check if we need to handle parity data. */ TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { if (bq->bp != bp) continue; TAILQ_REMOVE(&wp->bits, bq, queue); g_free(bq); cbp = wp->parity; if (cbp != NULL) { for (i = 0; i < wp->length; i++) cbp->bio_data[i] ^= bp->bio_data[i]; } break; } /* Handle parity data. */ if (TAILQ_EMPTY(&wp->bits)) { if (bp->bio_parent->bio_pflags & GV_BIO_CHECK) i = gv_check_parity(p, bp, wp); else i = gv_normal_parity(p, bp, wp); /* All of our sub-requests have finished. */ if (i) { completed = wp->length; TAILQ_REMOVE(&p->packets, wp, list); /* Bring the waiting bios back into the game. */ pbp = bioq_takefirst(p->wqueue); while (pbp != NULL) { gv_post_bio(sc, pbp); pbp = bioq_takefirst(p->wqueue); } g_free(wp); } } break; } pbp = bp->bio_parent; if (pbp->bio_error == 0) pbp->bio_error = bp->bio_error; pbp->bio_completed += completed; /* When the original request is finished, we deliver it. */ pbp->bio_inbed++; if (pbp->bio_inbed == pbp->bio_children) { /* Hand it over for checking or delivery. */ if (pbp->bio_cmd == BIO_WRITE && (pbp->bio_pflags & GV_BIO_CHECK)) { gv_parity_complete(p, pbp); } else if (pbp->bio_cmd == BIO_WRITE && (pbp->bio_pflags & GV_BIO_REBUILD)) { gv_rebuild_complete(p, pbp); } else if (pbp->bio_pflags & GV_BIO_INIT) { gv_init_complete(p, pbp); } else if (pbp->bio_pflags & GV_BIO_SYNCREQ) { gv_sync_complete(p, pbp); } else if (pbp->bio_pflags & GV_BIO_GROW) { gv_grow_complete(p, pbp); } else { g_io_deliver(pbp, pbp->bio_error); } } /* Clean up what we allocated. */ if (bp->bio_cflags & GV_BIO_MALLOC) g_free(bp->bio_data); g_destroy_bio(bp); } static int gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) { struct bio *pbp; struct gv_sd *s; int err, finished, i; err = 0; finished = 1; if (wp->waiting != NULL) { pbp = wp->waiting; wp->waiting = NULL; s = pbp->bio_caller1; g_io_request(pbp, s->drive_sc->consumer); finished = 0; } else if (wp->parity != NULL) { pbp = wp->parity; wp->parity = NULL; /* Check if the parity is correct. */ for (i = 0; i < wp->length; i++) { if (bp->bio_data[i] != pbp->bio_data[i]) { err = 1; break; } } /* The parity is not correct... */ if (err) { bp->bio_parent->bio_error = EAGAIN; /* ... but we rebuild it. */ if (bp->bio_parent->bio_pflags & GV_BIO_PARITY) { s = pbp->bio_caller1; g_io_request(pbp, s->drive_sc->consumer); finished = 0; } } /* * Clean up the BIO we would have used for rebuilding the * parity. */ if (finished) { bp->bio_parent->bio_inbed++; g_destroy_bio(pbp); } } return (finished); } static int gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) { struct bio *cbp, *pbp; struct gv_sd *s; int finished, i; finished = 1; if (wp->waiting != NULL) { pbp = wp->waiting; wp->waiting = NULL; cbp = wp->parity; for (i = 0; i < wp->length; i++) cbp->bio_data[i] ^= pbp->bio_data[i]; s = pbp->bio_caller1; g_io_request(pbp, s->drive_sc->consumer); finished = 0; } else if (wp->parity != NULL) { cbp = wp->parity; wp->parity = NULL; s = cbp->bio_caller1; g_io_request(cbp, s->drive_sc->consumer); finished = 0; } return (finished); } /* Flush the queue with delayed requests. */ static void gv_plex_flush(struct gv_plex *p) { struct gv_softc *sc; struct bio *bp; sc = p->vinumconf; bp = bioq_takefirst(p->rqueue); while (bp != NULL) { gv_plex_start(p, bp); bp = bioq_takefirst(p->rqueue); } } static void gv_post_bio(struct gv_softc *sc, struct bio *bp) { KASSERT(sc != NULL, ("NULL sc")); KASSERT(bp != NULL, ("NULL bp")); mtx_lock(&sc->bqueue_mtx); bioq_disksort(sc->bqueue_down, bp); wakeup(sc); mtx_unlock(&sc->bqueue_mtx); } int gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset, off_t length, int type, caddr_t data) { struct gv_softc *sc; struct bio *bp; KASSERT(from != NULL, ("NULL from")); KASSERT(to != NULL, ("NULL to")); sc = from->vinumconf; KASSERT(sc != NULL, ("NULL sc")); bp = g_new_bio(); if (bp == NULL) { G_VINUM_DEBUG(0, "sync from '%s' failed at offset " " %jd; out of memory", from->name, offset); return (ENOMEM); } bp->bio_length = length; bp->bio_done = NULL; bp->bio_pflags |= GV_BIO_SYNCREQ; bp->bio_offset = offset; bp->bio_caller1 = from; bp->bio_caller2 = to; bp->bio_cmd = type; if (data == NULL) data = g_malloc(length, M_WAITOK); bp->bio_pflags |= GV_BIO_MALLOC; /* Free on the next run. */ bp->bio_data = data; /* Send down next. */ gv_post_bio(sc, bp); //gv_plex_start(from, bp); return (0); } /* * Handle a finished plex sync bio. */ int gv_sync_complete(struct gv_plex *to, struct bio *bp) { struct gv_plex *from, *p; struct gv_sd *s; struct gv_volume *v; struct gv_softc *sc; off_t offset; int err; g_topology_assert_not(); err = 0; KASSERT(to != NULL, ("NULL to")); KASSERT(bp != NULL, ("NULL bp")); from = bp->bio_caller2; KASSERT(from != NULL, ("NULL from")); v = to->vol_sc; KASSERT(v != NULL, ("NULL v")); sc = v->vinumconf; KASSERT(sc != NULL, ("NULL sc")); /* If it was a read, write it. */ if (bp->bio_cmd == BIO_READ) { err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length, BIO_WRITE, bp->bio_data); /* If it was a write, read the next one. */ } else if (bp->bio_cmd == BIO_WRITE) { if (bp->bio_pflags & GV_BIO_MALLOC) g_free(bp->bio_data); to->synced += bp->bio_length; /* If we're finished, clean up. */ if (bp->bio_offset + bp->bio_length >= from->size) { G_VINUM_DEBUG(1, "syncing of %s from %s completed", to->name, from->name); /* Update our state. */ LIST_FOREACH(s, &to->subdisks, in_plex) gv_set_sd_state(s, GV_SD_UP, 0); gv_update_plex_state(to); to->flags &= ~GV_PLEX_SYNCING; to->synced = 0; gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); } else { offset = bp->bio_offset + bp->bio_length; err = gv_sync_request(from, to, offset, MIN(bp->bio_length, from->size - offset), BIO_READ, NULL); } } g_destroy_bio(bp); /* Clean up if there was an error. */ if (err) { to->flags &= ~GV_PLEX_SYNCING; G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err); } /* Check if all plexes are synced, and lower refcounts. */ g_topology_lock(); LIST_FOREACH(p, &v->plexes, in_volume) { if (p->flags & GV_PLEX_SYNCING) { g_topology_unlock(); return (-1); } } /* If we came here, all plexes are synced, and we're free. */ gv_access(v->provider, -1, -1, 0); g_topology_unlock(); G_VINUM_DEBUG(1, "plex sync completed"); gv_volume_flush(v); return (0); } /* * Create a new bio struct for the next grow request. */ int gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type, caddr_t data) { struct gv_softc *sc; struct bio *bp; KASSERT(p != NULL, ("gv_grow_request: NULL p")); sc = p->vinumconf; KASSERT(sc != NULL, ("gv_grow_request: NULL sc")); bp = g_new_bio(); if (bp == NULL) { G_VINUM_DEBUG(0, "grow of %s failed creating bio: " "out of memory", p->name); return (ENOMEM); } bp->bio_cmd = type; bp->bio_done = NULL; bp->bio_error = 0; bp->bio_caller1 = p; bp->bio_offset = offset; bp->bio_length = length; bp->bio_pflags |= GV_BIO_GROW; if (data == NULL) data = g_malloc(length, M_WAITOK); bp->bio_pflags |= GV_BIO_MALLOC; bp->bio_data = data; gv_post_bio(sc, bp); //gv_plex_start(p, bp); return (0); } /* * Finish handling of a bio to a growing plex. */ void gv_grow_complete(struct gv_plex *p, struct bio *bp) { struct gv_softc *sc; struct gv_sd *s; struct gv_volume *v; off_t origsize, offset; int sdcount, err; v = p->vol_sc; KASSERT(v != NULL, ("gv_grow_complete: NULL v")); sc = v->vinumconf; KASSERT(sc != NULL, ("gv_grow_complete: NULL sc")); err = 0; /* If it was a read, write it. */ if (bp->bio_cmd == BIO_READ) { p->synced += bp->bio_length; err = gv_grow_request(p, bp->bio_offset, bp->bio_length, BIO_WRITE, bp->bio_data); /* If it was a write, read next. */ } else if (bp->bio_cmd == BIO_WRITE) { if (bp->bio_pflags & GV_BIO_MALLOC) g_free(bp->bio_data); /* Find the real size of the plex. */ sdcount = gv_sdcount(p, 1); s = LIST_FIRST(&p->subdisks); KASSERT(s != NULL, ("NULL s")); origsize = (s->size * (sdcount - 1)); if (bp->bio_offset + bp->bio_length >= origsize) { G_VINUM_DEBUG(1, "growing of %s completed", p->name); p->flags &= ~GV_PLEX_GROWING; LIST_FOREACH(s, &p->subdisks, in_plex) { s->flags &= ~GV_SD_GROW; gv_set_sd_state(s, GV_SD_UP, 0); } p->size = gv_plex_size(p); gv_update_vol_size(v, gv_vol_size(v)); gv_set_plex_state(p, GV_PLEX_UP, 0); g_topology_lock(); gv_access(v->provider, -1, -1, 0); g_topology_unlock(); p->synced = 0; gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); /* Issue delayed requests. */ gv_plex_flush(p); } else { offset = bp->bio_offset + bp->bio_length; err = gv_grow_request(p, offset, MIN(bp->bio_length, origsize - offset), BIO_READ, NULL); } } g_destroy_bio(bp); if (err) { p->flags &= ~GV_PLEX_GROWING; G_VINUM_DEBUG(0, "error growing plex: error code %d", err); } } /* * Create an initialization BIO and send it off to the consumer. Assume that * we're given initialization data as parameter. */ void gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length) { struct gv_drive *d; struct g_consumer *cp; struct bio *bp, *cbp; KASSERT(s != NULL, ("gv_init_request: NULL s")); d = s->drive_sc; KASSERT(d != NULL, ("gv_init_request: NULL d")); cp = d->consumer; KASSERT(cp != NULL, ("gv_init_request: NULL cp")); bp = g_new_bio(); if (bp == NULL) { G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" " (drive offset %jd); out of memory", s->name, (intmax_t)s->initialized, (intmax_t)start); return; /* XXX: Error codes. */ } bp->bio_cmd = BIO_WRITE; bp->bio_data = data; bp->bio_done = NULL; bp->bio_error = 0; bp->bio_length = length; bp->bio_pflags |= GV_BIO_INIT; bp->bio_offset = start; bp->bio_caller1 = s; /* Then ofcourse, we have to clone it. */ cbp = g_clone_bio(bp); if (cbp == NULL) { G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" " (drive offset %jd); out of memory", s->name, (intmax_t)s->initialized, (intmax_t)start); return; /* XXX: Error codes. */ } cbp->bio_done = gv_done; cbp->bio_caller1 = s; d->active++; /* Send it off to the consumer. */ g_io_request(cbp, cp); } /* * Handle a finished initialization BIO. */ void gv_init_complete(struct gv_plex *p, struct bio *bp) { struct gv_softc *sc; struct gv_drive *d; struct g_consumer *cp; struct gv_sd *s; off_t start, length; caddr_t data; int error; s = bp->bio_caller1; start = bp->bio_offset; length = bp->bio_length; error = bp->bio_error; data = bp->bio_data; KASSERT(s != NULL, ("gv_init_complete: NULL s")); d = s->drive_sc; KASSERT(d != NULL, ("gv_init_complete: NULL d")); cp = d->consumer; KASSERT(cp != NULL, ("gv_init_complete: NULL cp")); sc = p->vinumconf; KASSERT(sc != NULL, ("gv_init_complete: NULL sc")); g_destroy_bio(bp); /* * First we need to find out if it was okay, and abort if it's not. * Then we need to free previous buffers, find out the correct subdisk, * as well as getting the correct starting point and length of the BIO. */ if (start >= s->drive_offset + s->size) { /* Free the data we initialized. */ - if (data != NULL) - g_free(data); + g_free(data); g_topology_assert_not(); g_topology_lock(); g_access(cp, 0, -1, 0); g_topology_unlock(); if (error) { gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); } else { gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG); s->initialized = 0; gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); G_VINUM_DEBUG(1, "subdisk '%s' init: finished " "successfully", s->name); } return; } s->initialized += length; start += length; gv_init_request(s, start, data, length); } /* * Create a new bio struct for the next parity rebuild. Used both by internal * rebuild of degraded plexes as well as user initiated rebuilds/checks. */ void gv_parity_request(struct gv_plex *p, int flags, off_t offset) { struct gv_softc *sc; struct bio *bp; KASSERT(p != NULL, ("gv_parity_request: NULL p")); sc = p->vinumconf; KASSERT(sc != NULL, ("gv_parity_request: NULL sc")); bp = g_new_bio(); if (bp == NULL) { G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: " "out of memory", p->name); return; } bp->bio_cmd = BIO_WRITE; bp->bio_done = NULL; bp->bio_error = 0; bp->bio_length = p->stripesize; bp->bio_caller1 = p; /* * Check if it's a rebuild of a degraded plex or a user request of * parity rebuild. */ if (flags & GV_BIO_REBUILD) bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK); else if (flags & GV_BIO_CHECK) bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO); else { G_VINUM_DEBUG(0, "invalid flags given in rebuild"); return; } bp->bio_pflags = flags; bp->bio_pflags |= GV_BIO_MALLOC; /* We still have more parity to build. */ bp->bio_offset = offset; gv_post_bio(sc, bp); //gv_plex_start(p, bp); /* Send it down to the plex. */ } /* * Handle a finished parity write. */ void gv_parity_complete(struct gv_plex *p, struct bio *bp) { struct gv_softc *sc; int error, flags; error = bp->bio_error; flags = bp->bio_pflags; flags &= ~GV_BIO_MALLOC; sc = p->vinumconf; KASSERT(sc != NULL, ("gv_parity_complete: NULL sc")); /* Clean up what we allocated. */ if (bp->bio_pflags & GV_BIO_MALLOC) g_free(bp->bio_data); g_destroy_bio(bp); if (error == EAGAIN) { G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx", (intmax_t)p->synced); } /* Any error is fatal, except EAGAIN when we're rebuilding. */ if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) { /* Make sure we don't have the lock. */ g_topology_assert_not(); g_topology_lock(); gv_access(p->vol_sc->provider, -1, -1, 0); g_topology_unlock(); G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx " "errno %d", p->name, (intmax_t)p->synced, error); return; } else { p->synced += p->stripesize; } if (p->synced >= p->size) { /* Make sure we don't have the lock. */ g_topology_assert_not(); g_topology_lock(); gv_access(p->vol_sc->provider, -1, -1, 0); g_topology_unlock(); /* We're finished. */ G_VINUM_DEBUG(1, "parity operation on %s finished", p->name); p->synced = 0; gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); return; } /* Send down next. It will determine if we need to itself. */ gv_parity_request(p, flags, p->synced); } /* * Handle a finished plex rebuild bio. */ void gv_rebuild_complete(struct gv_plex *p, struct bio *bp) { struct gv_softc *sc; struct gv_sd *s; int error, flags; off_t offset; error = bp->bio_error; flags = bp->bio_pflags; offset = bp->bio_offset; flags &= ~GV_BIO_MALLOC; sc = p->vinumconf; KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc")); /* Clean up what we allocated. */ if (bp->bio_pflags & GV_BIO_MALLOC) g_free(bp->bio_data); g_destroy_bio(bp); if (error) { g_topology_assert_not(); g_topology_lock(); gv_access(p->vol_sc->provider, -1, -1, 0); g_topology_unlock(); G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d", p->name, (intmax_t)offset, error); p->flags &= ~GV_PLEX_REBUILDING; p->synced = 0; gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ return; } offset += (p->stripesize * (gv_sdcount(p, 1) - 1)); if (offset >= p->size) { /* We're finished. */ g_topology_assert_not(); g_topology_lock(); gv_access(p->vol_sc->provider, -1, -1, 0); g_topology_unlock(); G_VINUM_DEBUG(1, "rebuild of %s finished", p->name); gv_save_config(p->vinumconf); p->flags &= ~GV_PLEX_REBUILDING; p->synced = 0; /* Try to up all subdisks. */ LIST_FOREACH(s, &p->subdisks, in_plex) gv_update_sd_state(s); gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ return; } /* Send down next. It will determine if we need to itself. */ gv_parity_request(p, flags, offset); }