Index: head/sys/geom/eli/g_eli.c =================================================================== --- head/sys/geom/eli/g_eli.c (revision 361014) +++ head/sys/geom/eli/g_eli.c (revision 361015) @@ -1,1446 +1,1449 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005-2019 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(geom_eli, "GEOM crypto module"); MALLOC_DEFINE(M_ELI, "eli data", "GEOM_ELI Data"); SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, eli, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "GEOM_ELI stuff"); static int g_eli_version = G_ELI_VERSION; SYSCTL_INT(_kern_geom_eli, OID_AUTO, version, CTLFLAG_RD, &g_eli_version, 0, "GELI version"); int g_eli_debug = 0; SYSCTL_INT(_kern_geom_eli, OID_AUTO, debug, CTLFLAG_RWTUN, &g_eli_debug, 0, "Debug level"); static u_int g_eli_tries = 3; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, tries, CTLFLAG_RWTUN, &g_eli_tries, 0, "Number of tries for entering the passphrase"); static u_int g_eli_visible_passphrase = GETS_NOECHO; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, visible_passphrase, CTLFLAG_RWTUN, &g_eli_visible_passphrase, 0, "Visibility of passphrase prompt (0 = invisible, 1 = visible, 2 = asterisk)"); u_int g_eli_overwrites = G_ELI_OVERWRITES; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, overwrites, CTLFLAG_RWTUN, &g_eli_overwrites, 0, "Number of times on-disk keys should be overwritten when destroying them"); static u_int g_eli_threads = 0; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, threads, CTLFLAG_RWTUN, &g_eli_threads, 0, "Number of threads doing crypto work"); u_int g_eli_batch = 0; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, batch, CTLFLAG_RWTUN, &g_eli_batch, 0, "Use crypto operations batching"); /* * Passphrase cached during boot, in order to be more user-friendly if * there are multiple providers using the same passphrase. */ static char cached_passphrase[256]; static u_int g_eli_boot_passcache = 1; TUNABLE_INT("kern.geom.eli.boot_passcache", &g_eli_boot_passcache); SYSCTL_UINT(_kern_geom_eli, OID_AUTO, boot_passcache, CTLFLAG_RD, &g_eli_boot_passcache, 0, "Passphrases are cached during boot process for possible reuse"); static void fetch_loader_passphrase(void * dummy) { char * env_passphrase; KASSERT(dynamic_kenv, ("need dynamic kenv")); if ((env_passphrase = kern_getenv("kern.geom.eli.passphrase")) != NULL) { /* Extract passphrase from the environment. */ strlcpy(cached_passphrase, env_passphrase, sizeof(cached_passphrase)); freeenv(env_passphrase); /* Wipe the passphrase from the environment. */ kern_unsetenv("kern.geom.eli.passphrase"); } } SYSINIT(geli_fetch_loader_passphrase, SI_SUB_KMEM + 1, SI_ORDER_ANY, fetch_loader_passphrase, NULL); static void zero_boot_passcache(void) { explicit_bzero(cached_passphrase, sizeof(cached_passphrase)); } static void zero_geli_intake_keys(void) { struct keybuf *keybuf; int i; if ((keybuf = get_keybuf()) != NULL) { /* Scan the key buffer, clear all GELI keys. */ for (i = 0; i < keybuf->kb_nents; i++) { if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) { explicit_bzero(keybuf->kb_ents[i].ke_data, sizeof(keybuf->kb_ents[i].ke_data)); keybuf->kb_ents[i].ke_type = KEYBUF_TYPE_NONE; } } } } static void zero_intake_passcache(void *dummy) { zero_boot_passcache(); zero_geli_intake_keys(); } EVENTHANDLER_DEFINE(mountroot, zero_intake_passcache, NULL, 0); static eventhandler_tag g_eli_pre_sync = NULL; static int g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp, off_t offset, struct g_eli_metadata *md); static int g_eli_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp); static void g_eli_init(struct g_class *mp); static void g_eli_fini(struct g_class *mp); static g_taste_t g_eli_taste; static g_dumpconf_t g_eli_dumpconf; struct g_class g_eli_class = { .name = G_ELI_CLASS_NAME, .version = G_VERSION, .ctlreq = g_eli_config, .taste = g_eli_taste, .destroy_geom = g_eli_destroy_geom, .init = g_eli_init, .fini = g_eli_fini }; /* * Code paths: * BIO_READ: * g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver * BIO_WRITE: * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver */ /* * EAGAIN from crypto(9) means, that we were probably balanced to another crypto * accelerator or something like this. * The function updates the SID and rerun the operation. */ int g_eli_crypto_rerun(struct cryptop *crp) { struct g_eli_softc *sc; struct g_eli_worker *wr; struct bio *bp; int error; bp = (struct bio *)crp->crp_opaque; sc = bp->bio_to->geom->softc; LIST_FOREACH(wr, &sc->sc_workers, w_next) { if (wr->w_number == bp->bio_pflags) break; } KASSERT(wr != NULL, ("Invalid worker (%u).", bp->bio_pflags)); G_ELI_DEBUG(1, "Rerunning crypto %s request (sid: %p -> %p).", bp->bio_cmd == BIO_READ ? "READ" : "WRITE", wr->w_sid, crp->crp_session); wr->w_sid = crp->crp_session; crp->crp_etype = 0; error = crypto_dispatch(crp); if (error == 0) return (0); G_ELI_DEBUG(1, "%s: crypto_dispatch() returned %d.", __func__, error); crp->crp_etype = error; return (error); } static void g_eli_getattr_done(struct bio *bp) { if (bp->bio_error == 0 && !strcmp(bp->bio_attribute, "GEOM::physpath")) { strlcat(bp->bio_data, "/eli", bp->bio_length); } g_std_done(bp); } /* * The function is called afer reading encrypted data from the provider. * * g_eli_start -> g_eli_crypto_read -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver */ void g_eli_read_done(struct bio *bp) { struct g_eli_softc *sc; struct bio *pbp; G_ELI_LOGREQ(2, bp, "Request done."); pbp = bp->bio_parent; if (pbp->bio_error == 0 && bp->bio_error != 0) pbp->bio_error = bp->bio_error; g_destroy_bio(bp); /* * Do we have all sectors already? */ pbp->bio_inbed++; if (pbp->bio_inbed < pbp->bio_children) return; sc = pbp->bio_to->geom->softc; if (pbp->bio_error != 0) { G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__, pbp->bio_error); pbp->bio_completed = 0; if (pbp->bio_driver2 != NULL) { free(pbp->bio_driver2, M_ELI); pbp->bio_driver2 = NULL; } g_io_deliver(pbp, pbp->bio_error); if (sc != NULL) atomic_subtract_int(&sc->sc_inflight, 1); return; } mtx_lock(&sc->sc_queue_mtx); bioq_insert_tail(&sc->sc_queue, pbp); mtx_unlock(&sc->sc_queue_mtx); wakeup(sc); } /* * The function is called after we encrypt and write data. * * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> G_ELI_WRITE_DONE -> g_io_deliver */ void g_eli_write_done(struct bio *bp) { struct g_eli_softc *sc; struct bio *pbp; G_ELI_LOGREQ(2, bp, "Request done."); pbp = bp->bio_parent; if (pbp->bio_error == 0 && bp->bio_error != 0) pbp->bio_error = bp->bio_error; g_destroy_bio(bp); /* * Do we have all sectors already? */ pbp->bio_inbed++; if (pbp->bio_inbed < pbp->bio_children) return; free(pbp->bio_driver2, M_ELI); pbp->bio_driver2 = NULL; if (pbp->bio_error != 0) { G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__, pbp->bio_error); pbp->bio_completed = 0; } else pbp->bio_completed = pbp->bio_length; /* * Write is finished, send it up. */ sc = pbp->bio_to->geom->softc; g_io_deliver(pbp, pbp->bio_error); if (sc != NULL) atomic_subtract_int(&sc->sc_inflight, 1); } /* * This function should never be called, but GEOM made as it set ->orphan() * method for every geom. */ static void g_eli_orphan_spoil_assert(struct g_consumer *cp) { panic("Function %s() called for %s.", __func__, cp->geom->name); } static void g_eli_orphan(struct g_consumer *cp) { struct g_eli_softc *sc; g_topology_assert(); sc = cp->geom->softc; if (sc == NULL) return; g_eli_destroy(sc, TRUE); } static void g_eli_resize(struct g_consumer *cp) { struct g_eli_softc *sc; struct g_provider *epp, *pp; off_t oldsize; g_topology_assert(); sc = cp->geom->softc; if (sc == NULL) return; if ((sc->sc_flags & G_ELI_FLAG_AUTORESIZE) == 0) { G_ELI_DEBUG(0, "Autoresize is turned off, old size: %jd.", (intmax_t)sc->sc_provsize); return; } pp = cp->provider; if ((sc->sc_flags & G_ELI_FLAG_ONETIME) == 0) { struct g_eli_metadata md; u_char *sector; int error; sector = NULL; error = g_eli_read_metadata_offset(cp->geom->class, pp, sc->sc_provsize - pp->sectorsize, &md); if (error != 0) { G_ELI_DEBUG(0, "Cannot read metadata from %s (error=%d).", pp->name, error); goto iofail; } md.md_provsize = pp->mediasize; sector = malloc(pp->sectorsize, M_ELI, M_WAITOK | M_ZERO); eli_metadata_encode(&md, sector); error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector, pp->sectorsize); if (error != 0) { G_ELI_DEBUG(0, "Cannot store metadata on %s (error=%d).", pp->name, error); goto iofail; } explicit_bzero(sector, pp->sectorsize); error = g_write_data(cp, sc->sc_provsize - pp->sectorsize, sector, pp->sectorsize); if (error != 0) { G_ELI_DEBUG(0, "Cannot clear old metadata from %s (error=%d).", pp->name, error); goto iofail; } iofail: explicit_bzero(&md, sizeof(md)); if (sector != NULL) { explicit_bzero(sector, pp->sectorsize); free(sector, M_ELI); } } oldsize = sc->sc_mediasize; sc->sc_mediasize = eli_mediasize(sc, pp->mediasize, pp->sectorsize); g_eli_key_resize(sc); sc->sc_provsize = pp->mediasize; epp = LIST_FIRST(&sc->sc_geom->provider); g_resize_provider(epp, sc->sc_mediasize); G_ELI_DEBUG(0, "Device %s size changed from %jd to %jd.", epp->name, (intmax_t)oldsize, (intmax_t)sc->sc_mediasize); } /* * BIO_READ: * G_ELI_START -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver * BIO_WRITE: * G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver */ static void g_eli_start(struct bio *bp) { struct g_eli_softc *sc; struct g_consumer *cp; struct bio *cbp; sc = bp->bio_to->geom->softc; KASSERT(sc != NULL, ("Provider's error should be set (error=%d)(device=%s).", bp->bio_to->error, bp->bio_to->name)); G_ELI_LOGREQ(2, bp, "Request received."); switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_GETATTR: case BIO_FLUSH: case BIO_ZONE: case BIO_SPEEDUP: break; case BIO_DELETE: /* * If the user hasn't set the NODELETE flag, we just pass * it down the stack and let the layers beneath us do (or * not) whatever they do with it. If they have, we * reject it. A possible extension would be an * additional flag to take it as a hint to shred the data * with [multiple?] overwrites. */ if (!(sc->sc_flags & G_ELI_FLAG_NODELETE)) break; default: g_io_deliver(bp, EOPNOTSUPP); return; } cbp = g_clone_bio(bp); if (cbp == NULL) { g_io_deliver(bp, ENOMEM); return; } bp->bio_driver1 = cbp; bp->bio_pflags = G_ELI_NEW_BIO; switch (bp->bio_cmd) { case BIO_READ: if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) { g_eli_crypto_read(sc, bp, 0); break; } /* FALLTHROUGH */ case BIO_WRITE: mtx_lock(&sc->sc_queue_mtx); bioq_insert_tail(&sc->sc_queue, bp); mtx_unlock(&sc->sc_queue_mtx); wakeup(sc); break; case BIO_GETATTR: case BIO_FLUSH: case BIO_DELETE: case BIO_SPEEDUP: case BIO_ZONE: if (bp->bio_cmd == BIO_GETATTR) cbp->bio_done = g_eli_getattr_done; else cbp->bio_done = g_std_done; cp = LIST_FIRST(&sc->sc_geom->consumer); cbp->bio_to = cp->provider; G_ELI_LOGREQ(2, cbp, "Sending request."); g_io_request(cbp, cp); break; } } static int g_eli_newsession(struct g_eli_worker *wr) { struct g_eli_softc *sc; struct crypto_session_params csp; int error; void *key; sc = wr->w_softc; memset(&csp, 0, sizeof(csp)); csp.csp_mode = CSP_MODE_CIPHER; csp.csp_cipher_alg = sc->sc_ealgo; csp.csp_ivlen = g_eli_ivlen(sc->sc_ealgo); csp.csp_cipher_klen = sc->sc_ekeylen / 8; if (sc->sc_ealgo == CRYPTO_AES_XTS) csp.csp_cipher_klen <<= 1; if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) { key = g_eli_key_hold(sc, 0, LIST_FIRST(&sc->sc_geom->consumer)->provider->sectorsize); csp.csp_cipher_key = key; } else { key = NULL; csp.csp_cipher_key = sc->sc_ekey; } if (sc->sc_flags & G_ELI_FLAG_AUTH) { csp.csp_mode = CSP_MODE_ETA; csp.csp_auth_alg = sc->sc_aalgo; csp.csp_auth_klen = G_ELI_AUTH_SECKEYLEN; } switch (sc->sc_crypto) { case G_ELI_CRYPTO_SW: error = crypto_newsession(&wr->w_sid, &csp, CRYPTOCAP_F_SOFTWARE); break; case G_ELI_CRYPTO_HW: error = crypto_newsession(&wr->w_sid, &csp, CRYPTOCAP_F_HARDWARE); break; case G_ELI_CRYPTO_UNKNOWN: error = crypto_newsession(&wr->w_sid, &csp, CRYPTOCAP_F_HARDWARE); if (error == 0) { mtx_lock(&sc->sc_queue_mtx); if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN) sc->sc_crypto = G_ELI_CRYPTO_HW; mtx_unlock(&sc->sc_queue_mtx); } else { error = crypto_newsession(&wr->w_sid, &csp, CRYPTOCAP_F_SOFTWARE); mtx_lock(&sc->sc_queue_mtx); if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN) sc->sc_crypto = G_ELI_CRYPTO_SW; mtx_unlock(&sc->sc_queue_mtx); } break; default: panic("%s: invalid condition", __func__); } if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) { if (error) g_eli_key_drop(sc, key); else wr->w_first_key = key; } return (error); } static void g_eli_freesession(struct g_eli_worker *wr) { struct g_eli_softc *sc; crypto_freesession(wr->w_sid); if (wr->w_first_key != NULL) { sc = wr->w_softc; g_eli_key_drop(sc, wr->w_first_key); wr->w_first_key = NULL; } } static void g_eli_cancel(struct g_eli_softc *sc) { struct bio *bp; mtx_assert(&sc->sc_queue_mtx, MA_OWNED); while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) { KASSERT(bp->bio_pflags == G_ELI_NEW_BIO, ("Not new bio when canceling (bp=%p).", bp)); g_io_deliver(bp, ENXIO); } } static struct bio * g_eli_takefirst(struct g_eli_softc *sc) { struct bio *bp; mtx_assert(&sc->sc_queue_mtx, MA_OWNED); if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND)) return (bioq_takefirst(&sc->sc_queue)); /* * Device suspended, so we skip new I/O requests. */ TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { if (bp->bio_pflags != G_ELI_NEW_BIO) break; } if (bp != NULL) bioq_remove(&sc->sc_queue, bp); return (bp); } /* * This is the main function for kernel worker thread when we don't have * hardware acceleration and we have to do cryptography in software. * Dedicated thread is needed, so we don't slow down g_up/g_down GEOM * threads with crypto work. */ static void g_eli_worker(void *arg) { struct g_eli_softc *sc; struct g_eli_worker *wr; struct bio *bp; int error; wr = arg; sc = wr->w_softc; #ifdef EARLY_AP_STARTUP MPASS(!sc->sc_cpubind || smp_started); #elif defined(SMP) /* Before sched_bind() to a CPU, wait for all CPUs to go on-line. */ if (sc->sc_cpubind) { while (!smp_started) tsleep(wr, 0, "geli:smp", hz / 4); } #endif thread_lock(curthread); sched_prio(curthread, PUSER); if (sc->sc_cpubind) sched_bind(curthread, wr->w_number % mp_ncpus); thread_unlock(curthread); G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm); for (;;) { mtx_lock(&sc->sc_queue_mtx); again: bp = g_eli_takefirst(sc); if (bp == NULL) { if (sc->sc_flags & G_ELI_FLAG_DESTROY) { g_eli_cancel(sc); LIST_REMOVE(wr, w_next); g_eli_freesession(wr); free(wr, M_ELI); G_ELI_DEBUG(1, "Thread %s exiting.", curthread->td_proc->p_comm); wakeup(&sc->sc_workers); mtx_unlock(&sc->sc_queue_mtx); kproc_exit(0); } while (sc->sc_flags & G_ELI_FLAG_SUSPEND) { if (sc->sc_inflight > 0) { G_ELI_DEBUG(0, "inflight=%d", sc->sc_inflight); /* * We still have inflight BIOs, so * sleep and retry. */ msleep(sc, &sc->sc_queue_mtx, PRIBIO, "geli:inf", hz / 5); goto again; } /* * Suspend requested, mark the worker as * suspended and go to sleep. */ if (wr->w_active) { g_eli_freesession(wr); wr->w_active = FALSE; } wakeup(&sc->sc_workers); msleep(sc, &sc->sc_queue_mtx, PRIBIO, "geli:suspend", 0); if (!wr->w_active && !(sc->sc_flags & G_ELI_FLAG_SUSPEND)) { error = g_eli_newsession(wr); KASSERT(error == 0, ("g_eli_newsession() failed on resume (error=%d)", error)); wr->w_active = TRUE; } goto again; } msleep(sc, &sc->sc_queue_mtx, PDROP, "geli:w", 0); continue; } if (bp->bio_pflags == G_ELI_NEW_BIO) atomic_add_int(&sc->sc_inflight, 1); mtx_unlock(&sc->sc_queue_mtx); if (bp->bio_pflags == G_ELI_NEW_BIO) { bp->bio_pflags = 0; if (sc->sc_flags & G_ELI_FLAG_AUTH) { if (bp->bio_cmd == BIO_READ) g_eli_auth_read(sc, bp); else g_eli_auth_run(wr, bp); } else { if (bp->bio_cmd == BIO_READ) g_eli_crypto_read(sc, bp, 1); else g_eli_crypto_run(wr, bp); } } else { if (sc->sc_flags & G_ELI_FLAG_AUTH) g_eli_auth_run(wr, bp); else g_eli_crypto_run(wr, bp); } } } static int g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp, off_t offset, struct g_eli_metadata *md) { struct g_geom *gp; struct g_consumer *cp; u_char *buf = NULL; int error; g_topology_assert(); gp = g_new_geomf(mp, "eli:taste"); gp->start = g_eli_start; gp->access = g_std_access; /* * g_eli_read_metadata() is always called from the event thread. * Our geom is created and destroyed in the same event, so there * could be no orphan nor spoil event in the meantime. */ gp->orphan = g_eli_orphan_spoil_assert; gp->spoiled = g_eli_orphan_spoil_assert; cp = g_new_consumer(gp); error = g_attach(cp, pp); if (error != 0) goto end; error = g_access(cp, 1, 0, 0); if (error != 0) goto end; g_topology_unlock(); buf = g_read_data(cp, offset, pp->sectorsize, &error); g_topology_lock(); if (buf == NULL) goto end; error = eli_metadata_decode(buf, md); if (error != 0) goto end; /* Metadata was read and decoded successfully. */ end: if (buf != NULL) g_free(buf); if (cp->provider != NULL) { if (cp->acr == 1) g_access(cp, -1, 0, 0); g_detach(cp); } g_destroy_consumer(cp); g_destroy_geom(gp); return (error); } int g_eli_read_metadata(struct g_class *mp, struct g_provider *pp, struct g_eli_metadata *md) { return (g_eli_read_metadata_offset(mp, pp, pp->mediasize - pp->sectorsize, md)); } /* * The function is called when we had last close on provider and user requested * to close it when this situation occur. */ static void g_eli_last_close(void *arg, int flags __unused) { struct g_geom *gp; char gpname[64]; int error; g_topology_assert(); gp = arg; strlcpy(gpname, gp->name, sizeof(gpname)); error = g_eli_destroy(gp->softc, TRUE); KASSERT(error == 0, ("Cannot detach %s on last close (error=%d).", gpname, error)); G_ELI_DEBUG(0, "Detached %s on last close.", gpname); } int g_eli_access(struct g_provider *pp, int dr, int dw, int de) { struct g_eli_softc *sc; struct g_geom *gp; gp = pp->geom; sc = gp->softc; if (dw > 0) { if (sc->sc_flags & G_ELI_FLAG_RO) { /* Deny write attempts. */ return (EROFS); } /* Someone is opening us for write, we need to remember that. */ sc->sc_flags |= G_ELI_FLAG_WOPEN; return (0); } /* Is this the last close? */ if (pp->acr + dr > 0 || pp->acw + dw > 0 || pp->ace + de > 0) return (0); /* * Automatically detach on last close if requested. */ if ((sc->sc_flags & G_ELI_FLAG_RW_DETACH) || (sc->sc_flags & G_ELI_FLAG_WOPEN)) { g_post_event(g_eli_last_close, gp, M_WAITOK, NULL); } return (0); } static int g_eli_cpu_is_disabled(int cpu) { #ifdef SMP return (CPU_ISSET(cpu, &hlt_cpus_mask)); #else return (0); #endif } struct g_geom * g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp, const struct g_eli_metadata *md, const u_char *mkey, int nkey) { struct g_eli_softc *sc; struct g_eli_worker *wr; struct g_geom *gp; struct g_provider *pp; struct g_consumer *cp; + struct g_geom_alias *gap; u_int i, threads; int dcw, error; G_ELI_DEBUG(1, "Creating device %s%s.", bpp->name, G_ELI_SUFFIX); KASSERT(eli_metadata_crypto_supported(md), ("%s: unsupported crypto for %s", __func__, bpp->name)); gp = g_new_geomf(mp, "%s%s", bpp->name, G_ELI_SUFFIX); sc = malloc(sizeof(*sc), M_ELI, M_WAITOK | M_ZERO); gp->start = g_eli_start; /* * Spoiling can happen even though we have the provider open * exclusively, e.g. through media change events. */ gp->spoiled = g_eli_orphan; gp->orphan = g_eli_orphan; gp->resize = g_eli_resize; gp->dumpconf = g_eli_dumpconf; /* * If detach-on-last-close feature is not enabled and we don't operate * on read-only provider, we can simply use g_std_access(). */ if (md->md_flags & (G_ELI_FLAG_WO_DETACH | G_ELI_FLAG_RO)) gp->access = g_eli_access; else gp->access = g_std_access; eli_metadata_softc(sc, md, bpp->sectorsize, bpp->mediasize); sc->sc_nkey = nkey; gp->softc = sc; sc->sc_geom = gp; bioq_init(&sc->sc_queue); mtx_init(&sc->sc_queue_mtx, "geli:queue", NULL, MTX_DEF); mtx_init(&sc->sc_ekeys_lock, "geli:ekeys", NULL, MTX_DEF); pp = NULL; cp = g_new_consumer(gp); error = g_attach(cp, bpp); if (error != 0) { if (req != NULL) { gctl_error(req, "Cannot attach to %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot attach to %s (error=%d).", bpp->name, error); } goto failed; } /* * Keep provider open all the time, so we can run critical tasks, * like Master Keys deletion, without wondering if we can open * provider or not. * We don't open provider for writing only when user requested read-only * access. */ dcw = (sc->sc_flags & G_ELI_FLAG_RO) ? 0 : 1; error = g_access(cp, 1, dcw, 1); if (error != 0) { if (req != NULL) { gctl_error(req, "Cannot access %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot access %s (error=%d).", bpp->name, error); } goto failed; } /* * Remember the keys in our softc structure. */ g_eli_mkey_propagate(sc, mkey); LIST_INIT(&sc->sc_workers); threads = g_eli_threads; if (threads == 0) threads = mp_ncpus; sc->sc_cpubind = (mp_ncpus > 1 && threads == mp_ncpus); for (i = 0; i < threads; i++) { if (g_eli_cpu_is_disabled(i)) { G_ELI_DEBUG(1, "%s: CPU %u disabled, skipping.", bpp->name, i); continue; } wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO); wr->w_softc = sc; wr->w_number = i; wr->w_active = TRUE; error = g_eli_newsession(wr); if (error != 0) { free(wr, M_ELI); if (req != NULL) { gctl_error(req, "Cannot set up crypto session " "for %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot set up crypto session " "for %s (error=%d).", bpp->name, error); } goto failed; } error = kproc_create(g_eli_worker, wr, &wr->w_proc, 0, 0, "g_eli[%u] %s", i, bpp->name); if (error != 0) { g_eli_freesession(wr); free(wr, M_ELI); if (req != NULL) { gctl_error(req, "Cannot create kernel thread " "for %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot create kernel thread " "for %s (error=%d).", bpp->name, error); } goto failed; } LIST_INSERT_HEAD(&sc->sc_workers, wr, w_next); } /* * Create decrypted provider. */ pp = g_new_providerf(gp, "%s%s", bpp->name, G_ELI_SUFFIX); pp->mediasize = sc->sc_mediasize; pp->sectorsize = sc->sc_sectorsize; + LIST_FOREACH(gap, &bpp->aliases, ga_next) + g_provider_add_alias(pp, "%s%s", gap->ga_alias, G_ELI_SUFFIX); g_error_provider(pp, 0); G_ELI_DEBUG(0, "Device %s created.", pp->name); G_ELI_DEBUG(0, "Encryption: %s %u", g_eli_algo2str(sc->sc_ealgo), sc->sc_ekeylen); if (sc->sc_flags & G_ELI_FLAG_AUTH) G_ELI_DEBUG(0, " Integrity: %s", g_eli_algo2str(sc->sc_aalgo)); G_ELI_DEBUG(0, " Crypto: %s", sc->sc_crypto == G_ELI_CRYPTO_SW ? "software" : "hardware"); return (gp); failed: mtx_lock(&sc->sc_queue_mtx); sc->sc_flags |= G_ELI_FLAG_DESTROY; wakeup(sc); /* * Wait for kernel threads self destruction. */ while (!LIST_EMPTY(&sc->sc_workers)) { msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO, "geli:destroy", 0); } mtx_destroy(&sc->sc_queue_mtx); if (cp->provider != NULL) { if (cp->acr == 1) g_access(cp, -1, -dcw, -1); g_detach(cp); } g_destroy_consumer(cp); g_destroy_geom(gp); g_eli_key_destroy(sc); bzero(sc, sizeof(*sc)); free(sc, M_ELI); return (NULL); } int g_eli_destroy(struct g_eli_softc *sc, boolean_t force) { struct g_geom *gp; struct g_provider *pp; g_topology_assert(); if (sc == NULL) return (ENXIO); gp = sc->sc_geom; pp = LIST_FIRST(&gp->provider); if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { if (force) { G_ELI_DEBUG(1, "Device %s is still open, so it " "cannot be definitely removed.", pp->name); sc->sc_flags |= G_ELI_FLAG_RW_DETACH; gp->access = g_eli_access; g_wither_provider(pp, ENXIO); return (EBUSY); } else { G_ELI_DEBUG(1, "Device %s is still open (r%dw%de%d).", pp->name, pp->acr, pp->acw, pp->ace); return (EBUSY); } } mtx_lock(&sc->sc_queue_mtx); sc->sc_flags |= G_ELI_FLAG_DESTROY; wakeup(sc); while (!LIST_EMPTY(&sc->sc_workers)) { msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO, "geli:destroy", 0); } mtx_destroy(&sc->sc_queue_mtx); gp->softc = NULL; g_eli_key_destroy(sc); bzero(sc, sizeof(*sc)); free(sc, M_ELI); G_ELI_DEBUG(0, "Device %s destroyed.", gp->name); g_wither_geom_close(gp, ENXIO); return (0); } static int g_eli_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused, struct g_geom *gp) { struct g_eli_softc *sc; sc = gp->softc; return (g_eli_destroy(sc, FALSE)); } static int g_eli_keyfiles_load(struct hmac_ctx *ctx, const char *provider) { u_char *keyfile, *data; char *file, name[64]; size_t size; int i; for (i = 0; ; i++) { snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i); keyfile = preload_search_by_type(name); if (keyfile == NULL && i == 0) { /* * If there is only one keyfile, allow simpler name. */ snprintf(name, sizeof(name), "%s:geli_keyfile", provider); keyfile = preload_search_by_type(name); } if (keyfile == NULL) return (i); /* Return number of loaded keyfiles. */ data = preload_fetch_addr(keyfile); if (data == NULL) { G_ELI_DEBUG(0, "Cannot find key file data for %s.", name); return (0); } size = preload_fetch_size(keyfile); if (size == 0) { G_ELI_DEBUG(0, "Cannot find key file size for %s.", name); return (0); } file = preload_search_info(keyfile, MODINFO_NAME); if (file == NULL) { G_ELI_DEBUG(0, "Cannot find key file name for %s.", name); return (0); } G_ELI_DEBUG(1, "Loaded keyfile %s for %s (type: %s).", file, provider, name); g_eli_crypto_hmac_update(ctx, data, size); } } static void g_eli_keyfiles_clear(const char *provider) { u_char *keyfile, *data; char name[64]; size_t size; int i; for (i = 0; ; i++) { snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i); keyfile = preload_search_by_type(name); if (keyfile == NULL) return; data = preload_fetch_addr(keyfile); size = preload_fetch_size(keyfile); if (data != NULL && size != 0) bzero(data, size); } } /* * Tasting is only made on boot. * We detect providers which should be attached before root is mounted. */ static struct g_geom * g_eli_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) { struct g_eli_metadata md; struct g_geom *gp; struct hmac_ctx ctx; char passphrase[256]; u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN]; u_int i, nkey, nkeyfiles, tries, showpass; int error; struct keybuf *keybuf; g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); g_topology_assert(); if (root_mounted() || g_eli_tries == 0) return (NULL); G_ELI_DEBUG(3, "Tasting %s.", pp->name); error = g_eli_read_metadata(mp, pp, &md); if (error != 0) return (NULL); gp = NULL; if (strcmp(md.md_magic, G_ELI_MAGIC) != 0) return (NULL); if (md.md_version > G_ELI_VERSION) { printf("geom_eli.ko module is too old to handle %s.\n", pp->name); return (NULL); } if (md.md_provsize != pp->mediasize) return (NULL); /* Should we attach it on boot? */ if (!(md.md_flags & G_ELI_FLAG_BOOT) && !(md.md_flags & G_ELI_FLAG_GELIBOOT)) return (NULL); if (md.md_keys == 0x00) { G_ELI_DEBUG(0, "No valid keys on %s.", pp->name); return (NULL); } if (!eli_metadata_crypto_supported(&md)) { G_ELI_DEBUG(0, "%s uses invalid or unsupported algorithms\n", pp->name); return (NULL); } if (md.md_iterations == -1) { /* If there is no passphrase, we try only once. */ tries = 1; } else { /* Ask for the passphrase no more than g_eli_tries times. */ tries = g_eli_tries; } if ((keybuf = get_keybuf()) != NULL) { /* Scan the key buffer, try all GELI keys. */ for (i = 0; i < keybuf->kb_nents; i++) { if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) { memcpy(key, keybuf->kb_ents[i].ke_data, sizeof(key)); if (g_eli_mkey_decrypt_any(&md, key, mkey, &nkey) == 0 ) { explicit_bzero(key, sizeof(key)); goto have_key; } } } } for (i = 0; i <= tries; i++) { g_eli_crypto_hmac_init(&ctx, NULL, 0); /* * Load all key files. */ nkeyfiles = g_eli_keyfiles_load(&ctx, pp->name); if (nkeyfiles == 0 && md.md_iterations == -1) { /* * No key files and no passphrase, something is * definitely wrong here. * geli(8) doesn't allow for such situation, so assume * that there was really no passphrase and in that case * key files are no properly defined in loader.conf. */ G_ELI_DEBUG(0, "Found no key files in loader.conf for %s.", pp->name); return (NULL); } /* Ask for the passphrase if defined. */ if (md.md_iterations >= 0) { /* Try first with cached passphrase. */ if (i == 0) { if (!g_eli_boot_passcache) continue; memcpy(passphrase, cached_passphrase, sizeof(passphrase)); } else { printf("Enter passphrase for %s: ", pp->name); showpass = g_eli_visible_passphrase; if ((md.md_flags & G_ELI_FLAG_GELIDISPLAYPASS) != 0) showpass = GETS_ECHOPASS; cngets(passphrase, sizeof(passphrase), showpass); memcpy(cached_passphrase, passphrase, sizeof(passphrase)); } } /* * Prepare Derived-Key from the user passphrase. */ if (md.md_iterations == 0) { g_eli_crypto_hmac_update(&ctx, md.md_salt, sizeof(md.md_salt)); g_eli_crypto_hmac_update(&ctx, passphrase, strlen(passphrase)); explicit_bzero(passphrase, sizeof(passphrase)); } else if (md.md_iterations > 0) { u_char dkey[G_ELI_USERKEYLEN]; pkcs5v2_genkey(dkey, sizeof(dkey), md.md_salt, sizeof(md.md_salt), passphrase, md.md_iterations); bzero(passphrase, sizeof(passphrase)); g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey)); explicit_bzero(dkey, sizeof(dkey)); } g_eli_crypto_hmac_final(&ctx, key, 0); /* * Decrypt Master-Key. */ error = g_eli_mkey_decrypt_any(&md, key, mkey, &nkey); bzero(key, sizeof(key)); if (error == -1) { if (i == tries) { G_ELI_DEBUG(0, "Wrong key for %s. No tries left.", pp->name); g_eli_keyfiles_clear(pp->name); return (NULL); } if (i > 0) { G_ELI_DEBUG(0, "Wrong key for %s. Tries left: %u.", pp->name, tries - i); } /* Try again. */ continue; } else if (error > 0) { G_ELI_DEBUG(0, "Cannot decrypt Master Key for %s (error=%d).", pp->name, error); g_eli_keyfiles_clear(pp->name); return (NULL); } g_eli_keyfiles_clear(pp->name); G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name); break; } have_key: /* * We have correct key, let's attach provider. */ gp = g_eli_create(NULL, mp, pp, &md, mkey, nkey); bzero(mkey, sizeof(mkey)); bzero(&md, sizeof(md)); if (gp == NULL) { G_ELI_DEBUG(0, "Cannot create device %s%s.", pp->name, G_ELI_SUFFIX); return (NULL); } return (gp); } static void g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { struct g_eli_softc *sc; g_topology_assert(); sc = gp->softc; if (sc == NULL) return; if (pp != NULL || cp != NULL) return; /* Nothing here. */ sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)sc->sc_ekeys_total); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)sc->sc_ekeys_allocated); sbuf_printf(sb, "%s", indent); if (sc->sc_flags == 0) sbuf_cat(sb, "NONE"); else { int first = 1; #define ADD_FLAG(flag, name) do { \ if (sc->sc_flags & (flag)) { \ if (!first) \ sbuf_cat(sb, ", "); \ else \ first = 0; \ sbuf_cat(sb, name); \ } \ } while (0) ADD_FLAG(G_ELI_FLAG_SUSPEND, "SUSPEND"); ADD_FLAG(G_ELI_FLAG_SINGLE_KEY, "SINGLE-KEY"); ADD_FLAG(G_ELI_FLAG_NATIVE_BYTE_ORDER, "NATIVE-BYTE-ORDER"); ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME"); ADD_FLAG(G_ELI_FLAG_BOOT, "BOOT"); ADD_FLAG(G_ELI_FLAG_WO_DETACH, "W-DETACH"); ADD_FLAG(G_ELI_FLAG_RW_DETACH, "RW-DETACH"); ADD_FLAG(G_ELI_FLAG_AUTH, "AUTH"); ADD_FLAG(G_ELI_FLAG_WOPEN, "W-OPEN"); ADD_FLAG(G_ELI_FLAG_DESTROY, "DESTROY"); ADD_FLAG(G_ELI_FLAG_RO, "READ-ONLY"); ADD_FLAG(G_ELI_FLAG_NODELETE, "NODELETE"); ADD_FLAG(G_ELI_FLAG_GELIBOOT, "GELIBOOT"); ADD_FLAG(G_ELI_FLAG_GELIDISPLAYPASS, "GELIDISPLAYPASS"); ADD_FLAG(G_ELI_FLAG_AUTORESIZE, "AUTORESIZE"); #undef ADD_FLAG } sbuf_cat(sb, "\n"); if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) { sbuf_printf(sb, "%s%u\n", indent, sc->sc_nkey); } sbuf_printf(sb, "%s%u\n", indent, sc->sc_version); sbuf_printf(sb, "%s", indent); switch (sc->sc_crypto) { case G_ELI_CRYPTO_HW: sbuf_cat(sb, "hardware"); break; case G_ELI_CRYPTO_SW: sbuf_cat(sb, "software"); break; default: sbuf_cat(sb, "UNKNOWN"); break; } sbuf_cat(sb, "\n"); if (sc->sc_flags & G_ELI_FLAG_AUTH) { sbuf_printf(sb, "%s%s\n", indent, g_eli_algo2str(sc->sc_aalgo)); } sbuf_printf(sb, "%s%u\n", indent, sc->sc_ekeylen); sbuf_printf(sb, "%s%s\n", indent, g_eli_algo2str(sc->sc_ealgo)); sbuf_printf(sb, "%s%s\n", indent, (sc->sc_flags & G_ELI_FLAG_SUSPEND) ? "SUSPENDED" : "ACTIVE"); } static void g_eli_shutdown_pre_sync(void *arg, int howto) { struct g_class *mp; struct g_geom *gp, *gp2; struct g_provider *pp; struct g_eli_softc *sc; int error; mp = arg; g_topology_lock(); LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { sc = gp->softc; if (sc == NULL) continue; pp = LIST_FIRST(&gp->provider); KASSERT(pp != NULL, ("No provider? gp=%p (%s)", gp, gp->name)); if (pp->acr + pp->acw + pp->ace == 0) error = g_eli_destroy(sc, TRUE); else { sc->sc_flags |= G_ELI_FLAG_RW_DETACH; gp->access = g_eli_access; } } g_topology_unlock(); } static void g_eli_init(struct g_class *mp) { g_eli_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync, g_eli_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST); if (g_eli_pre_sync == NULL) G_ELI_DEBUG(0, "Warning! Cannot register shutdown event."); } static void g_eli_fini(struct g_class *mp) { if (g_eli_pre_sync != NULL) EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_eli_pre_sync); } DECLARE_GEOM_CLASS(g_eli_class, g_eli); MODULE_DEPEND(g_eli, crypto, 1, 1, 1); MODULE_VERSION(geom_eli, 0); Index: head/sys/geom/geom.h =================================================================== --- head/sys/geom/geom.h (revision 361014) +++ head/sys/geom/geom.h (revision 361015) @@ -1,435 +1,436 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _GEOM_GEOM_H_ #define _GEOM_GEOM_H_ #include #include #include #include #include #include #include struct g_class; struct g_geom; struct g_consumer; struct g_provider; struct g_stat; struct thread; struct bio; struct sbuf; struct gctl_req; struct g_configargs; struct disk_zone_args; typedef int g_config_t (struct g_configargs *ca); typedef void g_ctl_req_t (struct gctl_req *, struct g_class *cp, char const *verb); typedef int g_ctl_create_geom_t (struct gctl_req *, struct g_class *cp, struct g_provider *pp); typedef int g_ctl_destroy_geom_t (struct gctl_req *, struct g_class *cp, struct g_geom *gp); typedef int g_ctl_config_geom_t (struct gctl_req *, struct g_geom *gp, const char *verb); typedef void g_init_t (struct g_class *mp); typedef void g_fini_t (struct g_class *mp); typedef struct g_geom * g_taste_t (struct g_class *, struct g_provider *, int flags); typedef int g_ioctl_t(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td); #define G_TF_NORMAL 0 #define G_TF_INSIST 1 #define G_TF_TRANSPARENT 2 typedef int g_access_t (struct g_provider *, int, int, int); /* XXX: not sure about the thread arg */ typedef void g_orphan_t (struct g_consumer *); typedef void g_start_t (struct bio *); typedef void g_spoiled_t (struct g_consumer *); typedef void g_attrchanged_t (struct g_consumer *, const char *attr); typedef void g_provgone_t (struct g_provider *); typedef void g_dumpconf_t (struct sbuf *, const char *indent, struct g_geom *, struct g_consumer *, struct g_provider *); typedef void g_resize_t(struct g_consumer *cp); /* * The g_class structure describes a transformation class. In other words * all BSD disklabel handlers share one g_class, all MBR handlers share * one common g_class and so on. * Certain operations are instantiated on the class, most notably the * taste and config_geom functions. */ struct g_class { const char *name; u_int version; u_int spare0; g_taste_t *taste; g_config_t *config; g_ctl_req_t *ctlreq; g_init_t *init; g_fini_t *fini; g_ctl_destroy_geom_t *destroy_geom; /* * Default values for geom methods */ g_start_t *start; g_spoiled_t *spoiled; g_attrchanged_t *attrchanged; g_dumpconf_t *dumpconf; g_access_t *access; g_orphan_t *orphan; g_ioctl_t *ioctl; g_provgone_t *providergone; g_resize_t *resize; void *spare1; void *spare2; /* * The remaining elements are private */ LIST_ENTRY(g_class) class; LIST_HEAD(,g_geom) geom; }; -/* - * The g_geom_alias is a list node for aliases for the geom name - * for device node creation. - */ -struct g_geom_alias { - LIST_ENTRY(g_geom_alias) ga_next; - const char *ga_alias; -}; - #define G_VERSION_00 0x19950323 #define G_VERSION_01 0x20041207 /* add fflag to g_ioctl_t */ #define G_VERSION G_VERSION_01 /* * The g_geom is an instance of a g_class. */ struct g_geom { char *name; struct g_class *class; LIST_ENTRY(g_geom) geom; LIST_HEAD(,g_consumer) consumer; LIST_HEAD(,g_provider) provider; TAILQ_ENTRY(g_geom) geoms; /* XXX: better name */ int rank; g_start_t *start; g_spoiled_t *spoiled; g_attrchanged_t *attrchanged; g_dumpconf_t *dumpconf; g_access_t *access; g_orphan_t *orphan; g_ioctl_t *ioctl; g_provgone_t *providergone; g_resize_t *resize; void *spare0; void *spare1; void *softc; unsigned flags; #define G_GEOM_WITHER 0x01 #define G_GEOM_VOLATILE_BIO 0x02 #define G_GEOM_IN_ACCESS 0x04 #define G_GEOM_ACCESS_WAIT 0x08 - LIST_HEAD(,g_geom_alias) aliases; }; /* * The g_bioq is a queue of struct bio's. * XXX: possibly collection point for statistics. * XXX: should (possibly) be collapsed with sys/bio.h::bio_queue_head. */ struct g_bioq { TAILQ_HEAD(, bio) bio_queue; struct mtx bio_queue_lock; int bio_queue_length; }; /* * A g_consumer is an attachment point for a g_provider. One g_consumer * can only be attached to one g_provider, but multiple g_consumers * can be attached to one g_provider. */ struct g_consumer { struct g_geom *geom; LIST_ENTRY(g_consumer) consumer; struct g_provider *provider; LIST_ENTRY(g_consumer) consumers; /* XXX: better name */ int acr, acw, ace; int flags; #define G_CF_SPOILED 0x1 #define G_CF_ORPHAN 0x4 #define G_CF_DIRECT_SEND 0x10 #define G_CF_DIRECT_RECEIVE 0x20 struct devstat *stat; u_int nstart, nend; /* Two fields for the implementing class to use */ void *private; u_int index; }; /* + * The g_geom_alias is a list node for aliases for the provider name for device + * node creation. + */ +struct g_geom_alias { + LIST_ENTRY(g_geom_alias) ga_next; + const char *ga_alias; +}; + +/* * A g_provider is a "logical disk". */ struct g_provider { char *name; LIST_ENTRY(g_provider) provider; struct g_geom *geom; LIST_HEAD(,g_consumer) consumers; int acr, acw, ace; int error; TAILQ_ENTRY(g_provider) orphan; off_t mediasize; u_int sectorsize; off_t stripesize; off_t stripeoffset; struct devstat *stat; u_int spare1; u_int spare2; u_int flags; #define G_PF_WITHER 0x2 #define G_PF_ORPHAN 0x4 #define G_PF_ACCEPT_UNMAPPED 0x8 #define G_PF_DIRECT_SEND 0x10 #define G_PF_DIRECT_RECEIVE 0x20 + LIST_HEAD(,g_geom_alias) aliases; /* Two fields for the implementing class to use */ void *private; u_int index; }; /* BIO_GETATTR("GEOM::setstate") argument values. */ #define G_STATE_FAILED 0 #define G_STATE_REBUILD 1 #define G_STATE_RESYNC 2 #define G_STATE_ACTIVE 3 /* geom_dev.c */ struct cdev; void g_dev_print(void); void g_dev_physpath_changed(void); struct g_provider *g_dev_getprovider(struct cdev *dev); /* geom_dump.c */ void (g_trace)(int level, const char *, ...) __printflike(2, 3); #define G_T_TOPOLOGY 0x01 #define G_T_BIO 0x02 #define G_T_ACCESS 0x04 extern int g_debugflags; #define G_F_FOOTSHOOTING 0x10 #define G_F_DISKIOCTL 0x40 #define G_F_CTLDUMP 0x80 #define g_trace(level, fmt, ...) do { \ if (__predict_false(g_debugflags & (level))) \ (g_trace)(level, fmt, ## __VA_ARGS__); \ } while (0) /* geom_event.c */ typedef void g_event_t(void *, int flag); #define EV_CANCEL 1 int g_post_event(g_event_t *func, void *arg, int flag, ...); int g_waitfor_event(g_event_t *func, void *arg, int flag, ...); void g_cancel_event(void *ref); int g_attr_changed(struct g_provider *pp, const char *attr, int flag); int g_media_changed(struct g_provider *pp, int flag); int g_media_gone(struct g_provider *pp, int flag); void g_orphan_provider(struct g_provider *pp, int error); void g_waitidlelock(void); /* geom_subr.c */ int g_access(struct g_consumer *cp, int nread, int nwrite, int nexcl); int g_attach(struct g_consumer *cp, struct g_provider *pp); int g_compare_names(const char *namea, const char *nameb); void g_destroy_consumer(struct g_consumer *cp); void g_destroy_geom(struct g_geom *pp); void g_destroy_provider(struct g_provider *pp); void g_detach(struct g_consumer *cp); void g_error_provider(struct g_provider *pp, int error); struct g_provider *g_provider_by_name(char const *arg); -void g_geom_add_alias(struct g_geom *gp, const char *alias); int g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len); #define g_getattr(a, c, v) g_getattr__((a), (c), (v), sizeof *(v)) int g_handleattr(struct bio *bp, const char *attribute, const void *val, int len); int g_handleattr_int(struct bio *bp, const char *attribute, int val); int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val); int g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val); int g_handleattr_str(struct bio *bp, const char *attribute, const char *str); struct g_consumer * g_new_consumer(struct g_geom *gp); struct g_geom * g_new_geomf(struct g_class *mp, const char *fmt, ...) __printflike(2, 3); struct g_provider * g_new_providerf(struct g_geom *gp, const char *fmt, ...) + __printflike(2, 3); +void g_provider_add_alias(struct g_provider *pp, const char *fmt, ...) __printflike(2, 3); void g_resize_provider(struct g_provider *pp, off_t size); int g_retaste(struct g_class *mp); void g_spoil(struct g_provider *pp, struct g_consumer *cp); int g_std_access(struct g_provider *pp, int dr, int dw, int de); void g_std_done(struct bio *bp); void g_std_spoiled(struct g_consumer *cp); void g_wither_geom(struct g_geom *gp, int error); void g_wither_geom_close(struct g_geom *gp, int error); void g_wither_provider(struct g_provider *pp, int error); #if defined(DIAGNOSTIC) || defined(DDB) int g_valid_obj(void const *ptr); #endif #ifdef DIAGNOSTIC #define G_VALID_CLASS(foo) \ KASSERT(g_valid_obj(foo) == 1, ("%p is not a g_class", foo)) #define G_VALID_GEOM(foo) \ KASSERT(g_valid_obj(foo) == 2, ("%p is not a g_geom", foo)) #define G_VALID_CONSUMER(foo) \ KASSERT(g_valid_obj(foo) == 3, ("%p is not a g_consumer", foo)) #define G_VALID_PROVIDER(foo) \ KASSERT(g_valid_obj(foo) == 4, ("%p is not a g_provider", foo)) #else #define G_VALID_CLASS(foo) do { } while (0) #define G_VALID_GEOM(foo) do { } while (0) #define G_VALID_CONSUMER(foo) do { } while (0) #define G_VALID_PROVIDER(foo) do { } while (0) #endif int g_modevent(module_t, int, void *); /* geom_io.c */ struct bio * g_clone_bio(struct bio *); struct bio * g_duplicate_bio(struct bio *); void g_destroy_bio(struct bio *); void g_io_deliver(struct bio *bp, int error); int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr); int g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp); int g_io_flush(struct g_consumer *cp); int g_io_speedup(size_t shortage, u_int flags, size_t *resid, struct g_consumer *cp); void g_io_request(struct bio *bp, struct g_consumer *cp); struct bio *g_new_bio(void); struct bio *g_alloc_bio(void); void g_reset_bio(struct bio *); void * g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error); int g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length); int g_delete_data(struct g_consumer *cp, off_t offset, off_t length); void g_format_bio(struct sbuf *, const struct bio *bp); void g_print_bio(const char *prefix, const struct bio *bp, const char *fmtsuffix, ...) __printflike(3, 4); int g_use_g_read_data(void *, off_t, void **, int); int g_use_g_write_data(void *, off_t, void *, int); /* geom_kern.c / geom_kernsim.c */ #ifdef _KERNEL extern struct sx topology_lock; struct g_kerneldump { off_t offset; off_t length; struct dumperinfo di; }; MALLOC_DECLARE(M_GEOM); static __inline void * g_malloc(int size, int flags) { void *p; p = malloc(size, M_GEOM, flags); return (p); } static __inline void g_free(void *ptr) { #ifdef DIAGNOSTIC if (sx_xlocked(&topology_lock)) { KASSERT(g_valid_obj(ptr) == 0, ("g_free(%p) of live object, type %d", ptr, g_valid_obj(ptr))); } #endif free(ptr, M_GEOM); } #define g_topology_lock() \ do { \ sx_xlock(&topology_lock); \ } while (0) #define g_topology_try_lock() sx_try_xlock(&topology_lock) #define g_topology_unlock() \ do { \ sx_xunlock(&topology_lock); \ } while (0) #define g_topology_locked() sx_xlocked(&topology_lock) #define g_topology_assert() \ do { \ sx_assert(&topology_lock, SX_XLOCKED); \ } while (0) #define g_topology_assert_not() \ do { \ sx_assert(&topology_lock, SX_UNLOCKED); \ } while (0) #define g_topology_sleep(chan, timo) \ sx_sleep(chan, &topology_lock, 0, "gtopol", timo) #define DECLARE_GEOM_CLASS(class, name) \ static moduledata_t name##_mod = { \ #name, g_modevent, &class \ }; \ DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND); int g_is_geom_thread(struct thread *td); #endif /* _KERNEL */ /* geom_ctl.c */ int gctl_set_param(struct gctl_req *req, const char *param, void const *ptr, int len); void gctl_set_param_err(struct gctl_req *req, const char *param, void const *ptr, int len); void *gctl_get_param(struct gctl_req *req, const char *param, int *len); char const *gctl_get_asciiparam(struct gctl_req *req, const char *param); void *gctl_get_paraml(struct gctl_req *req, const char *param, int len); void *gctl_get_paraml_opt(struct gctl_req *req, const char *param, int len); int gctl_error(struct gctl_req *req, const char *fmt, ...) __printflike(2, 3); struct g_class *gctl_get_class(struct gctl_req *req, char const *arg); struct g_geom *gctl_get_geom(struct gctl_req *req, struct g_class *mpr, char const *arg); struct g_provider *gctl_get_provider(struct gctl_req *req, char const *arg); #endif /* _GEOM_GEOM_H_ */ Index: head/sys/geom/geom_dev.c =================================================================== --- head/sys/geom/geom_dev.c (revision 361014) +++ head/sys/geom/geom_dev.c (revision 361015) @@ -1,899 +1,899 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct g_dev_softc { struct mtx sc_mtx; struct cdev *sc_dev; struct cdev *sc_alias; int sc_open; u_int sc_active; #define SC_A_DESTROY (1 << 31) #define SC_A_OPEN (1 << 30) #define SC_A_ACTIVE (SC_A_OPEN - 1) }; static d_open_t g_dev_open; static d_close_t g_dev_close; static d_strategy_t g_dev_strategy; static d_ioctl_t g_dev_ioctl; static struct cdevsw g_dev_cdevsw = { .d_version = D_VERSION, .d_open = g_dev_open, .d_close = g_dev_close, .d_read = physread, .d_write = physwrite, .d_ioctl = g_dev_ioctl, .d_strategy = g_dev_strategy, .d_name = "g_dev", .d_flags = D_DISK | D_TRACKCLOSE, }; static g_init_t g_dev_init; static g_fini_t g_dev_fini; static g_taste_t g_dev_taste; static g_orphan_t g_dev_orphan; static g_attrchanged_t g_dev_attrchanged; static g_resize_t g_dev_resize; static struct g_class g_dev_class = { .name = "DEV", .version = G_VERSION, .init = g_dev_init, .fini = g_dev_fini, .taste = g_dev_taste, .orphan = g_dev_orphan, .attrchanged = g_dev_attrchanged, .resize = g_dev_resize }; /* * We target 262144 (8 x 32768) sectors by default as this significantly * increases the throughput on commonly used SSD's with a marginal * increase in non-interruptible request latency. */ static uint64_t g_dev_del_max_sectors = 262144; SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, dev, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "GEOM_DEV stuff"); SYSCTL_QUAD(_kern_geom_dev, OID_AUTO, delete_max_sectors, CTLFLAG_RW, &g_dev_del_max_sectors, 0, "Maximum number of sectors in a single " "delete request sent to the provider. Larger requests are chunked " "so they can be interrupted. (0 = disable chunking)"); static char *dumpdev = NULL; static void g_dev_init(struct g_class *mp) { dumpdev = kern_getenv("dumpdev"); } static void g_dev_fini(struct g_class *mp) { freeenv(dumpdev); dumpdev = NULL; } static int g_dev_setdumpdev(struct cdev *dev, struct diocskerneldump_arg *kda) { struct g_kerneldump kd; struct g_consumer *cp; int error, len; MPASS(dev != NULL && kda != NULL); MPASS(kda->kda_index != KDA_REMOVE); cp = dev->si_drv2; len = sizeof(kd); memset(&kd, 0, len); kd.offset = 0; kd.length = OFF_MAX; error = g_io_getattr("GEOM::kerneldump", cp, &len, &kd); if (error != 0) return (error); error = dumper_insert(&kd.di, devtoname(dev), kda); if (error == 0) dev->si_flags |= SI_DUMPDEV; return (error); } static int init_dumpdev(struct cdev *dev) { struct diocskerneldump_arg kda; struct g_consumer *cp; const char *devprefix = "/dev/", *devname; int error; size_t len; bzero(&kda, sizeof(kda)); kda.kda_index = KDA_APPEND; if (dumpdev == NULL) return (0); len = strlen(devprefix); devname = devtoname(dev); if (strcmp(devname, dumpdev) != 0 && (strncmp(dumpdev, devprefix, len) != 0 || strcmp(devname, dumpdev + len) != 0)) return (0); cp = (struct g_consumer *)dev->si_drv2; error = g_access(cp, 1, 0, 0); if (error != 0) return (error); error = g_dev_setdumpdev(dev, &kda); if (error == 0) { freeenv(dumpdev); dumpdev = NULL; } (void)g_access(cp, -1, 0, 0); return (error); } static void g_dev_destroy(void *arg, int flags __unused) { struct g_consumer *cp; struct g_geom *gp; struct g_dev_softc *sc; char buf[SPECNAMELEN + 6]; g_topology_assert(); cp = arg; gp = cp->geom; sc = cp->private; g_trace(G_T_TOPOLOGY, "g_dev_destroy(%p(%s))", cp, gp->name); snprintf(buf, sizeof(buf), "cdev=%s", gp->name); devctl_notify_f("GEOM", "DEV", "DESTROY", buf, M_WAITOK); if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) g_access(cp, -cp->acr, -cp->acw, -cp->ace); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); mtx_destroy(&sc->sc_mtx); g_free(sc); } void g_dev_print(void) { struct g_geom *gp; char const *p = ""; LIST_FOREACH(gp, &g_dev_class.geom, geom) { printf("%s%s", p, gp->name); p = " "; } printf("\n"); } static void g_dev_set_physpath(struct g_consumer *cp) { struct g_dev_softc *sc; char *physpath; int error, physpath_len; if (g_access(cp, 1, 0, 0) != 0) return; sc = cp->private; physpath_len = MAXPATHLEN; physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); g_access(cp, -1, 0, 0); if (error == 0 && strlen(physpath) != 0) { struct cdev *dev, *old_alias_dev; struct cdev **alias_devp; dev = sc->sc_dev; old_alias_dev = sc->sc_alias; alias_devp = (struct cdev **)&sc->sc_alias; make_dev_physpath_alias(MAKEDEV_WAITOK, alias_devp, dev, old_alias_dev, physpath); } else if (sc->sc_alias) { destroy_dev((struct cdev *)sc->sc_alias); sc->sc_alias = NULL; } g_free(physpath); } static void g_dev_set_media(struct g_consumer *cp) { struct g_dev_softc *sc; struct cdev *dev; char buf[SPECNAMELEN + 6]; sc = cp->private; dev = sc->sc_dev; snprintf(buf, sizeof(buf), "cdev=%s", dev->si_name); devctl_notify_f("DEVFS", "CDEV", "MEDIACHANGE", buf, M_WAITOK); devctl_notify_f("GEOM", "DEV", "MEDIACHANGE", buf, M_WAITOK); dev = sc->sc_alias; if (dev != NULL) { snprintf(buf, sizeof(buf), "cdev=%s", dev->si_name); devctl_notify_f("DEVFS", "CDEV", "MEDIACHANGE", buf, M_WAITOK); devctl_notify_f("GEOM", "DEV", "MEDIACHANGE", buf, M_WAITOK); } } static void g_dev_attrchanged(struct g_consumer *cp, const char *attr) { if (strcmp(attr, "GEOM::media") == 0) { g_dev_set_media(cp); return; } if (strcmp(attr, "GEOM::physpath") == 0) { g_dev_set_physpath(cp); return; } } static void g_dev_resize(struct g_consumer *cp) { char buf[SPECNAMELEN + 6]; snprintf(buf, sizeof(buf), "cdev=%s", cp->provider->name); devctl_notify_f("GEOM", "DEV", "SIZECHANGE", buf, M_WAITOK); } struct g_provider * g_dev_getprovider(struct cdev *dev) { struct g_consumer *cp; g_topology_assert(); if (dev == NULL) return (NULL); if (dev->si_devsw != &g_dev_cdevsw) return (NULL); cp = dev->si_drv2; return (cp->provider); } static struct g_geom * g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused) { struct g_geom *gp; struct g_geom_alias *gap; struct g_consumer *cp; struct g_dev_softc *sc; int error; struct cdev *dev, *adev; char buf[SPECNAMELEN + 6]; struct make_dev_args args; g_trace(G_T_TOPOLOGY, "dev_taste(%s,%s)", mp->name, pp->name); g_topology_assert(); gp = g_new_geomf(mp, "%s", pp->name); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); mtx_init(&sc->sc_mtx, "g_dev", NULL, MTX_DEF); cp = g_new_consumer(gp); cp->private = sc; cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); KASSERT(error == 0, ("g_dev_taste(%s) failed to g_attach, err=%d", pp->name, error)); make_dev_args_init(&args); args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; args.mda_devsw = &g_dev_cdevsw; args.mda_cr = NULL; args.mda_uid = UID_ROOT; args.mda_gid = GID_OPERATOR; args.mda_mode = 0640; args.mda_si_drv1 = sc; args.mda_si_drv2 = cp; error = make_dev_s(&args, &sc->sc_dev, "%s", gp->name); if (error != 0) { printf("%s: make_dev_p() failed (gp->name=%s, error=%d)\n", __func__, gp->name, error); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); mtx_destroy(&sc->sc_mtx); g_free(sc); return (NULL); } dev = sc->sc_dev; dev->si_flags |= SI_UNMAPPED; dev->si_iosize_max = MAXPHYS; error = init_dumpdev(dev); if (error != 0) printf("%s: init_dumpdev() failed (gp->name=%s, error=%d)\n", __func__, gp->name, error); g_dev_attrchanged(cp, "GEOM::physpath"); snprintf(buf, sizeof(buf), "cdev=%s", gp->name); devctl_notify_f("GEOM", "DEV", "CREATE", buf, M_WAITOK); /* * Now add all the aliases for this drive */ - LIST_FOREACH(gap, &pp->geom->aliases, ga_next) { + LIST_FOREACH(gap, &pp->aliases, ga_next) { error = make_dev_alias_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &adev, dev, "%s", gap->ga_alias); if (error) { printf("%s: make_dev_alias_p() failed (name=%s, error=%d)\n", __func__, gap->ga_alias, error); continue; } snprintf(buf, sizeof(buf), "cdev=%s", gap->ga_alias); devctl_notify_f("GEOM", "DEV", "CREATE", buf, M_WAITOK); } return (gp); } static int g_dev_open(struct cdev *dev, int flags, int fmt, struct thread *td) { struct g_consumer *cp; struct g_dev_softc *sc; int error, r, w, e; cp = dev->si_drv2; g_trace(G_T_ACCESS, "g_dev_open(%s, %d, %d, %p)", cp->geom->name, flags, fmt, td); r = flags & FREAD ? 1 : 0; w = flags & FWRITE ? 1 : 0; #ifdef notyet e = flags & O_EXCL ? 1 : 0; #else e = 0; #endif /* * This happens on attempt to open a device node with O_EXEC. */ if (r + w + e == 0) return (EINVAL); if (w) { /* * When running in very secure mode, do not allow * opens for writing of any disks. */ error = securelevel_ge(td->td_ucred, 2); if (error) return (error); } g_topology_lock(); error = g_access(cp, r, w, e); g_topology_unlock(); if (error == 0) { sc = dev->si_drv1; mtx_lock(&sc->sc_mtx); if (sc->sc_open == 0 && (sc->sc_active & SC_A_ACTIVE) != 0) wakeup(&sc->sc_active); sc->sc_open += r + w + e; if (sc->sc_open == 0) atomic_clear_int(&sc->sc_active, SC_A_OPEN); else atomic_set_int(&sc->sc_active, SC_A_OPEN); mtx_unlock(&sc->sc_mtx); } return (error); } static int g_dev_close(struct cdev *dev, int flags, int fmt, struct thread *td) { struct g_consumer *cp; struct g_dev_softc *sc; int error, r, w, e; cp = dev->si_drv2; g_trace(G_T_ACCESS, "g_dev_close(%s, %d, %d, %p)", cp->geom->name, flags, fmt, td); r = flags & FREAD ? -1 : 0; w = flags & FWRITE ? -1 : 0; #ifdef notyet e = flags & O_EXCL ? -1 : 0; #else e = 0; #endif /* * The vgonel(9) - caused by eg. forced unmount of devfs - calls * VOP_CLOSE(9) on devfs vnode without any FREAD or FWRITE flags, * which would result in zero deltas, which in turn would cause * panic in g_access(9). * * Note that we cannot zero the counters (ie. do "r = cp->acr" * etc) instead, because the consumer might be opened in another * devfs instance. */ if (r + w + e == 0) return (EINVAL); sc = dev->si_drv1; mtx_lock(&sc->sc_mtx); sc->sc_open += r + w + e; if (sc->sc_open == 0) atomic_clear_int(&sc->sc_active, SC_A_OPEN); else atomic_set_int(&sc->sc_active, SC_A_OPEN); while (sc->sc_open == 0 && (sc->sc_active & SC_A_ACTIVE) != 0) msleep(&sc->sc_active, &sc->sc_mtx, 0, "g_dev_close", hz / 10); mtx_unlock(&sc->sc_mtx); g_topology_lock(); error = g_access(cp, r, w, e); g_topology_unlock(); return (error); } static int g_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { struct g_consumer *cp; struct g_provider *pp; off_t offset, length, chunk, odd; int i, error; #ifdef COMPAT_FREEBSD12 struct diocskerneldump_arg kda_copy; #endif cp = dev->si_drv2; pp = cp->provider; /* If consumer or provider is dying, don't disturb. */ if (cp->flags & G_CF_ORPHAN) return (ENXIO); if (pp->error) return (pp->error); error = 0; KASSERT(cp->acr || cp->acw, ("Consumer with zero access count in g_dev_ioctl")); i = IOCPARM_LEN(cmd); switch (cmd) { case DIOCGSECTORSIZE: *(u_int *)data = pp->sectorsize; if (*(u_int *)data == 0) error = ENOENT; break; case DIOCGMEDIASIZE: *(off_t *)data = pp->mediasize; if (*(off_t *)data == 0) error = ENOENT; break; case DIOCGFWSECTORS: error = g_io_getattr("GEOM::fwsectors", cp, &i, data); if (error == 0 && *(u_int *)data == 0) error = ENOENT; break; case DIOCGFWHEADS: error = g_io_getattr("GEOM::fwheads", cp, &i, data); if (error == 0 && *(u_int *)data == 0) error = ENOENT; break; case DIOCGFRONTSTUFF: error = g_io_getattr("GEOM::frontstuff", cp, &i, data); break; #ifdef COMPAT_FREEBSD11 case DIOCSKERNELDUMP_FREEBSD11: { struct diocskerneldump_arg kda; gone_in(13, "FreeBSD 11.x ABI compat"); bzero(&kda, sizeof(kda)); kda.kda_encryption = KERNELDUMP_ENC_NONE; kda.kda_index = (*(u_int *)data ? 0 : KDA_REMOVE_ALL); if (kda.kda_index == KDA_REMOVE_ALL) error = dumper_remove(devtoname(dev), &kda); else error = g_dev_setdumpdev(dev, &kda); break; } #endif #ifdef COMPAT_FREEBSD12 case DIOCSKERNELDUMP_FREEBSD12: { struct diocskerneldump_arg_freebsd12 *kda12; gone_in(14, "FreeBSD 12.x ABI compat"); kda12 = (void *)data; memcpy(&kda_copy, kda12, sizeof(kda_copy)); kda_copy.kda_index = (kda12->kda12_enable ? 0 : KDA_REMOVE_ALL); explicit_bzero(kda12, sizeof(*kda12)); /* Kludge to pass kda_copy to kda in fallthrough. */ data = (void *)&kda_copy; } /* FALLTHROUGH */ #endif case DIOCSKERNELDUMP: { struct diocskerneldump_arg *kda; uint8_t *encryptedkey; kda = (struct diocskerneldump_arg *)data; if (kda->kda_index == KDA_REMOVE_ALL || kda->kda_index == KDA_REMOVE_DEV || kda->kda_index == KDA_REMOVE) { error = dumper_remove(devtoname(dev), kda); explicit_bzero(kda, sizeof(*kda)); break; } if (kda->kda_encryption != KERNELDUMP_ENC_NONE) { if (kda->kda_encryptedkeysize == 0 || kda->kda_encryptedkeysize > KERNELDUMP_ENCKEY_MAX_SIZE) { explicit_bzero(kda, sizeof(*kda)); return (EINVAL); } encryptedkey = malloc(kda->kda_encryptedkeysize, M_TEMP, M_WAITOK); error = copyin(kda->kda_encryptedkey, encryptedkey, kda->kda_encryptedkeysize); } else { encryptedkey = NULL; } if (error == 0) { kda->kda_encryptedkey = encryptedkey; error = g_dev_setdumpdev(dev, kda); } if (encryptedkey != NULL) { explicit_bzero(encryptedkey, kda->kda_encryptedkeysize); free(encryptedkey, M_TEMP); } explicit_bzero(kda, sizeof(*kda)); break; } case DIOCGFLUSH: error = g_io_flush(cp); break; case DIOCGDELETE: offset = ((off_t *)data)[0]; length = ((off_t *)data)[1]; if ((offset % pp->sectorsize) != 0 || (length % pp->sectorsize) != 0 || length <= 0) { printf("%s: offset=%jd length=%jd\n", __func__, offset, length); error = EINVAL; break; } if ((pp->mediasize > 0) && (offset >= pp->mediasize)) { /* * Catch out-of-bounds requests here. The problem is * that due to historical GEOM I/O implementation * peculatities, g_delete_data() would always return * success for requests starting just the next byte * after providers media boundary. Condition check on * non-zero media size, since that condition would * (most likely) cause ENXIO instead. */ error = EIO; break; } while (length > 0) { chunk = length; if (g_dev_del_max_sectors != 0 && chunk > g_dev_del_max_sectors * pp->sectorsize) { chunk = g_dev_del_max_sectors * pp->sectorsize; if (pp->stripesize > 0) { odd = (offset + chunk + pp->stripeoffset) % pp->stripesize; if (chunk > odd) chunk -= odd; } } error = g_delete_data(cp, offset, chunk); length -= chunk; offset += chunk; if (error) break; /* * Since the request size can be large, the service * time can be is likewise. We make this ioctl * interruptible by checking for signals for each bio. */ if (SIGPENDING(td)) break; } break; case DIOCGIDENT: error = g_io_getattr("GEOM::ident", cp, &i, data); break; case DIOCGPROVIDERNAME: strlcpy(data, pp->name, i); break; case DIOCGSTRIPESIZE: *(off_t *)data = pp->stripesize; break; case DIOCGSTRIPEOFFSET: *(off_t *)data = pp->stripeoffset; break; case DIOCGPHYSPATH: error = g_io_getattr("GEOM::physpath", cp, &i, data); if (error == 0 && *(char *)data == '\0') error = ENOENT; break; case DIOCGATTR: { struct diocgattr_arg *arg = (struct diocgattr_arg *)data; if (arg->len > sizeof(arg->value)) { error = EINVAL; break; } error = g_io_getattr(arg->name, cp, &arg->len, &arg->value); break; } case DIOCZONECMD: { struct disk_zone_args *zone_args =(struct disk_zone_args *)data; struct disk_zone_rep_entry *new_entries, *old_entries; struct disk_zone_report *rep; size_t alloc_size; old_entries = NULL; new_entries = NULL; rep = NULL; alloc_size = 0; if (zone_args->zone_cmd == DISK_ZONE_REPORT_ZONES) { rep = &zone_args->zone_params.report; #define MAXENTRIES (MAXPHYS / sizeof(struct disk_zone_rep_entry)) if (rep->entries_allocated > MAXENTRIES) rep->entries_allocated = MAXENTRIES; alloc_size = rep->entries_allocated * sizeof(struct disk_zone_rep_entry); if (alloc_size != 0) new_entries = g_malloc(alloc_size, M_WAITOK| M_ZERO); old_entries = rep->entries; rep->entries = new_entries; } error = g_io_zonecmd(zone_args, cp); if (zone_args->zone_cmd == DISK_ZONE_REPORT_ZONES && alloc_size != 0 && error == 0) error = copyout(new_entries, old_entries, alloc_size); if (old_entries != NULL && rep != NULL) rep->entries = old_entries; if (new_entries != NULL) g_free(new_entries); break; } default: if (pp->geom->ioctl != NULL) { error = pp->geom->ioctl(pp, cmd, data, fflag, td); } else { error = ENOIOCTL; } } return (error); } static void g_dev_done(struct bio *bp2) { struct g_consumer *cp; struct g_dev_softc *sc; struct bio *bp; int active; cp = bp2->bio_from; sc = cp->private; bp = bp2->bio_parent; bp->bio_error = bp2->bio_error; bp->bio_completed = bp2->bio_completed; bp->bio_resid = bp->bio_length - bp2->bio_completed; if (bp2->bio_cmd == BIO_ZONE) bcopy(&bp2->bio_zone, &bp->bio_zone, sizeof(bp->bio_zone)); if (bp2->bio_error != 0) { g_trace(G_T_BIO, "g_dev_done(%p) had error %d", bp2, bp2->bio_error); bp->bio_flags |= BIO_ERROR; } else { g_trace(G_T_BIO, "g_dev_done(%p/%p) resid %ld completed %jd", bp2, bp, bp2->bio_resid, (intmax_t)bp2->bio_completed); } g_destroy_bio(bp2); active = atomic_fetchadd_int(&sc->sc_active, -1) - 1; if ((active & SC_A_ACTIVE) == 0) { if ((active & SC_A_OPEN) == 0) wakeup(&sc->sc_active); if (active & SC_A_DESTROY) g_post_event(g_dev_destroy, cp, M_NOWAIT, NULL); } biodone(bp); } static void g_dev_strategy(struct bio *bp) { struct g_consumer *cp; struct bio *bp2; struct cdev *dev; struct g_dev_softc *sc; KASSERT(bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE || bp->bio_cmd == BIO_FLUSH || bp->bio_cmd == BIO_ZONE, ("Wrong bio_cmd bio=%p cmd=%d", bp, bp->bio_cmd)); dev = bp->bio_dev; cp = dev->si_drv2; KASSERT(cp->acr || cp->acw, ("Consumer with zero access count in g_dev_strategy")); biotrack(bp, __func__); #ifdef INVARIANTS if ((bp->bio_offset % cp->provider->sectorsize) != 0 || (bp->bio_bcount % cp->provider->sectorsize) != 0) { bp->bio_resid = bp->bio_bcount; biofinish(bp, NULL, EINVAL); return; } #endif sc = dev->si_drv1; KASSERT(sc->sc_open > 0, ("Closed device in g_dev_strategy")); atomic_add_int(&sc->sc_active, 1); for (;;) { /* * XXX: This is not an ideal solution, but I believe it to * XXX: deadlock safely, all things considered. */ bp2 = g_clone_bio(bp); if (bp2 != NULL) break; pause("gdstrat", hz / 10); } KASSERT(bp2 != NULL, ("XXX: ENOMEM in a bad place")); bp2->bio_done = g_dev_done; g_trace(G_T_BIO, "g_dev_strategy(%p/%p) offset %jd length %jd data %p cmd %d", bp, bp2, (intmax_t)bp->bio_offset, (intmax_t)bp2->bio_length, bp2->bio_data, bp2->bio_cmd); g_io_request(bp2, cp); KASSERT(cp->acr || cp->acw, ("g_dev_strategy raced with g_dev_close and lost")); } /* * g_dev_callback() * * Called by devfs when asynchronous device destruction is completed. * - Mark that we have no attached device any more. * - If there are no outstanding requests, schedule geom destruction. * Otherwise destruction will be scheduled later by g_dev_done(). */ static void g_dev_callback(void *arg) { struct g_consumer *cp; struct g_dev_softc *sc; int active; cp = arg; sc = cp->private; g_trace(G_T_TOPOLOGY, "g_dev_callback(%p(%s))", cp, cp->geom->name); sc->sc_dev = NULL; sc->sc_alias = NULL; active = atomic_fetchadd_int(&sc->sc_active, SC_A_DESTROY); if ((active & SC_A_ACTIVE) == 0) g_post_event(g_dev_destroy, cp, M_WAITOK, NULL); } /* * g_dev_orphan() * * Called from below when the provider orphaned us. * - Clear any dump settings. * - Request asynchronous device destruction to prevent any more requests * from coming in. The provider is already marked with an error, so * anything which comes in the interim will be returned immediately. */ static void g_dev_orphan(struct g_consumer *cp) { struct cdev *dev; struct g_dev_softc *sc; g_topology_assert(); sc = cp->private; dev = sc->sc_dev; g_trace(G_T_TOPOLOGY, "g_dev_orphan(%p(%s))", cp, cp->geom->name); /* Reset any dump-area set on this device */ if (dev->si_flags & SI_DUMPDEV) { struct diocskerneldump_arg kda; bzero(&kda, sizeof(kda)); kda.kda_index = KDA_REMOVE_DEV; (void)dumper_remove(devtoname(dev), &kda); } /* Destroy the struct cdev *so we get no more requests */ delist_dev(dev); destroy_dev_sched_cb(dev, g_dev_callback, cp); } DECLARE_GEOM_CLASS(g_dev_class, g_dev); Index: head/sys/geom/geom_disk.c =================================================================== --- head/sys/geom/geom_disk.c (revision 361014) +++ head/sys/geom/geom_disk.c (revision 361015) @@ -1,1089 +1,1087 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_geom.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct g_disk_softc { struct disk *dp; struct devstat *d_devstat; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; char led[64]; uint32_t state; struct mtx done_mtx; }; static g_access_t g_disk_access; static g_start_t g_disk_start; static g_ioctl_t g_disk_ioctl; static g_dumpconf_t g_disk_dumpconf; static g_provgone_t g_disk_providergone; static int g_disk_sysctl_flags(SYSCTL_HANDLER_ARGS); static struct g_class g_disk_class = { .name = G_DISK_CLASS_NAME, .version = G_VERSION, .start = g_disk_start, .access = g_disk_access, .ioctl = g_disk_ioctl, .providergone = g_disk_providergone, .dumpconf = g_disk_dumpconf, }; SYSCTL_DECL(_kern_geom); static SYSCTL_NODE(_kern_geom, OID_AUTO, disk, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "GEOM_DISK stuff"); DECLARE_GEOM_CLASS(g_disk_class, g_disk); static int g_disk_access(struct g_provider *pp, int r, int w, int e) { struct disk *dp; struct g_disk_softc *sc; int error; g_trace(G_T_ACCESS, "g_disk_access(%s, %d, %d, %d)", pp->name, r, w, e); g_topology_assert(); sc = pp->private; if ((dp = sc->dp) == NULL || dp->d_destroyed) { /* * Allow decreasing access count even if disk is not * available anymore. */ if (r <= 0 && w <= 0 && e <= 0) return (0); return (ENXIO); } r += pp->acr; w += pp->acw; e += pp->ace; error = 0; if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) { /* * It would be better to defer this decision to d_open if * it was able to take flags. */ if (w > 0 && (dp->d_flags & DISKFLAG_WRITE_PROTECT) != 0) error = EROFS; if (error == 0 && dp->d_open != NULL) error = dp->d_open(dp); if (bootverbose && error != 0) printf("Opened disk %s -> %d\n", pp->name, error); if (error != 0) return (error); pp->sectorsize = dp->d_sectorsize; if (dp->d_maxsize == 0) { printf("WARNING: Disk drive %s%d has no d_maxsize\n", dp->d_name, dp->d_unit); dp->d_maxsize = DFLTPHYS; } if (dp->d_delmaxsize == 0) { if (bootverbose && dp->d_flags & DISKFLAG_CANDELETE) { printf("WARNING: Disk drive %s%d has no " "d_delmaxsize\n", dp->d_name, dp->d_unit); } dp->d_delmaxsize = dp->d_maxsize; } pp->stripeoffset = dp->d_stripeoffset; pp->stripesize = dp->d_stripesize; dp->d_flags |= DISKFLAG_OPEN; /* * Do not invoke resize event when initial size was zero. * Some disks report its size only after first opening. */ if (pp->mediasize == 0) pp->mediasize = dp->d_mediasize; else g_resize_provider(pp, dp->d_mediasize); } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) { if (dp->d_close != NULL) { error = dp->d_close(dp); if (error != 0) printf("Closed disk %s -> %d\n", pp->name, error); } sc->state = G_STATE_ACTIVE; if (sc->led[0] != 0) led_set(sc->led, "0"); dp->d_flags &= ~DISKFLAG_OPEN; } return (error); } static void g_disk_kerneldump(struct bio *bp, struct disk *dp) { struct g_kerneldump *gkd; struct g_geom *gp; gkd = (struct g_kerneldump*)bp->bio_data; gp = bp->bio_to->geom; g_trace(G_T_TOPOLOGY, "g_disk_kerneldump(%s, %jd, %jd)", gp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length); if (dp->d_dump == NULL) { g_io_deliver(bp, ENODEV); return; } gkd->di.dumper = dp->d_dump; gkd->di.priv = dp; gkd->di.blocksize = dp->d_sectorsize; gkd->di.maxiosize = dp->d_maxsize; gkd->di.mediaoffset = gkd->offset; if ((gkd->offset + gkd->length) > dp->d_mediasize) gkd->length = dp->d_mediasize - gkd->offset; gkd->di.mediasize = gkd->length; g_io_deliver(bp, 0); } static void g_disk_setstate(struct bio *bp, struct g_disk_softc *sc) { const char *cmd; memcpy(&sc->state, bp->bio_data, sizeof(sc->state)); if (sc->led[0] != 0) { switch (sc->state) { case G_STATE_FAILED: cmd = "1"; break; case G_STATE_REBUILD: cmd = "f5"; break; case G_STATE_RESYNC: cmd = "f1"; break; default: cmd = "0"; break; } led_set(sc->led, cmd); } g_io_deliver(bp, 0); } static void g_disk_done(struct bio *bp) { struct bintime now; struct bio *bp2; struct g_disk_softc *sc; /* See "notes" for why we need a mutex here */ sc = bp->bio_caller1; bp2 = bp->bio_parent; binuptime(&now); mtx_lock(&sc->done_mtx); if (bp2->bio_error == 0) bp2->bio_error = bp->bio_error; bp2->bio_completed += bp->bio_length - bp->bio_resid; switch (bp->bio_cmd) { case BIO_ZONE: bcopy(&bp->bio_zone, &bp2->bio_zone, sizeof(bp->bio_zone)); /*FALLTHROUGH*/ case BIO_READ: case BIO_WRITE: case BIO_DELETE: case BIO_FLUSH: devstat_end_transaction_bio_bt(sc->d_devstat, bp, &now); break; default: break; } bp2->bio_inbed++; if (bp2->bio_children == bp2->bio_inbed) { mtx_unlock(&sc->done_mtx); bp2->bio_resid = bp2->bio_bcount - bp2->bio_completed; g_io_deliver(bp2, bp2->bio_error); } else mtx_unlock(&sc->done_mtx); g_destroy_bio(bp); } static int g_disk_ioctl(struct g_provider *pp, u_long cmd, void * data, int fflag, struct thread *td) { struct disk *dp; struct g_disk_softc *sc; sc = pp->private; dp = sc->dp; KASSERT(dp != NULL && !dp->d_destroyed, ("g_disk_ioctl(%lx) on destroyed disk %s", cmd, pp->name)); if (dp->d_ioctl == NULL) return (ENOIOCTL); return (dp->d_ioctl(dp, cmd, data, fflag, td)); } static off_t g_disk_maxsize(struct disk *dp, struct bio *bp) { if (bp->bio_cmd == BIO_DELETE) return (dp->d_delmaxsize); return (dp->d_maxsize); } static int g_disk_maxsegs(struct disk *dp, struct bio *bp) { return ((g_disk_maxsize(dp, bp) / PAGE_SIZE) + 1); } static void g_disk_advance(struct disk *dp, struct bio *bp, off_t off) { bp->bio_offset += off; bp->bio_length -= off; if ((bp->bio_flags & BIO_VLIST) != 0) { bus_dma_segment_t *seg, *end; seg = (bus_dma_segment_t *)bp->bio_data; end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n; off += bp->bio_ma_offset; while (off >= seg->ds_len) { KASSERT((seg != end), ("vlist request runs off the end")); off -= seg->ds_len; seg++; } bp->bio_ma_offset = off; bp->bio_ma_n = end - seg; bp->bio_data = (void *)seg; } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) { bp->bio_ma += off / PAGE_SIZE; bp->bio_ma_offset += off; bp->bio_ma_offset %= PAGE_SIZE; bp->bio_ma_n -= off / PAGE_SIZE; } else { bp->bio_data += off; } } static void g_disk_seg_limit(bus_dma_segment_t *seg, off_t *poffset, off_t *plength, int *ppages) { uintptr_t seg_page_base; uintptr_t seg_page_end; off_t offset; off_t length; int seg_pages; offset = *poffset; length = *plength; if (length > seg->ds_len - offset) length = seg->ds_len - offset; seg_page_base = trunc_page(seg->ds_addr + offset); seg_page_end = round_page(seg->ds_addr + offset + length); seg_pages = (seg_page_end - seg_page_base) >> PAGE_SHIFT; if (seg_pages > *ppages) { seg_pages = *ppages; length = (seg_page_base + (seg_pages << PAGE_SHIFT)) - (seg->ds_addr + offset); } *poffset = 0; *plength -= length; *ppages -= seg_pages; } static off_t g_disk_vlist_limit(struct disk *dp, struct bio *bp, bus_dma_segment_t **pendseg) { bus_dma_segment_t *seg, *end; off_t residual; off_t offset; int pages; seg = (bus_dma_segment_t *)bp->bio_data; end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n; residual = bp->bio_length; offset = bp->bio_ma_offset; pages = g_disk_maxsegs(dp, bp); while (residual != 0 && pages != 0) { KASSERT((seg != end), ("vlist limit runs off the end")); g_disk_seg_limit(seg, &offset, &residual, &pages); seg++; } if (pendseg != NULL) *pendseg = seg; return (residual); } static bool g_disk_limit(struct disk *dp, struct bio *bp) { bool limited = false; off_t maxsz; maxsz = g_disk_maxsize(dp, bp); /* * XXX: If we have a stripesize we should really use it here. * Care should be taken in the delete case if this is done * as deletes can be very sensitive to size given how they * are processed. */ if (bp->bio_length > maxsz) { bp->bio_length = maxsz; limited = true; } if ((bp->bio_flags & BIO_VLIST) != 0) { bus_dma_segment_t *firstseg, *endseg; off_t residual; firstseg = (bus_dma_segment_t*)bp->bio_data; residual = g_disk_vlist_limit(dp, bp, &endseg); if (residual != 0) { bp->bio_ma_n = endseg - firstseg; bp->bio_length -= residual; limited = true; } } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) { bp->bio_ma_n = howmany(bp->bio_ma_offset + bp->bio_length, PAGE_SIZE); } return (limited); } static void g_disk_start(struct bio *bp) { struct bio *bp2, *bp3; struct disk *dp; struct g_disk_softc *sc; int error; off_t off; biotrack(bp, __func__); sc = bp->bio_to->private; dp = sc->dp; KASSERT(dp != NULL && !dp->d_destroyed, ("g_disk_start(%p) on destroyed disk %s", bp, bp->bio_to->name)); error = EJUSTRETURN; switch(bp->bio_cmd) { case BIO_DELETE: if (!(dp->d_flags & DISKFLAG_CANDELETE)) { error = EOPNOTSUPP; break; } /* fall-through */ case BIO_READ: case BIO_WRITE: KASSERT((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0 || (bp->bio_flags & BIO_UNMAPPED) == 0, ("unmapped bio not supported by disk %s", dp->d_name)); off = 0; bp3 = NULL; bp2 = g_clone_bio(bp); if (bp2 == NULL) { error = ENOMEM; break; } for (;;) { if (g_disk_limit(dp, bp2)) { off += bp2->bio_length; /* * To avoid a race, we need to grab the next bio * before we schedule this one. See "notes". */ bp3 = g_clone_bio(bp); if (bp3 == NULL) bp->bio_error = ENOMEM; } bp2->bio_done = g_disk_done; bp2->bio_caller1 = sc; bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize; bp2->bio_bcount = bp2->bio_length; bp2->bio_disk = dp; devstat_start_transaction_bio(dp->d_devstat, bp2); dp->d_strategy(bp2); if (bp3 == NULL) break; bp2 = bp3; bp3 = NULL; g_disk_advance(dp, bp2, off); } break; case BIO_GETATTR: /* Give the driver a chance to override */ if (dp->d_getattr != NULL) { if (bp->bio_disk == NULL) bp->bio_disk = dp; error = dp->d_getattr(bp); if (error != -1) break; error = EJUSTRETURN; } if (g_handleattr_int(bp, "GEOM::candelete", (dp->d_flags & DISKFLAG_CANDELETE) != 0)) break; else if (g_handleattr_int(bp, "GEOM::fwsectors", dp->d_fwsectors)) break; else if (g_handleattr_int(bp, "GEOM::fwheads", dp->d_fwheads)) break; else if (g_handleattr_off_t(bp, "GEOM::frontstuff", 0)) break; else if (g_handleattr_str(bp, "GEOM::ident", dp->d_ident)) break; else if (g_handleattr_str(bp, "GEOM::descr", dp->d_descr)) break; else if (g_handleattr_uint16_t(bp, "GEOM::hba_vendor", dp->d_hba_vendor)) break; else if (g_handleattr_uint16_t(bp, "GEOM::hba_device", dp->d_hba_device)) break; else if (g_handleattr_uint16_t(bp, "GEOM::hba_subvendor", dp->d_hba_subvendor)) break; else if (g_handleattr_uint16_t(bp, "GEOM::hba_subdevice", dp->d_hba_subdevice)) break; else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump")) g_disk_kerneldump(bp, dp); else if (!strcmp(bp->bio_attribute, "GEOM::setstate")) g_disk_setstate(bp, sc); else if (g_handleattr_uint16_t(bp, "GEOM::rotation_rate", dp->d_rotation_rate)) break; else if (g_handleattr_str(bp, "GEOM::attachment", dp->d_attachment)) break; else error = ENOIOCTL; break; case BIO_FLUSH: g_trace(G_T_BIO, "g_disk_flushcache(%s)", bp->bio_to->name); if (!(dp->d_flags & DISKFLAG_CANFLUSHCACHE)) { error = EOPNOTSUPP; break; } /*FALLTHROUGH*/ case BIO_ZONE: if (bp->bio_cmd == BIO_ZONE) { if (!(dp->d_flags & DISKFLAG_CANZONE)) { error = EOPNOTSUPP; break; } g_trace(G_T_BIO, "g_disk_zone(%s)", bp->bio_to->name); } bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } bp2->bio_done = g_disk_done; bp2->bio_caller1 = sc; bp2->bio_disk = dp; devstat_start_transaction_bio(dp->d_devstat, bp2); dp->d_strategy(bp2); break; case BIO_SPEEDUP: bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } bp2->bio_done = g_disk_done; bp2->bio_caller1 = sc; bp2->bio_disk = dp; dp->d_strategy(bp2); break; default: error = EOPNOTSUPP; break; } if (error != EJUSTRETURN) g_io_deliver(bp, error); return; } static void g_disk_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { struct bio *bp; struct disk *dp; struct g_disk_softc *sc; char *buf; int res = 0; sc = gp->softc; if (sc == NULL || (dp = sc->dp) == NULL) return; if (indent == NULL) { sbuf_printf(sb, " hd %u", dp->d_fwheads); sbuf_printf(sb, " sc %u", dp->d_fwsectors); return; } if (pp != NULL) { sbuf_printf(sb, "%s%u\n", indent, dp->d_fwheads); sbuf_printf(sb, "%s%u\n", indent, dp->d_fwsectors); /* * "rotationrate" is a little complicated, because the value * returned by the drive might not be the RPM; 0 and 1 are * special cases, and there's also a valid range. */ sbuf_printf(sb, "%s", indent); if (dp->d_rotation_rate == DISK_RR_UNKNOWN) /* Old drives */ sbuf_cat(sb, "unknown"); /* don't report RPM. */ else if (dp->d_rotation_rate == DISK_RR_NON_ROTATING) sbuf_cat(sb, "0"); else if ((dp->d_rotation_rate >= DISK_RR_MIN) && (dp->d_rotation_rate <= DISK_RR_MAX)) sbuf_printf(sb, "%u", dp->d_rotation_rate); else sbuf_cat(sb, "invalid"); sbuf_cat(sb, "\n"); if (dp->d_getattr != NULL) { buf = g_malloc(DISK_IDENT_SIZE, M_WAITOK); bp = g_alloc_bio(); bp->bio_disk = dp; bp->bio_attribute = "GEOM::ident"; bp->bio_length = DISK_IDENT_SIZE; bp->bio_data = buf; res = dp->d_getattr(bp); sbuf_printf(sb, "%s", indent); g_conf_cat_escaped(sb, res == 0 ? buf : dp->d_ident); sbuf_cat(sb, "\n"); bp->bio_attribute = "GEOM::lunid"; bp->bio_length = DISK_IDENT_SIZE; bp->bio_data = buf; if (dp->d_getattr(bp) == 0) { sbuf_printf(sb, "%s", indent); g_conf_cat_escaped(sb, buf); sbuf_cat(sb, "\n"); } bp->bio_attribute = "GEOM::lunname"; bp->bio_length = DISK_IDENT_SIZE; bp->bio_data = buf; if (dp->d_getattr(bp) == 0) { sbuf_printf(sb, "%s", indent); g_conf_cat_escaped(sb, buf); sbuf_cat(sb, "\n"); } g_destroy_bio(bp); g_free(buf); } else { sbuf_printf(sb, "%s", indent); g_conf_cat_escaped(sb, dp->d_ident); sbuf_cat(sb, "\n"); } sbuf_printf(sb, "%s", indent); g_conf_cat_escaped(sb, dp->d_descr); sbuf_cat(sb, "\n"); } } static void g_disk_resize(void *ptr, int flag) { struct disk *dp; struct g_geom *gp; struct g_provider *pp; if (flag == EV_CANCEL) return; g_topology_assert(); dp = ptr; gp = dp->d_geom; if (dp->d_destroyed || gp == NULL) return; LIST_FOREACH(pp, &gp->provider, provider) { if (pp->sectorsize != 0 && pp->sectorsize != dp->d_sectorsize) g_wither_provider(pp, ENXIO); else g_resize_provider(pp, dp->d_mediasize); } } static void g_disk_create(void *arg, int flag) { struct g_geom *gp; struct g_provider *pp; struct disk *dp; struct g_disk_softc *sc; struct disk_alias *dap; char tmpstr[80]; if (flag == EV_CANCEL) return; g_topology_assert(); dp = arg; mtx_pool_lock(mtxpool_sleep, dp); dp->d_init_level = DISK_INIT_START; /* * If the disk has already gone away, we can just stop here and * call the user's callback to tell him we've cleaned things up. */ if (dp->d_goneflag != 0) { mtx_pool_unlock(mtxpool_sleep, dp); if (dp->d_gone != NULL) dp->d_gone(dp); return; } mtx_pool_unlock(mtxpool_sleep, dp); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); mtx_init(&sc->done_mtx, "g_disk_done", NULL, MTX_DEF); sc->dp = dp; sc->d_devstat = dp->d_devstat; gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit); gp->softc = sc; - LIST_FOREACH(dap, &dp->d_aliases, da_next) { - snprintf(tmpstr, sizeof(tmpstr), "%s%d", dap->da_alias, dp->d_unit); - g_geom_add_alias(gp, tmpstr); - } pp = g_new_providerf(gp, "%s", gp->name); + LIST_FOREACH(dap, &dp->d_aliases, da_next) + g_provider_add_alias(pp, "%s%d", dap->da_alias, dp->d_unit); devstat_remove_entry(pp->stat); pp->stat = NULL; dp->d_devstat->id = pp; pp->mediasize = dp->d_mediasize; pp->sectorsize = dp->d_sectorsize; pp->stripeoffset = dp->d_stripeoffset; pp->stripesize = dp->d_stripesize; if ((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0) pp->flags |= G_PF_ACCEPT_UNMAPPED; if ((dp->d_flags & DISKFLAG_DIRECT_COMPLETION) != 0) pp->flags |= G_PF_DIRECT_SEND; pp->flags |= G_PF_DIRECT_RECEIVE; if (bootverbose) printf("GEOM: new disk %s\n", gp->name); sysctl_ctx_init(&sc->sysctl_ctx); snprintf(tmpstr, sizeof(tmpstr), "GEOM disk %s", gp->name); sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_kern_geom_disk), OID_AUTO, gp->name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, tmpstr); if (sc->sysctl_tree != NULL) { SYSCTL_ADD_STRING(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "led", CTLFLAG_RWTUN, sc->led, sizeof(sc->led), "LED name"); SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "flags", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, dp, 0, g_disk_sysctl_flags, "A", "Report disk flags"); } pp->private = sc; dp->d_geom = gp; g_error_provider(pp, 0); mtx_pool_lock(mtxpool_sleep, dp); dp->d_init_level = DISK_INIT_DONE; /* * If the disk has gone away at this stage, start the withering * process for it. */ if (dp->d_goneflag != 0) { mtx_pool_unlock(mtxpool_sleep, dp); g_wither_provider(pp, ENXIO); return; } mtx_pool_unlock(mtxpool_sleep, dp); } /* * We get this callback after all of the consumers have gone away, and just * before the provider is freed. If the disk driver provided a d_gone * callback, let them know that it is okay to free resources -- they won't * be getting any more accesses from GEOM. */ static void g_disk_providergone(struct g_provider *pp) { struct disk *dp; struct g_disk_softc *sc; sc = (struct g_disk_softc *)pp->private; dp = sc->dp; if (dp != NULL && dp->d_gone != NULL) dp->d_gone(dp); if (sc->sysctl_tree != NULL) { sysctl_ctx_free(&sc->sysctl_ctx); sc->sysctl_tree = NULL; } if (sc->led[0] != 0) { led_set(sc->led, "0"); sc->led[0] = 0; } pp->private = NULL; pp->geom->softc = NULL; mtx_destroy(&sc->done_mtx); g_free(sc); } static void g_disk_destroy(void *ptr, int flag) { struct disk *dp; struct g_geom *gp; struct g_disk_softc *sc; struct disk_alias *dap, *daptmp; g_topology_assert(); dp = ptr; gp = dp->d_geom; if (gp != NULL) { sc = gp->softc; if (sc != NULL) sc->dp = NULL; dp->d_geom = NULL; g_wither_geom(gp, ENXIO); } LIST_FOREACH_SAFE(dap, &dp->d_aliases, da_next, daptmp) g_free(dap); g_free(dp); } /* * We only allow printable characters in disk ident, * the rest is converted to 'x'. */ static void g_disk_ident_adjust(char *ident, size_t size) { char *p, tmp[4], newid[DISK_IDENT_SIZE]; newid[0] = '\0'; for (p = ident; *p != '\0'; p++) { if (isprint(*p)) { tmp[0] = *p; tmp[1] = '\0'; } else { snprintf(tmp, sizeof(tmp), "x%02hhx", *(unsigned char *)p); } if (strlcat(newid, tmp, sizeof(newid)) >= sizeof(newid)) break; } bzero(ident, size); strlcpy(ident, newid, size); } struct disk * disk_alloc(void) { struct disk *dp; dp = g_malloc(sizeof(struct disk), M_WAITOK | M_ZERO); LIST_INIT(&dp->d_aliases); return (dp); } void disk_create(struct disk *dp, int version) { if (version != DISK_VERSION) { printf("WARNING: Attempt to add disk %s%d %s", dp->d_name, dp->d_unit, " using incompatible ABI version of disk(9)\n"); printf("WARNING: Ignoring disk %s%d\n", dp->d_name, dp->d_unit); return; } if (dp->d_flags & DISKFLAG_RESERVED) { printf("WARNING: Attempt to add non-MPSAFE disk %s%d\n", dp->d_name, dp->d_unit); printf("WARNING: Ignoring disk %s%d\n", dp->d_name, dp->d_unit); return; } KASSERT(dp->d_strategy != NULL, ("disk_create need d_strategy")); KASSERT(dp->d_name != NULL, ("disk_create need d_name")); KASSERT(*dp->d_name != 0, ("disk_create need d_name")); KASSERT(strlen(dp->d_name) < SPECNAMELEN - 4, ("disk name too long")); if (dp->d_devstat == NULL) dp->d_devstat = devstat_new_entry(dp->d_name, dp->d_unit, dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); dp->d_geom = NULL; dp->d_init_level = DISK_INIT_NONE; g_disk_ident_adjust(dp->d_ident, sizeof(dp->d_ident)); g_post_event(g_disk_create, dp, M_WAITOK, dp, NULL); } void disk_destroy(struct disk *dp) { disk_gone(dp); dp->d_destroyed = 1; g_cancel_event(dp); if (dp->d_devstat != NULL) devstat_remove_entry(dp->d_devstat); g_post_event(g_disk_destroy, dp, M_WAITOK, NULL); } void disk_add_alias(struct disk *dp, const char *name) { struct disk_alias *dap; dap = (struct disk_alias *)g_malloc( sizeof(struct disk_alias) + strlen(name) + 1, M_WAITOK); strcpy((char *)(dap + 1), name); dap->da_alias = (const char *)(dap + 1); LIST_INSERT_HEAD(&dp->d_aliases, dap, da_next); } void disk_gone(struct disk *dp) { struct g_geom *gp; struct g_provider *pp; mtx_pool_lock(mtxpool_sleep, dp); /* * Second wither call makes no sense, plus we can not access the list * of providers without topology lock after calling wither once. */ if (dp->d_goneflag != 0) { mtx_pool_unlock(mtxpool_sleep, dp); return; } dp->d_goneflag = 1; /* * If we're still in the process of creating this disk (the * g_disk_create() function is still queued, or is in * progress), the init level will not yet be DISK_INIT_DONE. * * If that is the case, g_disk_create() will see d_goneflag * and take care of cleaning things up. * * If the disk has already been created, we default to * withering the provider as usual below. * * If the caller has not set a d_gone() callback, he will * not be any worse off by returning here, because the geom * has not been fully setup in any case. */ if (dp->d_init_level < DISK_INIT_DONE) { mtx_pool_unlock(mtxpool_sleep, dp); return; } mtx_pool_unlock(mtxpool_sleep, dp); gp = dp->d_geom; pp = LIST_FIRST(&gp->provider); if (pp != NULL) { KASSERT(LIST_NEXT(pp, provider) == NULL, ("geom %p has more than one provider", gp)); g_wither_provider(pp, ENXIO); } } void disk_attr_changed(struct disk *dp, const char *attr, int flag) { struct g_geom *gp; struct g_provider *pp; char devnamebuf[128]; gp = dp->d_geom; if (gp != NULL) LIST_FOREACH(pp, &gp->provider, provider) (void)g_attr_changed(pp, attr, flag); snprintf(devnamebuf, sizeof(devnamebuf), "devname=%s%d", dp->d_name, dp->d_unit); devctl_notify("GEOM", "disk", attr, devnamebuf); } void disk_media_changed(struct disk *dp, int flag) { struct g_geom *gp; struct g_provider *pp; gp = dp->d_geom; if (gp != NULL) { pp = LIST_FIRST(&gp->provider); if (pp != NULL) { KASSERT(LIST_NEXT(pp, provider) == NULL, ("geom %p has more than one provider", gp)); g_media_changed(pp, flag); } } } void disk_media_gone(struct disk *dp, int flag) { struct g_geom *gp; struct g_provider *pp; gp = dp->d_geom; if (gp != NULL) { pp = LIST_FIRST(&gp->provider); if (pp != NULL) { KASSERT(LIST_NEXT(pp, provider) == NULL, ("geom %p has more than one provider", gp)); g_media_gone(pp, flag); } } } int disk_resize(struct disk *dp, int flag) { if (dp->d_destroyed || dp->d_geom == NULL) return (0); return (g_post_event(g_disk_resize, dp, flag, NULL)); } static void g_kern_disks(void *p, int flag __unused) { struct sbuf *sb; struct g_geom *gp; char *sp; sb = p; sp = ""; g_topology_assert(); LIST_FOREACH(gp, &g_disk_class.geom, geom) { sbuf_printf(sb, "%s%s", sp, gp->name); sp = " "; } sbuf_finish(sb); } static int g_disk_sysctl_flags(SYSCTL_HANDLER_ARGS) { struct disk *dp; struct sbuf *sb; int error; sb = sbuf_new_auto(); dp = (struct disk *)arg1; sbuf_printf(sb, "%b", dp->d_flags, "\20" "\2OPEN" "\3CANDELETE" "\4CANFLUSHCACHE" "\5UNMAPPEDBIO" "\6DIRECTCOMPLETION" "\10CANZONE" "\11WRITEPROTECT"); sbuf_finish(sb); error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); return (error); } static int sysctl_disks(SYSCTL_HANDLER_ARGS) { int error; struct sbuf *sb; sb = sbuf_new_auto(); g_waitfor_event(g_kern_disks, sb, M_WAITOK, NULL); error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); return error; } SYSCTL_PROC(_kern, OID_AUTO, disks, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_disks, "A", "names of available disks"); Index: head/sys/geom/geom_dump.c =================================================================== --- head/sys/geom/geom_dump.c (revision 361014) +++ head/sys/geom/geom_dump.c (revision 361015) @@ -1,332 +1,332 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include static void g_confdot_consumer(struct sbuf *sb, struct g_consumer *cp) { sbuf_printf(sb, "z%p [label=\"r%dw%de%d\"];\n", cp, cp->acr, cp->acw, cp->ace); if (cp->provider) sbuf_printf(sb, "z%p -> z%p;\n", cp, cp->provider); } static void g_confdot_provider(struct sbuf *sb, struct g_provider *pp) { sbuf_printf(sb, "z%p [shape=hexagon,label=\"%s\\nr%dw%de%d\\nerr#%d\\n" "sector=%u\\nstripe=%ju\"];\n", pp, pp->name, pp->acr, pp->acw, pp->ace, pp->error, pp->sectorsize, (uintmax_t)pp->stripesize); } static void g_confdot_geom(struct sbuf *sb, struct g_geom *gp) { struct g_consumer *cp; struct g_provider *pp; sbuf_printf(sb, "z%p [shape=box,label=\"%s\\n%s\\nr#%d\"];\n", gp, gp->class->name, gp->name, gp->rank); LIST_FOREACH(cp, &gp->consumer, consumer) { g_confdot_consumer(sb, cp); sbuf_printf(sb, "z%p -> z%p;\n", gp, cp); } LIST_FOREACH(pp, &gp->provider, provider) { g_confdot_provider(sb, pp); sbuf_printf(sb, "z%p -> z%p;\n", pp, gp); } } static void g_confdot_class(struct sbuf *sb, struct g_class *mp) { struct g_geom *gp; LIST_FOREACH(gp, &mp->geom, geom) g_confdot_geom(sb, gp); } void g_confdot(void *p, int flag ) { struct g_class *mp; struct sbuf *sb; KASSERT(flag != EV_CANCEL, ("g_confdot was cancelled")); sb = p; g_topology_assert(); sbuf_cat(sb, "digraph geom {\n"); LIST_FOREACH(mp, &g_classes, class) g_confdot_class(sb, mp); sbuf_cat(sb, "}\n"); sbuf_finish(sb); } static void g_conftxt_geom(struct sbuf *sb, struct g_geom *gp, int level) { struct g_provider *pp; struct g_consumer *cp; if (gp->flags & G_GEOM_WITHER) return; LIST_FOREACH(pp, &gp->provider, provider) { sbuf_printf(sb, "%d %s %s %ju %u", level, gp->class->name, pp->name, (uintmax_t)pp->mediasize, pp->sectorsize); if (gp->dumpconf != NULL) gp->dumpconf(sb, NULL, gp, NULL, pp); sbuf_cat(sb, "\n"); LIST_FOREACH(cp, &pp->consumers, consumers) g_conftxt_geom(sb, cp->geom, level + 1); } } static void g_conftxt_class(struct sbuf *sb, struct g_class *mp) { struct g_geom *gp; LIST_FOREACH(gp, &mp->geom, geom) g_conftxt_geom(sb, gp, 0); } void g_conftxt(void *p, int flag) { struct g_class *mp; struct sbuf *sb; KASSERT(flag != EV_CANCEL, ("g_conftxt was cancelled")); sb = p; g_topology_assert(); LIST_FOREACH(mp, &g_classes, class) { if (!strcmp(mp->name, G_DISK_CLASS_NAME) || !strcmp(mp->name, "MD")) g_conftxt_class(sb, mp); } sbuf_finish(sb); } void g_conf_cat_escaped(struct sbuf *sb, const char *buf) { const u_char *c; for (c = buf; *c != '\0'; c++) { if (*c == '&' || *c == '<' || *c == '>' || *c == '\'' || *c == '"' || *c > 0x7e) sbuf_printf(sb, "&#x%X;", *c); else if (*c == '\t' || *c == '\n' || *c == '\r' || *c > 0x1f) sbuf_putc(sb, *c); else sbuf_putc(sb, '?'); } } void g_conf_printf_escaped(struct sbuf *sb, const char *fmt, ...) { struct sbuf *s; va_list ap; s = sbuf_new_auto(); va_start(ap, fmt); sbuf_vprintf(s, fmt, ap); va_end(ap); sbuf_finish(s); g_conf_cat_escaped(sb, sbuf_data(s)); sbuf_delete(s); } static void g_conf_consumer(struct sbuf *sb, struct g_consumer *cp) { sbuf_printf(sb, "\t\n", cp); sbuf_printf(sb, "\t \n", cp->geom); if (cp->provider != NULL) sbuf_printf(sb, "\t \n", cp->provider); sbuf_printf(sb, "\t r%dw%de%d\n", cp->acr, cp->acw, cp->ace); if (cp->geom->flags & G_GEOM_WITHER) ; else if (cp->geom->dumpconf != NULL) { sbuf_cat(sb, "\t \n"); cp->geom->dumpconf(sb, "\t ", cp->geom, cp, NULL); sbuf_cat(sb, "\t \n"); } sbuf_cat(sb, "\t\n"); } static void g_conf_provider(struct sbuf *sb, struct g_provider *pp) { + struct g_geom_alias *gap; sbuf_printf(sb, "\t\n", pp); sbuf_printf(sb, "\t \n", pp->geom); sbuf_printf(sb, "\t r%dw%de%d\n", pp->acr, pp->acw, pp->ace); sbuf_cat(sb, "\t "); g_conf_cat_escaped(sb, pp->name); sbuf_cat(sb, "\n"); + LIST_FOREACH(gap, &pp->aliases, ga_next) { + sbuf_cat(sb, "\t "); + g_conf_cat_escaped(sb, gap->ga_alias); + sbuf_cat(sb, "\n"); + } sbuf_printf(sb, "\t %jd\n", (intmax_t)pp->mediasize); sbuf_printf(sb, "\t %u\n", pp->sectorsize); sbuf_printf(sb, "\t %ju\n", (uintmax_t)pp->stripesize); sbuf_printf(sb, "\t %ju\n", (uintmax_t)pp->stripeoffset); if (pp->flags & G_PF_WITHER) sbuf_cat(sb, "\t \n"); else if (pp->geom->flags & G_GEOM_WITHER) ; else if (pp->geom->dumpconf != NULL) { sbuf_cat(sb, "\t \n"); pp->geom->dumpconf(sb, "\t ", pp->geom, NULL, pp); sbuf_cat(sb, "\t \n"); } sbuf_cat(sb, "\t\n"); } static void g_conf_geom(struct sbuf *sb, struct g_geom *gp, struct g_provider *pp, struct g_consumer *cp) { struct g_consumer *cp2; struct g_provider *pp2; - struct g_geom_alias *gap; sbuf_printf(sb, " \n", gp); sbuf_printf(sb, " \n", gp->class); sbuf_cat(sb, " "); g_conf_cat_escaped(sb, gp->name); sbuf_cat(sb, "\n"); sbuf_printf(sb, " %d\n", gp->rank); if (gp->flags & G_GEOM_WITHER) sbuf_cat(sb, " \n"); else if (gp->dumpconf != NULL) { sbuf_cat(sb, " \n"); gp->dumpconf(sb, "\t", gp, NULL, NULL); sbuf_cat(sb, " \n"); } LIST_FOREACH(cp2, &gp->consumer, consumer) { if (cp != NULL && cp != cp2) continue; g_conf_consumer(sb, cp2); } LIST_FOREACH(pp2, &gp->provider, provider) { if (pp != NULL && pp != pp2) continue; g_conf_provider(sb, pp2); - } - LIST_FOREACH(gap, &gp->aliases, ga_next) { - sbuf_cat(sb, " \n"); - g_conf_cat_escaped(sb, gap->ga_alias); - sbuf_cat(sb, " \n"); } sbuf_cat(sb, " \n"); } static void g_conf_class(struct sbuf *sb, struct g_class *mp, struct g_geom *gp, struct g_provider *pp, struct g_consumer *cp) { struct g_geom *gp2; sbuf_printf(sb, " \n", mp); sbuf_cat(sb, " "); g_conf_cat_escaped(sb, mp->name); sbuf_cat(sb, "\n"); LIST_FOREACH(gp2, &mp->geom, geom) { if (gp != NULL && gp != gp2) continue; g_conf_geom(sb, gp2, pp, cp); } sbuf_cat(sb, " \n"); } void g_conf_specific(struct sbuf *sb, struct g_class *mp, struct g_geom *gp, struct g_provider *pp, struct g_consumer *cp) { struct g_class *mp2; g_topology_assert(); sbuf_cat(sb, "\n"); LIST_FOREACH(mp2, &g_classes, class) { if (mp != NULL && mp != mp2) continue; g_conf_class(sb, mp2, gp, pp, cp); } sbuf_cat(sb, "\n"); sbuf_finish(sb); } void g_confxml(void *p, int flag) { KASSERT(flag != EV_CANCEL, ("g_confxml was cancelled")); g_topology_assert(); g_conf_specific(p, NULL, NULL, NULL, NULL); } void (g_trace)(int level, const char *fmt, ...) { va_list ap; if (!(g_debugflags & level)) return; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("\n"); } Index: head/sys/geom/geom_subr.c =================================================================== --- head/sys/geom/geom_subr.c (revision 361014) +++ head/sys/geom/geom_subr.c (revision 361015) @@ -1,1661 +1,1670 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #ifdef KDB #include #endif SDT_PROVIDER_DEFINE(geom); struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes); static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms); char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim; struct g_hh00 { struct g_class *mp; struct g_provider *pp; off_t size; int error; int post; }; void g_dbg_printf(const char *classname, int lvl, struct bio *bp, const char *format, ...) { #ifndef PRINTF_BUFR_SIZE #define PRINTF_BUFR_SIZE 64 #endif char bufr[PRINTF_BUFR_SIZE]; struct sbuf sb, *sbp __unused; va_list ap; sbp = sbuf_new(&sb, bufr, sizeof(bufr), SBUF_FIXEDLEN); KASSERT(sbp != NULL, ("sbuf_new misused?")); sbuf_set_drain(&sb, sbuf_printf_drain, NULL); sbuf_cat(&sb, classname); if (lvl >= 0) sbuf_printf(&sb, "[%d]", lvl); va_start(ap, format); sbuf_vprintf(&sb, format, ap); va_end(ap); if (bp != NULL) { sbuf_putc(&sb, ' '); g_format_bio(&sb, bp); } /* Terminate the debug line with a single '\n'. */ sbuf_nl_terminate(&sb); /* Flush line to printf. */ sbuf_finish(&sb); sbuf_delete(&sb); } /* * This event offers a new class a chance to taste all preexisting providers. */ static void g_load_class(void *arg, int flag) { struct g_hh00 *hh; struct g_class *mp2, *mp; struct g_geom *gp; struct g_provider *pp; g_topology_assert(); if (flag == EV_CANCEL) /* XXX: can't happen ? */ return; if (g_shutdown) return; hh = arg; mp = hh->mp; hh->error = 0; if (hh->post) { g_free(hh); hh = NULL; } g_trace(G_T_TOPOLOGY, "g_load_class(%s)", mp->name); KASSERT(mp->name != NULL && *mp->name != '\0', ("GEOM class has no name")); LIST_FOREACH(mp2, &g_classes, class) { if (mp2 == mp) { printf("The GEOM class %s is already loaded.\n", mp2->name); if (hh != NULL) hh->error = EEXIST; return; } else if (strcmp(mp2->name, mp->name) == 0) { printf("A GEOM class %s is already loaded.\n", mp2->name); if (hh != NULL) hh->error = EEXIST; return; } } LIST_INIT(&mp->geom); LIST_INSERT_HEAD(&g_classes, mp, class); if (mp->init != NULL) mp->init(mp); if (mp->taste == NULL) return; LIST_FOREACH(mp2, &g_classes, class) { if (mp == mp2) continue; LIST_FOREACH(gp, &mp2->geom, geom) { LIST_FOREACH(pp, &gp->provider, provider) { mp->taste(mp, pp, 0); g_topology_assert(); } } } } static int g_unload_class(struct g_class *mp) { struct g_geom *gp; struct g_provider *pp; struct g_consumer *cp; int error; g_topology_lock(); g_trace(G_T_TOPOLOGY, "g_unload_class(%s)", mp->name); retry: G_VALID_CLASS(mp); LIST_FOREACH(gp, &mp->geom, geom) { /* We refuse to unload if anything is open */ LIST_FOREACH(pp, &gp->provider, provider) if (pp->acr || pp->acw || pp->ace) { g_topology_unlock(); return (EBUSY); } LIST_FOREACH(cp, &gp->consumer, consumer) if (cp->acr || cp->acw || cp->ace) { g_topology_unlock(); return (EBUSY); } /* If the geom is withering, wait for it to finish. */ if (gp->flags & G_GEOM_WITHER) { g_topology_sleep(mp, 1); goto retry; } } /* * We allow unloading if we have no geoms, or a class * method we can use to get rid of them. */ if (!LIST_EMPTY(&mp->geom) && mp->destroy_geom == NULL) { g_topology_unlock(); return (EOPNOTSUPP); } /* Bar new entries */ mp->taste = NULL; mp->config = NULL; LIST_FOREACH(gp, &mp->geom, geom) { error = mp->destroy_geom(NULL, mp, gp); if (error != 0) { g_topology_unlock(); return (error); } } /* Wait for withering to finish. */ for (;;) { gp = LIST_FIRST(&mp->geom); if (gp == NULL) break; KASSERT(gp->flags & G_GEOM_WITHER, ("Non-withering geom in class %s", mp->name)); g_topology_sleep(mp, 1); } G_VALID_CLASS(mp); if (mp->fini != NULL) mp->fini(mp); LIST_REMOVE(mp, class); g_topology_unlock(); return (0); } int g_modevent(module_t mod, int type, void *data) { struct g_hh00 *hh; int error; static int g_ignition; struct g_class *mp; mp = data; if (mp->version != G_VERSION) { printf("GEOM class %s has Wrong version %x\n", mp->name, mp->version); return (EINVAL); } if (!g_ignition) { g_ignition++; g_init(); } error = EOPNOTSUPP; switch (type) { case MOD_LOAD: g_trace(G_T_TOPOLOGY, "g_modevent(%s, LOAD)", mp->name); hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO); hh->mp = mp; /* * Once the system is not cold, MOD_LOAD calls will be * from the userland and the g_event thread will be able * to acknowledge their completion. */ if (cold) { hh->post = 1; error = g_post_event(g_load_class, hh, M_WAITOK, NULL); } else { error = g_waitfor_event(g_load_class, hh, M_WAITOK, NULL); if (error == 0) error = hh->error; g_free(hh); } break; case MOD_UNLOAD: g_trace(G_T_TOPOLOGY, "g_modevent(%s, UNLOAD)", mp->name); error = g_unload_class(mp); if (error == 0) { KASSERT(LIST_EMPTY(&mp->geom), ("Unloaded class (%s) still has geom", mp->name)); } break; } return (error); } static void g_retaste_event(void *arg, int flag) { struct g_class *mp, *mp2; struct g_geom *gp; struct g_hh00 *hh; struct g_provider *pp; struct g_consumer *cp; g_topology_assert(); if (flag == EV_CANCEL) /* XXX: can't happen ? */ return; if (g_shutdown || g_notaste) return; hh = arg; mp = hh->mp; hh->error = 0; if (hh->post) { g_free(hh); hh = NULL; } g_trace(G_T_TOPOLOGY, "g_retaste(%s)", mp->name); LIST_FOREACH(mp2, &g_classes, class) { LIST_FOREACH(gp, &mp2->geom, geom) { LIST_FOREACH(pp, &gp->provider, provider) { if (pp->acr || pp->acw || pp->ace) continue; LIST_FOREACH(cp, &pp->consumers, consumers) { if (cp->geom->class == mp && (cp->flags & G_CF_ORPHAN) == 0) break; } if (cp != NULL) { cp->flags |= G_CF_ORPHAN; g_wither_geom(cp->geom, ENXIO); } mp->taste(mp, pp, 0); g_topology_assert(); } } } } int g_retaste(struct g_class *mp) { struct g_hh00 *hh; int error; if (mp->taste == NULL) return (EINVAL); hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO); hh->mp = mp; if (cold) { hh->post = 1; error = g_post_event(g_retaste_event, hh, M_WAITOK, NULL); } else { error = g_waitfor_event(g_retaste_event, hh, M_WAITOK, NULL); if (error == 0) error = hh->error; g_free(hh); } return (error); } struct g_geom * g_new_geomf(struct g_class *mp, const char *fmt, ...) { struct g_geom *gp; va_list ap; struct sbuf *sb; g_topology_assert(); G_VALID_CLASS(mp); sb = sbuf_new_auto(); va_start(ap, fmt); sbuf_vprintf(sb, fmt, ap); va_end(ap); sbuf_finish(sb); gp = g_malloc(sizeof *gp, M_WAITOK | M_ZERO); gp->name = g_malloc(sbuf_len(sb) + 1, M_WAITOK | M_ZERO); gp->class = mp; gp->rank = 1; LIST_INIT(&gp->consumer); LIST_INIT(&gp->provider); - LIST_INIT(&gp->aliases); LIST_INSERT_HEAD(&mp->geom, gp, geom); TAILQ_INSERT_HEAD(&geoms, gp, geoms); strcpy(gp->name, sbuf_data(sb)); sbuf_delete(sb); /* Fill in defaults from class */ gp->start = mp->start; gp->spoiled = mp->spoiled; gp->attrchanged = mp->attrchanged; gp->providergone = mp->providergone; gp->dumpconf = mp->dumpconf; gp->access = mp->access; gp->orphan = mp->orphan; gp->ioctl = mp->ioctl; gp->resize = mp->resize; return (gp); } void g_destroy_geom(struct g_geom *gp) { - struct g_geom_alias *gap, *gaptmp; g_topology_assert(); G_VALID_GEOM(gp); g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name); KASSERT(LIST_EMPTY(&gp->consumer), ("g_destroy_geom(%s) with consumer(s) [%p]", gp->name, LIST_FIRST(&gp->consumer))); KASSERT(LIST_EMPTY(&gp->provider), ("g_destroy_geom(%s) with provider(s) [%p]", gp->name, LIST_FIRST(&gp->provider))); g_cancel_event(gp); LIST_REMOVE(gp, geom); TAILQ_REMOVE(&geoms, gp, geoms); - LIST_FOREACH_SAFE(gap, &gp->aliases, ga_next, gaptmp) - g_free(gap); g_free(gp->name); g_free(gp); } /* * This function is called (repeatedly) until the geom has withered away. */ void g_wither_geom(struct g_geom *gp, int error) { struct g_provider *pp; g_topology_assert(); G_VALID_GEOM(gp); g_trace(G_T_TOPOLOGY, "g_wither_geom(%p(%s))", gp, gp->name); if (!(gp->flags & G_GEOM_WITHER)) { gp->flags |= G_GEOM_WITHER; LIST_FOREACH(pp, &gp->provider, provider) if (!(pp->flags & G_PF_ORPHAN)) g_orphan_provider(pp, error); } g_do_wither(); } /* * Convenience function to destroy a particular provider. */ void g_wither_provider(struct g_provider *pp, int error) { pp->flags |= G_PF_WITHER; if (!(pp->flags & G_PF_ORPHAN)) g_orphan_provider(pp, error); } /* * This function is called (repeatedly) until the has withered away. */ void g_wither_geom_close(struct g_geom *gp, int error) { struct g_consumer *cp; g_topology_assert(); G_VALID_GEOM(gp); g_trace(G_T_TOPOLOGY, "g_wither_geom_close(%p(%s))", gp, gp->name); LIST_FOREACH(cp, &gp->consumer, consumer) if (cp->acr || cp->acw || cp->ace) g_access(cp, -cp->acr, -cp->acw, -cp->ace); g_wither_geom(gp, error); } /* * This function is called (repeatedly) until we cant wash away more * withered bits at present. */ void g_wither_washer() { struct g_class *mp; struct g_geom *gp, *gp2; struct g_provider *pp, *pp2; struct g_consumer *cp, *cp2; g_topology_assert(); LIST_FOREACH(mp, &g_classes, class) { LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) { if (!(pp->flags & G_PF_WITHER)) continue; if (LIST_EMPTY(&pp->consumers)) g_destroy_provider(pp); } if (!(gp->flags & G_GEOM_WITHER)) continue; LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) { if (LIST_EMPTY(&pp->consumers)) g_destroy_provider(pp); } LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp2) { if (cp->acr || cp->acw || cp->ace) continue; if (cp->provider != NULL) g_detach(cp); g_destroy_consumer(cp); } if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer)) g_destroy_geom(gp); } } } struct g_consumer * g_new_consumer(struct g_geom *gp) { struct g_consumer *cp; g_topology_assert(); G_VALID_GEOM(gp); KASSERT(!(gp->flags & G_GEOM_WITHER), ("g_new_consumer on WITHERing geom(%s) (class %s)", gp->name, gp->class->name)); KASSERT(gp->orphan != NULL, ("g_new_consumer on geom(%s) (class %s) without orphan", gp->name, gp->class->name)); cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO); cp->geom = gp; cp->stat = devstat_new_entry(cp, -1, 0, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); LIST_INSERT_HEAD(&gp->consumer, cp, consumer); return(cp); } void g_destroy_consumer(struct g_consumer *cp) { struct g_geom *gp; g_topology_assert(); G_VALID_CONSUMER(cp); g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp); KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached")); KASSERT (cp->acr == 0, ("g_destroy_consumer with acr")); KASSERT (cp->acw == 0, ("g_destroy_consumer with acw")); KASSERT (cp->ace == 0, ("g_destroy_consumer with ace")); g_cancel_event(cp); gp = cp->geom; LIST_REMOVE(cp, consumer); devstat_remove_entry(cp->stat); g_free(cp); if (gp->flags & G_GEOM_WITHER) g_do_wither(); } static void g_new_provider_event(void *arg, int flag) { struct g_class *mp; struct g_provider *pp; struct g_consumer *cp, *next_cp; g_topology_assert(); if (flag == EV_CANCEL) return; if (g_shutdown) return; pp = arg; G_VALID_PROVIDER(pp); KASSERT(!(pp->flags & G_PF_WITHER), ("g_new_provider_event but withered")); LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, next_cp) { if ((cp->flags & G_CF_ORPHAN) == 0 && cp->geom->attrchanged != NULL) cp->geom->attrchanged(cp, "GEOM::media"); } if (g_notaste) return; LIST_FOREACH(mp, &g_classes, class) { if (mp->taste == NULL) continue; LIST_FOREACH(cp, &pp->consumers, consumers) if (cp->geom->class == mp && (cp->flags & G_CF_ORPHAN) == 0) break; if (cp != NULL) continue; mp->taste(mp, pp, 0); g_topology_assert(); } } struct g_provider * g_new_providerf(struct g_geom *gp, const char *fmt, ...) { struct g_provider *pp; struct sbuf *sb; va_list ap; g_topology_assert(); G_VALID_GEOM(gp); KASSERT(gp->access != NULL, ("new provider on geom(%s) without ->access (class %s)", gp->name, gp->class->name)); KASSERT(gp->start != NULL, ("new provider on geom(%s) without ->start (class %s)", gp->name, gp->class->name)); KASSERT(!(gp->flags & G_GEOM_WITHER), ("new provider on WITHERing geom(%s) (class %s)", gp->name, gp->class->name)); sb = sbuf_new_auto(); va_start(ap, fmt); sbuf_vprintf(sb, fmt, ap); va_end(ap); sbuf_finish(sb); pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO); pp->name = (char *)(pp + 1); strcpy(pp->name, sbuf_data(sb)); sbuf_delete(sb); LIST_INIT(&pp->consumers); + LIST_INIT(&pp->aliases); pp->error = ENXIO; pp->geom = gp; pp->stat = devstat_new_entry(pp, -1, 0, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); LIST_INSERT_HEAD(&gp->provider, pp, provider); g_post_event(g_new_provider_event, pp, M_WAITOK, pp, gp, NULL); return (pp); } void +g_provider_add_alias(struct g_provider *pp, const char *fmt, ...) +{ + struct sbuf *sb; + struct g_geom_alias *gap; + va_list ap; + + /* + * Generate the alias string and save it in the list. + */ + sb = sbuf_new_auto(); + va_start(ap, fmt); + sbuf_vprintf(sb, fmt, ap); + va_end(ap); + sbuf_finish(sb); + gap = g_malloc(sizeof(*gap) + sbuf_len(sb) + 1, M_WAITOK | M_ZERO); + memcpy((char *)(gap + 1), sbuf_data(sb), sbuf_len(sb)); + sbuf_delete(sb); + gap->ga_alias = (const char *)(gap + 1); + LIST_INSERT_HEAD(&pp->aliases, gap, ga_next); +} + +void g_error_provider(struct g_provider *pp, int error) { /* G_VALID_PROVIDER(pp); We may not have g_topology */ pp->error = error; } static void g_resize_provider_event(void *arg, int flag) { struct g_hh00 *hh; struct g_class *mp; struct g_geom *gp; struct g_provider *pp; struct g_consumer *cp, *cp2; off_t size; g_topology_assert(); if (g_shutdown) return; hh = arg; pp = hh->pp; size = hh->size; g_free(hh); G_VALID_PROVIDER(pp); KASSERT(!(pp->flags & G_PF_WITHER), ("g_resize_provider_event but withered")); g_trace(G_T_TOPOLOGY, "g_resize_provider_event(%p)", pp); LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, cp2) { gp = cp->geom; if (gp->resize == NULL && size < pp->mediasize) { /* * XXX: g_dev_orphan method does deferred destroying * and it is possible, that other event could already * call the orphan method. Check consumer's flags to * do not schedule it twice. */ if (cp->flags & G_CF_ORPHAN) continue; cp->flags |= G_CF_ORPHAN; cp->geom->orphan(cp); } } pp->mediasize = size; LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, cp2) { gp = cp->geom; if ((gp->flags & G_GEOM_WITHER) == 0 && gp->resize != NULL) gp->resize(cp); } /* * After resizing, the previously invalid GEOM class metadata * might become valid. This means we should retaste. */ LIST_FOREACH(mp, &g_classes, class) { if (mp->taste == NULL) continue; LIST_FOREACH(cp, &pp->consumers, consumers) if (cp->geom->class == mp && (cp->flags & G_CF_ORPHAN) == 0) break; if (cp != NULL) continue; mp->taste(mp, pp, 0); g_topology_assert(); } } void g_resize_provider(struct g_provider *pp, off_t size) { struct g_hh00 *hh; G_VALID_PROVIDER(pp); if (pp->flags & G_PF_WITHER) return; if (size == pp->mediasize) return; hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO); hh->pp = pp; hh->size = size; g_post_event(g_resize_provider_event, hh, M_WAITOK, NULL); } #ifndef _PATH_DEV #define _PATH_DEV "/dev/" #endif struct g_provider * g_provider_by_name(char const *arg) { struct g_class *cp; struct g_geom *gp; struct g_provider *pp, *wpp; if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) arg += sizeof(_PATH_DEV) - 1; wpp = NULL; LIST_FOREACH(cp, &g_classes, class) { LIST_FOREACH(gp, &cp->geom, geom) { LIST_FOREACH(pp, &gp->provider, provider) { if (strcmp(arg, pp->name) != 0) continue; if ((gp->flags & G_GEOM_WITHER) == 0 && (pp->flags & G_PF_WITHER) == 0) return (pp); else wpp = pp; } } } return (wpp); } void g_destroy_provider(struct g_provider *pp) { struct g_geom *gp; + struct g_geom_alias *gap, *gaptmp; g_topology_assert(); G_VALID_PROVIDER(pp); KASSERT(LIST_EMPTY(&pp->consumers), ("g_destroy_provider but attached")); KASSERT (pp->acr == 0, ("g_destroy_provider with acr")); KASSERT (pp->acw == 0, ("g_destroy_provider with acw")); KASSERT (pp->ace == 0, ("g_destroy_provider with ace")); g_cancel_event(pp); LIST_REMOVE(pp, provider); gp = pp->geom; devstat_remove_entry(pp->stat); /* * If a callback was provided, send notification that the provider * is now gone. */ if (gp->providergone != NULL) gp->providergone(pp); - + LIST_FOREACH_SAFE(gap, &pp->aliases, ga_next, gaptmp) + g_free(gap); g_free(pp); if ((gp->flags & G_GEOM_WITHER)) g_do_wither(); } /* * We keep the "geoms" list sorted by topological order (== increasing * numerical rank) at all times. * When an attach is done, the attaching geoms rank is invalidated * and it is moved to the tail of the list. * All geoms later in the sequence has their ranks reevaluated in * sequence. If we cannot assign rank to a geom because it's * prerequisites do not have rank, we move that element to the tail * of the sequence with invalid rank as well. * At some point we encounter our original geom and if we stil fail * to assign it a rank, there must be a loop and we fail back to * g_attach() which detach again and calls redo_rank again * to fix up the damage. * It would be much simpler code wise to do it recursively, but we * can't risk that on the kernel stack. */ static int redo_rank(struct g_geom *gp) { struct g_consumer *cp; struct g_geom *gp1, *gp2; int n, m; g_topology_assert(); G_VALID_GEOM(gp); /* Invalidate this geoms rank and move it to the tail */ gp1 = TAILQ_NEXT(gp, geoms); if (gp1 != NULL) { gp->rank = 0; TAILQ_REMOVE(&geoms, gp, geoms); TAILQ_INSERT_TAIL(&geoms, gp, geoms); } else { gp1 = gp; } /* re-rank the rest of the sequence */ for (; gp1 != NULL; gp1 = gp2) { gp1->rank = 0; m = 1; LIST_FOREACH(cp, &gp1->consumer, consumer) { if (cp->provider == NULL) continue; n = cp->provider->geom->rank; if (n == 0) { m = 0; break; } else if (n >= m) m = n + 1; } gp1->rank = m; gp2 = TAILQ_NEXT(gp1, geoms); /* got a rank, moving on */ if (m != 0) continue; /* no rank to original geom means loop */ if (gp == gp1) return (ELOOP); /* no rank, put it at the end move on */ TAILQ_REMOVE(&geoms, gp1, geoms); TAILQ_INSERT_TAIL(&geoms, gp1, geoms); } return (0); } int g_attach(struct g_consumer *cp, struct g_provider *pp) { int error; g_topology_assert(); G_VALID_CONSUMER(cp); G_VALID_PROVIDER(pp); g_trace(G_T_TOPOLOGY, "g_attach(%p, %p)", cp, pp); KASSERT(cp->provider == NULL, ("attach but attached")); cp->provider = pp; cp->flags &= ~G_CF_ORPHAN; LIST_INSERT_HEAD(&pp->consumers, cp, consumers); error = redo_rank(cp->geom); if (error) { LIST_REMOVE(cp, consumers); cp->provider = NULL; redo_rank(cp->geom); } return (error); } void g_detach(struct g_consumer *cp) { struct g_provider *pp; g_topology_assert(); G_VALID_CONSUMER(cp); g_trace(G_T_TOPOLOGY, "g_detach(%p)", cp); KASSERT(cp->provider != NULL, ("detach but not attached")); KASSERT(cp->acr == 0, ("detach but nonzero acr")); KASSERT(cp->acw == 0, ("detach but nonzero acw")); KASSERT(cp->ace == 0, ("detach but nonzero ace")); KASSERT(cp->nstart == cp->nend, ("detach with active requests")); pp = cp->provider; LIST_REMOVE(cp, consumers); cp->provider = NULL; if ((cp->geom->flags & G_GEOM_WITHER) || (pp->geom->flags & G_GEOM_WITHER) || (pp->flags & G_PF_WITHER)) g_do_wither(); redo_rank(cp->geom); } /* * g_access() * * Access-check with delta values. The question asked is "can provider * "cp" change the access counters by the relative amounts dc[rwe] ?" */ int g_access(struct g_consumer *cp, int dcr, int dcw, int dce) { struct g_provider *pp; struct g_geom *gp; int pw, pe; #ifdef INVARIANTS int sr, sw, se; #endif int error; g_topology_assert(); G_VALID_CONSUMER(cp); pp = cp->provider; KASSERT(pp != NULL, ("access but not attached")); G_VALID_PROVIDER(pp); gp = pp->geom; g_trace(G_T_ACCESS, "g_access(%p(%s), %d, %d, %d)", cp, pp->name, dcr, dcw, dce); KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr")); KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw")); KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace")); KASSERT(dcr != 0 || dcw != 0 || dce != 0, ("NOP access request")); KASSERT(cp->acr + dcr != 0 || cp->acw + dcw != 0 || cp->ace + dce != 0 || cp->nstart == cp->nend, ("Last close with active requests")); KASSERT(gp->access != NULL, ("NULL geom->access")); /* * If our class cares about being spoiled, and we have been, we * are probably just ahead of the event telling us that. Fail * now rather than having to unravel this later. */ if (cp->geom->spoiled != NULL && (cp->flags & G_CF_SPOILED) && (dcr > 0 || dcw > 0 || dce > 0)) return (ENXIO); /* * A number of GEOM classes either need to perform an I/O on the first * open or to acquire a different subsystem's lock. To do that they * may have to drop the topology lock. * Other GEOM classes perform special actions when opening a lower rank * geom for the first time. As a result, more than one thread may * end up performing the special actions. * So, we prevent concurrent "first" opens by marking the consumer with * special flag. * * Note that if the geom's access method never drops the topology lock, * then we will never see G_GEOM_IN_ACCESS here. */ while ((gp->flags & G_GEOM_IN_ACCESS) != 0) { g_trace(G_T_ACCESS, "%s: race on geom %s via provider %s and consumer of %s", __func__, gp->name, pp->name, cp->geom->name); gp->flags |= G_GEOM_ACCESS_WAIT; g_topology_sleep(gp, 0); } /* * Figure out what counts the provider would have had, if this * consumer had (r0w0e0) at this time. */ pw = pp->acw - cp->acw; pe = pp->ace - cp->ace; g_trace(G_T_ACCESS, "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)", dcr, dcw, dce, cp->acr, cp->acw, cp->ace, pp->acr, pp->acw, pp->ace, pp, pp->name); /* If foot-shooting is enabled, any open on rank#1 is OK */ if ((g_debugflags & G_F_FOOTSHOOTING) && gp->rank == 1) ; /* If we try exclusive but already write: fail */ else if (dce > 0 && pw > 0) return (EPERM); /* If we try write but already exclusive: fail */ else if (dcw > 0 && pe > 0) return (EPERM); /* If we try to open more but provider is error'ed: fail */ else if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0) { printf("%s(%d): provider %s has error %d set\n", __func__, __LINE__, pp->name, pp->error); return (pp->error); } /* Ok then... */ #ifdef INVARIANTS sr = cp->acr; sw = cp->acw; se = cp->ace; #endif gp->flags |= G_GEOM_IN_ACCESS; error = gp->access(pp, dcr, dcw, dce); KASSERT(dcr > 0 || dcw > 0 || dce > 0 || error == 0, ("Geom provider %s::%s dcr=%d dcw=%d dce=%d error=%d failed " "closing ->access()", gp->class->name, pp->name, dcr, dcw, dce, error)); g_topology_assert(); gp->flags &= ~G_GEOM_IN_ACCESS; KASSERT(cp->acr == sr && cp->acw == sw && cp->ace == se, ("Access counts changed during geom->access")); if ((gp->flags & G_GEOM_ACCESS_WAIT) != 0) { gp->flags &= ~G_GEOM_ACCESS_WAIT; wakeup(gp); } if (!error) { /* * If we open first write, spoil any partner consumers. * If we close last write and provider is not errored, * trigger re-taste. */ if (pp->acw == 0 && dcw != 0) g_spoil(pp, cp); else if (pp->acw != 0 && pp->acw == -dcw && pp->error == 0 && !(gp->flags & G_GEOM_WITHER)) g_post_event(g_new_provider_event, pp, M_WAITOK, pp, NULL); pp->acr += dcr; pp->acw += dcw; pp->ace += dce; cp->acr += dcr; cp->acw += dcw; cp->ace += dce; if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) KASSERT(pp->sectorsize > 0, ("Provider %s lacks sectorsize", pp->name)); if ((cp->geom->flags & G_GEOM_WITHER) && cp->acr == 0 && cp->acw == 0 && cp->ace == 0) g_do_wither(); } return (error); } int g_handleattr_int(struct bio *bp, const char *attribute, int val) { return (g_handleattr(bp, attribute, &val, sizeof val)); } int g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val) { return (g_handleattr(bp, attribute, &val, sizeof val)); } int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val) { return (g_handleattr(bp, attribute, &val, sizeof val)); } int g_handleattr_str(struct bio *bp, const char *attribute, const char *str) { return (g_handleattr(bp, attribute, str, 0)); } int g_handleattr(struct bio *bp, const char *attribute, const void *val, int len) { int error = 0; if (strcmp(bp->bio_attribute, attribute)) return (0); if (len == 0) { bzero(bp->bio_data, bp->bio_length); if (strlcpy(bp->bio_data, val, bp->bio_length) >= bp->bio_length) { printf("%s: %s %s bio_length %jd strlen %zu -> EFAULT\n", __func__, bp->bio_to->name, attribute, (intmax_t)bp->bio_length, strlen(val)); error = EFAULT; } } else if (bp->bio_length == len) { bcopy(val, bp->bio_data, len); } else { printf("%s: %s %s bio_length %jd len %d -> EFAULT\n", __func__, bp->bio_to->name, attribute, (intmax_t)bp->bio_length, len); error = EFAULT; } if (error == 0) bp->bio_completed = bp->bio_length; g_io_deliver(bp, error); return (1); } int g_std_access(struct g_provider *pp, int dr __unused, int dw __unused, int de __unused) { g_topology_assert(); G_VALID_PROVIDER(pp); return (0); } void g_std_done(struct bio *bp) { struct bio *bp2; bp2 = bp->bio_parent; if (bp2->bio_error == 0) bp2->bio_error = bp->bio_error; bp2->bio_completed += bp->bio_completed; g_destroy_bio(bp); bp2->bio_inbed++; if (bp2->bio_children == bp2->bio_inbed) { if (bp2->bio_cmd == BIO_SPEEDUP) bp2->bio_completed = bp2->bio_length; g_io_deliver(bp2, bp2->bio_error); } } /* XXX: maybe this is only g_slice_spoiled */ void g_std_spoiled(struct g_consumer *cp) { struct g_geom *gp; struct g_provider *pp; g_topology_assert(); G_VALID_CONSUMER(cp); g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp); cp->flags |= G_CF_ORPHAN; g_detach(cp); gp = cp->geom; LIST_FOREACH(pp, &gp->provider, provider) g_orphan_provider(pp, ENXIO); g_destroy_consumer(cp); if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer)) g_destroy_geom(gp); else gp->flags |= G_GEOM_WITHER; } /* * Spoiling happens when a provider is opened for writing, but consumers * which are configured by in-band data are attached (slicers for instance). * Since the write might potentially change the in-band data, such consumers * need to re-evaluate their existence after the writing session closes. * We do this by (offering to) tear them down when the open for write happens * in return for a re-taste when it closes again. * Together with the fact that such consumers grab an 'e' bit whenever they * are open, regardless of mode, this ends up DTRT. */ static void g_spoil_event(void *arg, int flag) { struct g_provider *pp; struct g_consumer *cp, *cp2; g_topology_assert(); if (flag == EV_CANCEL) return; pp = arg; G_VALID_PROVIDER(pp); g_trace(G_T_TOPOLOGY, "%s %p(%s:%s:%s)", __func__, pp, pp->geom->class->name, pp->geom->name, pp->name); for (cp = LIST_FIRST(&pp->consumers); cp != NULL; cp = cp2) { cp2 = LIST_NEXT(cp, consumers); if ((cp->flags & G_CF_SPOILED) == 0) continue; cp->flags &= ~G_CF_SPOILED; if (cp->geom->spoiled == NULL) continue; cp->geom->spoiled(cp); g_topology_assert(); } } void g_spoil(struct g_provider *pp, struct g_consumer *cp) { struct g_consumer *cp2; g_topology_assert(); G_VALID_PROVIDER(pp); G_VALID_CONSUMER(cp); LIST_FOREACH(cp2, &pp->consumers, consumers) { if (cp2 == cp) continue; /* KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr)); KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw)); */ KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace)); cp2->flags |= G_CF_SPOILED; } g_post_event(g_spoil_event, pp, M_WAITOK, pp, NULL); } static void g_media_changed_event(void *arg, int flag) { struct g_provider *pp; int retaste; g_topology_assert(); if (flag == EV_CANCEL) return; pp = arg; G_VALID_PROVIDER(pp); /* * If provider was not open for writing, queue retaste after spoiling. * If it was, retaste will happen automatically on close. */ retaste = (pp->acw == 0 && pp->error == 0 && !(pp->geom->flags & G_GEOM_WITHER)); g_spoil_event(arg, flag); if (retaste) g_post_event(g_new_provider_event, pp, M_WAITOK, pp, NULL); } int g_media_changed(struct g_provider *pp, int flag) { struct g_consumer *cp; LIST_FOREACH(cp, &pp->consumers, consumers) cp->flags |= G_CF_SPOILED; return (g_post_event(g_media_changed_event, pp, flag, pp, NULL)); } int g_media_gone(struct g_provider *pp, int flag) { struct g_consumer *cp; LIST_FOREACH(cp, &pp->consumers, consumers) cp->flags |= G_CF_SPOILED; return (g_post_event(g_spoil_event, pp, flag, pp, NULL)); } int g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len) { int error, i; i = len; error = g_io_getattr(attr, cp, &i, var); if (error) return (error); if (i != len) return (EINVAL); return (0); } static int g_get_device_prefix_len(const char *name) { int len; if (strncmp(name, "ada", 3) == 0) len = 3; else if (strncmp(name, "ad", 2) == 0) len = 2; else return (0); if (name[len] < '0' || name[len] > '9') return (0); do { len++; } while (name[len] >= '0' && name[len] <= '9'); return (len); } int g_compare_names(const char *namea, const char *nameb) { int deva, devb; if (strcmp(namea, nameb) == 0) return (1); deva = g_get_device_prefix_len(namea); if (deva == 0) return (0); devb = g_get_device_prefix_len(nameb); if (devb == 0) return (0); if (strcmp(namea + deva, nameb + devb) == 0) return (1); return (0); -} - -void -g_geom_add_alias(struct g_geom *gp, const char *alias) -{ - struct g_geom_alias *gap; - - gap = (struct g_geom_alias *)g_malloc( - sizeof(struct g_geom_alias) + strlen(alias) + 1, M_WAITOK); - strcpy((char *)(gap + 1), alias); - gap->ga_alias = (const char *)(gap + 1); - LIST_INSERT_HEAD(&gp->aliases, gap, ga_next); } #if defined(DIAGNOSTIC) || defined(DDB) /* * This function walks the mesh and returns a non-zero integer if it * finds the argument pointer is an object. The return value indicates * which type of object it is believed to be. If topology is not locked, * this function is potentially dangerous, but we don't assert that the * topology lock is held when called from debugger. */ int g_valid_obj(void const *ptr) { struct g_class *mp; struct g_geom *gp; struct g_consumer *cp; struct g_provider *pp; #ifdef KDB if (kdb_active == 0) #endif g_topology_assert(); LIST_FOREACH(mp, &g_classes, class) { if (ptr == mp) return (1); LIST_FOREACH(gp, &mp->geom, geom) { if (ptr == gp) return (2); LIST_FOREACH(cp, &gp->consumer, consumer) if (ptr == cp) return (3); LIST_FOREACH(pp, &gp->provider, provider) if (ptr == pp) return (4); } } return(0); } #endif #ifdef DDB #define gprintf(...) do { \ db_printf("%*s", indent, ""); \ db_printf(__VA_ARGS__); \ } while (0) #define gprintln(...) do { \ gprintf(__VA_ARGS__); \ db_printf("\n"); \ } while (0) #define ADDFLAG(obj, flag, sflag) do { \ if ((obj)->flags & (flag)) { \ if (comma) \ strlcat(str, ",", size); \ strlcat(str, (sflag), size); \ comma = 1; \ } \ } while (0) static char * provider_flags_to_string(struct g_provider *pp, char *str, size_t size) { int comma = 0; bzero(str, size); if (pp->flags == 0) { strlcpy(str, "NONE", size); return (str); } ADDFLAG(pp, G_PF_WITHER, "G_PF_WITHER"); ADDFLAG(pp, G_PF_ORPHAN, "G_PF_ORPHAN"); return (str); } static char * geom_flags_to_string(struct g_geom *gp, char *str, size_t size) { int comma = 0; bzero(str, size); if (gp->flags == 0) { strlcpy(str, "NONE", size); return (str); } ADDFLAG(gp, G_GEOM_WITHER, "G_GEOM_WITHER"); return (str); } static void db_show_geom_consumer(int indent, struct g_consumer *cp) { if (indent == 0) { gprintln("consumer: %p", cp); gprintln(" class: %s (%p)", cp->geom->class->name, cp->geom->class); gprintln(" geom: %s (%p)", cp->geom->name, cp->geom); if (cp->provider == NULL) gprintln(" provider: none"); else { gprintln(" provider: %s (%p)", cp->provider->name, cp->provider); } gprintln(" access: r%dw%de%d", cp->acr, cp->acw, cp->ace); gprintln(" flags: 0x%04x", cp->flags); #ifdef INVARIANTS gprintln(" nstart: %u", cp->nstart); gprintln(" nend: %u", cp->nend); #endif } else { gprintf("consumer: %p (%s), access=r%dw%de%d", cp, cp->provider != NULL ? cp->provider->name : "none", cp->acr, cp->acw, cp->ace); if (cp->flags) db_printf(", flags=0x%04x", cp->flags); db_printf("\n"); } } static void db_show_geom_provider(int indent, struct g_provider *pp) { struct g_consumer *cp; char flags[64]; if (indent == 0) { gprintln("provider: %s (%p)", pp->name, pp); gprintln(" class: %s (%p)", pp->geom->class->name, pp->geom->class); gprintln(" geom: %s (%p)", pp->geom->name, pp->geom); gprintln(" mediasize: %jd", (intmax_t)pp->mediasize); gprintln(" sectorsize: %u", pp->sectorsize); gprintln(" stripesize: %ju", (uintmax_t)pp->stripesize); gprintln(" stripeoffset: %ju", (uintmax_t)pp->stripeoffset); gprintln(" access: r%dw%de%d", pp->acr, pp->acw, pp->ace); gprintln(" flags: %s (0x%04x)", provider_flags_to_string(pp, flags, sizeof(flags)), pp->flags); gprintln(" error: %d", pp->error); if (LIST_EMPTY(&pp->consumers)) gprintln(" consumers: none"); } else { gprintf("provider: %s (%p), access=r%dw%de%d", pp->name, pp, pp->acr, pp->acw, pp->ace); if (pp->flags != 0) { db_printf(", flags=%s (0x%04x)", provider_flags_to_string(pp, flags, sizeof(flags)), pp->flags); } db_printf("\n"); } if (!LIST_EMPTY(&pp->consumers)) { LIST_FOREACH(cp, &pp->consumers, consumers) { db_show_geom_consumer(indent + 2, cp); if (db_pager_quit) break; } } } static void db_show_geom_geom(int indent, struct g_geom *gp) { struct g_provider *pp; struct g_consumer *cp; char flags[64]; if (indent == 0) { gprintln("geom: %s (%p)", gp->name, gp); gprintln(" class: %s (%p)", gp->class->name, gp->class); gprintln(" flags: %s (0x%04x)", geom_flags_to_string(gp, flags, sizeof(flags)), gp->flags); gprintln(" rank: %d", gp->rank); if (LIST_EMPTY(&gp->provider)) gprintln(" providers: none"); if (LIST_EMPTY(&gp->consumer)) gprintln(" consumers: none"); } else { gprintf("geom: %s (%p), rank=%d", gp->name, gp, gp->rank); if (gp->flags != 0) { db_printf(", flags=%s (0x%04x)", geom_flags_to_string(gp, flags, sizeof(flags)), gp->flags); } db_printf("\n"); } if (!LIST_EMPTY(&gp->provider)) { LIST_FOREACH(pp, &gp->provider, provider) { db_show_geom_provider(indent + 2, pp); if (db_pager_quit) break; } } if (!LIST_EMPTY(&gp->consumer)) { LIST_FOREACH(cp, &gp->consumer, consumer) { db_show_geom_consumer(indent + 2, cp); if (db_pager_quit) break; } } } static void db_show_geom_class(struct g_class *mp) { struct g_geom *gp; db_printf("class: %s (%p)\n", mp->name, mp); LIST_FOREACH(gp, &mp->geom, geom) { db_show_geom_geom(2, gp); if (db_pager_quit) break; } } /* * Print the GEOM topology or the given object. */ DB_SHOW_COMMAND(geom, db_show_geom) { struct g_class *mp; if (!have_addr) { /* No address given, print the entire topology. */ LIST_FOREACH(mp, &g_classes, class) { db_show_geom_class(mp); db_printf("\n"); if (db_pager_quit) break; } } else { switch (g_valid_obj((void *)addr)) { case 1: db_show_geom_class((struct g_class *)addr); break; case 2: db_show_geom_geom(0, (struct g_geom *)addr); break; case 3: db_show_geom_consumer(0, (struct g_consumer *)addr); break; case 4: db_show_geom_provider(0, (struct g_provider *)addr); break; default: db_printf("Not a GEOM object.\n"); break; } } } static void db_print_bio_cmd(struct bio *bp) { db_printf(" cmd: "); switch (bp->bio_cmd) { case BIO_READ: db_printf("BIO_READ"); break; case BIO_WRITE: db_printf("BIO_WRITE"); break; case BIO_DELETE: db_printf("BIO_DELETE"); break; case BIO_GETATTR: db_printf("BIO_GETATTR"); break; case BIO_FLUSH: db_printf("BIO_FLUSH"); break; case BIO_CMD0: db_printf("BIO_CMD0"); break; case BIO_CMD1: db_printf("BIO_CMD1"); break; case BIO_CMD2: db_printf("BIO_CMD2"); break; case BIO_ZONE: db_printf("BIO_ZONE"); break; default: db_printf("UNKNOWN"); break; } db_printf("\n"); } static void db_print_bio_flags(struct bio *bp) { int comma; comma = 0; db_printf(" flags: "); if (bp->bio_flags & BIO_ERROR) { db_printf("BIO_ERROR"); comma = 1; } if (bp->bio_flags & BIO_DONE) { db_printf("%sBIO_DONE", (comma ? ", " : "")); comma = 1; } if (bp->bio_flags & BIO_ONQUEUE) db_printf("%sBIO_ONQUEUE", (comma ? ", " : "")); db_printf("\n"); } /* * Print useful information in a BIO */ DB_SHOW_COMMAND(bio, db_show_bio) { struct bio *bp; if (have_addr) { bp = (struct bio *)addr; db_printf("BIO %p\n", bp); db_print_bio_cmd(bp); db_print_bio_flags(bp); db_printf(" cflags: 0x%hx\n", bp->bio_cflags); db_printf(" pflags: 0x%hx\n", bp->bio_pflags); db_printf(" offset: %jd\n", (intmax_t)bp->bio_offset); db_printf(" length: %jd\n", (intmax_t)bp->bio_length); db_printf(" bcount: %ld\n", bp->bio_bcount); db_printf(" resid: %ld\n", bp->bio_resid); db_printf(" completed: %jd\n", (intmax_t)bp->bio_completed); db_printf(" children: %u\n", bp->bio_children); db_printf(" inbed: %u\n", bp->bio_inbed); db_printf(" error: %d\n", bp->bio_error); db_printf(" parent: %p\n", bp->bio_parent); db_printf(" driver1: %p\n", bp->bio_driver1); db_printf(" driver2: %p\n", bp->bio_driver2); db_printf(" caller1: %p\n", bp->bio_caller1); db_printf(" caller2: %p\n", bp->bio_caller2); db_printf(" bio_from: %p\n", bp->bio_from); db_printf(" bio_to: %p\n", bp->bio_to); #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING) db_printf(" bio_track_bp: %p\n", bp->bio_track_bp); #endif } } #undef gprintf #undef gprintln #undef ADDFLAG #endif /* DDB */ Index: head/sys/geom/mountver/g_mountver.c =================================================================== --- head/sys/geom/mountver/g_mountver.c (revision 361014) +++ head/sys/geom/mountver/g_mountver.c (revision 361015) @@ -1,702 +1,705 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2010 Edward Tomasz Napierala * Copyright (c) 2004-2006 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include SYSCTL_DECL(_kern_geom); static SYSCTL_NODE(_kern_geom, OID_AUTO, mountver, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "GEOM_MOUNTVER stuff"); static u_int g_mountver_debug = 0; static u_int g_mountver_check_ident = 1; SYSCTL_UINT(_kern_geom_mountver, OID_AUTO, debug, CTLFLAG_RW, &g_mountver_debug, 0, "Debug level"); SYSCTL_UINT(_kern_geom_mountver, OID_AUTO, check_ident, CTLFLAG_RW, &g_mountver_check_ident, 0, "Check disk ident when reattaching"); static eventhandler_tag g_mountver_pre_sync = NULL; static void g_mountver_queue(struct bio *bp); static void g_mountver_orphan(struct g_consumer *cp); static void g_mountver_resize(struct g_consumer *cp); static int g_mountver_destroy(struct g_geom *gp, boolean_t force); static g_taste_t g_mountver_taste; static int g_mountver_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp); static void g_mountver_config(struct gctl_req *req, struct g_class *mp, const char *verb); static void g_mountver_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); static void g_mountver_init(struct g_class *mp); static void g_mountver_fini(struct g_class *mp); struct g_class g_mountver_class = { .name = G_MOUNTVER_CLASS_NAME, .version = G_VERSION, .ctlreq = g_mountver_config, .taste = g_mountver_taste, .destroy_geom = g_mountver_destroy_geom, .init = g_mountver_init, .fini = g_mountver_fini }; static void g_mountver_detach(void *arg, int flags __unused) { struct g_consumer *cp = arg; g_topology_assert(); if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) g_access(cp, -cp->acr, -cp->acw, -cp->ace); g_detach(cp); } static void g_mountver_done(struct bio *bp) { struct g_mountver_softc *sc; struct g_geom *gp; struct g_consumer *cp; struct bio *pbp; cp = bp->bio_from; gp = cp->geom; if (bp->bio_error != ENXIO) { g_std_done(bp); goto done; } /* * When the device goes away, it's possible that few requests * will be completed with ENXIO before g_mountver_orphan() * gets called. To work around that, we have to queue requests * that failed with ENXIO, in order to send them later. */ pbp = bp->bio_parent; KASSERT(pbp->bio_to == LIST_FIRST(&gp->provider), ("parent request was for someone else")); g_destroy_bio(bp); pbp->bio_inbed++; g_mountver_queue(pbp); done: sc = gp->softc; mtx_lock(&sc->sc_mtx); if (--cp->index == 0 && sc->sc_orphaned) g_post_event(g_mountver_detach, cp, M_NOWAIT, NULL); mtx_unlock(&sc->sc_mtx); } /* * Send the BIO down. The function is called with sc_mtx held to cover * the race with orphan, but drops it before external calls. */ static void g_mountver_send(struct g_geom *gp, struct bio *bp) { struct g_mountver_softc *sc = gp->softc; struct g_consumer *cp; struct bio *cbp; mtx_assert(&sc->sc_mtx, MA_OWNED); cbp = g_clone_bio(bp); if (cbp == NULL) { mtx_unlock(&sc->sc_mtx); g_io_deliver(bp, ENOMEM); return; } cp = LIST_FIRST(&gp->consumer); cp->index++; mtx_unlock(&sc->sc_mtx); cbp->bio_done = g_mountver_done; g_io_request(cbp, cp); } static void g_mountver_queue(struct bio *bp) { struct g_mountver_softc *sc; struct g_geom *gp; gp = bp->bio_to->geom; sc = gp->softc; mtx_lock(&sc->sc_mtx); TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue); mtx_unlock(&sc->sc_mtx); } static void g_mountver_send_queued(struct g_geom *gp) { struct g_mountver_softc *sc; struct bio *bp; sc = gp->softc; mtx_lock(&sc->sc_mtx); while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL && !sc->sc_orphaned) { TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue); G_MOUNTVER_LOGREQ(bp, "Sending queued request."); /* sc_mtx is dropped inside */ g_mountver_send(gp, bp); mtx_lock(&sc->sc_mtx); } mtx_unlock(&sc->sc_mtx); } static void g_mountver_discard_queued(struct g_geom *gp) { struct g_mountver_softc *sc; struct bio *bp; sc = gp->softc; mtx_lock(&sc->sc_mtx); while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) { TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue); mtx_unlock(&sc->sc_mtx); G_MOUNTVER_LOGREQ(bp, "Discarding queued request."); g_io_deliver(bp, ENXIO); mtx_lock(&sc->sc_mtx); } mtx_unlock(&sc->sc_mtx); } static void g_mountver_start(struct bio *bp) { struct g_mountver_softc *sc; struct g_geom *gp; gp = bp->bio_to->geom; sc = gp->softc; G_MOUNTVER_LOGREQ(bp, "Request received."); /* * It is possible that some bios were returned with ENXIO, even though * orphaning didn't happen yet. In that case, queue all subsequent * requests in order to maintain ordering. */ mtx_lock(&sc->sc_mtx); if (sc->sc_orphaned || !TAILQ_EMPTY(&sc->sc_queue)) { mtx_unlock(&sc->sc_mtx); if (sc->sc_shutting_down) { G_MOUNTVER_LOGREQ(bp, "Discarding request due to shutdown."); g_io_deliver(bp, ENXIO); return; } G_MOUNTVER_LOGREQ(bp, "Queueing request."); g_mountver_queue(bp); if (!sc->sc_orphaned) g_mountver_send_queued(gp); } else { G_MOUNTVER_LOGREQ(bp, "Sending request."); /* sc_mtx is dropped inside */ g_mountver_send(gp, bp); } } static int g_mountver_access(struct g_provider *pp, int dr, int dw, int de) { struct g_mountver_softc *sc; struct g_geom *gp; struct g_consumer *cp; g_topology_assert(); gp = pp->geom; cp = LIST_FIRST(&gp->consumer); sc = gp->softc; if (sc == NULL && dr <= 0 && dw <= 0 && de <= 0) return (0); KASSERT(sc != NULL, ("Trying to access withered provider \"%s\".", pp->name)); sc->sc_access_r += dr; sc->sc_access_w += dw; sc->sc_access_e += de; if (sc->sc_orphaned) return (0); return (g_access(cp, dr, dw, de)); } static int g_mountver_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp) { struct g_mountver_softc *sc; struct g_geom *gp; struct g_provider *newpp; struct g_consumer *cp; + struct g_geom_alias *gap; char name[64]; int error; int identsize = DISK_IDENT_SIZE; g_topology_assert(); gp = NULL; newpp = NULL; cp = NULL; snprintf(name, sizeof(name), "%s%s", pp->name, G_MOUNTVER_SUFFIX); LIST_FOREACH(gp, &mp->geom, geom) { if (strcmp(gp->name, name) == 0) { gctl_error(req, "Provider %s already exists.", name); return (EEXIST); } } gp = g_new_geomf(mp, "%s", name); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); mtx_init(&sc->sc_mtx, "gmountver", NULL, MTX_DEF | MTX_RECURSE); TAILQ_INIT(&sc->sc_queue); sc->sc_provider_name = strdup(pp->name, M_GEOM); gp->softc = sc; gp->start = g_mountver_start; gp->orphan = g_mountver_orphan; gp->resize = g_mountver_resize; gp->access = g_mountver_access; gp->dumpconf = g_mountver_dumpconf; newpp = g_new_providerf(gp, "%s", gp->name); newpp->mediasize = pp->mediasize; newpp->sectorsize = pp->sectorsize; newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; + LIST_FOREACH(gap, &pp->aliases, ga_next) + g_provider_add_alias(newpp, "%s%s", gap->ga_alias, G_MOUNTVER_SUFFIX); if ((pp->flags & G_PF_ACCEPT_UNMAPPED) != 0) { G_MOUNTVER_DEBUG(0, "Unmapped supported for %s.", gp->name); newpp->flags |= G_PF_ACCEPT_UNMAPPED; } else { G_MOUNTVER_DEBUG(0, "Unmapped unsupported for %s.", gp->name); newpp->flags &= ~G_PF_ACCEPT_UNMAPPED; } cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error != 0) { gctl_error(req, "Cannot attach to provider %s.", pp->name); goto fail; } error = g_access(cp, 1, 0, 0); if (error != 0) { gctl_error(req, "Cannot access provider %s.", pp->name); goto fail; } error = g_io_getattr("GEOM::ident", cp, &identsize, sc->sc_ident); g_access(cp, -1, 0, 0); if (error != 0) { if (g_mountver_check_ident) { gctl_error(req, "Cannot get disk ident from %s; error = %d.", pp->name, error); goto fail; } G_MOUNTVER_DEBUG(0, "Cannot get disk ident from %s; error = %d.", pp->name, error); sc->sc_ident[0] = '\0'; } g_error_provider(newpp, 0); G_MOUNTVER_DEBUG(0, "Device %s created.", gp->name); return (0); fail: g_free(sc->sc_provider_name); if (cp->provider != NULL) g_detach(cp); g_destroy_consumer(cp); g_destroy_provider(newpp); g_free(gp->softc); g_destroy_geom(gp); return (error); } static int g_mountver_destroy(struct g_geom *gp, boolean_t force) { struct g_mountver_softc *sc; struct g_provider *pp; g_topology_assert(); if (gp->softc == NULL) return (ENXIO); sc = gp->softc; pp = LIST_FIRST(&gp->provider); if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { if (force) { G_MOUNTVER_DEBUG(0, "Device %s is still open, so it " "can't be definitely removed.", pp->name); } else { G_MOUNTVER_DEBUG(1, "Device %s is still open (r%dw%de%d).", pp->name, pp->acr, pp->acw, pp->ace); return (EBUSY); } } else { G_MOUNTVER_DEBUG(0, "Device %s removed.", gp->name); } if (pp != NULL) g_wither_provider(pp, ENXIO); g_mountver_discard_queued(gp); g_free(sc->sc_provider_name); g_free(gp->softc); gp->softc = NULL; g_wither_geom(gp, ENXIO); return (0); } static int g_mountver_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) { return (g_mountver_destroy(gp, 0)); } static void g_mountver_ctl_create(struct gctl_req *req, struct g_class *mp) { struct g_provider *pp; const char *name; char param[16]; int i, *nargs; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument", "nargs"); return; } if (*nargs <= 0) { gctl_error(req, "Missing device(s)."); return; } for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%d' argument", i); return; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); pp = g_provider_by_name(name); if (pp == NULL) { G_MOUNTVER_DEBUG(1, "Provider %s is invalid.", name); gctl_error(req, "Provider %s is invalid.", name); return; } if (g_mountver_create(req, mp, pp) != 0) return; } } static struct g_geom * g_mountver_find_geom(struct g_class *mp, const char *name) { struct g_geom *gp; LIST_FOREACH(gp, &mp->geom, geom) { if (strcmp(gp->name, name) == 0) return (gp); } return (NULL); } static void g_mountver_ctl_destroy(struct gctl_req *req, struct g_class *mp) { int *nargs, *force, error, i; struct g_geom *gp; const char *name; char param[16]; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument", "nargs"); return; } if (*nargs <= 0) { gctl_error(req, "Missing device(s)."); return; } force = gctl_get_paraml(req, "force", sizeof(*force)); if (force == NULL) { gctl_error(req, "No 'force' argument"); return; } for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%d' argument", i); return; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); gp = g_mountver_find_geom(mp, name); if (gp == NULL) { G_MOUNTVER_DEBUG(1, "Device %s is invalid.", name); gctl_error(req, "Device %s is invalid.", name); return; } error = g_mountver_destroy(gp, *force); if (error != 0) { gctl_error(req, "Cannot destroy device %s (error=%d).", gp->name, error); return; } } } static void g_mountver_orphan(struct g_consumer *cp) { struct g_mountver_softc *sc; int done; g_topology_assert(); sc = cp->geom->softc; mtx_lock(&sc->sc_mtx); sc->sc_orphaned = 1; done = (cp->index == 0); mtx_unlock(&sc->sc_mtx); if (done) g_mountver_detach(cp, 0); G_MOUNTVER_DEBUG(0, "%s is offline. Mount verification in progress.", sc->sc_provider_name); } static void g_mountver_resize(struct g_consumer *cp) { struct g_geom *gp; struct g_provider *pp; gp = cp->geom; LIST_FOREACH(pp, &gp->provider, provider) g_resize_provider(pp, cp->provider->mediasize); } static int g_mountver_ident_matches(struct g_geom *gp) { struct g_consumer *cp; struct g_mountver_softc *sc; char ident[DISK_IDENT_SIZE]; int error, identsize = DISK_IDENT_SIZE; sc = gp->softc; cp = LIST_FIRST(&gp->consumer); if (g_mountver_check_ident == 0) return (0); error = g_access(cp, 1, 0, 0); if (error != 0) { G_MOUNTVER_DEBUG(0, "Cannot access %s; " "not attaching; error = %d.", gp->name, error); return (1); } error = g_io_getattr("GEOM::ident", cp, &identsize, ident); g_access(cp, -1, 0, 0); if (error != 0) { G_MOUNTVER_DEBUG(0, "Cannot get disk ident for %s; " "not attaching; error = %d.", gp->name, error); return (1); } if (strcmp(ident, sc->sc_ident) != 0) { G_MOUNTVER_DEBUG(1, "Disk ident for %s (\"%s\") is different " "from expected \"%s\", not attaching.", gp->name, ident, sc->sc_ident); return (1); } return (0); } static struct g_geom * g_mountver_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) { struct g_mountver_softc *sc; struct g_consumer *cp; struct g_geom *gp; int error; g_topology_assert(); g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); G_MOUNTVER_DEBUG(2, "Tasting %s.", pp->name); /* * Let's check if device already exists. */ LIST_FOREACH(gp, &mp->geom, geom) { sc = gp->softc; if (sc == NULL) continue; /* Already attached? */ if (pp == LIST_FIRST(&gp->provider)) return (NULL); if (sc->sc_orphaned && strcmp(pp->name, sc->sc_provider_name) == 0) break; } if (gp == NULL) return (NULL); cp = LIST_FIRST(&gp->consumer); g_attach(cp, pp); error = g_mountver_ident_matches(gp); if (error != 0) { g_detach(cp); return (NULL); } if (sc->sc_access_r > 0 || sc->sc_access_w > 0 || sc->sc_access_e > 0) { error = g_access(cp, sc->sc_access_r, sc->sc_access_w, sc->sc_access_e); if (error != 0) { G_MOUNTVER_DEBUG(0, "Cannot access %s; error = %d.", pp->name, error); g_detach(cp); return (NULL); } } sc->sc_orphaned = 0; g_mountver_send_queued(gp); G_MOUNTVER_DEBUG(0, "%s has completed mount verification.", sc->sc_provider_name); return (gp); } static void g_mountver_config(struct gctl_req *req, struct g_class *mp, const char *verb) { uint32_t *version; g_topology_assert(); version = gctl_get_paraml(req, "version", sizeof(*version)); if (version == NULL) { gctl_error(req, "No '%s' argument.", "version"); return; } if (*version != G_MOUNTVER_VERSION) { gctl_error(req, "Userland and kernel parts are out of sync."); return; } if (strcmp(verb, "create") == 0) { g_mountver_ctl_create(req, mp); return; } else if (strcmp(verb, "destroy") == 0) { g_mountver_ctl_destroy(req, mp); return; } gctl_error(req, "Unknown verb."); } static void g_mountver_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { struct g_mountver_softc *sc; if (pp != NULL || cp != NULL) return; sc = gp->softc; sbuf_printf(sb, "%s%s\n", indent, sc->sc_orphaned ? "OFFLINE" : "ONLINE"); sbuf_printf(sb, "%s%s\n", indent, sc->sc_provider_name); sbuf_printf(sb, "%s%s\n", indent, sc->sc_ident); } static void g_mountver_shutdown_pre_sync(void *arg, int howto) { struct g_mountver_softc *sc; struct g_class *mp; struct g_geom *gp, *gp2; mp = arg; g_topology_lock(); LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { if (gp->softc == NULL) continue; sc = gp->softc; sc->sc_shutting_down = 1; if (sc->sc_orphaned) g_mountver_destroy(gp, 1); } g_topology_unlock(); } static void g_mountver_init(struct g_class *mp) { g_mountver_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync, g_mountver_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST); if (g_mountver_pre_sync == NULL) G_MOUNTVER_DEBUG(0, "Warning! Cannot register shutdown event."); } static void g_mountver_fini(struct g_class *mp) { if (g_mountver_pre_sync != NULL) EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_mountver_pre_sync); } DECLARE_GEOM_CLASS(g_mountver_class, g_mountver); MODULE_VERSION(geom_mountver, 0); Index: head/sys/geom/nop/g_nop.c =================================================================== --- head/sys/geom/nop/g_nop.c (revision 361014) +++ head/sys/geom/nop/g_nop.c (revision 361015) @@ -1,998 +1,1001 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004-2006 Pawel Jakub Dawidek * Copyright (c) 2019 Mariusz Zaborski * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include SYSCTL_DECL(_kern_geom); static SYSCTL_NODE(_kern_geom, OID_AUTO, nop, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "GEOM_NOP stuff"); static u_int g_nop_debug = 0; SYSCTL_UINT(_kern_geom_nop, OID_AUTO, debug, CTLFLAG_RW, &g_nop_debug, 0, "Debug level"); static int g_nop_destroy(struct g_geom *gp, boolean_t force); static int g_nop_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp); static void g_nop_config(struct gctl_req *req, struct g_class *mp, const char *verb); static g_access_t g_nop_access; static g_dumpconf_t g_nop_dumpconf; static g_orphan_t g_nop_orphan; static g_provgone_t g_nop_providergone; static g_resize_t g_nop_resize; static g_start_t g_nop_start; struct g_class g_nop_class = { .name = G_NOP_CLASS_NAME, .version = G_VERSION, .ctlreq = g_nop_config, .destroy_geom = g_nop_destroy_geom, .access = g_nop_access, .dumpconf = g_nop_dumpconf, .orphan = g_nop_orphan, .providergone = g_nop_providergone, .resize = g_nop_resize, .start = g_nop_start, }; struct g_nop_delay { struct callout dl_cal; struct bio *dl_bio; TAILQ_ENTRY(g_nop_delay) dl_next; }; static bool g_nop_verify_nprefix(const char *name) { int i; for (i = 0; i < strlen(name); i++) { if (isalpha(name[i]) == 0 && isdigit(name[i]) == 0) { return (false); } } return (true); } static void g_nop_orphan(struct g_consumer *cp) { g_topology_assert(); g_nop_destroy(cp->geom, 1); } static void g_nop_resize(struct g_consumer *cp) { struct g_nop_softc *sc; struct g_geom *gp; struct g_provider *pp; off_t size; g_topology_assert(); gp = cp->geom; sc = gp->softc; if (sc->sc_explicitsize != 0) return; if (cp->provider->mediasize < sc->sc_offset) { g_nop_destroy(gp, 1); return; } size = cp->provider->mediasize - sc->sc_offset; LIST_FOREACH(pp, &gp->provider, provider) g_resize_provider(pp, size); } static int g_nop_dumper(void *priv, void *virtual, vm_offset_t physical, off_t offset, size_t length) { return (0); } static void g_nop_kerneldump(struct bio *bp, struct g_nop_softc *sc) { struct g_kerneldump *gkd; struct g_geom *gp; struct g_provider *pp; gkd = (struct g_kerneldump *)bp->bio_data; gp = bp->bio_to->geom; g_trace(G_T_TOPOLOGY, "%s(%s, %jd, %jd)", __func__, gp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length); pp = LIST_FIRST(&gp->provider); gkd->di.dumper = g_nop_dumper; gkd->di.priv = sc; gkd->di.blocksize = pp->sectorsize; gkd->di.maxiosize = DFLTPHYS; gkd->di.mediaoffset = sc->sc_offset + gkd->offset; if (gkd->offset > sc->sc_explicitsize) { g_io_deliver(bp, ENODEV); return; } if (gkd->offset + gkd->length > sc->sc_explicitsize) gkd->length = sc->sc_explicitsize - gkd->offset; gkd->di.mediasize = gkd->length; g_io_deliver(bp, 0); } static void g_nop_pass(struct bio *cbp, struct g_geom *gp) { G_NOP_LOGREQ(cbp, "Sending request."); g_io_request(cbp, LIST_FIRST(&gp->consumer)); } static void g_nop_pass_timeout(void *data) { struct g_nop_softc *sc; struct g_geom *gp; struct g_nop_delay *gndelay; gndelay = (struct g_nop_delay *)data; gp = gndelay->dl_bio->bio_to->geom; sc = gp->softc; mtx_lock(&sc->sc_lock); TAILQ_REMOVE(&sc->sc_head_delay, gndelay, dl_next); mtx_unlock(&sc->sc_lock); g_nop_pass(gndelay->dl_bio, gp); g_free(data); } static void g_nop_start(struct bio *bp) { struct g_nop_softc *sc; struct g_geom *gp; struct g_provider *pp; struct bio *cbp; u_int failprob, delayprob, delaytime; failprob = delayprob = delaytime = 0; gp = bp->bio_to->geom; sc = gp->softc; G_NOP_LOGREQ(bp, "Request received."); mtx_lock(&sc->sc_lock); switch (bp->bio_cmd) { case BIO_READ: sc->sc_reads++; sc->sc_readbytes += bp->bio_length; if (sc->sc_count_until_fail != 0) { sc->sc_count_until_fail -= 1; } else { failprob = sc->sc_rfailprob; delayprob = sc->sc_rdelayprob; delaytime = sc->sc_delaymsec; } break; case BIO_WRITE: sc->sc_writes++; sc->sc_wrotebytes += bp->bio_length; if (sc->sc_count_until_fail != 0) { sc->sc_count_until_fail -= 1; } else { failprob = sc->sc_wfailprob; delayprob = sc->sc_wdelayprob; delaytime = sc->sc_delaymsec; } break; case BIO_DELETE: sc->sc_deletes++; break; case BIO_GETATTR: sc->sc_getattrs++; if (sc->sc_physpath && g_handleattr_str(bp, "GEOM::physpath", sc->sc_physpath)) ; else if (strcmp(bp->bio_attribute, "GEOM::kerneldump") == 0) g_nop_kerneldump(bp, sc); else /* * Fallthrough to forwarding the GETATTR down to the * lower level device. */ break; mtx_unlock(&sc->sc_lock); return; case BIO_FLUSH: sc->sc_flushes++; break; case BIO_SPEEDUP: sc->sc_speedups++; break; case BIO_CMD0: sc->sc_cmd0s++; break; case BIO_CMD1: sc->sc_cmd1s++; break; case BIO_CMD2: sc->sc_cmd2s++; break; } mtx_unlock(&sc->sc_lock); if (failprob > 0) { u_int rval; rval = arc4random() % 100; if (rval < failprob) { G_NOP_LOGREQLVL(1, bp, "Returning error=%d.", sc->sc_error); g_io_deliver(bp, sc->sc_error); return; } } cbp = g_clone_bio(bp); if (cbp == NULL) { g_io_deliver(bp, ENOMEM); return; } cbp->bio_done = g_std_done; cbp->bio_offset = bp->bio_offset + sc->sc_offset; pp = LIST_FIRST(&gp->provider); KASSERT(pp != NULL, ("NULL pp")); cbp->bio_to = pp; if (delayprob > 0) { struct g_nop_delay *gndelay; u_int rval; rval = arc4random() % 100; if (rval < delayprob) { gndelay = g_malloc(sizeof(*gndelay), M_NOWAIT | M_ZERO); if (gndelay != NULL) { callout_init(&gndelay->dl_cal, 1); gndelay->dl_bio = cbp; mtx_lock(&sc->sc_lock); TAILQ_INSERT_TAIL(&sc->sc_head_delay, gndelay, dl_next); mtx_unlock(&sc->sc_lock); callout_reset(&gndelay->dl_cal, MSEC_2_TICKS(delaytime), g_nop_pass_timeout, gndelay); return; } } } g_nop_pass(cbp, gp); } static int g_nop_access(struct g_provider *pp, int dr, int dw, int de) { struct g_geom *gp; struct g_consumer *cp; int error; gp = pp->geom; cp = LIST_FIRST(&gp->consumer); error = g_access(cp, dr, dw, de); return (error); } static int g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp, const char *gnopname, int ioerror, u_int count_until_fail, u_int rfailprob, u_int wfailprob, u_int delaymsec, u_int rdelayprob, u_int wdelayprob, off_t offset, off_t size, u_int secsize, off_t stripesize, off_t stripeoffset, const char *physpath) { struct g_nop_softc *sc; struct g_geom *gp; struct g_provider *newpp; struct g_consumer *cp; + struct g_geom_alias *gap; char name[64]; int error, n; off_t explicitsize; g_topology_assert(); gp = NULL; newpp = NULL; cp = NULL; if ((offset % pp->sectorsize) != 0) { gctl_error(req, "Invalid offset for provider %s.", pp->name); return (EINVAL); } if ((size % pp->sectorsize) != 0) { gctl_error(req, "Invalid size for provider %s.", pp->name); return (EINVAL); } if (offset >= pp->mediasize) { gctl_error(req, "Invalid offset for provider %s.", pp->name); return (EINVAL); } explicitsize = size; if (size == 0) size = pp->mediasize - offset; if (offset + size > pp->mediasize) { gctl_error(req, "Invalid size for provider %s.", pp->name); return (EINVAL); } if (secsize == 0) secsize = pp->sectorsize; else if ((secsize % pp->sectorsize) != 0) { gctl_error(req, "Invalid secsize for provider %s.", pp->name); return (EINVAL); } if (secsize > MAXPHYS) { gctl_error(req, "secsize is too big."); return (EINVAL); } size -= size % secsize; if ((stripesize % pp->sectorsize) != 0) { gctl_error(req, "Invalid stripesize for provider %s.", pp->name); return (EINVAL); } if ((stripeoffset % pp->sectorsize) != 0) { gctl_error(req, "Invalid stripeoffset for provider %s.", pp->name); return (EINVAL); } if (stripesize != 0 && stripeoffset >= stripesize) { gctl_error(req, "stripeoffset is too big."); return (EINVAL); } if (gnopname != NULL && !g_nop_verify_nprefix(gnopname)) { gctl_error(req, "Name %s is invalid.", gnopname); return (EINVAL); } if (gnopname != NULL) { n = snprintf(name, sizeof(name), "%s%s", gnopname, G_NOP_SUFFIX); } else { n = snprintf(name, sizeof(name), "%s%s", pp->name, G_NOP_SUFFIX); } if (n <= 0 || n >= sizeof(name)) { gctl_error(req, "Invalid provider name."); return (EINVAL); } LIST_FOREACH(gp, &mp->geom, geom) { if (strcmp(gp->name, name) == 0) { gctl_error(req, "Provider %s already exists.", name); return (EEXIST); } } gp = g_new_geomf(mp, "%s", name); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); sc->sc_offset = offset; sc->sc_explicitsize = explicitsize; sc->sc_stripesize = stripesize; sc->sc_stripeoffset = stripeoffset; if (physpath && strcmp(physpath, G_NOP_PHYSPATH_PASSTHROUGH)) { sc->sc_physpath = strndup(physpath, MAXPATHLEN, M_GEOM); } else sc->sc_physpath = NULL; sc->sc_error = ioerror; sc->sc_count_until_fail = count_until_fail; sc->sc_rfailprob = rfailprob; sc->sc_wfailprob = wfailprob; sc->sc_delaymsec = delaymsec; sc->sc_rdelayprob = rdelayprob; sc->sc_wdelayprob = wdelayprob; sc->sc_reads = 0; sc->sc_writes = 0; sc->sc_deletes = 0; sc->sc_getattrs = 0; sc->sc_flushes = 0; sc->sc_speedups = 0; sc->sc_cmd0s = 0; sc->sc_cmd1s = 0; sc->sc_cmd2s = 0; sc->sc_readbytes = 0; sc->sc_wrotebytes = 0; TAILQ_INIT(&sc->sc_head_delay); mtx_init(&sc->sc_lock, "gnop lock", NULL, MTX_DEF); gp->softc = sc; newpp = g_new_providerf(gp, "%s", gp->name); newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; newpp->mediasize = size; newpp->sectorsize = secsize; newpp->stripesize = stripesize; newpp->stripeoffset = stripeoffset; + LIST_FOREACH(gap, &pp->aliases, ga_next) + g_provider_add_alias(newpp, "%s%s", gap->ga_alias, G_NOP_SUFFIX); cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error != 0) { gctl_error(req, "Cannot attach to provider %s.", pp->name); goto fail; } newpp->flags |= pp->flags & G_PF_ACCEPT_UNMAPPED; g_error_provider(newpp, 0); G_NOP_DEBUG(0, "Device %s created.", gp->name); return (0); fail: if (cp->provider != NULL) g_detach(cp); g_destroy_consumer(cp); g_destroy_provider(newpp); mtx_destroy(&sc->sc_lock); free(sc->sc_physpath, M_GEOM); g_free(gp->softc); g_destroy_geom(gp); return (error); } static void g_nop_providergone(struct g_provider *pp) { struct g_geom *gp = pp->geom; struct g_nop_softc *sc = gp->softc; KASSERT(TAILQ_EMPTY(&sc->sc_head_delay), ("delayed request list is not empty")); gp->softc = NULL; free(sc->sc_physpath, M_GEOM); mtx_destroy(&sc->sc_lock); g_free(sc); } static int g_nop_destroy(struct g_geom *gp, boolean_t force) { struct g_nop_softc *sc; struct g_provider *pp; g_topology_assert(); sc = gp->softc; if (sc == NULL) return (ENXIO); pp = LIST_FIRST(&gp->provider); if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { if (force) { G_NOP_DEBUG(0, "Device %s is still open, so it " "can't be definitely removed.", pp->name); } else { G_NOP_DEBUG(1, "Device %s is still open (r%dw%de%d).", pp->name, pp->acr, pp->acw, pp->ace); return (EBUSY); } } else { G_NOP_DEBUG(0, "Device %s removed.", gp->name); } g_wither_geom(gp, ENXIO); return (0); } static int g_nop_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) { return (g_nop_destroy(gp, 0)); } static void g_nop_ctl_create(struct gctl_req *req, struct g_class *mp) { struct g_provider *pp; intmax_t *val, error, rfailprob, wfailprob, count_until_fail, offset, secsize, size, stripesize, stripeoffset, delaymsec, rdelayprob, wdelayprob; const char *name, *physpath, *gnopname; char param[16]; int i, *nargs; g_topology_assert(); error = -1; rfailprob = -1; wfailprob = -1; count_until_fail = -1; offset = 0; secsize = 0; size = 0; stripesize = 0; stripeoffset = 0; delaymsec = -1; rdelayprob = -1; wdelayprob = -1; nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument", "nargs"); return; } if (*nargs <= 0) { gctl_error(req, "Missing device(s)."); return; } val = gctl_get_paraml_opt(req, "error", sizeof(*val)); if (val != NULL) { error = *val; } val = gctl_get_paraml_opt(req, "rfailprob", sizeof(*val)); if (val != NULL) { rfailprob = *val; if (rfailprob < -1 || rfailprob > 100) { gctl_error(req, "Invalid '%s' argument", "rfailprob"); return; } } val = gctl_get_paraml_opt(req, "wfailprob", sizeof(*val)); if (val != NULL) { wfailprob = *val; if (wfailprob < -1 || wfailprob > 100) { gctl_error(req, "Invalid '%s' argument", "wfailprob"); return; } } val = gctl_get_paraml_opt(req, "delaymsec", sizeof(*val)); if (val != NULL) { delaymsec = *val; if (delaymsec < 1 && delaymsec != -1) { gctl_error(req, "Invalid '%s' argument", "delaymsec"); return; } } val = gctl_get_paraml_opt(req, "rdelayprob", sizeof(*val)); if (val != NULL) { rdelayprob = *val; if (rdelayprob < -1 || rdelayprob > 100) { gctl_error(req, "Invalid '%s' argument", "rdelayprob"); return; } } val = gctl_get_paraml_opt(req, "wdelayprob", sizeof(*val)); if (val != NULL) { wdelayprob = *val; if (wdelayprob < -1 || wdelayprob > 100) { gctl_error(req, "Invalid '%s' argument", "wdelayprob"); return; } } val = gctl_get_paraml_opt(req, "count_until_fail", sizeof(*val)); if (val != NULL) { count_until_fail = *val; if (count_until_fail < -1) { gctl_error(req, "Invalid '%s' argument", "count_until_fail"); return; } } val = gctl_get_paraml_opt(req, "offset", sizeof(*val)); if (val != NULL) { offset = *val; if (offset < 0) { gctl_error(req, "Invalid '%s' argument", "offset"); return; } } val = gctl_get_paraml_opt(req, "size", sizeof(*val)); if (val != NULL) { size = *val; if (size < 0) { gctl_error(req, "Invalid '%s' argument", "size"); return; } } val = gctl_get_paraml_opt(req, "secsize", sizeof(*val)); if (val != NULL) { secsize = *val; if (secsize < 0) { gctl_error(req, "Invalid '%s' argument", "secsize"); return; } } val = gctl_get_paraml_opt(req, "stripesize", sizeof(*val)); if (val != NULL) { stripesize = *val; if (stripesize < 0) { gctl_error(req, "Invalid '%s' argument", "stripesize"); return; } } val = gctl_get_paraml_opt(req, "stripeoffset", sizeof(*val)); if (val != NULL) { stripeoffset = *val; if (stripeoffset < 0) { gctl_error(req, "Invalid '%s' argument", "stripeoffset"); return; } } physpath = gctl_get_asciiparam(req, "physpath"); gnopname = gctl_get_asciiparam(req, "gnopname"); for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%d' argument", i); return; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); pp = g_provider_by_name(name); if (pp == NULL) { G_NOP_DEBUG(1, "Provider %s is invalid.", name); gctl_error(req, "Provider %s is invalid.", name); return; } if (g_nop_create(req, mp, pp, gnopname, error == -1 ? EIO : (int)error, count_until_fail == -1 ? 0 : (u_int)count_until_fail, rfailprob == -1 ? 0 : (u_int)rfailprob, wfailprob == -1 ? 0 : (u_int)wfailprob, delaymsec == -1 ? 1 : (u_int)delaymsec, rdelayprob == -1 ? 0 : (u_int)rdelayprob, wdelayprob == -1 ? 0 : (u_int)wdelayprob, (off_t)offset, (off_t)size, (u_int)secsize, (off_t)stripesize, (off_t)stripeoffset, physpath) != 0) { return; } } } static void g_nop_ctl_configure(struct gctl_req *req, struct g_class *mp) { struct g_nop_softc *sc; struct g_provider *pp; intmax_t *val, delaymsec, error, rdelayprob, rfailprob, wdelayprob, wfailprob, count_until_fail; const char *name; char param[16]; int i, *nargs; g_topology_assert(); count_until_fail = -1; delaymsec = -1; error = -1; rdelayprob = -1; rfailprob = -1; wdelayprob = -1; wfailprob = -1; nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument", "nargs"); return; } if (*nargs <= 0) { gctl_error(req, "Missing device(s)."); return; } val = gctl_get_paraml_opt(req, "error", sizeof(*val)); if (val != NULL) { error = *val; } val = gctl_get_paraml_opt(req, "count_until_fail", sizeof(*val)); if (val != NULL) { count_until_fail = *val; } val = gctl_get_paraml_opt(req, "rfailprob", sizeof(*val)); if (val != NULL) { rfailprob = *val; if (rfailprob < -1 || rfailprob > 100) { gctl_error(req, "Invalid '%s' argument", "rfailprob"); return; } } val = gctl_get_paraml_opt(req, "wfailprob", sizeof(*val)); if (val != NULL) { wfailprob = *val; if (wfailprob < -1 || wfailprob > 100) { gctl_error(req, "Invalid '%s' argument", "wfailprob"); return; } } val = gctl_get_paraml_opt(req, "delaymsec", sizeof(*val)); if (val != NULL) { delaymsec = *val; if (delaymsec < 1 && delaymsec != -1) { gctl_error(req, "Invalid '%s' argument", "delaymsec"); return; } } val = gctl_get_paraml_opt(req, "rdelayprob", sizeof(*val)); if (val != NULL) { rdelayprob = *val; if (rdelayprob < -1 || rdelayprob > 100) { gctl_error(req, "Invalid '%s' argument", "rdelayprob"); return; } } val = gctl_get_paraml_opt(req, "wdelayprob", sizeof(*val)); if (val != NULL) { wdelayprob = *val; if (wdelayprob < -1 || wdelayprob > 100) { gctl_error(req, "Invalid '%s' argument", "wdelayprob"); return; } } for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%d' argument", i); return; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); pp = g_provider_by_name(name); if (pp == NULL || pp->geom->class != mp) { G_NOP_DEBUG(1, "Provider %s is invalid.", name); gctl_error(req, "Provider %s is invalid.", name); return; } sc = pp->geom->softc; if (error != -1) sc->sc_error = (int)error; if (rfailprob != -1) sc->sc_rfailprob = (u_int)rfailprob; if (wfailprob != -1) sc->sc_wfailprob = (u_int)wfailprob; if (rdelayprob != -1) sc->sc_rdelayprob = (u_int)rdelayprob; if (wdelayprob != -1) sc->sc_wdelayprob = (u_int)wdelayprob; if (delaymsec != -1) sc->sc_delaymsec = (u_int)delaymsec; if (count_until_fail != -1) sc->sc_count_until_fail = (u_int)count_until_fail; } } static struct g_geom * g_nop_find_geom(struct g_class *mp, const char *name) { struct g_geom *gp; LIST_FOREACH(gp, &mp->geom, geom) { if (strcmp(gp->name, name) == 0) return (gp); } return (NULL); } static void g_nop_ctl_destroy(struct gctl_req *req, struct g_class *mp) { int *nargs, *force, error, i; struct g_geom *gp; const char *name; char param[16]; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument", "nargs"); return; } if (*nargs <= 0) { gctl_error(req, "Missing device(s)."); return; } force = gctl_get_paraml(req, "force", sizeof(*force)); if (force == NULL) { gctl_error(req, "No 'force' argument"); return; } for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%d' argument", i); return; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); gp = g_nop_find_geom(mp, name); if (gp == NULL) { G_NOP_DEBUG(1, "Device %s is invalid.", name); gctl_error(req, "Device %s is invalid.", name); return; } error = g_nop_destroy(gp, *force); if (error != 0) { gctl_error(req, "Cannot destroy device %s (error=%d).", gp->name, error); return; } } } static void g_nop_ctl_reset(struct gctl_req *req, struct g_class *mp) { struct g_nop_softc *sc; struct g_provider *pp; const char *name; char param[16]; int i, *nargs; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument", "nargs"); return; } if (*nargs <= 0) { gctl_error(req, "Missing device(s)."); return; } for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%d' argument", i); return; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); pp = g_provider_by_name(name); if (pp == NULL || pp->geom->class != mp) { G_NOP_DEBUG(1, "Provider %s is invalid.", name); gctl_error(req, "Provider %s is invalid.", name); return; } sc = pp->geom->softc; sc->sc_reads = 0; sc->sc_writes = 0; sc->sc_deletes = 0; sc->sc_getattrs = 0; sc->sc_flushes = 0; sc->sc_speedups = 0; sc->sc_cmd0s = 0; sc->sc_cmd1s = 0; sc->sc_cmd2s = 0; sc->sc_readbytes = 0; sc->sc_wrotebytes = 0; } } static void g_nop_config(struct gctl_req *req, struct g_class *mp, const char *verb) { uint32_t *version; g_topology_assert(); version = gctl_get_paraml(req, "version", sizeof(*version)); if (version == NULL) { gctl_error(req, "No '%s' argument.", "version"); return; } if (*version != G_NOP_VERSION) { gctl_error(req, "Userland and kernel parts are out of sync."); return; } if (strcmp(verb, "create") == 0) { g_nop_ctl_create(req, mp); return; } else if (strcmp(verb, "configure") == 0) { g_nop_ctl_configure(req, mp); return; } else if (strcmp(verb, "destroy") == 0) { g_nop_ctl_destroy(req, mp); return; } else if (strcmp(verb, "reset") == 0) { g_nop_ctl_reset(req, mp); return; } gctl_error(req, "Unknown verb."); } static void g_nop_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { struct g_nop_softc *sc; if (pp != NULL || cp != NULL) return; sc = gp->softc; sbuf_printf(sb, "%s%jd\n", indent, (intmax_t)sc->sc_offset); sbuf_printf(sb, "%s%u\n", indent, sc->sc_rfailprob); sbuf_printf(sb, "%s%u\n", indent, sc->sc_wfailprob); sbuf_printf(sb, "%s%u\n", indent, sc->sc_rdelayprob); sbuf_printf(sb, "%s%u\n", indent, sc->sc_wdelayprob); sbuf_printf(sb, "%s%d\n", indent, sc->sc_delaymsec); sbuf_printf(sb, "%s%u\n", indent, sc->sc_count_until_fail); sbuf_printf(sb, "%s%d\n", indent, sc->sc_error); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_reads); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_writes); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_deletes); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_getattrs); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_flushes); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_speedups); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_cmd0s); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_cmd1s); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_cmd2s); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_readbytes); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_wrotebytes); } DECLARE_GEOM_CLASS(g_nop_class, g_nop); MODULE_VERSION(geom_nop, 0); Index: head/sys/geom/part/g_part.c =================================================================== --- head/sys/geom/part/g_part.c (revision 361014) +++ head/sys/geom/part/g_part.c (revision 361015) @@ -1,2436 +1,2434 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002, 2005-2009 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "g_part_if.h" #ifndef _PATH_DEV #define _PATH_DEV "/dev/" #endif static kobj_method_t g_part_null_methods[] = { { 0, 0 } }; static struct g_part_scheme g_part_null_scheme = { "(none)", g_part_null_methods, sizeof(struct g_part_table), }; TAILQ_HEAD(, g_part_scheme) g_part_schemes = TAILQ_HEAD_INITIALIZER(g_part_schemes); struct g_part_alias_list { const char *lexeme; enum g_part_alias alias; } g_part_alias_list[G_PART_ALIAS_COUNT] = { { "apple-apfs", G_PART_ALIAS_APPLE_APFS }, { "apple-boot", G_PART_ALIAS_APPLE_BOOT }, { "apple-core-storage", G_PART_ALIAS_APPLE_CORE_STORAGE }, { "apple-hfs", G_PART_ALIAS_APPLE_HFS }, { "apple-label", G_PART_ALIAS_APPLE_LABEL }, { "apple-raid", G_PART_ALIAS_APPLE_RAID }, { "apple-raid-offline", G_PART_ALIAS_APPLE_RAID_OFFLINE }, { "apple-tv-recovery", G_PART_ALIAS_APPLE_TV_RECOVERY }, { "apple-ufs", G_PART_ALIAS_APPLE_UFS }, { "bios-boot", G_PART_ALIAS_BIOS_BOOT }, { "chromeos-firmware", G_PART_ALIAS_CHROMEOS_FIRMWARE }, { "chromeos-kernel", G_PART_ALIAS_CHROMEOS_KERNEL }, { "chromeos-reserved", G_PART_ALIAS_CHROMEOS_RESERVED }, { "chromeos-root", G_PART_ALIAS_CHROMEOS_ROOT }, { "dragonfly-ccd", G_PART_ALIAS_DFBSD_CCD }, { "dragonfly-hammer", G_PART_ALIAS_DFBSD_HAMMER }, { "dragonfly-hammer2", G_PART_ALIAS_DFBSD_HAMMER2 }, { "dragonfly-label32", G_PART_ALIAS_DFBSD }, { "dragonfly-label64", G_PART_ALIAS_DFBSD64 }, { "dragonfly-legacy", G_PART_ALIAS_DFBSD_LEGACY }, { "dragonfly-swap", G_PART_ALIAS_DFBSD_SWAP }, { "dragonfly-ufs", G_PART_ALIAS_DFBSD_UFS }, { "dragonfly-vinum", G_PART_ALIAS_DFBSD_VINUM }, { "ebr", G_PART_ALIAS_EBR }, { "efi", G_PART_ALIAS_EFI }, { "fat16", G_PART_ALIAS_MS_FAT16 }, { "fat32", G_PART_ALIAS_MS_FAT32 }, { "fat32lba", G_PART_ALIAS_MS_FAT32LBA }, { "freebsd", G_PART_ALIAS_FREEBSD }, { "freebsd-boot", G_PART_ALIAS_FREEBSD_BOOT }, { "freebsd-nandfs", G_PART_ALIAS_FREEBSD_NANDFS }, { "freebsd-swap", G_PART_ALIAS_FREEBSD_SWAP }, { "freebsd-ufs", G_PART_ALIAS_FREEBSD_UFS }, { "freebsd-vinum", G_PART_ALIAS_FREEBSD_VINUM }, { "freebsd-zfs", G_PART_ALIAS_FREEBSD_ZFS }, { "linux-data", G_PART_ALIAS_LINUX_DATA }, { "linux-lvm", G_PART_ALIAS_LINUX_LVM }, { "linux-raid", G_PART_ALIAS_LINUX_RAID }, { "linux-swap", G_PART_ALIAS_LINUX_SWAP }, { "mbr", G_PART_ALIAS_MBR }, { "ms-basic-data", G_PART_ALIAS_MS_BASIC_DATA }, { "ms-ldm-data", G_PART_ALIAS_MS_LDM_DATA }, { "ms-ldm-metadata", G_PART_ALIAS_MS_LDM_METADATA }, { "ms-recovery", G_PART_ALIAS_MS_RECOVERY }, { "ms-reserved", G_PART_ALIAS_MS_RESERVED }, { "ms-spaces", G_PART_ALIAS_MS_SPACES }, { "netbsd-ccd", G_PART_ALIAS_NETBSD_CCD }, { "netbsd-cgd", G_PART_ALIAS_NETBSD_CGD }, { "netbsd-ffs", G_PART_ALIAS_NETBSD_FFS }, { "netbsd-lfs", G_PART_ALIAS_NETBSD_LFS }, { "netbsd-raid", G_PART_ALIAS_NETBSD_RAID }, { "netbsd-swap", G_PART_ALIAS_NETBSD_SWAP }, { "ntfs", G_PART_ALIAS_MS_NTFS }, { "openbsd-data", G_PART_ALIAS_OPENBSD_DATA }, { "prep-boot", G_PART_ALIAS_PREP_BOOT }, { "vmware-reserved", G_PART_ALIAS_VMRESERVED }, { "vmware-vmfs", G_PART_ALIAS_VMFS }, { "vmware-vmkdiag", G_PART_ALIAS_VMKDIAG }, { "vmware-vsanhdr", G_PART_ALIAS_VMVSANHDR }, }; SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, part, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "GEOM_PART stuff"); static u_int check_integrity = 1; SYSCTL_UINT(_kern_geom_part, OID_AUTO, check_integrity, CTLFLAG_RWTUN, &check_integrity, 1, "Enable integrity checking"); static u_int auto_resize = 1; SYSCTL_UINT(_kern_geom_part, OID_AUTO, auto_resize, CTLFLAG_RWTUN, &auto_resize, 1, "Enable auto resize"); static u_int allow_nesting = 0; SYSCTL_UINT(_kern_geom_part, OID_AUTO, allow_nesting, CTLFLAG_RWTUN, &allow_nesting, 0, "Allow additional levels of nesting"); char g_part_separator[MAXPATHLEN] = ""; SYSCTL_STRING(_kern_geom_part, OID_AUTO, separator, CTLFLAG_RDTUN, &g_part_separator, sizeof(g_part_separator), "Partition name separator"); /* * The GEOM partitioning class. */ static g_ctl_req_t g_part_ctlreq; static g_ctl_destroy_geom_t g_part_destroy_geom; static g_fini_t g_part_fini; static g_init_t g_part_init; static g_taste_t g_part_taste; static g_access_t g_part_access; static g_dumpconf_t g_part_dumpconf; static g_orphan_t g_part_orphan; static g_spoiled_t g_part_spoiled; static g_start_t g_part_start; static g_resize_t g_part_resize; static g_ioctl_t g_part_ioctl; static struct g_class g_part_class = { .name = "PART", .version = G_VERSION, /* Class methods. */ .ctlreq = g_part_ctlreq, .destroy_geom = g_part_destroy_geom, .fini = g_part_fini, .init = g_part_init, .taste = g_part_taste, /* Geom methods. */ .access = g_part_access, .dumpconf = g_part_dumpconf, .orphan = g_part_orphan, .spoiled = g_part_spoiled, .start = g_part_start, .resize = g_part_resize, .ioctl = g_part_ioctl, }; DECLARE_GEOM_CLASS(g_part_class, g_part); MODULE_VERSION(g_part, 0); /* * Support functions. */ static void g_part_wither(struct g_geom *, int); const char * g_part_alias_name(enum g_part_alias alias) { int i; for (i = 0; i < G_PART_ALIAS_COUNT; i++) { if (g_part_alias_list[i].alias != alias) continue; return (g_part_alias_list[i].lexeme); } return (NULL); } void g_part_geometry_heads(off_t blocks, u_int sectors, off_t *bestchs, u_int *bestheads) { static u_int candidate_heads[] = { 1, 2, 16, 32, 64, 128, 255, 0 }; off_t chs, cylinders; u_int heads; int idx; *bestchs = 0; *bestheads = 0; for (idx = 0; candidate_heads[idx] != 0; idx++) { heads = candidate_heads[idx]; cylinders = blocks / heads / sectors; if (cylinders < heads || cylinders < sectors) break; if (cylinders > 1023) continue; chs = cylinders * heads * sectors; if (chs > *bestchs || (chs == *bestchs && *bestheads == 1)) { *bestchs = chs; *bestheads = heads; } } } static void g_part_geometry(struct g_part_table *table, struct g_consumer *cp, off_t blocks) { static u_int candidate_sectors[] = { 1, 9, 17, 33, 63, 0 }; off_t chs, bestchs; u_int heads, sectors; int idx; if (g_getattr("GEOM::fwsectors", cp, §ors) != 0 || sectors == 0 || g_getattr("GEOM::fwheads", cp, &heads) != 0 || heads == 0) { table->gpt_fixgeom = 0; table->gpt_heads = 0; table->gpt_sectors = 0; bestchs = 0; for (idx = 0; candidate_sectors[idx] != 0; idx++) { sectors = candidate_sectors[idx]; g_part_geometry_heads(blocks, sectors, &chs, &heads); if (chs == 0) continue; /* * Prefer a geometry with sectors > 1, but only if * it doesn't bump down the number of heads to 1. */ if (chs > bestchs || (chs == bestchs && heads > 1 && table->gpt_sectors == 1)) { bestchs = chs; table->gpt_heads = heads; table->gpt_sectors = sectors; } } /* * If we didn't find a geometry at all, then the disk is * too big. This means we can use the maximum number of * heads and sectors. */ if (bestchs == 0) { table->gpt_heads = 255; table->gpt_sectors = 63; } } else { table->gpt_fixgeom = 1; table->gpt_heads = heads; table->gpt_sectors = sectors; } } static void g_part_get_physpath_done(struct bio *bp) { struct g_geom *gp; struct g_part_entry *entry; struct g_part_table *table; struct g_provider *pp; struct bio *pbp; pbp = bp->bio_parent; pp = pbp->bio_to; gp = pp->geom; table = gp->softc; entry = pp->private; if (bp->bio_error == 0) { char *end; size_t len, remainder; len = strlcat(bp->bio_data, "/", bp->bio_length); if (len < bp->bio_length) { end = bp->bio_data + len; remainder = bp->bio_length - len; G_PART_NAME(table, entry, end, remainder); } } g_std_done(bp); } #define DPRINTF(...) if (bootverbose) { \ printf("GEOM_PART: " __VA_ARGS__); \ } static int g_part_check_integrity(struct g_part_table *table, struct g_consumer *cp) { struct g_part_entry *e1, *e2; struct g_provider *pp; off_t offset; int failed; failed = 0; pp = cp->provider; if (table->gpt_last < table->gpt_first) { DPRINTF("last LBA is below first LBA: %jd < %jd\n", (intmax_t)table->gpt_last, (intmax_t)table->gpt_first); failed++; } if (table->gpt_last > pp->mediasize / pp->sectorsize - 1) { DPRINTF("last LBA extends beyond mediasize: " "%jd > %jd\n", (intmax_t)table->gpt_last, (intmax_t)pp->mediasize / pp->sectorsize - 1); failed++; } LIST_FOREACH(e1, &table->gpt_entry, gpe_entry) { if (e1->gpe_deleted || e1->gpe_internal) continue; if (e1->gpe_start < table->gpt_first) { DPRINTF("partition %d has start offset below first " "LBA: %jd < %jd\n", e1->gpe_index, (intmax_t)e1->gpe_start, (intmax_t)table->gpt_first); failed++; } if (e1->gpe_start > table->gpt_last) { DPRINTF("partition %d has start offset beyond last " "LBA: %jd > %jd\n", e1->gpe_index, (intmax_t)e1->gpe_start, (intmax_t)table->gpt_last); failed++; } if (e1->gpe_end < e1->gpe_start) { DPRINTF("partition %d has end offset below start " "offset: %jd < %jd\n", e1->gpe_index, (intmax_t)e1->gpe_end, (intmax_t)e1->gpe_start); failed++; } if (e1->gpe_end > table->gpt_last) { DPRINTF("partition %d has end offset beyond last " "LBA: %jd > %jd\n", e1->gpe_index, (intmax_t)e1->gpe_end, (intmax_t)table->gpt_last); failed++; } if (pp->stripesize > 0) { offset = e1->gpe_start * pp->sectorsize; if (e1->gpe_offset > offset) offset = e1->gpe_offset; if ((offset + pp->stripeoffset) % pp->stripesize) { DPRINTF("partition %d on (%s, %s) is not " "aligned on %ju bytes\n", e1->gpe_index, pp->name, table->gpt_scheme->name, (uintmax_t)pp->stripesize); /* Don't treat this as a critical failure */ } } e2 = e1; while ((e2 = LIST_NEXT(e2, gpe_entry)) != NULL) { if (e2->gpe_deleted || e2->gpe_internal) continue; if (e1->gpe_start >= e2->gpe_start && e1->gpe_start <= e2->gpe_end) { DPRINTF("partition %d has start offset inside " "partition %d: start[%d] %jd >= start[%d] " "%jd <= end[%d] %jd\n", e1->gpe_index, e2->gpe_index, e2->gpe_index, (intmax_t)e2->gpe_start, e1->gpe_index, (intmax_t)e1->gpe_start, e2->gpe_index, (intmax_t)e2->gpe_end); failed++; } if (e1->gpe_end >= e2->gpe_start && e1->gpe_end <= e2->gpe_end) { DPRINTF("partition %d has end offset inside " "partition %d: start[%d] %jd >= end[%d] " "%jd <= end[%d] %jd\n", e1->gpe_index, e2->gpe_index, e2->gpe_index, (intmax_t)e2->gpe_start, e1->gpe_index, (intmax_t)e1->gpe_end, e2->gpe_index, (intmax_t)e2->gpe_end); failed++; } if (e1->gpe_start < e2->gpe_start && e1->gpe_end > e2->gpe_end) { DPRINTF("partition %d contains partition %d: " "start[%d] %jd > start[%d] %jd, end[%d] " "%jd < end[%d] %jd\n", e1->gpe_index, e2->gpe_index, e1->gpe_index, (intmax_t)e1->gpe_start, e2->gpe_index, (intmax_t)e2->gpe_start, e2->gpe_index, (intmax_t)e2->gpe_end, e1->gpe_index, (intmax_t)e1->gpe_end); failed++; } } } if (failed != 0) { printf("GEOM_PART: integrity check failed (%s, %s)\n", pp->name, table->gpt_scheme->name); if (check_integrity != 0) return (EINVAL); table->gpt_corrupt = 1; } return (0); } #undef DPRINTF struct g_part_entry * g_part_new_entry(struct g_part_table *table, int index, quad_t start, quad_t end) { struct g_part_entry *entry, *last; last = NULL; LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_index == index) break; if (entry->gpe_index > index) { entry = NULL; break; } last = entry; } if (entry == NULL) { entry = g_malloc(table->gpt_scheme->gps_entrysz, M_WAITOK | M_ZERO); entry->gpe_index = index; if (last == NULL) LIST_INSERT_HEAD(&table->gpt_entry, entry, gpe_entry); else LIST_INSERT_AFTER(last, entry, gpe_entry); } else entry->gpe_offset = 0; entry->gpe_start = start; entry->gpe_end = end; return (entry); } static void g_part_new_provider(struct g_geom *gp, struct g_part_table *table, struct g_part_entry *entry) { struct g_consumer *cp; struct g_provider *pp; struct sbuf *sb; struct g_geom_alias *gap; off_t offset; cp = LIST_FIRST(&gp->consumer); pp = cp->provider; offset = entry->gpe_start * pp->sectorsize; if (entry->gpe_offset < offset) entry->gpe_offset = offset; if (entry->gpe_pp == NULL) { + sb = sbuf_new_auto(); + G_PART_FULLNAME(table, entry, sb, gp->name); + sbuf_finish(sb); + entry->gpe_pp = g_new_providerf(gp, "%s", sbuf_data(sb)); + sbuf_delete(sb); /* - * Add aliases to the geom before we create the provider so that - * geom_dev can taste it with all the aliases in place so all - * the aliased dev_t instances get created for each partition - * (eg foo5p7 gets created for bar5p7 when foo is an alias of bar). + * If our parent provider had any aliases, then copy them to our + * provider so when geom DEV tastes things later, they will be + * there for it to create the aliases with those name used in + * place of the geom's name we use to create the provider. The + * kobj interface that generates names makes this awkward. */ - LIST_FOREACH(gap, &table->gpt_gp->aliases, ga_next) { + LIST_FOREACH(gap, &pp->aliases, ga_next) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gap->ga_alias); sbuf_finish(sb); - g_geom_add_alias(gp, sbuf_data(sb)); + g_provider_add_alias(entry->gpe_pp, "%s", sbuf_data(sb)); sbuf_delete(sb); } - sb = sbuf_new_auto(); - G_PART_FULLNAME(table, entry, sb, gp->name); - sbuf_finish(sb); - entry->gpe_pp = g_new_providerf(gp, "%s", sbuf_data(sb)); - sbuf_delete(sb); entry->gpe_pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; entry->gpe_pp->private = entry; /* Close the circle. */ } entry->gpe_pp->index = entry->gpe_index - 1; /* index is 1-based. */ entry->gpe_pp->mediasize = (entry->gpe_end - entry->gpe_start + 1) * pp->sectorsize; entry->gpe_pp->mediasize -= entry->gpe_offset - offset; entry->gpe_pp->sectorsize = pp->sectorsize; entry->gpe_pp->stripesize = pp->stripesize; entry->gpe_pp->stripeoffset = pp->stripeoffset + entry->gpe_offset; if (pp->stripesize > 0) entry->gpe_pp->stripeoffset %= pp->stripesize; entry->gpe_pp->flags |= pp->flags & G_PF_ACCEPT_UNMAPPED; g_error_provider(entry->gpe_pp, 0); } static struct g_geom* g_part_find_geom(const char *name) { struct g_geom *gp; LIST_FOREACH(gp, &g_part_class.geom, geom) { if ((gp->flags & G_GEOM_WITHER) == 0 && strcmp(name, gp->name) == 0) break; } return (gp); } static int g_part_parm_geom(struct gctl_req *req, const char *name, struct g_geom **v) { struct g_geom *gp; const char *gname; gname = gctl_get_asciiparam(req, name); if (gname == NULL) return (ENOATTR); if (strncmp(gname, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) gname += sizeof(_PATH_DEV) - 1; gp = g_part_find_geom(gname); if (gp == NULL) { gctl_error(req, "%d %s '%s'", EINVAL, name, gname); return (EINVAL); } *v = gp; return (0); } static int g_part_parm_provider(struct gctl_req *req, const char *name, struct g_provider **v) { struct g_provider *pp; const char *pname; pname = gctl_get_asciiparam(req, name); if (pname == NULL) return (ENOATTR); if (strncmp(pname, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) pname += sizeof(_PATH_DEV) - 1; pp = g_provider_by_name(pname); if (pp == NULL) { gctl_error(req, "%d %s '%s'", EINVAL, name, pname); return (EINVAL); } *v = pp; return (0); } static int g_part_parm_quad(struct gctl_req *req, const char *name, quad_t *v) { const char *p; char *x; quad_t q; p = gctl_get_asciiparam(req, name); if (p == NULL) return (ENOATTR); q = strtoq(p, &x, 0); if (*x != '\0' || q < 0) { gctl_error(req, "%d %s '%s'", EINVAL, name, p); return (EINVAL); } *v = q; return (0); } static int g_part_parm_scheme(struct gctl_req *req, const char *name, struct g_part_scheme **v) { struct g_part_scheme *s; const char *p; p = gctl_get_asciiparam(req, name); if (p == NULL) return (ENOATTR); TAILQ_FOREACH(s, &g_part_schemes, scheme_list) { if (s == &g_part_null_scheme) continue; if (!strcasecmp(s->name, p)) break; } if (s == NULL) { gctl_error(req, "%d %s '%s'", EINVAL, name, p); return (EINVAL); } *v = s; return (0); } static int g_part_parm_str(struct gctl_req *req, const char *name, const char **v) { const char *p; p = gctl_get_asciiparam(req, name); if (p == NULL) return (ENOATTR); /* An empty label is always valid. */ if (strcmp(name, "label") != 0 && p[0] == '\0') { gctl_error(req, "%d %s '%s'", EINVAL, name, p); return (EINVAL); } *v = p; return (0); } static int g_part_parm_intmax(struct gctl_req *req, const char *name, u_int *v) { const intmax_t *p; int size; p = gctl_get_param(req, name, &size); if (p == NULL) return (ENOATTR); if (size != sizeof(*p) || *p < 0 || *p > INT_MAX) { gctl_error(req, "%d %s '%jd'", EINVAL, name, *p); return (EINVAL); } *v = (u_int)*p; return (0); } static int g_part_parm_uint32(struct gctl_req *req, const char *name, u_int *v) { const uint32_t *p; int size; p = gctl_get_param(req, name, &size); if (p == NULL) return (ENOATTR); if (size != sizeof(*p) || *p > INT_MAX) { gctl_error(req, "%d %s '%u'", EINVAL, name, (unsigned int)*p); return (EINVAL); } *v = (u_int)*p; return (0); } static int g_part_parm_bootcode(struct gctl_req *req, const char *name, const void **v, unsigned int *s) { const void *p; int size; p = gctl_get_param(req, name, &size); if (p == NULL) return (ENOATTR); *v = p; *s = size; return (0); } static int g_part_probe(struct g_geom *gp, struct g_consumer *cp, int depth) { struct g_part_scheme *iter, *scheme; struct g_part_table *table; int pri, probe; table = gp->softc; scheme = (table != NULL) ? table->gpt_scheme : NULL; pri = (scheme != NULL) ? G_PART_PROBE(table, cp) : INT_MIN; if (pri == 0) goto done; if (pri > 0) { /* error */ scheme = NULL; pri = INT_MIN; } TAILQ_FOREACH(iter, &g_part_schemes, scheme_list) { if (iter == &g_part_null_scheme) continue; table = (void *)kobj_create((kobj_class_t)iter, M_GEOM, M_WAITOK); table->gpt_gp = gp; table->gpt_scheme = iter; table->gpt_depth = depth; probe = G_PART_PROBE(table, cp); if (probe <= 0 && probe > pri) { pri = probe; scheme = iter; if (gp->softc != NULL) kobj_delete((kobj_t)gp->softc, M_GEOM); gp->softc = table; if (pri == 0) goto done; } else kobj_delete((kobj_t)table, M_GEOM); } done: return ((scheme == NULL) ? ENXIO : 0); } /* * Control request functions. */ static int g_part_ctl_add(struct gctl_req *req, struct g_part_parms *gpp) { struct g_geom *gp; struct g_provider *pp; struct g_part_entry *delent, *last, *entry; struct g_part_table *table; struct sbuf *sb; quad_t end; unsigned int index; int error; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); pp = LIST_FIRST(&gp->consumer)->provider; table = gp->softc; end = gpp->gpp_start + gpp->gpp_size - 1; if (gpp->gpp_start < table->gpt_first || gpp->gpp_start > table->gpt_last) { gctl_error(req, "%d start '%jd'", EINVAL, (intmax_t)gpp->gpp_start); return (EINVAL); } if (end < gpp->gpp_start || end > table->gpt_last) { gctl_error(req, "%d size '%jd'", EINVAL, (intmax_t)gpp->gpp_size); return (EINVAL); } if (gpp->gpp_index > table->gpt_entries) { gctl_error(req, "%d index '%d'", EINVAL, gpp->gpp_index); return (EINVAL); } delent = last = NULL; index = (gpp->gpp_index > 0) ? gpp->gpp_index : 1; LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted) { if (entry->gpe_index == index) delent = entry; continue; } if (entry->gpe_index == index) index = entry->gpe_index + 1; if (entry->gpe_index < index) last = entry; if (entry->gpe_internal) continue; if (gpp->gpp_start >= entry->gpe_start && gpp->gpp_start <= entry->gpe_end) { gctl_error(req, "%d start '%jd'", ENOSPC, (intmax_t)gpp->gpp_start); return (ENOSPC); } if (end >= entry->gpe_start && end <= entry->gpe_end) { gctl_error(req, "%d end '%jd'", ENOSPC, (intmax_t)end); return (ENOSPC); } if (gpp->gpp_start < entry->gpe_start && end > entry->gpe_end) { gctl_error(req, "%d size '%jd'", ENOSPC, (intmax_t)gpp->gpp_size); return (ENOSPC); } } if (gpp->gpp_index > 0 && index != gpp->gpp_index) { gctl_error(req, "%d index '%d'", EEXIST, gpp->gpp_index); return (EEXIST); } if (index > table->gpt_entries) { gctl_error(req, "%d index '%d'", ENOSPC, index); return (ENOSPC); } entry = (delent == NULL) ? g_malloc(table->gpt_scheme->gps_entrysz, M_WAITOK | M_ZERO) : delent; entry->gpe_index = index; entry->gpe_start = gpp->gpp_start; entry->gpe_end = end; error = G_PART_ADD(table, entry, gpp); if (error) { gctl_error(req, "%d", error); if (delent == NULL) g_free(entry); return (error); } if (delent == NULL) { if (last == NULL) LIST_INSERT_HEAD(&table->gpt_entry, entry, gpe_entry); else LIST_INSERT_AFTER(last, entry, gpe_entry); entry->gpe_created = 1; } else { entry->gpe_deleted = 0; entry->gpe_modified = 1; } g_part_new_provider(gp, table, entry); /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gp->name); if (pp->stripesize > 0 && entry->gpe_pp->stripeoffset != 0) sbuf_printf(sb, " added, but partition is not " "aligned on %ju bytes\n", (uintmax_t)pp->stripesize); else sbuf_cat(sb, " added\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_bootcode(struct gctl_req *req, struct g_part_parms *gpp) { struct g_geom *gp; struct g_part_table *table; struct sbuf *sb; int error, sz; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; sz = table->gpt_scheme->gps_bootcodesz; if (sz == 0) { error = ENODEV; goto fail; } if (gpp->gpp_codesize > sz) { error = EFBIG; goto fail; } error = G_PART_BOOTCODE(table, gpp); if (error) goto fail; /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); sbuf_printf(sb, "bootcode written to %s\n", gp->name); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); fail: gctl_error(req, "%d", error); return (error); } static int g_part_ctl_commit(struct gctl_req *req, struct g_part_parms *gpp) { struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp; struct g_part_entry *entry, *tmp; struct g_part_table *table; char *buf; int error, i; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; if (!table->gpt_opened) { gctl_error(req, "%d", EPERM); return (EPERM); } g_topology_unlock(); cp = LIST_FIRST(&gp->consumer); if ((table->gpt_smhead | table->gpt_smtail) != 0) { pp = cp->provider; buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO); while (table->gpt_smhead != 0) { i = ffs(table->gpt_smhead) - 1; error = g_write_data(cp, i * pp->sectorsize, buf, pp->sectorsize); if (error) { g_free(buf); goto fail; } table->gpt_smhead &= ~(1 << i); } while (table->gpt_smtail != 0) { i = ffs(table->gpt_smtail) - 1; error = g_write_data(cp, pp->mediasize - (i + 1) * pp->sectorsize, buf, pp->sectorsize); if (error) { g_free(buf); goto fail; } table->gpt_smtail &= ~(1 << i); } g_free(buf); } if (table->gpt_scheme == &g_part_null_scheme) { g_topology_lock(); g_access(cp, -1, -1, -1); g_part_wither(gp, ENXIO); return (0); } error = G_PART_WRITE(table, cp); if (error) goto fail; LIST_FOREACH_SAFE(entry, &table->gpt_entry, gpe_entry, tmp) { if (!entry->gpe_deleted) { /* Notify consumers that provider might be changed. */ if (entry->gpe_modified && ( entry->gpe_pp->acw + entry->gpe_pp->ace + entry->gpe_pp->acr) == 0) g_media_changed(entry->gpe_pp, M_NOWAIT); entry->gpe_created = 0; entry->gpe_modified = 0; continue; } LIST_REMOVE(entry, gpe_entry); g_free(entry); } table->gpt_created = 0; table->gpt_opened = 0; g_topology_lock(); g_access(cp, -1, -1, -1); return (0); fail: g_topology_lock(); gctl_error(req, "%d", error); return (error); } static int g_part_ctl_create(struct gctl_req *req, struct g_part_parms *gpp) { struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp; struct g_part_scheme *scheme; struct g_part_table *null, *table; struct sbuf *sb; int attr, error; pp = gpp->gpp_provider; scheme = gpp->gpp_scheme; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, pp->name)); g_topology_assert(); /* Check that there isn't already a g_part geom on the provider. */ gp = g_part_find_geom(pp->name); if (gp != NULL) { null = gp->softc; if (null->gpt_scheme != &g_part_null_scheme) { gctl_error(req, "%d geom '%s'", EEXIST, pp->name); return (EEXIST); } } else null = NULL; if ((gpp->gpp_parms & G_PART_PARM_ENTRIES) && (gpp->gpp_entries < scheme->gps_minent || gpp->gpp_entries > scheme->gps_maxent)) { gctl_error(req, "%d entries '%d'", EINVAL, gpp->gpp_entries); return (EINVAL); } if (null == NULL) gp = g_new_geomf(&g_part_class, "%s", pp->name); gp->softc = kobj_create((kobj_class_t)gpp->gpp_scheme, M_GEOM, M_WAITOK); table = gp->softc; table->gpt_gp = gp; table->gpt_scheme = gpp->gpp_scheme; table->gpt_entries = (gpp->gpp_parms & G_PART_PARM_ENTRIES) ? gpp->gpp_entries : scheme->gps_minent; LIST_INIT(&table->gpt_entry); if (null == NULL) { cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 1, 1); if (error != 0) { g_part_wither(gp, error); gctl_error(req, "%d geom '%s'", error, pp->name); return (error); } table->gpt_opened = 1; } else { cp = LIST_FIRST(&gp->consumer); table->gpt_opened = null->gpt_opened; table->gpt_smhead = null->gpt_smhead; table->gpt_smtail = null->gpt_smtail; } g_topology_unlock(); /* Make sure the provider has media. */ if (pp->mediasize == 0 || pp->sectorsize == 0) { error = ENODEV; goto fail; } /* Make sure we can nest and if so, determine our depth. */ error = g_getattr("PART::isleaf", cp, &attr); if (!error && attr) { error = ENODEV; goto fail; } error = g_getattr("PART::depth", cp, &attr); table->gpt_depth = (!error) ? attr + 1 : 0; /* * Synthesize a disk geometry. Some partitioning schemes * depend on it and since some file systems need it even * when the partitition scheme doesn't, we do it here in * scheme-independent code. */ g_part_geometry(table, cp, pp->mediasize / pp->sectorsize); error = G_PART_CREATE(table, gpp); if (error) goto fail; g_topology_lock(); table->gpt_created = 1; if (null != NULL) kobj_delete((kobj_t)null, M_GEOM); /* * Support automatic commit by filling in the gpp_geom * parameter. */ gpp->gpp_parms |= G_PART_PARM_GEOM; gpp->gpp_geom = gp; /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); sbuf_printf(sb, "%s created\n", gp->name); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); fail: g_topology_lock(); if (null == NULL) { g_access(cp, -1, -1, -1); g_part_wither(gp, error); } else { kobj_delete((kobj_t)gp->softc, M_GEOM); gp->softc = null; } gctl_error(req, "%d provider", error); return (error); } static int g_part_ctl_delete(struct gctl_req *req, struct g_part_parms *gpp) { struct g_geom *gp; struct g_provider *pp; struct g_part_entry *entry; struct g_part_table *table; struct sbuf *sb; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; if (entry->gpe_index == gpp->gpp_index) break; } if (entry == NULL) { gctl_error(req, "%d index '%d'", ENOENT, gpp->gpp_index); return (ENOENT); } pp = entry->gpe_pp; if (pp != NULL) { if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0) { gctl_error(req, "%d", EBUSY); return (EBUSY); } pp->private = NULL; entry->gpe_pp = NULL; } if (pp != NULL) g_wither_provider(pp, ENXIO); /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gp->name); sbuf_cat(sb, " deleted\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } if (entry->gpe_created) { LIST_REMOVE(entry, gpe_entry); g_free(entry); } else { entry->gpe_modified = 0; entry->gpe_deleted = 1; } return (0); } static int g_part_ctl_destroy(struct gctl_req *req, struct g_part_parms *gpp) { struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp; struct g_part_entry *entry, *tmp; struct g_part_table *null, *table; struct sbuf *sb; int error; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; /* Check for busy providers. */ LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; if (gpp->gpp_force) { pp = entry->gpe_pp; if (pp == NULL) continue; if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) continue; } gctl_error(req, "%d", EBUSY); return (EBUSY); } if (gpp->gpp_force) { /* Destroy all providers. */ LIST_FOREACH_SAFE(entry, &table->gpt_entry, gpe_entry, tmp) { pp = entry->gpe_pp; if (pp != NULL) { pp->private = NULL; g_wither_provider(pp, ENXIO); } LIST_REMOVE(entry, gpe_entry); g_free(entry); } } error = G_PART_DESTROY(table, gpp); if (error) { gctl_error(req, "%d", error); return (error); } gp->softc = kobj_create((kobj_class_t)&g_part_null_scheme, M_GEOM, M_WAITOK); null = gp->softc; null->gpt_gp = gp; null->gpt_scheme = &g_part_null_scheme; LIST_INIT(&null->gpt_entry); cp = LIST_FIRST(&gp->consumer); pp = cp->provider; null->gpt_last = pp->mediasize / pp->sectorsize - 1; null->gpt_depth = table->gpt_depth; null->gpt_opened = table->gpt_opened; null->gpt_smhead = table->gpt_smhead; null->gpt_smtail = table->gpt_smtail; while ((entry = LIST_FIRST(&table->gpt_entry)) != NULL) { LIST_REMOVE(entry, gpe_entry); g_free(entry); } kobj_delete((kobj_t)table, M_GEOM); /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); sbuf_printf(sb, "%s destroyed\n", gp->name); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_modify(struct gctl_req *req, struct g_part_parms *gpp) { struct g_geom *gp; struct g_part_entry *entry; struct g_part_table *table; struct sbuf *sb; int error; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; if (entry->gpe_index == gpp->gpp_index) break; } if (entry == NULL) { gctl_error(req, "%d index '%d'", ENOENT, gpp->gpp_index); return (ENOENT); } error = G_PART_MODIFY(table, entry, gpp); if (error) { gctl_error(req, "%d", error); return (error); } if (!entry->gpe_created) entry->gpe_modified = 1; /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gp->name); sbuf_cat(sb, " modified\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_move(struct gctl_req *req, struct g_part_parms *gpp) { gctl_error(req, "%d verb 'move'", ENOSYS); return (ENOSYS); } static int g_part_ctl_recover(struct gctl_req *req, struct g_part_parms *gpp) { struct g_part_table *table; struct g_geom *gp; struct sbuf *sb; int error, recovered; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; error = recovered = 0; if (table->gpt_corrupt) { error = G_PART_RECOVER(table); if (error == 0) error = g_part_check_integrity(table, LIST_FIRST(&gp->consumer)); if (error) { gctl_error(req, "%d recovering '%s' failed", error, gp->name); return (error); } recovered = 1; } /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); if (recovered) sbuf_printf(sb, "%s recovered\n", gp->name); else sbuf_printf(sb, "%s recovering is not needed\n", gp->name); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_resize(struct gctl_req *req, struct g_part_parms *gpp) { struct g_geom *gp; struct g_provider *pp; struct g_part_entry *pe, *entry; struct g_part_table *table; struct sbuf *sb; quad_t end; int error; off_t mediasize; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; /* check gpp_index */ LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; if (entry->gpe_index == gpp->gpp_index) break; } if (entry == NULL) { gctl_error(req, "%d index '%d'", ENOENT, gpp->gpp_index); return (ENOENT); } /* check gpp_size */ end = entry->gpe_start + gpp->gpp_size - 1; if (gpp->gpp_size < 1 || end > table->gpt_last) { gctl_error(req, "%d size '%jd'", EINVAL, (intmax_t)gpp->gpp_size); return (EINVAL); } LIST_FOREACH(pe, &table->gpt_entry, gpe_entry) { if (pe->gpe_deleted || pe->gpe_internal || pe == entry) continue; if (end >= pe->gpe_start && end <= pe->gpe_end) { gctl_error(req, "%d end '%jd'", ENOSPC, (intmax_t)end); return (ENOSPC); } if (entry->gpe_start < pe->gpe_start && end > pe->gpe_end) { gctl_error(req, "%d size '%jd'", ENOSPC, (intmax_t)gpp->gpp_size); return (ENOSPC); } } pp = entry->gpe_pp; if ((g_debugflags & G_F_FOOTSHOOTING) == 0 && (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)) { if (entry->gpe_end - entry->gpe_start + 1 > gpp->gpp_size) { /* Deny shrinking of an opened partition. */ gctl_error(req, "%d", EBUSY); return (EBUSY); } } error = G_PART_RESIZE(table, entry, gpp); if (error) { gctl_error(req, "%d%s", error, error != EBUSY ? "": " resizing will lead to unexpected shrinking" " due to alignment"); return (error); } if (!entry->gpe_created) entry->gpe_modified = 1; /* update mediasize of changed provider */ mediasize = (entry->gpe_end - entry->gpe_start + 1) * pp->sectorsize; g_resize_provider(pp, mediasize); /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gp->name); sbuf_cat(sb, " resized\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_setunset(struct gctl_req *req, struct g_part_parms *gpp, unsigned int set) { struct g_geom *gp; struct g_part_entry *entry; struct g_part_table *table; struct sbuf *sb; int error; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; if (gpp->gpp_parms & G_PART_PARM_INDEX) { LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; if (entry->gpe_index == gpp->gpp_index) break; } if (entry == NULL) { gctl_error(req, "%d index '%d'", ENOENT, gpp->gpp_index); return (ENOENT); } } else entry = NULL; error = G_PART_SETUNSET(table, entry, gpp->gpp_attrib, set); if (error) { gctl_error(req, "%d attrib '%s'", error, gpp->gpp_attrib); return (error); } /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); sbuf_printf(sb, "%s %sset on ", gpp->gpp_attrib, (set) ? "" : "un"); if (entry) G_PART_FULLNAME(table, entry, sb, gp->name); else sbuf_cat(sb, gp->name); sbuf_cat(sb, "\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_undo(struct gctl_req *req, struct g_part_parms *gpp) { struct g_consumer *cp; struct g_provider *pp; struct g_geom *gp; struct g_part_entry *entry, *tmp; struct g_part_table *table; int error, reprobe; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; if (!table->gpt_opened) { gctl_error(req, "%d", EPERM); return (EPERM); } cp = LIST_FIRST(&gp->consumer); LIST_FOREACH_SAFE(entry, &table->gpt_entry, gpe_entry, tmp) { entry->gpe_modified = 0; if (entry->gpe_created) { pp = entry->gpe_pp; if (pp != NULL) { pp->private = NULL; entry->gpe_pp = NULL; g_wither_provider(pp, ENXIO); } entry->gpe_deleted = 1; } if (entry->gpe_deleted) { LIST_REMOVE(entry, gpe_entry); g_free(entry); } } g_topology_unlock(); reprobe = (table->gpt_scheme == &g_part_null_scheme || table->gpt_created) ? 1 : 0; if (reprobe) { LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_internal) continue; error = EBUSY; goto fail; } while ((entry = LIST_FIRST(&table->gpt_entry)) != NULL) { LIST_REMOVE(entry, gpe_entry); g_free(entry); } error = g_part_probe(gp, cp, table->gpt_depth); if (error) { g_topology_lock(); g_access(cp, -1, -1, -1); g_part_wither(gp, error); return (0); } table = gp->softc; /* * Synthesize a disk geometry. Some partitioning schemes * depend on it and since some file systems need it even * when the partitition scheme doesn't, we do it here in * scheme-independent code. */ pp = cp->provider; g_part_geometry(table, cp, pp->mediasize / pp->sectorsize); } error = G_PART_READ(table, cp); if (error) goto fail; error = g_part_check_integrity(table, cp); if (error) goto fail; g_topology_lock(); LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (!entry->gpe_internal) g_part_new_provider(gp, table, entry); } table->gpt_opened = 0; g_access(cp, -1, -1, -1); return (0); fail: g_topology_lock(); gctl_error(req, "%d", error); return (error); } static void g_part_wither(struct g_geom *gp, int error) { struct g_part_entry *entry; struct g_part_table *table; struct g_provider *pp; table = gp->softc; if (table != NULL) { gp->softc = NULL; while ((entry = LIST_FIRST(&table->gpt_entry)) != NULL) { LIST_REMOVE(entry, gpe_entry); pp = entry->gpe_pp; entry->gpe_pp = NULL; if (pp != NULL) { pp->private = NULL; g_wither_provider(pp, error); } g_free(entry); } G_PART_DESTROY(table, NULL); kobj_delete((kobj_t)table, M_GEOM); } g_wither_geom(gp, error); } /* * Class methods. */ static void g_part_ctlreq(struct gctl_req *req, struct g_class *mp, const char *verb) { struct g_part_parms gpp; struct g_part_table *table; struct gctl_req_arg *ap; enum g_part_ctl ctlreq; unsigned int i, mparms, oparms, parm; int auto_commit, close_on_error; int error, modifies; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, verb)); g_topology_assert(); ctlreq = G_PART_CTL_NONE; modifies = 1; mparms = 0; oparms = G_PART_PARM_FLAGS | G_PART_PARM_OUTPUT | G_PART_PARM_VERSION; switch (*verb) { case 'a': if (!strcmp(verb, "add")) { ctlreq = G_PART_CTL_ADD; mparms |= G_PART_PARM_GEOM | G_PART_PARM_SIZE | G_PART_PARM_START | G_PART_PARM_TYPE; oparms |= G_PART_PARM_INDEX | G_PART_PARM_LABEL; } break; case 'b': if (!strcmp(verb, "bootcode")) { ctlreq = G_PART_CTL_BOOTCODE; mparms |= G_PART_PARM_GEOM | G_PART_PARM_BOOTCODE; oparms |= G_PART_PARM_SKIP_DSN; } break; case 'c': if (!strcmp(verb, "commit")) { ctlreq = G_PART_CTL_COMMIT; mparms |= G_PART_PARM_GEOM; modifies = 0; } else if (!strcmp(verb, "create")) { ctlreq = G_PART_CTL_CREATE; mparms |= G_PART_PARM_PROVIDER | G_PART_PARM_SCHEME; oparms |= G_PART_PARM_ENTRIES; } break; case 'd': if (!strcmp(verb, "delete")) { ctlreq = G_PART_CTL_DELETE; mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX; } else if (!strcmp(verb, "destroy")) { ctlreq = G_PART_CTL_DESTROY; mparms |= G_PART_PARM_GEOM; oparms |= G_PART_PARM_FORCE; } break; case 'm': if (!strcmp(verb, "modify")) { ctlreq = G_PART_CTL_MODIFY; mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX; oparms |= G_PART_PARM_LABEL | G_PART_PARM_TYPE; } else if (!strcmp(verb, "move")) { ctlreq = G_PART_CTL_MOVE; mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX; } break; case 'r': if (!strcmp(verb, "recover")) { ctlreq = G_PART_CTL_RECOVER; mparms |= G_PART_PARM_GEOM; } else if (!strcmp(verb, "resize")) { ctlreq = G_PART_CTL_RESIZE; mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX | G_PART_PARM_SIZE; } break; case 's': if (!strcmp(verb, "set")) { ctlreq = G_PART_CTL_SET; mparms |= G_PART_PARM_ATTRIB | G_PART_PARM_GEOM; oparms |= G_PART_PARM_INDEX; } break; case 'u': if (!strcmp(verb, "undo")) { ctlreq = G_PART_CTL_UNDO; mparms |= G_PART_PARM_GEOM; modifies = 0; } else if (!strcmp(verb, "unset")) { ctlreq = G_PART_CTL_UNSET; mparms |= G_PART_PARM_ATTRIB | G_PART_PARM_GEOM; oparms |= G_PART_PARM_INDEX; } break; } if (ctlreq == G_PART_CTL_NONE) { gctl_error(req, "%d verb '%s'", EINVAL, verb); return; } bzero(&gpp, sizeof(gpp)); for (i = 0; i < req->narg; i++) { ap = &req->arg[i]; parm = 0; switch (ap->name[0]) { case 'a': if (!strcmp(ap->name, "arg0")) { parm = mparms & (G_PART_PARM_GEOM | G_PART_PARM_PROVIDER); } if (!strcmp(ap->name, "attrib")) parm = G_PART_PARM_ATTRIB; break; case 'b': if (!strcmp(ap->name, "bootcode")) parm = G_PART_PARM_BOOTCODE; break; case 'c': if (!strcmp(ap->name, "class")) continue; break; case 'e': if (!strcmp(ap->name, "entries")) parm = G_PART_PARM_ENTRIES; break; case 'f': if (!strcmp(ap->name, "flags")) parm = G_PART_PARM_FLAGS; else if (!strcmp(ap->name, "force")) parm = G_PART_PARM_FORCE; break; case 'i': if (!strcmp(ap->name, "index")) parm = G_PART_PARM_INDEX; break; case 'l': if (!strcmp(ap->name, "label")) parm = G_PART_PARM_LABEL; break; case 'o': if (!strcmp(ap->name, "output")) parm = G_PART_PARM_OUTPUT; break; case 's': if (!strcmp(ap->name, "scheme")) parm = G_PART_PARM_SCHEME; else if (!strcmp(ap->name, "size")) parm = G_PART_PARM_SIZE; else if (!strcmp(ap->name, "start")) parm = G_PART_PARM_START; else if (!strcmp(ap->name, "skip_dsn")) parm = G_PART_PARM_SKIP_DSN; break; case 't': if (!strcmp(ap->name, "type")) parm = G_PART_PARM_TYPE; break; case 'v': if (!strcmp(ap->name, "verb")) continue; else if (!strcmp(ap->name, "version")) parm = G_PART_PARM_VERSION; break; } if ((parm & (mparms | oparms)) == 0) { gctl_error(req, "%d param '%s'", EINVAL, ap->name); return; } switch (parm) { case G_PART_PARM_ATTRIB: error = g_part_parm_str(req, ap->name, &gpp.gpp_attrib); break; case G_PART_PARM_BOOTCODE: error = g_part_parm_bootcode(req, ap->name, &gpp.gpp_codeptr, &gpp.gpp_codesize); break; case G_PART_PARM_ENTRIES: error = g_part_parm_intmax(req, ap->name, &gpp.gpp_entries); break; case G_PART_PARM_FLAGS: error = g_part_parm_str(req, ap->name, &gpp.gpp_flags); break; case G_PART_PARM_FORCE: error = g_part_parm_uint32(req, ap->name, &gpp.gpp_force); break; case G_PART_PARM_GEOM: error = g_part_parm_geom(req, ap->name, &gpp.gpp_geom); break; case G_PART_PARM_INDEX: error = g_part_parm_intmax(req, ap->name, &gpp.gpp_index); break; case G_PART_PARM_LABEL: error = g_part_parm_str(req, ap->name, &gpp.gpp_label); break; case G_PART_PARM_OUTPUT: error = 0; /* Write-only parameter */ break; case G_PART_PARM_PROVIDER: error = g_part_parm_provider(req, ap->name, &gpp.gpp_provider); break; case G_PART_PARM_SCHEME: error = g_part_parm_scheme(req, ap->name, &gpp.gpp_scheme); break; case G_PART_PARM_SIZE: error = g_part_parm_quad(req, ap->name, &gpp.gpp_size); break; case G_PART_PARM_SKIP_DSN: error = g_part_parm_uint32(req, ap->name, &gpp.gpp_skip_dsn); break; case G_PART_PARM_START: error = g_part_parm_quad(req, ap->name, &gpp.gpp_start); break; case G_PART_PARM_TYPE: error = g_part_parm_str(req, ap->name, &gpp.gpp_type); break; case G_PART_PARM_VERSION: error = g_part_parm_uint32(req, ap->name, &gpp.gpp_version); break; default: error = EDOOFUS; gctl_error(req, "%d %s", error, ap->name); break; } if (error != 0) { if (error == ENOATTR) { gctl_error(req, "%d param '%s'", error, ap->name); } return; } gpp.gpp_parms |= parm; } if ((gpp.gpp_parms & mparms) != mparms) { parm = mparms - (gpp.gpp_parms & mparms); gctl_error(req, "%d param '%x'", ENOATTR, parm); return; } /* Obtain permissions if possible/necessary. */ close_on_error = 0; table = NULL; if (modifies && (gpp.gpp_parms & G_PART_PARM_GEOM)) { table = gpp.gpp_geom->softc; if (table != NULL && table->gpt_corrupt && ctlreq != G_PART_CTL_DESTROY && ctlreq != G_PART_CTL_RECOVER) { gctl_error(req, "%d table '%s' is corrupt", EPERM, gpp.gpp_geom->name); return; } if (table != NULL && !table->gpt_opened) { error = g_access(LIST_FIRST(&gpp.gpp_geom->consumer), 1, 1, 1); if (error) { gctl_error(req, "%d geom '%s'", error, gpp.gpp_geom->name); return; } table->gpt_opened = 1; close_on_error = 1; } } /* Allow the scheme to check or modify the parameters. */ if (table != NULL) { error = G_PART_PRECHECK(table, ctlreq, &gpp); if (error) { gctl_error(req, "%d pre-check failed", error); goto out; } } else error = EDOOFUS; /* Prevent bogus uninit. warning. */ switch (ctlreq) { case G_PART_CTL_NONE: panic("%s", __func__); case G_PART_CTL_ADD: error = g_part_ctl_add(req, &gpp); break; case G_PART_CTL_BOOTCODE: error = g_part_ctl_bootcode(req, &gpp); break; case G_PART_CTL_COMMIT: error = g_part_ctl_commit(req, &gpp); break; case G_PART_CTL_CREATE: error = g_part_ctl_create(req, &gpp); break; case G_PART_CTL_DELETE: error = g_part_ctl_delete(req, &gpp); break; case G_PART_CTL_DESTROY: error = g_part_ctl_destroy(req, &gpp); break; case G_PART_CTL_MODIFY: error = g_part_ctl_modify(req, &gpp); break; case G_PART_CTL_MOVE: error = g_part_ctl_move(req, &gpp); break; case G_PART_CTL_RECOVER: error = g_part_ctl_recover(req, &gpp); break; case G_PART_CTL_RESIZE: error = g_part_ctl_resize(req, &gpp); break; case G_PART_CTL_SET: error = g_part_ctl_setunset(req, &gpp, 1); break; case G_PART_CTL_UNDO: error = g_part_ctl_undo(req, &gpp); break; case G_PART_CTL_UNSET: error = g_part_ctl_setunset(req, &gpp, 0); break; } /* Implement automatic commit. */ if (!error) { auto_commit = (modifies && (gpp.gpp_parms & G_PART_PARM_FLAGS) && strchr(gpp.gpp_flags, 'C') != NULL) ? 1 : 0; if (auto_commit) { KASSERT(gpp.gpp_parms & G_PART_PARM_GEOM, ("%s", __func__)); error = g_part_ctl_commit(req, &gpp); } } out: if (error && close_on_error) { g_access(LIST_FIRST(&gpp.gpp_geom->consumer), -1, -1, -1); table->gpt_opened = 0; } } static int g_part_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) { G_PART_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, gp->name)); g_topology_assert(); g_part_wither(gp, EINVAL); return (0); } static struct g_geom * g_part_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) { struct g_consumer *cp; struct g_geom *gp; struct g_part_entry *entry; struct g_part_table *table; struct root_hold_token *rht; - struct g_geom_alias *gap; int attr, depth; int error; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, pp->name)); g_topology_assert(); /* Skip providers that are already open for writing. */ if (pp->acw > 0) return (NULL); /* * Create a GEOM with consumer and hook it up to the provider. * With that we become part of the topology. Obtain read access * to the provider. */ gp = g_new_geomf(mp, "%s", pp->name); - LIST_FOREACH(gap, &pp->geom->aliases, ga_next) - g_geom_add_alias(gp, gap->ga_alias); cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 0, 0); if (error != 0) { if (cp->provider) g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); return (NULL); } rht = root_mount_hold(mp->name); g_topology_unlock(); /* * Short-circuit the whole probing galore when there's no * media present. */ if (pp->mediasize == 0 || pp->sectorsize == 0) { error = ENODEV; goto fail; } /* Make sure we can nest and if so, determine our depth. */ error = g_getattr("PART::isleaf", cp, &attr); if (!error && attr) { error = ENODEV; goto fail; } error = g_getattr("PART::depth", cp, &attr); depth = (!error) ? attr + 1 : 0; error = g_part_probe(gp, cp, depth); if (error) goto fail; table = gp->softc; /* * Synthesize a disk geometry. Some partitioning schemes * depend on it and since some file systems need it even * when the partitition scheme doesn't, we do it here in * scheme-independent code. */ g_part_geometry(table, cp, pp->mediasize / pp->sectorsize); error = G_PART_READ(table, cp); if (error) goto fail; error = g_part_check_integrity(table, cp); if (error) goto fail; g_topology_lock(); LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (!entry->gpe_internal) g_part_new_provider(gp, table, entry); } root_mount_rel(rht); g_access(cp, -1, 0, 0); return (gp); fail: g_topology_lock(); root_mount_rel(rht); g_access(cp, -1, 0, 0); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); return (NULL); } /* * Geom methods. */ static int g_part_access(struct g_provider *pp, int dr, int dw, int de) { struct g_consumer *cp; G_PART_TRACE((G_T_ACCESS, "%s(%s,%d,%d,%d)", __func__, pp->name, dr, dw, de)); cp = LIST_FIRST(&pp->geom->consumer); /* We always gain write-exclusive access. */ return (g_access(cp, dr, dw, dw + de)); } static void g_part_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { char buf[64]; struct g_part_entry *entry; struct g_part_table *table; KASSERT(sb != NULL && gp != NULL, ("%s", __func__)); table = gp->softc; if (indent == NULL) { KASSERT(cp == NULL && pp != NULL, ("%s", __func__)); entry = pp->private; if (entry == NULL) return; sbuf_printf(sb, " i %u o %ju ty %s", entry->gpe_index, (uintmax_t)entry->gpe_offset, G_PART_TYPE(table, entry, buf, sizeof(buf))); /* * libdisk compatibility quirk - the scheme dumps the * slicer name and partition type in a way that is * compatible with libdisk. When libdisk is not used * anymore, this should go away. */ G_PART_DUMPCONF(table, entry, sb, indent); } else if (cp != NULL) { /* Consumer configuration. */ KASSERT(pp == NULL, ("%s", __func__)); /* none */ } else if (pp != NULL) { /* Provider configuration. */ entry = pp->private; if (entry == NULL) return; sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)entry->gpe_start); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)entry->gpe_end); sbuf_printf(sb, "%s%u\n", indent, entry->gpe_index); sbuf_printf(sb, "%s%s\n", indent, G_PART_TYPE(table, entry, buf, sizeof(buf))); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)entry->gpe_offset); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)pp->mediasize); G_PART_DUMPCONF(table, entry, sb, indent); } else { /* Geom configuration. */ sbuf_printf(sb, "%s%s\n", indent, table->gpt_scheme->name); sbuf_printf(sb, "%s%u\n", indent, table->gpt_entries); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)table->gpt_first); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)table->gpt_last); sbuf_printf(sb, "%s%u\n", indent, table->gpt_sectors); sbuf_printf(sb, "%s%u\n", indent, table->gpt_heads); sbuf_printf(sb, "%s%s\n", indent, table->gpt_corrupt ? "CORRUPT": "OK"); sbuf_printf(sb, "%s%s\n", indent, table->gpt_opened ? "true": "false"); G_PART_DUMPCONF(table, NULL, sb, indent); } } /*- * This start routine is only called for non-trivial requests, all the * trivial ones are handled autonomously by the slice code. * For requests we handle here, we must call the g_io_deliver() on the * bio, and return non-zero to indicate to the slice code that we did so. * This code executes in the "DOWN" I/O path, this means: * * No sleeping. * * Don't grab the topology lock. * * Don't call biowait, g_getattr(), g_setattr() or g_read_data() */ static int g_part_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td) { struct g_part_table *table; table = pp->geom->softc; return G_PART_IOCTL(table, pp, cmd, data, fflag, td); } static void g_part_resize(struct g_consumer *cp) { struct g_part_table *table; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name)); g_topology_assert(); if (auto_resize == 0) return; table = cp->geom->softc; if (table->gpt_opened == 0) { if (g_access(cp, 1, 1, 1) != 0) return; table->gpt_opened = 1; } if (G_PART_RESIZE(table, NULL, NULL) == 0) printf("GEOM_PART: %s was automatically resized.\n" " Use `gpart commit %s` to save changes or " "`gpart undo %s` to revert them.\n", cp->geom->name, cp->geom->name, cp->geom->name); if (g_part_check_integrity(table, cp) != 0) { g_access(cp, -1, -1, -1); table->gpt_opened = 0; g_part_wither(table->gpt_gp, ENXIO); } } static void g_part_orphan(struct g_consumer *cp) { struct g_provider *pp; struct g_part_table *table; pp = cp->provider; KASSERT(pp != NULL, ("%s", __func__)); G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, pp->name)); g_topology_assert(); KASSERT(pp->error != 0, ("%s", __func__)); table = cp->geom->softc; if (table != NULL && table->gpt_opened) g_access(cp, -1, -1, -1); g_part_wither(cp->geom, pp->error); } static void g_part_spoiled(struct g_consumer *cp) { G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name)); g_topology_assert(); cp->flags |= G_CF_ORPHAN; g_part_wither(cp->geom, ENXIO); } static void g_part_start(struct bio *bp) { struct bio *bp2; struct g_consumer *cp; struct g_geom *gp; struct g_part_entry *entry; struct g_part_table *table; struct g_kerneldump *gkd; struct g_provider *pp; void (*done_func)(struct bio *) = g_std_done; char buf[64]; biotrack(bp, __func__); pp = bp->bio_to; gp = pp->geom; table = gp->softc; cp = LIST_FIRST(&gp->consumer); G_PART_TRACE((G_T_BIO, "%s: cmd=%d, provider=%s", __func__, bp->bio_cmd, pp->name)); entry = pp->private; if (entry == NULL) { g_io_deliver(bp, ENXIO); return; } switch(bp->bio_cmd) { case BIO_DELETE: case BIO_READ: case BIO_WRITE: if (bp->bio_offset >= pp->mediasize) { g_io_deliver(bp, EIO); return; } bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } if (bp2->bio_offset + bp2->bio_length > pp->mediasize) bp2->bio_length = pp->mediasize - bp2->bio_offset; bp2->bio_done = g_std_done; bp2->bio_offset += entry->gpe_offset; g_io_request(bp2, cp); return; case BIO_SPEEDUP: case BIO_FLUSH: break; case BIO_GETATTR: if (g_handleattr_int(bp, "GEOM::fwheads", table->gpt_heads)) return; if (g_handleattr_int(bp, "GEOM::fwsectors", table->gpt_sectors)) return; /* * allow_nesting overrides "isleaf" to false _unless_ the * provider offset is zero, since otherwise we would recurse. */ if (g_handleattr_int(bp, "PART::isleaf", table->gpt_isleaf && (allow_nesting == 0 || entry->gpe_offset == 0))) return; if (g_handleattr_int(bp, "PART::depth", table->gpt_depth)) return; if (g_handleattr_str(bp, "PART::scheme", table->gpt_scheme->name)) return; if (g_handleattr_str(bp, "PART::type", G_PART_TYPE(table, entry, buf, sizeof(buf)))) return; if (!strcmp("GEOM::physpath", bp->bio_attribute)) { done_func = g_part_get_physpath_done; break; } if (!strcmp("GEOM::kerneldump", bp->bio_attribute)) { /* * Check that the partition is suitable for kernel * dumps. Typically only swap partitions should be * used. If the request comes from the nested scheme * we allow dumping there as well. */ if ((bp->bio_from == NULL || bp->bio_from->geom->class != &g_part_class) && G_PART_DUMPTO(table, entry) == 0) { g_io_deliver(bp, ENODEV); printf("GEOM_PART: Partition '%s' not suitable" " for kernel dumps (wrong type?)\n", pp->name); return; } gkd = (struct g_kerneldump *)bp->bio_data; if (gkd->offset >= pp->mediasize) { g_io_deliver(bp, EIO); return; } if (gkd->offset + gkd->length > pp->mediasize) gkd->length = pp->mediasize - gkd->offset; gkd->offset += entry->gpe_offset; } break; default: g_io_deliver(bp, EOPNOTSUPP); return; } bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } bp2->bio_done = done_func; g_io_request(bp2, cp); } static void g_part_init(struct g_class *mp) { TAILQ_INSERT_HEAD(&g_part_schemes, &g_part_null_scheme, scheme_list); } static void g_part_fini(struct g_class *mp) { TAILQ_REMOVE(&g_part_schemes, &g_part_null_scheme, scheme_list); } static void g_part_unload_event(void *arg, int flag) { struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp; struct g_part_scheme *scheme; struct g_part_table *table; uintptr_t *xchg; int acc, error; if (flag == EV_CANCEL) return; xchg = arg; error = 0; scheme = (void *)(*xchg); g_topology_assert(); LIST_FOREACH(gp, &g_part_class.geom, geom) { table = gp->softc; if (table->gpt_scheme != scheme) continue; acc = 0; LIST_FOREACH(pp, &gp->provider, provider) acc += pp->acr + pp->acw + pp->ace; LIST_FOREACH(cp, &gp->consumer, consumer) acc += cp->acr + cp->acw + cp->ace; if (!acc) g_part_wither(gp, ENOSYS); else error = EBUSY; } if (!error) TAILQ_REMOVE(&g_part_schemes, scheme, scheme_list); *xchg = error; } int g_part_modevent(module_t mod, int type, struct g_part_scheme *scheme) { struct g_part_scheme *iter; uintptr_t arg; int error; error = 0; switch (type) { case MOD_LOAD: TAILQ_FOREACH(iter, &g_part_schemes, scheme_list) { if (scheme == iter) { printf("GEOM_PART: scheme %s is already " "registered!\n", scheme->name); break; } } if (iter == NULL) { TAILQ_INSERT_TAIL(&g_part_schemes, scheme, scheme_list); g_retaste(&g_part_class); } break; case MOD_UNLOAD: arg = (uintptr_t)scheme; error = g_waitfor_event(g_part_unload_event, &arg, M_WAITOK, NULL); if (error == 0) error = arg; break; default: error = EOPNOTSUPP; break; } return (error); } Index: head/sys/geom/uzip/g_uzip.c =================================================================== --- head/sys/geom/uzip/g_uzip.c (revision 361014) +++ head/sys/geom/uzip/g_uzip.c (revision 361015) @@ -1,988 +1,991 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004 Max Khon * Copyright (c) 2014 Juniper Networks, Inc. * Copyright (c) 2006-2016 Maxim Sobolev * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_geom.h" #include "opt_zstdio.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ZSTDIO #include #endif #include MALLOC_DEFINE(M_GEOM_UZIP, "geom_uzip", "GEOM UZIP data structures"); FEATURE(geom_uzip, "GEOM read-only compressed disks support"); struct g_uzip_blk { uint64_t offset; uint32_t blen; unsigned char last:1; unsigned char padded:1; #define BLEN_UNDEF UINT32_MAX }; #ifndef ABS #define ABS(a) ((a) < 0 ? -(a) : (a)) #endif #define BLK_IN_RANGE(mcn, bcn, ilen) \ (((bcn) != BLEN_UNDEF) && ( \ ((ilen) >= 0 && (mcn >= bcn) && (mcn <= ((intmax_t)(bcn) + (ilen)))) || \ ((ilen) < 0 && (mcn <= bcn) && (mcn >= ((intmax_t)(bcn) + (ilen)))) \ )) #ifdef GEOM_UZIP_DEBUG # define GEOM_UZIP_DBG_DEFAULT 3 #else # define GEOM_UZIP_DBG_DEFAULT 0 #endif #define GUZ_DBG_ERR 1 #define GUZ_DBG_INFO 2 #define GUZ_DBG_IO 3 #define GUZ_DBG_TOC 4 #define GUZ_DEV_SUFX ".uzip" #define GUZ_DEV_NAME(p) (p GUZ_DEV_SUFX) static char g_uzip_attach_to[MAXPATHLEN] = {"*"}; static char g_uzip_noattach_to[MAXPATHLEN] = {GUZ_DEV_NAME("*")}; TUNABLE_STR("kern.geom.uzip.attach_to", g_uzip_attach_to, sizeof(g_uzip_attach_to)); TUNABLE_STR("kern.geom.uzip.noattach_to", g_uzip_noattach_to, sizeof(g_uzip_noattach_to)); SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, uzip, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "GEOM_UZIP stuff"); static u_int g_uzip_debug = GEOM_UZIP_DBG_DEFAULT; SYSCTL_UINT(_kern_geom_uzip, OID_AUTO, debug, CTLFLAG_RWTUN, &g_uzip_debug, 0, "Debug level (0-4)"); static u_int g_uzip_debug_block = BLEN_UNDEF; SYSCTL_UINT(_kern_geom_uzip, OID_AUTO, debug_block, CTLFLAG_RWTUN, &g_uzip_debug_block, 0, "Debug operations around specific cluster#"); #define DPRINTF(lvl, a) \ if ((lvl) <= g_uzip_debug) { \ printf a; \ } #define DPRINTF_BLK(lvl, cn, a) \ if ((lvl) <= g_uzip_debug || \ BLK_IN_RANGE(cn, g_uzip_debug_block, 8) || \ BLK_IN_RANGE(cn, g_uzip_debug_block, -8)) { \ printf a; \ } #define DPRINTF_BRNG(lvl, bcn, ecn, a) \ KASSERT(bcn < ecn, ("DPRINTF_BRNG: invalid range (%ju, %ju)", \ (uintmax_t)bcn, (uintmax_t)ecn)); \ if (((lvl) <= g_uzip_debug) || \ BLK_IN_RANGE(g_uzip_debug_block, bcn, \ (intmax_t)ecn - (intmax_t)bcn)) { \ printf a; \ } #define UZIP_CLASS_NAME "UZIP" /* * Maximum allowed valid block size (to prevent foot-shooting) */ #define MAX_BLKSZ (MAXPHYS) static char CLOOP_MAGIC_START[] = "#!/bin/sh\n"; static void g_uzip_read_done(struct bio *bp); static void g_uzip_do(struct g_uzip_softc *, struct bio *bp); static void g_uzip_softc_free(struct g_geom *gp) { struct g_uzip_softc *sc = gp->softc; DPRINTF(GUZ_DBG_INFO, ("%s: %d requests, %d cached\n", gp->name, sc->req_total, sc->req_cached)); mtx_lock(&sc->queue_mtx); sc->wrkthr_flags |= GUZ_SHUTDOWN; wakeup(sc); while (!(sc->wrkthr_flags & GUZ_EXITING)) { msleep(sc->procp, &sc->queue_mtx, PRIBIO, "guzfree", hz / 10); } mtx_unlock(&sc->queue_mtx); sc->dcp->free(sc->dcp); free(sc->toc, M_GEOM_UZIP); mtx_destroy(&sc->queue_mtx); mtx_destroy(&sc->last_mtx); free(sc->last_buf, M_GEOM_UZIP); free(sc, M_GEOM_UZIP); gp->softc = NULL; } static int g_uzip_cached(struct g_geom *gp, struct bio *bp) { struct g_uzip_softc *sc; off_t ofs; size_t blk, blkofs, usz; sc = gp->softc; ofs = bp->bio_offset + bp->bio_completed; blk = ofs / sc->blksz; mtx_lock(&sc->last_mtx); if (blk == sc->last_blk) { blkofs = ofs % sc->blksz; usz = sc->blksz - blkofs; if (bp->bio_resid < usz) usz = bp->bio_resid; memcpy(bp->bio_data + bp->bio_completed, sc->last_buf + blkofs, usz); sc->req_cached++; mtx_unlock(&sc->last_mtx); DPRINTF(GUZ_DBG_IO, ("%s/%s: %p: offset=%jd: got %jd bytes " "from cache\n", __func__, gp->name, bp, (intmax_t)ofs, (intmax_t)usz)); bp->bio_completed += usz; bp->bio_resid -= usz; if (bp->bio_resid == 0) { g_io_deliver(bp, 0); return (1); } } else mtx_unlock(&sc->last_mtx); return (0); } #define BLK_ENDS(sc, bi) ((sc)->toc[(bi)].offset + \ (sc)->toc[(bi)].blen) #define BLK_IS_CONT(sc, bi) (BLK_ENDS((sc), (bi) - 1) == \ (sc)->toc[(bi)].offset) #define BLK_IS_NIL(sc, bi) ((sc)->toc[(bi)].blen == 0) #define TOFF_2_BOFF(sc, pp, bi) ((sc)->toc[(bi)].offset - \ (sc)->toc[(bi)].offset % (pp)->sectorsize) #define TLEN_2_BLEN(sc, pp, bp, ei) roundup(BLK_ENDS((sc), (ei)) - \ (bp)->bio_offset, (pp)->sectorsize) static int g_uzip_request(struct g_geom *gp, struct bio *bp) { struct g_uzip_softc *sc; struct bio *bp2; struct g_consumer *cp; struct g_provider *pp; off_t ofs, start_blk_ofs; size_t i, start_blk, end_blk, zsize; if (g_uzip_cached(gp, bp) != 0) return (1); sc = gp->softc; cp = LIST_FIRST(&gp->consumer); pp = cp->provider; ofs = bp->bio_offset + bp->bio_completed; start_blk = ofs / sc->blksz; KASSERT(start_blk < sc->nblocks, ("start_blk out of range")); end_blk = howmany(ofs + bp->bio_resid, sc->blksz); KASSERT(end_blk <= sc->nblocks, ("end_blk out of range")); for (; BLK_IS_NIL(sc, start_blk) && start_blk < end_blk; start_blk++) { /* Fill in any leading Nil blocks */ start_blk_ofs = ofs % sc->blksz; zsize = MIN(sc->blksz - start_blk_ofs, bp->bio_resid); DPRINTF_BLK(GUZ_DBG_IO, start_blk, ("%s/%s: %p/%ju: " "filling %ju zero bytes\n", __func__, gp->name, gp, (uintmax_t)bp->bio_completed, (uintmax_t)zsize)); bzero(bp->bio_data + bp->bio_completed, zsize); bp->bio_completed += zsize; bp->bio_resid -= zsize; ofs += zsize; } if (start_blk == end_blk) { KASSERT(bp->bio_resid == 0, ("bp->bio_resid is invalid")); /* * No non-Nil data is left, complete request immediately. */ DPRINTF(GUZ_DBG_IO, ("%s/%s: %p: all done returning %ju " "bytes\n", __func__, gp->name, gp, (uintmax_t)bp->bio_completed)); g_io_deliver(bp, 0); return (1); } for (i = start_blk + 1; i < end_blk; i++) { /* Trim discontinuous areas if any */ if (!BLK_IS_CONT(sc, i)) { end_blk = i; break; } } DPRINTF_BRNG(GUZ_DBG_IO, start_blk, end_blk, ("%s/%s: %p: " "start=%u (%ju[%jd]), end=%u (%ju)\n", __func__, gp->name, bp, (u_int)start_blk, (uintmax_t)sc->toc[start_blk].offset, (intmax_t)sc->toc[start_blk].blen, (u_int)end_blk, (uintmax_t)BLK_ENDS(sc, end_blk - 1))); bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return (1); } bp2->bio_done = g_uzip_read_done; bp2->bio_offset = TOFF_2_BOFF(sc, pp, start_blk); while (1) { bp2->bio_length = TLEN_2_BLEN(sc, pp, bp2, end_blk - 1); if (bp2->bio_length <= MAXPHYS) { break; } if (end_blk == (start_blk + 1)) { break; } end_blk--; } DPRINTF(GUZ_DBG_IO, ("%s/%s: bp2->bio_length = %jd, " "bp2->bio_offset = %jd\n", __func__, gp->name, (intmax_t)bp2->bio_length, (intmax_t)bp2->bio_offset)); bp2->bio_data = malloc(bp2->bio_length, M_GEOM_UZIP, M_NOWAIT); if (bp2->bio_data == NULL) { g_destroy_bio(bp2); g_io_deliver(bp, ENOMEM); return (1); } DPRINTF_BRNG(GUZ_DBG_IO, start_blk, end_blk, ("%s/%s: %p: " "reading %jd bytes from offset %jd\n", __func__, gp->name, bp, (intmax_t)bp2->bio_length, (intmax_t)bp2->bio_offset)); g_io_request(bp2, cp); return (0); } static void g_uzip_read_done(struct bio *bp) { struct bio *bp2; struct g_geom *gp; struct g_uzip_softc *sc; bp2 = bp->bio_parent; gp = bp2->bio_to->geom; sc = gp->softc; mtx_lock(&sc->queue_mtx); bioq_disksort(&sc->bio_queue, bp); mtx_unlock(&sc->queue_mtx); wakeup(sc); } static int g_uzip_memvcmp(const void *memory, unsigned char val, size_t size) { const u_char *mm; mm = (const u_char *)memory; return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0; } static void g_uzip_do(struct g_uzip_softc *sc, struct bio *bp) { struct bio *bp2; struct g_provider *pp; struct g_consumer *cp; struct g_geom *gp; char *data, *data2; off_t ofs; size_t blk, blkofs, len, ulen, firstblk; int err; bp2 = bp->bio_parent; gp = bp2->bio_to->geom; cp = LIST_FIRST(&gp->consumer); pp = cp->provider; bp2->bio_error = bp->bio_error; if (bp2->bio_error != 0) goto done; /* Make sure there's forward progress. */ if (bp->bio_completed == 0) { bp2->bio_error = ECANCELED; goto done; } ofs = bp2->bio_offset + bp2->bio_completed; firstblk = blk = ofs / sc->blksz; blkofs = ofs % sc->blksz; data = bp->bio_data + sc->toc[blk].offset % pp->sectorsize; data2 = bp2->bio_data + bp2->bio_completed; while (bp->bio_completed && bp2->bio_resid) { if (blk > firstblk && !BLK_IS_CONT(sc, blk)) { DPRINTF_BLK(GUZ_DBG_IO, blk, ("%s/%s: %p: backref'ed " "cluster #%u requested, looping around\n", __func__, gp->name, bp2, (u_int)blk)); goto done; } ulen = MIN(sc->blksz - blkofs, bp2->bio_resid); len = sc->toc[blk].blen; DPRINTF(GUZ_DBG_IO, ("%s/%s: %p/%ju: data2=%p, ulen=%u, " "data=%p, len=%u\n", __func__, gp->name, gp, bp->bio_completed, data2, (u_int)ulen, data, (u_int)len)); if (len == 0) { /* All zero block: no cache update */ zero_block: bzero(data2, ulen); } else if (len <= bp->bio_completed) { mtx_lock(&sc->last_mtx); err = sc->dcp->decompress(sc->dcp, gp->name, data, len, sc->last_buf); if (err != 0 && sc->toc[blk].last != 0) { /* * Last block decompression has failed, check * if it's just zero padding. */ if (g_uzip_memvcmp(data, '\0', len) == 0) { sc->toc[blk].blen = 0; sc->last_blk = -1; mtx_unlock(&sc->last_mtx); len = 0; goto zero_block; } } if (err != 0) { sc->last_blk = -1; mtx_unlock(&sc->last_mtx); bp2->bio_error = EILSEQ; DPRINTF(GUZ_DBG_ERR, ("%s/%s: decompress" "(%p, %ju, %ju) failed\n", __func__, gp->name, sc->dcp, (uintmax_t)blk, (uintmax_t)len)); goto done; } sc->last_blk = blk; memcpy(data2, sc->last_buf + blkofs, ulen); mtx_unlock(&sc->last_mtx); err = sc->dcp->rewind(sc->dcp, gp->name); if (err != 0) { bp2->bio_error = EILSEQ; DPRINTF(GUZ_DBG_ERR, ("%s/%s: rewind(%p) " "failed\n", __func__, gp->name, sc->dcp)); goto done; } data += len; } else break; data2 += ulen; bp2->bio_completed += ulen; bp2->bio_resid -= ulen; bp->bio_completed -= len; blkofs = 0; blk++; } done: /* Finish processing the request. */ free(bp->bio_data, M_GEOM_UZIP); g_destroy_bio(bp); if (bp2->bio_error != 0 || bp2->bio_resid == 0) g_io_deliver(bp2, bp2->bio_error); else g_uzip_request(gp, bp2); } static void g_uzip_start(struct bio *bp) { struct g_provider *pp; struct g_geom *gp; struct g_uzip_softc *sc; pp = bp->bio_to; gp = pp->geom; DPRINTF(GUZ_DBG_IO, ("%s/%s: %p: cmd=%d, offset=%jd, length=%jd, " "buffer=%p\n", __func__, gp->name, bp, bp->bio_cmd, (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length, bp->bio_data)); sc = gp->softc; sc->req_total++; if (bp->bio_cmd == BIO_GETATTR) { struct bio *bp2; struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp; /* pass on MNT:* requests and ignore others */ if (strncmp(bp->bio_attribute, "MNT:", 4) == 0) { bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } bp2->bio_done = g_std_done; pp = bp->bio_to; gp = pp->geom; cp = LIST_FIRST(&gp->consumer); g_io_request(bp2, cp); return; } } if (bp->bio_cmd != BIO_READ) { g_io_deliver(bp, EOPNOTSUPP); return; } bp->bio_resid = bp->bio_length; bp->bio_completed = 0; g_uzip_request(gp, bp); } static void g_uzip_orphan(struct g_consumer *cp) { struct g_geom *gp; g_topology_assert(); G_VALID_CONSUMER(cp); gp = cp->geom; g_trace(G_T_TOPOLOGY, "%s(%p/%s)", __func__, cp, gp->name); g_wither_geom(gp, ENXIO); /* * We can safely free the softc now if there are no accesses, * otherwise g_uzip_access() will do that after the last close. */ if ((cp->acr + cp->acw + cp->ace) == 0) g_uzip_softc_free(gp); } static void g_uzip_spoiled(struct g_consumer *cp) { g_trace(G_T_TOPOLOGY, "%s(%p/%s)", __func__, cp, cp->geom->name); cp->flags |= G_CF_ORPHAN; g_uzip_orphan(cp); } static int g_uzip_access(struct g_provider *pp, int dr, int dw, int de) { struct g_geom *gp; struct g_consumer *cp; int error; gp = pp->geom; cp = LIST_FIRST(&gp->consumer); KASSERT (cp != NULL, ("g_uzip_access but no consumer")); if (cp->acw + dw > 0) return (EROFS); error = g_access(cp, dr, dw, de); /* * Free the softc if all providers have been closed and this geom * is being removed. */ if (error == 0 && (gp->flags & G_GEOM_WITHER) != 0 && (cp->acr + cp->acw + cp->ace) == 0) g_uzip_softc_free(gp); return (error); } static int g_uzip_parse_toc(struct g_uzip_softc *sc, struct g_provider *pp, struct g_geom *gp) { uint32_t i, j, backref_to; uint64_t max_offset, min_offset; struct g_uzip_blk *last_blk; min_offset = sizeof(struct cloop_header) + (sc->nblocks + 1) * sizeof(uint64_t); max_offset = sc->toc[0].offset - 1; last_blk = &sc->toc[0]; for (i = 0; i < sc->nblocks; i++) { /* First do some bounds checking */ if ((sc->toc[i].offset < min_offset) || (sc->toc[i].offset > pp->mediasize)) { goto error_offset; } DPRINTF_BLK(GUZ_DBG_IO, i, ("%s: cluster #%u " "offset=%ju max_offset=%ju\n", gp->name, (u_int)i, (uintmax_t)sc->toc[i].offset, (uintmax_t)max_offset)); backref_to = BLEN_UNDEF; if (sc->toc[i].offset < max_offset) { /* * For the backref'ed blocks search already parsed * TOC entries for the matching offset and copy the * size from matched entry. */ for (j = 0; j <= i; j++) { if (sc->toc[j].offset == sc->toc[i].offset && !BLK_IS_NIL(sc, j)) { break; } if (j != i) { continue; } DPRINTF(GUZ_DBG_ERR, ("%s: cannot match " "backref'ed offset at cluster #%u\n", gp->name, i)); return (-1); } sc->toc[i].blen = sc->toc[j].blen; backref_to = j; } else { last_blk = &sc->toc[i]; /* * For the "normal blocks" seek forward until we hit * block whose offset is larger than ours and assume * it's going to be the next one. */ for (j = i + 1; j < sc->nblocks + 1; j++) { if (sc->toc[j].offset > max_offset) { break; } } sc->toc[i].blen = sc->toc[j].offset - sc->toc[i].offset; if (BLK_ENDS(sc, i) > pp->mediasize) { DPRINTF(GUZ_DBG_ERR, ("%s: cluster #%u " "extends past media boundary (%ju > %ju)\n", gp->name, (u_int)i, (uintmax_t)BLK_ENDS(sc, i), (intmax_t)pp->mediasize)); return (-1); } KASSERT(max_offset <= sc->toc[i].offset, ( "%s: max_offset is incorrect: %ju", gp->name, (uintmax_t)max_offset)); max_offset = BLK_ENDS(sc, i) - 1; } DPRINTF_BLK(GUZ_DBG_TOC, i, ("%s: cluster #%u, original %u " "bytes, in %u bytes", gp->name, i, sc->blksz, sc->toc[i].blen)); if (backref_to != BLEN_UNDEF) { DPRINTF_BLK(GUZ_DBG_TOC, i, (" (->#%u)", (u_int)backref_to)); } DPRINTF_BLK(GUZ_DBG_TOC, i, ("\n")); } last_blk->last = 1; /* Do a second pass to validate block lengths */ for (i = 0; i < sc->nblocks; i++) { if (sc->toc[i].blen > sc->dcp->max_blen) { if (sc->toc[i].last == 0) { DPRINTF(GUZ_DBG_ERR, ("%s: cluster #%u " "length (%ju) exceeds " "max_blen (%ju)\n", gp->name, i, (uintmax_t)sc->toc[i].blen, (uintmax_t)sc->dcp->max_blen)); return (-1); } DPRINTF(GUZ_DBG_INFO, ("%s: cluster #%u extra " "padding is detected, trimmed to %ju\n", gp->name, i, (uintmax_t)sc->dcp->max_blen)); sc->toc[i].blen = sc->dcp->max_blen; sc->toc[i].padded = 1; } } return (0); error_offset: DPRINTF(GUZ_DBG_ERR, ("%s: cluster #%u: invalid offset %ju, " "min_offset=%ju mediasize=%jd\n", gp->name, (u_int)i, sc->toc[i].offset, min_offset, pp->mediasize)); return (-1); } static struct g_geom * g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags) { int error; uint32_t i, total_offsets, offsets_read, blk; void *buf; struct cloop_header *header; struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp2; struct g_uzip_softc *sc; + struct g_geom_alias *gap; enum { G_UZIP = 1, G_ULZMA, G_ZSTD, } type; char cloop_version; g_trace(G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, pp->name); g_topology_assert(); /* Skip providers that are already open for writing. */ if (pp->acw > 0) return (NULL); if ((fnmatch(g_uzip_attach_to, pp->name, 0) != 0) || (fnmatch(g_uzip_noattach_to, pp->name, 0) == 0)) { DPRINTF(GUZ_DBG_INFO, ("%s(%s,%s), ignoring\n", __func__, mp->name, pp->name)); return (NULL); } buf = NULL; /* * Create geom instance. */ gp = g_new_geomf(mp, GUZ_DEV_NAME("%s"), pp->name); cp = g_new_consumer(gp); error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 0, 0); if (error) { goto e1; } g_topology_unlock(); /* * Read cloop header, look for CLOOP magic, perform * other validity checks. */ DPRINTF(GUZ_DBG_INFO, ("%s: media sectorsize %u, mediasize %jd\n", gp->name, pp->sectorsize, (intmax_t)pp->mediasize)); buf = g_read_data(cp, 0, pp->sectorsize, NULL); if (buf == NULL) goto e2; header = (struct cloop_header *) buf; if (strncmp(header->magic, CLOOP_MAGIC_START, sizeof(CLOOP_MAGIC_START) - 1) != 0) { DPRINTF(GUZ_DBG_ERR, ("%s: no CLOOP magic\n", gp->name)); goto e3; } cloop_version = header->magic[CLOOP_OFS_VERSN]; switch (header->magic[CLOOP_OFS_COMPR]) { case CLOOP_COMP_LZMA: case CLOOP_COMP_LZMA_DDP: type = G_ULZMA; if (cloop_version < CLOOP_MINVER_LZMA) { DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n", gp->name)); goto e3; } DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_LZMA image found\n", gp->name)); break; case CLOOP_COMP_LIBZ: case CLOOP_COMP_LIBZ_DDP: type = G_UZIP; if (cloop_version < CLOOP_MINVER_ZLIB) { DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n", gp->name)); goto e3; } DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_ZLIB image found\n", gp->name)); break; case CLOOP_COMP_ZSTD: case CLOOP_COMP_ZSTD_DDP: if (cloop_version < CLOOP_MINVER_ZSTD) { DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n", gp->name)); goto e3; } #ifdef ZSTDIO DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_ZSTD image found.\n", gp->name)); type = G_ZSTD; #else DPRINTF(GUZ_DBG_ERR, ("%s: GEOM_UZIP_ZSTD image found, but " "this kernel was configured with Zstd disabled.\n", gp->name)); goto e3; #endif break; default: DPRINTF(GUZ_DBG_ERR, ("%s: unsupported image type\n", gp->name)); goto e3; } /* * Initialize softc and read offsets. */ sc = malloc(sizeof(*sc), M_GEOM_UZIP, M_WAITOK | M_ZERO); gp->softc = sc; sc->blksz = ntohl(header->blksz); sc->nblocks = ntohl(header->nblocks); if (sc->blksz % 512 != 0) { printf("%s: block size (%u) should be multiple of 512.\n", gp->name, sc->blksz); goto e4; } if (sc->blksz > MAX_BLKSZ) { printf("%s: block size (%u) should not be larger than %d.\n", gp->name, sc->blksz, MAX_BLKSZ); } total_offsets = sc->nblocks + 1; if (sizeof(struct cloop_header) + total_offsets * sizeof(uint64_t) > pp->mediasize) { printf("%s: media too small for %u blocks\n", gp->name, sc->nblocks); goto e4; } sc->toc = malloc(total_offsets * sizeof(struct g_uzip_blk), M_GEOM_UZIP, M_WAITOK | M_ZERO); offsets_read = MIN(total_offsets, (pp->sectorsize - sizeof(*header)) / sizeof(uint64_t)); for (i = 0; i < offsets_read; i++) { sc->toc[i].offset = be64toh(((uint64_t *) (header + 1))[i]); sc->toc[i].blen = BLEN_UNDEF; } DPRINTF(GUZ_DBG_INFO, ("%s: %u offsets in the first sector\n", gp->name, offsets_read)); /* * The following invalidates the "header" pointer into the first * block's "buf." */ header = NULL; for (blk = 1; offsets_read < total_offsets; blk++) { uint32_t nread; free(buf, M_GEOM); buf = g_read_data( cp, blk * pp->sectorsize, pp->sectorsize, NULL); if (buf == NULL) goto e5; nread = MIN(total_offsets - offsets_read, pp->sectorsize / sizeof(uint64_t)); DPRINTF(GUZ_DBG_TOC, ("%s: %u offsets read from sector %d\n", gp->name, nread, blk)); for (i = 0; i < nread; i++) { sc->toc[offsets_read + i].offset = be64toh(((uint64_t *) buf)[i]); sc->toc[offsets_read + i].blen = BLEN_UNDEF; } offsets_read += nread; } free(buf, M_GEOM); buf = NULL; offsets_read -= 1; DPRINTF(GUZ_DBG_INFO, ("%s: done reading %u block offsets from %u " "sectors\n", gp->name, offsets_read, blk)); if (sc->nblocks != offsets_read) { DPRINTF(GUZ_DBG_ERR, ("%s: read %s offsets than expected " "blocks\n", gp->name, sc->nblocks < offsets_read ? "more" : "less")); goto e5; } switch (type) { case G_UZIP: sc->dcp = g_uzip_zlib_ctor(sc->blksz); break; case G_ULZMA: sc->dcp = g_uzip_lzma_ctor(sc->blksz); break; #ifdef ZSTDIO case G_ZSTD: sc->dcp = g_uzip_zstd_ctor(sc->blksz); break; #endif default: goto e5; } /* * The last+1 block was not always initialized by earlier versions of * mkuzip(8). However, *if* it is initialized, the difference between * its offset and the prior block's offset represents the length of the * final real compressed block, and this is significant to the * decompressor. */ if (cloop_version >= CLOOP_MINVER_RELIABLE_LASTBLKSZ && sc->toc[sc->nblocks].offset != 0) { if (sc->toc[sc->nblocks].offset > pp->mediasize) { DPRINTF(GUZ_DBG_ERR, ("%s: bogus n+1 offset %ju > mediasize %ju\n", gp->name, (uintmax_t)sc->toc[sc->nblocks].offset, (uintmax_t)pp->mediasize)); goto e6; } } else { sc->toc[sc->nblocks].offset = pp->mediasize; } /* Massage TOC (table of contents), make sure it is sound */ if (g_uzip_parse_toc(sc, pp, gp) != 0) { DPRINTF(GUZ_DBG_ERR, ("%s: TOC error\n", gp->name)); goto e6; } mtx_init(&sc->last_mtx, "geom_uzip cache", NULL, MTX_DEF); mtx_init(&sc->queue_mtx, "geom_uzip wrkthread", NULL, MTX_DEF); bioq_init(&sc->bio_queue); sc->last_blk = -1; sc->last_buf = malloc(sc->blksz, M_GEOM_UZIP, M_WAITOK); sc->req_total = 0; sc->req_cached = 0; sc->uzip_do = &g_uzip_do; error = kproc_create(g_uzip_wrkthr, sc, &sc->procp, 0, 0, "%s", gp->name); if (error != 0) { goto e7; } g_topology_lock(); pp2 = g_new_providerf(gp, "%s", gp->name); pp2->sectorsize = 512; pp2->mediasize = (off_t)sc->nblocks * sc->blksz; pp2->stripesize = pp->stripesize; pp2->stripeoffset = pp->stripeoffset; + LIST_FOREACH(gap, &pp->aliases, ga_next) + g_provider_add_alias(pp2, GUZ_DEV_NAME("%s"), gap->ga_alias); g_error_provider(pp2, 0); g_access(cp, -1, 0, 0); DPRINTF(GUZ_DBG_INFO, ("%s: taste ok (%d, %ju), (%ju, %ju), %x\n", gp->name, pp2->sectorsize, (uintmax_t)pp2->mediasize, (uintmax_t)pp2->stripeoffset, (uintmax_t)pp2->stripesize, pp2->flags)); DPRINTF(GUZ_DBG_INFO, ("%s: %u x %u blocks\n", gp->name, sc->nblocks, sc->blksz)); return (gp); e7: free(sc->last_buf, M_GEOM); mtx_destroy(&sc->queue_mtx); mtx_destroy(&sc->last_mtx); e6: sc->dcp->free(sc->dcp); e5: free(sc->toc, M_GEOM); e4: free(gp->softc, M_GEOM_UZIP); e3: if (buf != NULL) { free(buf, M_GEOM); } e2: g_topology_lock(); g_access(cp, -1, 0, 0); e1: g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); return (NULL); } static int g_uzip_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) { struct g_provider *pp; KASSERT(gp != NULL, ("NULL geom")); g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, gp->name); g_topology_assert(); if (gp->softc == NULL) { DPRINTF(GUZ_DBG_ERR, ("%s(%s): gp->softc == NULL\n", __func__, gp->name)); return (ENXIO); } pp = LIST_FIRST(&gp->provider); KASSERT(pp != NULL, ("NULL provider")); if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0) return (EBUSY); g_wither_geom(gp, ENXIO); g_uzip_softc_free(gp); return (0); } static struct g_class g_uzip_class = { .name = UZIP_CLASS_NAME, .version = G_VERSION, .taste = g_uzip_taste, .destroy_geom = g_uzip_destroy_geom, .start = g_uzip_start, .orphan = g_uzip_orphan, .access = g_uzip_access, .spoiled = g_uzip_spoiled, }; DECLARE_GEOM_CLASS(g_uzip_class, g_uzip); MODULE_DEPEND(g_uzip, xz, 1, 1, 1); MODULE_DEPEND(g_uzip, zlib, 1, 1, 1); MODULE_VERSION(geom_uzip, 0);