diff --git a/sys/geom/vinum/geom_vinum.c b/sys/geom/vinum/geom_vinum.c index 86f5c9f08e1f..b0a278104194 100644 --- a/sys/geom/vinum/geom_vinum.c +++ b/sys/geom/vinum/geom_vinum.c @@ -1,1050 +1,1047 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004, 2007 Lukas Ertl * Copyright (c) 2007, 2009 Ulf Lilleengen * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include SYSCTL_DECL(_kern_geom); static SYSCTL_NODE(_kern_geom, OID_AUTO, vinum, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "GEOM_VINUM stuff"); u_int g_vinum_debug = 0; SYSCTL_UINT(_kern_geom_vinum, OID_AUTO, debug, CTLFLAG_RWTUN, &g_vinum_debug, 0, "Debug level"); static int gv_create(struct g_geom *, struct gctl_req *); static void gv_attach(struct gv_softc *, struct gctl_req *); static void gv_detach(struct gv_softc *, struct gctl_req *); static void gv_parityop(struct gv_softc *, struct gctl_req *); static void gv_orphan(struct g_consumer *cp) { struct g_geom *gp; struct gv_softc *sc; struct gv_drive *d; g_topology_assert(); KASSERT(cp != NULL, ("gv_orphan: null cp")); gp = cp->geom; KASSERT(gp != NULL, ("gv_orphan: null gp")); sc = gp->softc; KASSERT(sc != NULL, ("gv_orphan: null sc")); d = cp->private; KASSERT(d != NULL, ("gv_orphan: null d")); g_trace(G_T_TOPOLOGY, "gv_orphan(%s)", gp->name); gv_post_event(sc, GV_EVENT_DRIVE_LOST, d, NULL, 0, 0); } void gv_start(struct bio *bp) { struct g_geom *gp; struct gv_softc *sc; gp = bp->bio_to->geom; sc = gp->softc; switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_DELETE: break; case BIO_GETATTR: default: g_io_deliver(bp, EOPNOTSUPP); return; } mtx_lock(&sc->bqueue_mtx); bioq_disksort(sc->bqueue_down, bp); wakeup(sc); mtx_unlock(&sc->bqueue_mtx); } void gv_done(struct bio *bp) { struct g_geom *gp; struct gv_softc *sc; KASSERT(bp != NULL, ("NULL bp")); gp = bp->bio_from->geom; sc = gp->softc; mtx_lock(&sc->bqueue_mtx); bioq_disksort(sc->bqueue_up, bp); wakeup(sc); mtx_unlock(&sc->bqueue_mtx); } int gv_access(struct g_provider *pp, int dr, int dw, int de) { struct g_geom *gp; struct gv_softc *sc; struct gv_drive *d, *d2; int error; gp = pp->geom; sc = gp->softc; /* * We want to modify the read count with the write count in case we have * plexes in a RAID-5 organization. */ dr += dw; LIST_FOREACH(d, &sc->drives, drive) { if (d->consumer == NULL) continue; error = g_access(d->consumer, dr, dw, de); if (error) { LIST_FOREACH(d2, &sc->drives, drive) { if (d == d2) break; g_access(d2->consumer, -dr, -dw, -de); } G_VINUM_DEBUG(0, "g_access '%s' failed: %d", d->name, error); return (error); } } return (0); } static void gv_init(struct g_class *mp) { struct g_geom *gp; struct gv_softc *sc; g_trace(G_T_TOPOLOGY, "gv_init(%p)", mp); gp = g_new_geomf(mp, "VINUM"); gp->spoiled = gv_orphan; gp->orphan = gv_orphan; gp->access = gv_access; gp->start = gv_start; gp->softc = g_malloc(sizeof(struct gv_softc), M_WAITOK | M_ZERO); sc = gp->softc; sc->geom = gp; sc->bqueue_down = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO); sc->bqueue_up = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO); bioq_init(sc->bqueue_down); bioq_init(sc->bqueue_up); LIST_INIT(&sc->drives); LIST_INIT(&sc->subdisks); LIST_INIT(&sc->plexes); LIST_INIT(&sc->volumes); TAILQ_INIT(&sc->equeue); mtx_init(&sc->config_mtx, "gv_config", NULL, MTX_DEF); mtx_init(&sc->equeue_mtx, "gv_equeue", NULL, MTX_DEF); mtx_init(&sc->bqueue_mtx, "gv_bqueue", NULL, MTX_DEF); kproc_create(gv_worker, sc, &sc->worker, 0, 0, "gv_worker"); } static int gv_unload(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) { struct gv_softc *sc; g_trace(G_T_TOPOLOGY, "gv_unload(%p)", mp); g_topology_assert(); sc = gp->softc; if (sc != NULL) { gv_worker_exit(sc); gp->softc = NULL; g_wither_geom(gp, ENXIO); } return (0); } /* Handle userland request of attaching object. */ static void gv_attach(struct gv_softc *sc, struct gctl_req *req) { struct gv_volume *v; struct gv_plex *p; struct gv_sd *s; off_t *offset; int *rename, type_child, type_parent; char *child, *parent; child = gctl_get_param(req, "child", NULL); if (child == NULL) { gctl_error(req, "no child given"); return; } parent = gctl_get_param(req, "parent", NULL); if (parent == NULL) { gctl_error(req, "no parent given"); return; } offset = gctl_get_paraml(req, "offset", sizeof(*offset)); if (offset == NULL) { gctl_error(req, "no offset given"); return; } rename = gctl_get_paraml(req, "rename", sizeof(*rename)); if (rename == NULL) { gctl_error(req, "no rename flag given"); return; } type_child = gv_object_type(sc, child); type_parent = gv_object_type(sc, parent); switch (type_child) { case GV_TYPE_PLEX: if (type_parent != GV_TYPE_VOL) { gctl_error(req, "no such volume to attach to"); return; } v = gv_find_vol(sc, parent); p = gv_find_plex(sc, child); gv_post_event(sc, GV_EVENT_ATTACH_PLEX, p, v, *offset, *rename); break; case GV_TYPE_SD: if (type_parent != GV_TYPE_PLEX) { gctl_error(req, "no such plex to attach to"); return; } p = gv_find_plex(sc, parent); s = gv_find_sd(sc, child); gv_post_event(sc, GV_EVENT_ATTACH_SD, s, p, *offset, *rename); break; default: gctl_error(req, "invalid child type"); break; } } /* Handle userland request of detaching object. */ static void gv_detach(struct gv_softc *sc, struct gctl_req *req) { struct gv_plex *p; struct gv_sd *s; int *flags, type; char *object; object = gctl_get_param(req, "object", NULL); if (object == NULL) { gctl_error(req, "no argument given"); return; } flags = gctl_get_paraml(req, "flags", sizeof(*flags)); type = gv_object_type(sc, object); switch (type) { case GV_TYPE_PLEX: p = gv_find_plex(sc, object); gv_post_event(sc, GV_EVENT_DETACH_PLEX, p, NULL, *flags, 0); break; case GV_TYPE_SD: s = gv_find_sd(sc, object); gv_post_event(sc, GV_EVENT_DETACH_SD, s, NULL, *flags, 0); break; default: gctl_error(req, "invalid object type"); break; } } /* Handle userland requests for creating new objects. */ static int gv_create(struct g_geom *gp, struct gctl_req *req) { struct gv_softc *sc; struct gv_drive *d, *d2; struct gv_plex *p, *p2; struct gv_sd *s, *s2; struct gv_volume *v, *v2; struct g_provider *pp; - int error, i, *drives, *flags, *plexes, *subdisks, *volumes; + int i, *drives, *flags, *plexes, *subdisks, *volumes; char buf[20]; g_topology_assert(); sc = gp->softc; /* Find out how many of each object have been passed in. */ volumes = gctl_get_paraml(req, "volumes", sizeof(*volumes)); plexes = gctl_get_paraml(req, "plexes", sizeof(*plexes)); subdisks = gctl_get_paraml(req, "subdisks", sizeof(*subdisks)); drives = gctl_get_paraml(req, "drives", sizeof(*drives)); if (volumes == NULL || plexes == NULL || subdisks == NULL || drives == NULL) { gctl_error(req, "number of objects not given"); return (-1); } flags = gctl_get_paraml(req, "flags", sizeof(*flags)); if (flags == NULL) { gctl_error(req, "flags not given"); return (-1); } /* First, handle drive definitions ... */ for (i = 0; i < *drives; i++) { snprintf(buf, sizeof(buf), "drive%d", i); d2 = gctl_get_paraml(req, buf, sizeof(*d2)); if (d2 == NULL) { gctl_error(req, "no drive definition given"); return (-1); } /* * Make sure that the device specified in the drive config is * an active GEOM provider. */ pp = g_provider_by_name(d2->device); if (pp == NULL) { gctl_error(req, "%s: device not found", d2->device); goto error; } if (gv_find_drive(sc, d2->name) != NULL) { /* Ignore error. */ if (*flags & GV_FLAG_F) continue; gctl_error(req, "drive '%s' already exists", d2->name); goto error; } if (gv_find_drive_device(sc, d2->device) != NULL) { gctl_error(req, "device '%s' already configured in " "gvinum", d2->device); goto error; } d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO); bcopy(d2, d, sizeof(*d)); gv_post_event(sc, GV_EVENT_CREATE_DRIVE, d, NULL, 0, 0); } /* ... then volume definitions ... */ for (i = 0; i < *volumes; i++) { - error = 0; snprintf(buf, sizeof(buf), "volume%d", i); v2 = gctl_get_paraml(req, buf, sizeof(*v2)); if (v2 == NULL) { gctl_error(req, "no volume definition given"); return (-1); } if (gv_find_vol(sc, v2->name) != NULL) { /* Ignore error. */ if (*flags & GV_FLAG_F) continue; gctl_error(req, "volume '%s' already exists", v2->name); goto error; } v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO); bcopy(v2, v, sizeof(*v)); gv_post_event(sc, GV_EVENT_CREATE_VOLUME, v, NULL, 0, 0); } /* ... then plex definitions ... */ for (i = 0; i < *plexes; i++) { - error = 0; snprintf(buf, sizeof(buf), "plex%d", i); p2 = gctl_get_paraml(req, buf, sizeof(*p2)); if (p2 == NULL) { gctl_error(req, "no plex definition given"); return (-1); } if (gv_find_plex(sc, p2->name) != NULL) { /* Ignore error. */ if (*flags & GV_FLAG_F) continue; gctl_error(req, "plex '%s' already exists", p2->name); goto error; } p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO); bcopy(p2, p, sizeof(*p)); gv_post_event(sc, GV_EVENT_CREATE_PLEX, p, NULL, 0, 0); } /* ... and, finally, subdisk definitions. */ for (i = 0; i < *subdisks; i++) { - error = 0; snprintf(buf, sizeof(buf), "sd%d", i); s2 = gctl_get_paraml(req, buf, sizeof(*s2)); if (s2 == NULL) { gctl_error(req, "no subdisk definition given"); return (-1); } if (gv_find_sd(sc, s2->name) != NULL) { /* Ignore error. */ if (*flags & GV_FLAG_F) continue; gctl_error(req, "sd '%s' already exists", s2->name); goto error; } s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO); bcopy(s2, s, sizeof(*s)); gv_post_event(sc, GV_EVENT_CREATE_SD, s, NULL, 0, 0); } error: gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc, NULL, 0, 0); gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); return (0); } static void gv_config(struct gctl_req *req, struct g_class *mp, char const *verb) { struct g_geom *gp; struct gv_softc *sc; struct sbuf *sb; char *comment; g_topology_assert(); gp = LIST_FIRST(&mp->geom); sc = gp->softc; if (!strcmp(verb, "attach")) { gv_attach(sc, req); } else if (!strcmp(verb, "concat")) { gv_concat(gp, req); } else if (!strcmp(verb, "detach")) { gv_detach(sc, req); } else if (!strcmp(verb, "list")) { gv_list(gp, req); /* Save our configuration back to disk. */ } else if (!strcmp(verb, "saveconfig")) { gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); /* Return configuration in string form. */ } else if (!strcmp(verb, "getconfig")) { comment = gctl_get_param(req, "comment", NULL); if (comment == NULL) { gctl_error(req, "no comment parameter given"); return; } sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN); gv_format_config(sc, sb, 0, comment); sbuf_finish(sb); gctl_set_param(req, "config", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } else if (!strcmp(verb, "create")) { gv_create(gp, req); } else if (!strcmp(verb, "mirror")) { gv_mirror(gp, req); } else if (!strcmp(verb, "move")) { gv_move(gp, req); } else if (!strcmp(verb, "raid5")) { gv_raid5(gp, req); } else if (!strcmp(verb, "rebuildparity") || !strcmp(verb, "checkparity")) { gv_parityop(sc, req); } else if (!strcmp(verb, "remove")) { gv_remove(gp, req); } else if (!strcmp(verb, "rename")) { gv_rename(gp, req); } else if (!strcmp(verb, "resetconfig")) { gv_post_event(sc, GV_EVENT_RESET_CONFIG, sc, NULL, 0, 0); } else if (!strcmp(verb, "start")) { gv_start_obj(gp, req); } else if (!strcmp(verb, "stripe")) { gv_stripe(gp, req); } else if (!strcmp(verb, "setstate")) { gv_setstate(gp, req); } else gctl_error(req, "Unknown verb parameter"); } static void gv_parityop(struct gv_softc *sc, struct gctl_req *req) { struct gv_plex *p; int *flags, *rebuild, type; char *plex; plex = gctl_get_param(req, "plex", NULL); if (plex == NULL) { gctl_error(req, "no plex given"); return; } flags = gctl_get_paraml(req, "flags", sizeof(*flags)); if (flags == NULL) { gctl_error(req, "no flags given"); return; } rebuild = gctl_get_paraml(req, "rebuild", sizeof(*rebuild)); if (rebuild == NULL) { gctl_error(req, "no operation given"); return; } type = gv_object_type(sc, plex); if (type != GV_TYPE_PLEX) { gctl_error(req, "'%s' is not a plex", plex); return; } p = gv_find_plex(sc, plex); if (p->state != GV_PLEX_UP) { gctl_error(req, "plex %s is not completely accessible", p->name); return; } if (p->org != GV_PLEX_RAID5) { gctl_error(req, "plex %s is not a RAID5 plex", p->name); return; } /* Put it in the event queue. */ /* XXX: The state of the plex might have changed when this event is * picked up ... We should perhaps check this afterwards. */ if (*rebuild) gv_post_event(sc, GV_EVENT_PARITY_REBUILD, p, NULL, 0, 0); else gv_post_event(sc, GV_EVENT_PARITY_CHECK, p, NULL, 0, 0); } static struct g_geom * gv_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) { struct g_geom *gp; struct g_consumer *cp; struct gv_softc *sc; struct gv_hdr vhdr; int error; g_topology_assert(); g_trace(G_T_TOPOLOGY, "gv_taste(%s, %s)", mp->name, pp->name); gp = LIST_FIRST(&mp->geom); if (gp == NULL) { G_VINUM_DEBUG(0, "error: tasting, but not initialized?"); return (NULL); } sc = gp->softc; cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; if (g_attach(cp, pp) != 0) { g_destroy_consumer(cp); return (NULL); } if (g_access(cp, 1, 0, 0) != 0) { g_detach(cp); g_destroy_consumer(cp); return (NULL); } g_topology_unlock(); error = gv_read_header(cp, &vhdr); g_topology_lock(); g_access(cp, -1, 0, 0); g_detach(cp); g_destroy_consumer(cp); /* Check if what we've been given is a valid vinum drive. */ if (!error) gv_post_event(sc, GV_EVENT_DRIVE_TASTED, pp, NULL, 0, 0); return (NULL); } void gv_worker(void *arg) { struct g_provider *pp; struct gv_softc *sc; struct gv_event *ev; struct gv_volume *v; struct gv_plex *p; struct gv_sd *s; struct gv_drive *d; struct bio *bp; int newstate, flags, err, rename; char *newname; off_t offset; sc = arg; KASSERT(sc != NULL, ("NULL sc")); for (;;) { /* Look at the events first... */ ev = gv_get_event(sc); if (ev != NULL) { gv_remove_event(sc, ev); switch (ev->type) { case GV_EVENT_DRIVE_TASTED: G_VINUM_DEBUG(2, "event 'drive tasted'"); pp = ev->arg1; gv_drive_tasted(sc, pp); break; case GV_EVENT_DRIVE_LOST: G_VINUM_DEBUG(2, "event 'drive lost'"); d = ev->arg1; gv_drive_lost(sc, d); break; case GV_EVENT_CREATE_DRIVE: G_VINUM_DEBUG(2, "event 'create drive'"); d = ev->arg1; gv_create_drive(sc, d); break; case GV_EVENT_CREATE_VOLUME: G_VINUM_DEBUG(2, "event 'create volume'"); v = ev->arg1; gv_create_volume(sc, v); break; case GV_EVENT_CREATE_PLEX: G_VINUM_DEBUG(2, "event 'create plex'"); p = ev->arg1; gv_create_plex(sc, p); break; case GV_EVENT_CREATE_SD: G_VINUM_DEBUG(2, "event 'create sd'"); s = ev->arg1; gv_create_sd(sc, s); break; case GV_EVENT_RM_DRIVE: G_VINUM_DEBUG(2, "event 'remove drive'"); d = ev->arg1; flags = ev->arg3; gv_rm_drive(sc, d, flags); /*gv_setup_objects(sc);*/ break; case GV_EVENT_RM_VOLUME: G_VINUM_DEBUG(2, "event 'remove volume'"); v = ev->arg1; gv_rm_vol(sc, v); /*gv_setup_objects(sc);*/ break; case GV_EVENT_RM_PLEX: G_VINUM_DEBUG(2, "event 'remove plex'"); p = ev->arg1; gv_rm_plex(sc, p); /*gv_setup_objects(sc);*/ break; case GV_EVENT_RM_SD: G_VINUM_DEBUG(2, "event 'remove sd'"); s = ev->arg1; gv_rm_sd(sc, s); /*gv_setup_objects(sc);*/ break; case GV_EVENT_SAVE_CONFIG: G_VINUM_DEBUG(2, "event 'save config'"); gv_save_config(sc); break; case GV_EVENT_SET_SD_STATE: G_VINUM_DEBUG(2, "event 'setstate sd'"); s = ev->arg1; newstate = ev->arg3; flags = ev->arg4; err = gv_set_sd_state(s, newstate, flags); if (err) G_VINUM_DEBUG(0, "error setting subdisk" " state: error code %d", err); break; case GV_EVENT_SET_DRIVE_STATE: G_VINUM_DEBUG(2, "event 'setstate drive'"); d = ev->arg1; newstate = ev->arg3; flags = ev->arg4; err = gv_set_drive_state(d, newstate, flags); if (err) G_VINUM_DEBUG(0, "error setting drive " "state: error code %d", err); break; case GV_EVENT_SET_VOL_STATE: G_VINUM_DEBUG(2, "event 'setstate volume'"); v = ev->arg1; newstate = ev->arg3; flags = ev->arg4; err = gv_set_vol_state(v, newstate, flags); if (err) G_VINUM_DEBUG(0, "error setting volume " "state: error code %d", err); break; case GV_EVENT_SET_PLEX_STATE: G_VINUM_DEBUG(2, "event 'setstate plex'"); p = ev->arg1; newstate = ev->arg3; flags = ev->arg4; err = gv_set_plex_state(p, newstate, flags); if (err) G_VINUM_DEBUG(0, "error setting plex " "state: error code %d", err); break; case GV_EVENT_SETUP_OBJECTS: G_VINUM_DEBUG(2, "event 'setup objects'"); gv_setup_objects(sc); break; case GV_EVENT_RESET_CONFIG: G_VINUM_DEBUG(2, "event 'resetconfig'"); err = gv_resetconfig(sc); if (err) G_VINUM_DEBUG(0, "error resetting " "config: error code %d", err); break; case GV_EVENT_PARITY_REBUILD: /* * Start the rebuild. The gv_plex_done will * handle issuing of the remaining rebuild bio's * until it's finished. */ G_VINUM_DEBUG(2, "event 'rebuild'"); p = ev->arg1; if (p->state != GV_PLEX_UP) { G_VINUM_DEBUG(0, "plex %s is not " "completely accessible", p->name); break; } if (p->flags & GV_PLEX_SYNCING || p->flags & GV_PLEX_REBUILDING || p->flags & GV_PLEX_GROWING) { G_VINUM_DEBUG(0, "plex %s is busy with " "syncing or parity build", p->name); break; } p->synced = 0; p->flags |= GV_PLEX_REBUILDING; g_topology_assert_not(); g_topology_lock(); err = gv_access(p->vol_sc->provider, 1, 1, 0); if (err) { G_VINUM_DEBUG(0, "unable to access " "provider"); break; } g_topology_unlock(); gv_parity_request(p, GV_BIO_CHECK | GV_BIO_PARITY, 0); break; case GV_EVENT_PARITY_CHECK: /* Start parity check. */ G_VINUM_DEBUG(2, "event 'check'"); p = ev->arg1; if (p->state != GV_PLEX_UP) { G_VINUM_DEBUG(0, "plex %s is not " "completely accessible", p->name); break; } if (p->flags & GV_PLEX_SYNCING || p->flags & GV_PLEX_REBUILDING || p->flags & GV_PLEX_GROWING) { G_VINUM_DEBUG(0, "plex %s is busy with " "syncing or parity build", p->name); break; } p->synced = 0; g_topology_assert_not(); g_topology_lock(); err = gv_access(p->vol_sc->provider, 1, 1, 0); if (err) { G_VINUM_DEBUG(0, "unable to access " "provider"); break; } g_topology_unlock(); gv_parity_request(p, GV_BIO_CHECK, 0); break; case GV_EVENT_START_PLEX: G_VINUM_DEBUG(2, "event 'start' plex"); p = ev->arg1; gv_start_plex(p); break; case GV_EVENT_START_VOLUME: G_VINUM_DEBUG(2, "event 'start' volume"); v = ev->arg1; gv_start_vol(v); break; case GV_EVENT_ATTACH_PLEX: G_VINUM_DEBUG(2, "event 'attach' plex"); p = ev->arg1; v = ev->arg2; rename = ev->arg4; err = gv_attach_plex(p, v, rename); if (err) G_VINUM_DEBUG(0, "error attaching %s to" " %s: error code %d", p->name, v->name, err); break; case GV_EVENT_ATTACH_SD: G_VINUM_DEBUG(2, "event 'attach' sd"); s = ev->arg1; p = ev->arg2; offset = ev->arg3; rename = ev->arg4; err = gv_attach_sd(s, p, offset, rename); if (err) G_VINUM_DEBUG(0, "error attaching %s to" " %s: error code %d", s->name, p->name, err); break; case GV_EVENT_DETACH_PLEX: G_VINUM_DEBUG(2, "event 'detach' plex"); p = ev->arg1; flags = ev->arg3; err = gv_detach_plex(p, flags); if (err) G_VINUM_DEBUG(0, "error detaching %s: " "error code %d", p->name, err); break; case GV_EVENT_DETACH_SD: G_VINUM_DEBUG(2, "event 'detach' sd"); s = ev->arg1; flags = ev->arg3; err = gv_detach_sd(s, flags); if (err) G_VINUM_DEBUG(0, "error detaching %s: " "error code %d", s->name, err); break; case GV_EVENT_RENAME_VOL: G_VINUM_DEBUG(2, "event 'rename' volume"); v = ev->arg1; newname = ev->arg2; flags = ev->arg3; err = gv_rename_vol(sc, v, newname, flags); if (err) G_VINUM_DEBUG(0, "error renaming %s to " "%s: error code %d", v->name, newname, err); g_free(newname); /* Destroy and recreate the provider if we can. */ if (gv_provider_is_open(v->provider)) { G_VINUM_DEBUG(0, "unable to rename " "provider to %s: provider in use", v->name); break; } g_topology_lock(); g_wither_provider(v->provider, ENOENT); g_topology_unlock(); v->provider = NULL; gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc, NULL, 0, 0); break; case GV_EVENT_RENAME_PLEX: G_VINUM_DEBUG(2, "event 'rename' plex"); p = ev->arg1; newname = ev->arg2; flags = ev->arg3; err = gv_rename_plex(sc, p, newname, flags); if (err) G_VINUM_DEBUG(0, "error renaming %s to " "%s: error code %d", p->name, newname, err); g_free(newname); break; case GV_EVENT_RENAME_SD: G_VINUM_DEBUG(2, "event 'rename' sd"); s = ev->arg1; newname = ev->arg2; flags = ev->arg3; err = gv_rename_sd(sc, s, newname, flags); if (err) G_VINUM_DEBUG(0, "error renaming %s to " "%s: error code %d", s->name, newname, err); g_free(newname); break; case GV_EVENT_RENAME_DRIVE: G_VINUM_DEBUG(2, "event 'rename' drive"); d = ev->arg1; newname = ev->arg2; flags = ev->arg3; err = gv_rename_drive(sc, d, newname, flags); if (err) G_VINUM_DEBUG(0, "error renaming %s to " "%s: error code %d", d->name, newname, err); g_free(newname); break; case GV_EVENT_MOVE_SD: G_VINUM_DEBUG(2, "event 'move' sd"); s = ev->arg1; d = ev->arg2; flags = ev->arg3; err = gv_move_sd(sc, s, d, flags); if (err) G_VINUM_DEBUG(0, "error moving %s to " "%s: error code %d", s->name, d->name, err); break; case GV_EVENT_THREAD_EXIT: G_VINUM_DEBUG(2, "event 'thread exit'"); g_free(ev); mtx_lock(&sc->equeue_mtx); mtx_lock(&sc->bqueue_mtx); gv_cleanup(sc); mtx_destroy(&sc->bqueue_mtx); mtx_destroy(&sc->equeue_mtx); g_free(sc->bqueue_down); g_free(sc->bqueue_up); g_free(sc); kproc_exit(0); /* NOTREACHED */ default: G_VINUM_DEBUG(1, "unknown event %d", ev->type); } g_free(ev); continue; } /* ... then do I/O processing. */ mtx_lock(&sc->bqueue_mtx); /* First do new requests. */ bp = bioq_takefirst(sc->bqueue_down); if (bp != NULL) { mtx_unlock(&sc->bqueue_mtx); /* A bio that interfered with another bio. */ if (bp->bio_pflags & GV_BIO_ONHOLD) { s = bp->bio_caller1; p = s->plex_sc; /* Is it still locked out? */ if (gv_stripe_active(p, bp)) { /* Park the bio on the waiting queue. */ bioq_disksort(p->wqueue, bp); } else { bp->bio_pflags &= ~GV_BIO_ONHOLD; g_io_request(bp, s->drive_sc->consumer); } /* A special request requireing special handling. */ } else if (bp->bio_pflags & GV_BIO_INTERNAL) { p = bp->bio_caller1; gv_plex_start(p, bp); } else { gv_volume_start(sc, bp); } mtx_lock(&sc->bqueue_mtx); } /* Then do completed requests. */ bp = bioq_takefirst(sc->bqueue_up); if (bp == NULL) { msleep(sc, &sc->bqueue_mtx, PRIBIO, "-", hz/10); mtx_unlock(&sc->bqueue_mtx); continue; } mtx_unlock(&sc->bqueue_mtx); gv_bio_done(sc, bp); } } #define VINUM_CLASS_NAME "VINUM" static struct g_class g_vinum_class = { .name = VINUM_CLASS_NAME, .version = G_VERSION, .init = gv_init, .taste = gv_taste, .ctlreq = gv_config, .destroy_geom = gv_unload, }; DECLARE_GEOM_CLASS(g_vinum_class, g_vinum); MODULE_VERSION(geom_vinum, 0); diff --git a/sys/geom/vinum/geom_vinum_subr.c b/sys/geom/vinum/geom_vinum_subr.c index 54dd6db95e5e..b68541bf00a7 100644 --- a/sys/geom/vinum/geom_vinum_subr.c +++ b/sys/geom/vinum/geom_vinum_subr.c @@ -1,1281 +1,1280 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2004, 2007 Lukas Ertl * Copyright (c) 2007, 2009 Ulf Lilleengen * Copyright (c) 1997, 1998, 1999 * Nan Yang Computer Services Limited. All rights reserved. * * Parts written by Greg Lehey * * This software is distributed under the so-called ``Berkeley * License'': * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Nan Yang Computer * Services Limited. * 4. Neither the name of the Company nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * This software is provided ``as is'', and any express or implied * warranties, including, but not limited to, the implied warranties of * merchantability and fitness for a particular purpose are disclaimed. * In no event shall the company or contributors be liable for any * direct, indirect, incidental, special, exemplary, or consequential * damages (including, but not limited to, procurement of substitute * goods or services; loss of use, data, or profits; or business * interruption) however caused and on any theory of liability, whether * in contract, strict liability, or tort (including negligence or * otherwise) arising in any way out of the use of this software, even if * advised of the possibility of such damage. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include int gv_drive_is_newer(struct gv_softc *, struct gv_drive *); static off_t gv_plex_smallest_sd(struct gv_plex *); void gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d) { char *aptr, *bptr, *cptr; struct gv_volume *v, *v2; struct gv_plex *p, *p2; struct gv_sd *s, *s2; int error, is_newer, tokens; char *token[GV_MAXARGS]; is_newer = gv_drive_is_newer(sc, d); /* Until the end of the string *buf. */ for (aptr = buf; *aptr != '\0'; aptr = bptr) { bptr = aptr; cptr = aptr; /* Separate input lines. */ while (*bptr != '\n') bptr++; *bptr = '\0'; bptr++; tokens = gv_tokenize(cptr, token, GV_MAXARGS); if (tokens <= 0) continue; if (!strcmp(token[0], "volume")) { v = gv_new_volume(tokens, token); if (v == NULL) { G_VINUM_DEBUG(0, "config parse failed volume"); break; } v2 = gv_find_vol(sc, v->name); if (v2 != NULL) { if (is_newer) { v2->state = v->state; G_VINUM_DEBUG(2, "newer volume found!"); } g_free(v); continue; } gv_create_volume(sc, v); } else if (!strcmp(token[0], "plex")) { p = gv_new_plex(tokens, token); if (p == NULL) { G_VINUM_DEBUG(0, "config parse failed plex"); break; } p2 = gv_find_plex(sc, p->name); if (p2 != NULL) { /* XXX */ if (is_newer) { p2->state = p->state; G_VINUM_DEBUG(2, "newer plex found!"); } g_free(p); continue; } error = gv_create_plex(sc, p); if (error) continue; /* * These flags were set in gv_create_plex() and are not * needed here (on-disk config parsing). */ p->flags &= ~GV_PLEX_ADDED; } else if (!strcmp(token[0], "sd")) { s = gv_new_sd(tokens, token); if (s == NULL) { G_VINUM_DEBUG(0, "config parse failed subdisk"); break; } s2 = gv_find_sd(sc, s->name); if (s2 != NULL) { /* XXX */ if (is_newer) { s2->state = s->state; G_VINUM_DEBUG(2, "newer subdisk found!"); } g_free(s); continue; } /* * Signal that this subdisk was tasted, and could * possibly reference a drive that isn't in our config * yet. */ s->flags |= GV_SD_TASTED; if (s->state == GV_SD_UP) s->flags |= GV_SD_CANGOUP; error = gv_create_sd(sc, s); if (error) continue; /* * This flag was set in gv_create_sd() and is not * needed here (on-disk config parsing). */ s->flags &= ~GV_SD_NEWBORN; s->flags &= ~GV_SD_GROW; } } } /* * Format the vinum configuration properly. If ondisk is non-zero then the * configuration is intended to be written to disk later. */ void gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix) { struct gv_drive *d; struct gv_sd *s; struct gv_plex *p; struct gv_volume *v; /* * We don't need the drive configuration if we're not writing the * config to disk. */ if (!ondisk) { LIST_FOREACH(d, &sc->drives, drive) { sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix, d->name, d->device); } } LIST_FOREACH(v, &sc->volumes, volume) { if (!ondisk) sbuf_printf(sb, "%s", prefix); sbuf_printf(sb, "volume %s", v->name); if (ondisk) sbuf_printf(sb, " state %s", gv_volstate(v->state)); sbuf_printf(sb, "\n"); } LIST_FOREACH(p, &sc->plexes, plex) { if (!ondisk) sbuf_printf(sb, "%s", prefix); sbuf_printf(sb, "plex name %s org %s ", p->name, gv_plexorg(p->org)); if (gv_is_striped(p)) sbuf_printf(sb, "%ds ", p->stripesize / 512); if (p->vol_sc != NULL) sbuf_printf(sb, "vol %s", p->volume); if (ondisk) sbuf_printf(sb, " state %s", gv_plexstate(p->state)); sbuf_printf(sb, "\n"); } LIST_FOREACH(s, &sc->subdisks, sd) { if (!ondisk) sbuf_printf(sb, "%s", prefix); sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset " "%jds", s->name, s->drive, s->size / 512, s->drive_offset / 512); if (s->plex_sc != NULL) { sbuf_printf(sb, " plex %s plexoffset %jds", s->plex, s->plex_offset / 512); } if (ondisk) sbuf_printf(sb, " state %s", gv_sdstate(s->state)); sbuf_printf(sb, "\n"); } } static off_t gv_plex_smallest_sd(struct gv_plex *p) { struct gv_sd *s; off_t smallest; KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p")); s = LIST_FIRST(&p->subdisks); if (s == NULL) return (-1); smallest = s->size; LIST_FOREACH(s, &p->subdisks, in_plex) { if (s->size < smallest) smallest = s->size; } return (smallest); } /* Walk over plexes in a volume and count how many are down. */ int gv_plexdown(struct gv_volume *v) { int plexdown; struct gv_plex *p; KASSERT(v != NULL, ("gv_plexdown: NULL v")); plexdown = 0; LIST_FOREACH(p, &v->plexes, plex) { if (p->state == GV_PLEX_DOWN) plexdown++; } return (plexdown); } int gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p) { struct gv_sd *s2; off_t psizeorig, remainder, smallest; /* If this subdisk was already given to this plex, do nothing. */ if (s->plex_sc == p) return (0); /* Check correct size of this subdisk. */ s2 = LIST_FIRST(&p->subdisks); /* Adjust the subdisk-size if necessary. */ if (s2 != NULL && gv_is_striped(p)) { /* First adjust to the stripesize. */ remainder = s->size % p->stripesize; if (remainder) { G_VINUM_DEBUG(1, "size of sd %s is not a " "multiple of plex stripesize, taking off " "%jd bytes", s->name, (intmax_t)remainder); gv_adjust_freespace(s, remainder); } smallest = gv_plex_smallest_sd(p); /* Then take off extra if other subdisks are smaller. */ remainder = s->size - smallest; /* * Don't allow a remainder below zero for running plexes, it's too * painful, and if someone were to accidentally do this, the * resulting array might be smaller than the original... not god */ if (remainder < 0) { if (!(p->flags & GV_PLEX_NEWBORN)) { G_VINUM_DEBUG(0, "sd %s too small for plex %s!", s->name, p->name); return (GV_ERR_BADSIZE); } /* Adjust other subdisks. */ LIST_FOREACH(s2, &p->subdisks, in_plex) { G_VINUM_DEBUG(1, "size of sd %s is to big, " "taking off %jd bytes", s->name, (intmax_t)remainder); gv_adjust_freespace(s2, (remainder * -1)); } } else if (remainder > 0) { G_VINUM_DEBUG(1, "size of sd %s is to big, " "taking off %jd bytes", s->name, (intmax_t)remainder); gv_adjust_freespace(s, remainder); } } /* Find the correct plex offset for this subdisk, if needed. */ if (s->plex_offset == -1) { /* * First set it to 0 to catch the case where we had a detached * subdisk that didn't get any good offset. */ s->plex_offset = 0; if (p->sdcount) { LIST_FOREACH(s2, &p->subdisks, in_plex) { if (gv_is_striped(p)) s->plex_offset = p->sdcount * p->stripesize; else s->plex_offset = s2->plex_offset + s2->size; } } } /* There are no subdisks for this plex yet, just insert it. */ if (LIST_EMPTY(&p->subdisks)) { LIST_INSERT_HEAD(&p->subdisks, s, in_plex); /* Insert in correct order, depending on plex_offset. */ } else { LIST_FOREACH(s2, &p->subdisks, in_plex) { if (s->plex_offset < s2->plex_offset) { LIST_INSERT_BEFORE(s2, s, in_plex); break; } else if (LIST_NEXT(s2, in_plex) == NULL) { LIST_INSERT_AFTER(s2, s, in_plex); break; } } } s->plex_sc = p; /* Adjust the size of our plex. We check if the plex misses a subdisk, * so we don't make the plex smaller than it actually should be. */ psizeorig = p->size; p->size = gv_plex_size(p); /* Make sure the size is not changed. */ if (p->sddetached > 0) { if (p->size < psizeorig) { p->size = psizeorig; /* We make sure wee need another subdisk. */ if (p->sddetached == 1) p->sddetached++; } p->sddetached--; } else { if ((p->org == GV_PLEX_RAID5 || p->org == GV_PLEX_STRIPED) && !(p->flags & GV_PLEX_NEWBORN) && p->state == GV_PLEX_UP) { s->flags |= GV_SD_GROW; } p->sdcount++; } return (0); } void gv_update_vol_size(struct gv_volume *v, off_t size) { if (v == NULL) return; if (v->provider != NULL) { g_topology_lock(); v->provider->mediasize = size; g_topology_unlock(); } v->size = size; } /* Return how many subdisks that constitute the original plex. */ int gv_sdcount(struct gv_plex *p, int growing) { struct gv_sd *s; int sdcount; sdcount = p->sdcount; if (growing) { LIST_FOREACH(s, &p->subdisks, in_plex) { if (s->flags & GV_SD_GROW) sdcount--; } } return (sdcount); } /* Calculates the plex size. */ off_t gv_plex_size(struct gv_plex *p) { struct gv_sd *s; off_t size; int sdcount; KASSERT(p != NULL, ("gv_plex_size: NULL p")); /* Adjust the size of our plex. */ size = 0; sdcount = gv_sdcount(p, 1); switch (p->org) { case GV_PLEX_CONCAT: LIST_FOREACH(s, &p->subdisks, in_plex) size += s->size; break; case GV_PLEX_STRIPED: s = LIST_FIRST(&p->subdisks); size = ((s != NULL) ? (sdcount * s->size) : 0); break; case GV_PLEX_RAID5: s = LIST_FIRST(&p->subdisks); size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0); break; } return (size); } /* Returns the size of a volume. */ off_t gv_vol_size(struct gv_volume *v) { struct gv_plex *p; off_t minplexsize; KASSERT(v != NULL, ("gv_vol_size: NULL v")); p = LIST_FIRST(&v->plexes); if (p == NULL) return (0); minplexsize = p->size; LIST_FOREACH(p, &v->plexes, in_volume) { if (p->size < minplexsize) { minplexsize = p->size; } } return (minplexsize); } void gv_update_plex_config(struct gv_plex *p) { struct gv_sd *s, *s2; off_t remainder; int required_sds, state; KASSERT(p != NULL, ("gv_update_plex_config: NULL p")); /* The plex was added to an already running volume. */ if (p->flags & GV_PLEX_ADDED) gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE); switch (p->org) { case GV_PLEX_STRIPED: required_sds = 2; break; case GV_PLEX_RAID5: required_sds = 3; break; case GV_PLEX_CONCAT: default: required_sds = 0; break; } if (required_sds) { if (p->sdcount < required_sds) { gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE); } /* * The subdisks in striped plexes must all have the same size. */ s = LIST_FIRST(&p->subdisks); LIST_FOREACH(s2, &p->subdisks, in_plex) { if (s->size != s2->size) { G_VINUM_DEBUG(0, "subdisk size mismatch %s" "(%jd) <> %s (%jd)", s->name, s->size, s2->name, s2->size); gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE); } } LIST_FOREACH(s, &p->subdisks, in_plex) { /* Trim subdisk sizes to match the stripe size. */ remainder = s->size % p->stripesize; if (remainder) { G_VINUM_DEBUG(1, "size of sd %s is not a " "multiple of plex stripesize, taking off " "%jd bytes", s->name, (intmax_t)remainder); gv_adjust_freespace(s, remainder); } } } p->size = gv_plex_size(p); if (p->sdcount == 0) gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE); else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) { LIST_FOREACH(s, &p->subdisks, in_plex) gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE); /* If added to a volume, we want the plex to be down. */ state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP; gv_set_plex_state(p, state, GV_SETSTATE_FORCE); p->flags &= ~GV_PLEX_ADDED; } else if (p->flags & GV_PLEX_ADDED) { LIST_FOREACH(s, &p->subdisks, in_plex) gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE); gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE); p->flags &= ~GV_PLEX_ADDED; } else if (p->state == GV_PLEX_UP) { LIST_FOREACH(s, &p->subdisks, in_plex) { if (s->flags & GV_SD_GROW) { gv_set_plex_state(p, GV_PLEX_GROWABLE, GV_SETSTATE_FORCE); break; } } } /* Our plex is grown up now. */ p->flags &= ~GV_PLEX_NEWBORN; } /* * Give a subdisk to a drive, check and adjust several parameters, adjust * freelist. */ int gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d) { struct gv_sd *s2; struct gv_freelist *fl, *fl2; off_t tmp; int i; fl2 = NULL; /* Shortcut for "referenced" drives. */ if (d->flags & GV_DRIVE_REFERENCED) { s->drive_sc = d; return (0); } /* Check if this subdisk was already given to this drive. */ if (s->drive_sc != NULL) { if (s->drive_sc == d) { if (!(s->flags & GV_SD_TASTED)) { return (0); } } else { G_VINUM_DEBUG(0, "error giving subdisk '%s' to '%s' " "(already on '%s')", s->name, d->name, s->drive_sc->name); return (GV_ERR_ISATTACHED); } } /* Preliminary checks. */ if ((s->size > d->avail) || (d->freelist_entries == 0)) { G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name, s->name); return (GV_ERR_NOSPACE); } /* If no size was given for this subdisk, try to auto-size it... */ if (s->size == -1) { /* Find the largest available slot. */ LIST_FOREACH(fl, &d->freelist, freelist) { if (fl->size < s->size) continue; s->size = fl->size; s->drive_offset = fl->offset; fl2 = fl; } /* No good slot found? */ if (s->size == -1) { G_VINUM_DEBUG(0, "unable to autosize '%s' on '%s'", s->name, d->name); return (GV_ERR_BADSIZE); } /* * ... or check if we have a free slot that's large enough for the * given size. */ } else { i = 0; LIST_FOREACH(fl, &d->freelist, freelist) { if (fl->size < s->size) continue; /* Assign drive offset, if not given. */ if (s->drive_offset == -1) s->drive_offset = fl->offset; fl2 = fl; i++; break; } /* Couldn't find a good free slot. */ if (i == 0) { G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'", s->name, d->name); return (GV_ERR_NOSPACE); } } /* No drive offset given, try to calculate it. */ if (s->drive_offset == -1) { /* Add offsets and sizes from other subdisks on this drive. */ LIST_FOREACH(s2, &d->subdisks, from_drive) { s->drive_offset = s2->drive_offset + s2->size; } /* * If there are no other subdisks yet, then set the default * offset to GV_DATA_START. */ if (s->drive_offset == -1) s->drive_offset = GV_DATA_START; /* Check if we have a free slot at the given drive offset. */ } else { i = 0; LIST_FOREACH(fl, &d->freelist, freelist) { /* Yes, this subdisk fits. */ if ((fl->offset <= s->drive_offset) && (fl->offset + fl->size >= s->drive_offset + s->size)) { i++; fl2 = fl; break; } } /* Couldn't find a good free slot. */ if (i == 0) { G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit " "on '%s'", s->name, d->name); return (GV_ERR_NOSPACE); } } /* * Now that all parameters are checked and set up, we can give the * subdisk to the drive and adjust the freelist. */ /* First, adjust the freelist. */ LIST_FOREACH(fl, &d->freelist, freelist) { /* Look for the free slot that we have found before. */ if (fl != fl2) continue; /* The subdisk starts at the beginning of the free slot. */ if (fl->offset == s->drive_offset) { fl->offset += s->size; fl->size -= s->size; /* The subdisk uses the whole slot, so remove it. */ if (fl->size == 0) { d->freelist_entries--; LIST_REMOVE(fl, freelist); } /* * The subdisk does not start at the beginning of the free * slot. */ } else { tmp = fl->offset + fl->size; fl->size = s->drive_offset - fl->offset; /* * The subdisk didn't use the complete rest of the free * slot, so we need to split it. */ if (s->drive_offset + s->size != tmp) { fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO); fl2->offset = s->drive_offset + s->size; fl2->size = tmp - fl2->offset; LIST_INSERT_AFTER(fl, fl2, freelist); d->freelist_entries++; } } break; } /* * This is the first subdisk on this drive, just insert it into the * list. */ if (LIST_EMPTY(&d->subdisks)) { LIST_INSERT_HEAD(&d->subdisks, s, from_drive); /* There are other subdisks, so insert this one in correct order. */ } else { LIST_FOREACH(s2, &d->subdisks, from_drive) { if (s->drive_offset < s2->drive_offset) { LIST_INSERT_BEFORE(s2, s, from_drive); break; } else if (LIST_NEXT(s2, from_drive) == NULL) { LIST_INSERT_AFTER(s2, s, from_drive); break; } } } d->sdcount++; d->avail -= s->size; s->flags &= ~GV_SD_TASTED; /* Link back from the subdisk to this drive. */ s->drive_sc = d; return (0); } void gv_free_sd(struct gv_sd *s) { struct gv_drive *d; struct gv_freelist *fl, *fl2; KASSERT(s != NULL, ("gv_free_sd: NULL s")); d = s->drive_sc; if (d == NULL) return; /* * First, find the free slot that's immediately before or after this * subdisk. */ fl = NULL; LIST_FOREACH(fl, &d->freelist, freelist) { if (fl->offset == s->drive_offset + s->size) break; if (fl->offset + fl->size == s->drive_offset) break; } /* If there is no free slot behind this subdisk, so create one. */ if (fl == NULL) { fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO); fl->size = s->size; fl->offset = s->drive_offset; if (d->freelist_entries == 0) { LIST_INSERT_HEAD(&d->freelist, fl, freelist); } else { LIST_FOREACH(fl2, &d->freelist, freelist) { if (fl->offset < fl2->offset) { LIST_INSERT_BEFORE(fl2, fl, freelist); break; } else if (LIST_NEXT(fl2, freelist) == NULL) { LIST_INSERT_AFTER(fl2, fl, freelist); break; } } } d->freelist_entries++; /* Expand the free slot we just found. */ } else { fl->size += s->size; if (fl->offset > s->drive_offset) fl->offset = s->drive_offset; } d->avail += s->size; d->sdcount--; } void gv_adjust_freespace(struct gv_sd *s, off_t remainder) { struct gv_drive *d; struct gv_freelist *fl, *fl2; KASSERT(s != NULL, ("gv_adjust_freespace: NULL s")); d = s->drive_sc; KASSERT(d != NULL, ("gv_adjust_freespace: NULL d")); /* First, find the free slot that's immediately after this subdisk. */ fl = NULL; LIST_FOREACH(fl, &d->freelist, freelist) { if (fl->offset == s->drive_offset + s->size) break; } /* If there is no free slot behind this subdisk, so create one. */ if (fl == NULL) { fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO); fl->size = remainder; fl->offset = s->drive_offset + s->size - remainder; if (d->freelist_entries == 0) { LIST_INSERT_HEAD(&d->freelist, fl, freelist); } else { LIST_FOREACH(fl2, &d->freelist, freelist) { if (fl->offset < fl2->offset) { LIST_INSERT_BEFORE(fl2, fl, freelist); break; } else if (LIST_NEXT(fl2, freelist) == NULL) { LIST_INSERT_AFTER(fl2, fl, freelist); break; } } } d->freelist_entries++; /* Expand the free slot we just found. */ } else { fl->offset -= remainder; fl->size += remainder; } s->size -= remainder; d->avail += remainder; } /* Check if the given plex is a striped one. */ int gv_is_striped(struct gv_plex *p) { KASSERT(p != NULL, ("gv_is_striped: NULL p")); switch(p->org) { case GV_PLEX_STRIPED: case GV_PLEX_RAID5: return (1); default: return (0); } } /* Find a volume by name. */ struct gv_volume * gv_find_vol(struct gv_softc *sc, char *name) { struct gv_volume *v; LIST_FOREACH(v, &sc->volumes, volume) { if (!strncmp(v->name, name, GV_MAXVOLNAME)) return (v); } return (NULL); } /* Find a plex by name. */ struct gv_plex * gv_find_plex(struct gv_softc *sc, char *name) { struct gv_plex *p; LIST_FOREACH(p, &sc->plexes, plex) { if (!strncmp(p->name, name, GV_MAXPLEXNAME)) return (p); } return (NULL); } /* Find a subdisk by name. */ struct gv_sd * gv_find_sd(struct gv_softc *sc, char *name) { struct gv_sd *s; LIST_FOREACH(s, &sc->subdisks, sd) { if (!strncmp(s->name, name, GV_MAXSDNAME)) return (s); } return (NULL); } /* Find a drive by name. */ struct gv_drive * gv_find_drive(struct gv_softc *sc, char *name) { struct gv_drive *d; LIST_FOREACH(d, &sc->drives, drive) { if (!strncmp(d->name, name, GV_MAXDRIVENAME)) return (d); } return (NULL); } /* Find a drive given a device. */ struct gv_drive * gv_find_drive_device(struct gv_softc *sc, char *device) { struct gv_drive *d; LIST_FOREACH(d, &sc->drives, drive) { if(!strcmp(d->device, device)) return (d); } return (NULL); } /* Check if any consumer of the given geom is open. */ int gv_consumer_is_open(struct g_consumer *cp) { if (cp == NULL) return (0); if (cp->acr || cp->acw || cp->ace) return (1); return (0); } int gv_provider_is_open(struct g_provider *pp) { if (pp == NULL) return (0); if (pp->acr || pp->acw || pp->ace) return (1); return (0); } /* * Compare the modification dates of the drives. * Return 1 if a > b, 0 otherwise. */ int gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d) { struct gv_drive *d2; struct timeval *a, *b; KASSERT(!LIST_EMPTY(&sc->drives), ("gv_is_drive_newer: empty drive list")); a = &d->hdr->label.last_update; LIST_FOREACH(d2, &sc->drives, drive) { if ((d == d2) || (d2->state != GV_DRIVE_UP) || (d2->hdr == NULL)) continue; b = &d2->hdr->label.last_update; if (timevalcmp(a, b, >)) return (1); } return (0); } /* Return the type of object identified by string 'name'. */ int gv_object_type(struct gv_softc *sc, char *name) { struct gv_drive *d; struct gv_plex *p; struct gv_sd *s; struct gv_volume *v; LIST_FOREACH(v, &sc->volumes, volume) { if (!strncmp(v->name, name, GV_MAXVOLNAME)) return (GV_TYPE_VOL); } LIST_FOREACH(p, &sc->plexes, plex) { if (!strncmp(p->name, name, GV_MAXPLEXNAME)) return (GV_TYPE_PLEX); } LIST_FOREACH(s, &sc->subdisks, sd) { if (!strncmp(s->name, name, GV_MAXSDNAME)) return (GV_TYPE_SD); } LIST_FOREACH(d, &sc->drives, drive) { if (!strncmp(d->name, name, GV_MAXDRIVENAME)) return (GV_TYPE_DRIVE); } return (GV_ERR_NOTFOUND); } void gv_setup_objects(struct gv_softc *sc) { struct g_provider *pp; struct gv_volume *v; struct gv_plex *p; struct gv_sd *s; struct gv_drive *d; LIST_FOREACH(s, &sc->subdisks, sd) { d = gv_find_drive(sc, s->drive); if (d != NULL) gv_sd_to_drive(s, d); p = gv_find_plex(sc, s->plex); if (p != NULL) gv_sd_to_plex(s, p); gv_update_sd_state(s); } LIST_FOREACH(p, &sc->plexes, plex) { gv_update_plex_config(p); v = gv_find_vol(sc, p->volume); if (v != NULL && p->vol_sc != v) { p->vol_sc = v; v->plexcount++; LIST_INSERT_HEAD(&v->plexes, p, in_volume); } gv_update_plex_config(p); } LIST_FOREACH(v, &sc->volumes, volume) { v->size = gv_vol_size(v); if (v->provider == NULL) { g_topology_lock(); pp = g_new_providerf(sc->geom, "gvinum/%s", v->name); pp->mediasize = v->size; pp->sectorsize = 512; /* XXX */ g_error_provider(pp, 0); v->provider = pp; pp->private = v; g_topology_unlock(); } else if (v->provider->mediasize != v->size) { g_topology_lock(); v->provider->mediasize = v->size; g_topology_unlock(); } v->flags &= ~GV_VOL_NEWBORN; gv_update_vol_state(v); } } void gv_cleanup(struct gv_softc *sc) { struct gv_volume *v, *v2; struct gv_plex *p, *p2; struct gv_sd *s, *s2; struct gv_drive *d, *d2; struct gv_freelist *fl, *fl2; mtx_lock(&sc->config_mtx); LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) { LIST_REMOVE(v, volume); g_free(v->wqueue); g_free(v); } LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) { LIST_REMOVE(p, plex); g_free(p->bqueue); g_free(p->rqueue); g_free(p->wqueue); g_free(p); } LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) { LIST_REMOVE(s, sd); g_free(s); } LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) { LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) { LIST_REMOVE(fl, freelist); g_free(fl); } LIST_REMOVE(d, drive); g_free(d->hdr); g_free(d); } mtx_destroy(&sc->config_mtx); } /* General 'attach' routine. */ int gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename) { struct gv_sd *s; struct gv_softc *sc __diagused; g_topology_assert(); sc = p->vinumconf; KASSERT(sc != NULL, ("NULL sc")); if (p->vol_sc != NULL) { G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s", p->name, p->volume); return (GV_ERR_ISATTACHED); } /* Stale all subdisks of this plex. */ LIST_FOREACH(s, &p->subdisks, in_plex) { if (s->state != GV_SD_STALE) gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE); } /* Attach to volume. Make sure volume is not up and running. */ if (gv_provider_is_open(v->provider)) { G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy", p->name, v->name); return (GV_ERR_ISBUSY); } p->vol_sc = v; strlcpy(p->volume, v->name, sizeof(p->volume)); v->plexcount++; if (rename) { snprintf(p->name, sizeof(p->name), "%s.p%d", v->name, v->plexcount); } LIST_INSERT_HEAD(&v->plexes, p, in_volume); /* Get plex up again. */ gv_update_vol_size(v, gv_vol_size(v)); gv_set_plex_state(p, GV_PLEX_UP, 0); gv_save_config(p->vinumconf); return (0); } int gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename) { struct gv_sd *s2; - int error, sdcount; + int error; g_topology_assert(); /* If subdisk is attached, don't do it. */ if (s->plex_sc != NULL) { G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s", s->name, s->plex); return (GV_ERR_ISATTACHED); } gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE); /* First check that this subdisk has a correct offset. If none other * starts at the same, and it's correct module stripesize, it is */ if (offset != -1 && offset % p->stripesize != 0) return (GV_ERR_BADOFFSET); LIST_FOREACH(s2, &p->subdisks, in_plex) { if (s2->plex_offset == offset) return (GV_ERR_BADOFFSET); } /* Attach the subdisk to the plex at given offset. */ s->plex_offset = offset; strlcpy(s->plex, p->name, sizeof(s->plex)); - sdcount = p->sdcount; error = gv_sd_to_plex(s, p); if (error) return (error); gv_update_plex_config(p); if (rename) { snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex, p->sdcount); } if (p->vol_sc != NULL) gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc)); gv_save_config(p->vinumconf); /* We don't update the subdisk state since the user might have to * initiate a rebuild/sync first. */ return (0); } /* Detach a plex from a volume. */ int gv_detach_plex(struct gv_plex *p, int flags) { struct gv_volume *v; g_topology_assert(); v = p->vol_sc; if (v == NULL) { G_VINUM_DEBUG(1, "unable to detach %s: already detached", p->name); return (0); /* Not an error. */ } /* * Only proceed if forced or volume inactive. */ if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) || p->state == GV_PLEX_UP)) { G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy", p->name, p->volume); return (GV_ERR_ISBUSY); } v->plexcount--; /* Make sure someone don't read us when gone. */ v->last_read_plex = NULL; LIST_REMOVE(p, in_volume); p->vol_sc = NULL; memset(p->volume, 0, GV_MAXVOLNAME); gv_update_vol_size(v, gv_vol_size(v)); gv_save_config(p->vinumconf); return (0); } /* Detach a subdisk from a plex. */ int gv_detach_sd(struct gv_sd *s, int flags) { struct gv_plex *p; g_topology_assert(); p = s->plex_sc; if (p == NULL) { G_VINUM_DEBUG(1, "unable to detach %s: already detached", s->name); return (0); /* Not an error. */ } /* * Don't proceed if we're not forcing, and the plex is up, or degraded * with this subdisk up. */ if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) || ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) { G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy", s->name, s->plex); return (GV_ERR_ISBUSY); } LIST_REMOVE(s, in_plex); s->plex_sc = NULL; memset(s->plex, 0, GV_MAXPLEXNAME); p->sddetached++; gv_save_config(s->vinumconf); return (0); }