Index: head/sys/geom/geom.h =================================================================== --- head/sys/geom/geom.h (revision 339814) +++ head/sys/geom/geom.h (revision 339815) @@ -1,435 +1,435 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _GEOM_GEOM_H_ #define _GEOM_GEOM_H_ #include #include #include #include #include #include #include struct g_class; struct g_geom; struct g_consumer; struct g_provider; struct g_stat; struct thread; struct bio; struct sbuf; struct gctl_req; struct g_configargs; struct disk_zone_args; typedef int g_config_t (struct g_configargs *ca); typedef void g_ctl_req_t (struct gctl_req *, struct g_class *cp, char const *verb); typedef int g_ctl_create_geom_t (struct gctl_req *, struct g_class *cp, struct g_provider *pp); typedef int g_ctl_destroy_geom_t (struct gctl_req *, struct g_class *cp, struct g_geom *gp); typedef int g_ctl_config_geom_t (struct gctl_req *, struct g_geom *gp, const char *verb); typedef void g_init_t (struct g_class *mp); typedef void g_fini_t (struct g_class *mp); typedef struct g_geom * g_taste_t (struct g_class *, struct g_provider *, int flags); typedef int g_ioctl_t(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td); #define G_TF_NORMAL 0 #define G_TF_INSIST 1 #define G_TF_TRANSPARENT 2 typedef int g_access_t (struct g_provider *, int, int, int); /* XXX: not sure about the thread arg */ typedef void g_orphan_t (struct g_consumer *); typedef void g_start_t (struct bio *); typedef void g_spoiled_t (struct g_consumer *); typedef void g_attrchanged_t (struct g_consumer *, const char *attr); typedef void g_provgone_t (struct g_provider *); typedef void g_dumpconf_t (struct sbuf *, const char *indent, struct g_geom *, struct g_consumer *, struct g_provider *); typedef void g_resize_t(struct g_consumer *cp); /* * The g_class structure describes a transformation class. In other words * all BSD disklabel handlers share one g_class, all MBR handlers share * one common g_class and so on. * Certain operations are instantiated on the class, most notably the * taste and config_geom functions. */ struct g_class { const char *name; u_int version; u_int spare0; g_taste_t *taste; g_config_t *config; g_ctl_req_t *ctlreq; g_init_t *init; g_fini_t *fini; g_ctl_destroy_geom_t *destroy_geom; /* * Default values for geom methods */ g_start_t *start; g_spoiled_t *spoiled; g_attrchanged_t *attrchanged; g_dumpconf_t *dumpconf; g_access_t *access; g_orphan_t *orphan; g_ioctl_t *ioctl; g_provgone_t *providergone; g_resize_t *resize; void *spare1; void *spare2; /* * The remaining elements are private */ LIST_ENTRY(g_class) class; LIST_HEAD(,g_geom) geom; }; /* * The g_geom_alias is a list node for aliases for the geom name * for device node creation. */ struct g_geom_alias { LIST_ENTRY(g_geom_alias) ga_next; const char *ga_alias; }; #define G_VERSION_00 0x19950323 #define G_VERSION_01 0x20041207 /* add fflag to g_ioctl_t */ #define G_VERSION G_VERSION_01 /* * The g_geom is an instance of a g_class. */ struct g_geom { char *name; struct g_class *class; LIST_ENTRY(g_geom) geom; LIST_HEAD(,g_consumer) consumer; LIST_HEAD(,g_provider) provider; TAILQ_ENTRY(g_geom) geoms; /* XXX: better name */ int rank; g_start_t *start; g_spoiled_t *spoiled; g_attrchanged_t *attrchanged; g_dumpconf_t *dumpconf; g_access_t *access; g_orphan_t *orphan; g_ioctl_t *ioctl; g_provgone_t *providergone; g_resize_t *resize; void *spare0; void *spare1; void *softc; unsigned flags; #define G_GEOM_WITHER 0x01 #define G_GEOM_VOLATILE_BIO 0x02 #define G_GEOM_IN_ACCESS 0x04 #define G_GEOM_ACCESS_WAIT 0x08 LIST_HEAD(,g_geom_alias) aliases; }; /* * The g_bioq is a queue of struct bio's. * XXX: possibly collection point for statistics. * XXX: should (possibly) be collapsed with sys/bio.h::bio_queue_head. */ struct g_bioq { TAILQ_HEAD(, bio) bio_queue; struct mtx bio_queue_lock; int bio_queue_length; }; /* * A g_consumer is an attachment point for a g_provider. One g_consumer * can only be attached to one g_provider, but multiple g_consumers * can be attached to one g_provider. */ struct g_consumer { struct g_geom *geom; LIST_ENTRY(g_consumer) consumer; struct g_provider *provider; LIST_ENTRY(g_consumer) consumers; /* XXX: better name */ int acr, acw, ace; int flags; #define G_CF_SPOILED 0x1 #define G_CF_ORPHAN 0x4 #define G_CF_DIRECT_SEND 0x10 #define G_CF_DIRECT_RECEIVE 0x20 struct devstat *stat; u_int nstart, nend; /* Two fields for the implementing class to use */ void *private; u_int index; }; /* * A g_provider is a "logical disk". */ struct g_provider { char *name; LIST_ENTRY(g_provider) provider; struct g_geom *geom; LIST_HEAD(,g_consumer) consumers; int acr, acw, ace; int error; TAILQ_ENTRY(g_provider) orphan; off_t mediasize; u_int sectorsize; - u_int stripesize; - u_int stripeoffset; + off_t stripesize; + off_t stripeoffset; struct devstat *stat; u_int nstart, nend; u_int flags; #define G_PF_WITHER 0x2 #define G_PF_ORPHAN 0x4 #define G_PF_ACCEPT_UNMAPPED 0x8 #define G_PF_DIRECT_SEND 0x10 #define G_PF_DIRECT_RECEIVE 0x20 /* Two fields for the implementing class to use */ void *private; u_int index; }; /* * Descriptor of a classifier. We can register a function and * an argument, which is called by g_io_request() on bio's * that are not previously classified. */ struct g_classifier_hook { TAILQ_ENTRY(g_classifier_hook) link; int (*func)(void *arg, struct bio *bp); void *arg; }; /* BIO_GETATTR("GEOM::setstate") argument values. */ #define G_STATE_FAILED 0 #define G_STATE_REBUILD 1 #define G_STATE_RESYNC 2 #define G_STATE_ACTIVE 3 /* geom_dev.c */ struct cdev; void g_dev_print(void); void g_dev_physpath_changed(void); struct g_provider *g_dev_getprovider(struct cdev *dev); /* geom_dump.c */ void g_trace(int level, const char *, ...); # define G_T_TOPOLOGY 1 # define G_T_BIO 2 # define G_T_ACCESS 4 /* geom_event.c */ typedef void g_event_t(void *, int flag); #define EV_CANCEL 1 int g_post_event(g_event_t *func, void *arg, int flag, ...); int g_waitfor_event(g_event_t *func, void *arg, int flag, ...); void g_cancel_event(void *ref); int g_attr_changed(struct g_provider *pp, const char *attr, int flag); int g_media_changed(struct g_provider *pp, int flag); int g_media_gone(struct g_provider *pp, int flag); void g_orphan_provider(struct g_provider *pp, int error); void g_waitidlelock(void); /* geom_subr.c */ int g_access(struct g_consumer *cp, int nread, int nwrite, int nexcl); int g_attach(struct g_consumer *cp, struct g_provider *pp); int g_compare_names(const char *namea, const char *nameb); void g_destroy_consumer(struct g_consumer *cp); void g_destroy_geom(struct g_geom *pp); void g_destroy_provider(struct g_provider *pp); void g_detach(struct g_consumer *cp); void g_error_provider(struct g_provider *pp, int error); struct g_provider *g_provider_by_name(char const *arg); void g_geom_add_alias(struct g_geom *gp, const char *alias); int g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len); #define g_getattr(a, c, v) g_getattr__((a), (c), (v), sizeof *(v)) int g_handleattr(struct bio *bp, const char *attribute, const void *val, int len); int g_handleattr_int(struct bio *bp, const char *attribute, int val); int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val); int g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val); int g_handleattr_str(struct bio *bp, const char *attribute, const char *str); struct g_consumer * g_new_consumer(struct g_geom *gp); struct g_geom * g_new_geomf(struct g_class *mp, const char *fmt, ...) __printflike(2, 3); struct g_provider * g_new_providerf(struct g_geom *gp, const char *fmt, ...) __printflike(2, 3); void g_resize_provider(struct g_provider *pp, off_t size); int g_retaste(struct g_class *mp); void g_spoil(struct g_provider *pp, struct g_consumer *cp); int g_std_access(struct g_provider *pp, int dr, int dw, int de); void g_std_done(struct bio *bp); void g_std_spoiled(struct g_consumer *cp); void g_wither_geom(struct g_geom *gp, int error); void g_wither_geom_close(struct g_geom *gp, int error); void g_wither_provider(struct g_provider *pp, int error); #if defined(DIAGNOSTIC) || defined(DDB) int g_valid_obj(void const *ptr); #endif #ifdef DIAGNOSTIC #define G_VALID_CLASS(foo) \ KASSERT(g_valid_obj(foo) == 1, ("%p is not a g_class", foo)) #define G_VALID_GEOM(foo) \ KASSERT(g_valid_obj(foo) == 2, ("%p is not a g_geom", foo)) #define G_VALID_CONSUMER(foo) \ KASSERT(g_valid_obj(foo) == 3, ("%p is not a g_consumer", foo)) #define G_VALID_PROVIDER(foo) \ KASSERT(g_valid_obj(foo) == 4, ("%p is not a g_provider", foo)) #else #define G_VALID_CLASS(foo) do { } while (0) #define G_VALID_GEOM(foo) do { } while (0) #define G_VALID_CONSUMER(foo) do { } while (0) #define G_VALID_PROVIDER(foo) do { } while (0) #endif int g_modevent(module_t, int, void *); /* geom_io.c */ struct bio * g_clone_bio(struct bio *); struct bio * g_duplicate_bio(struct bio *); void g_destroy_bio(struct bio *); void g_io_deliver(struct bio *bp, int error); int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr); int g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp); int g_io_flush(struct g_consumer *cp); int g_register_classifier(struct g_classifier_hook *hook); void g_unregister_classifier(struct g_classifier_hook *hook); void g_io_request(struct bio *bp, struct g_consumer *cp); struct bio *g_new_bio(void); struct bio *g_alloc_bio(void); void g_reset_bio(struct bio *); void * g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error); int g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length); int g_delete_data(struct g_consumer *cp, off_t offset, off_t length); void g_print_bio(struct bio *bp); int g_use_g_read_data(void *, off_t, void **, int); int g_use_g_write_data(void *, off_t, void *, int); /* geom_kern.c / geom_kernsim.c */ #ifdef _KERNEL extern struct sx topology_lock; struct g_kerneldump { off_t offset; off_t length; struct dumperinfo di; }; MALLOC_DECLARE(M_GEOM); static __inline void * g_malloc(int size, int flags) { void *p; p = malloc(size, M_GEOM, flags); return (p); } static __inline void g_free(void *ptr) { #ifdef DIAGNOSTIC if (sx_xlocked(&topology_lock)) { KASSERT(g_valid_obj(ptr) == 0, ("g_free(%p) of live object, type %d", ptr, g_valid_obj(ptr))); } #endif free(ptr, M_GEOM); } #define g_topology_lock() \ do { \ sx_xlock(&topology_lock); \ } while (0) #define g_topology_try_lock() sx_try_xlock(&topology_lock) #define g_topology_unlock() \ do { \ sx_xunlock(&topology_lock); \ } while (0) #define g_topology_assert() \ do { \ sx_assert(&topology_lock, SX_XLOCKED); \ } while (0) #define g_topology_assert_not() \ do { \ sx_assert(&topology_lock, SX_UNLOCKED); \ } while (0) #define g_topology_sleep(chan, timo) \ sx_sleep(chan, &topology_lock, 0, "gtopol", timo) #define DECLARE_GEOM_CLASS(class, name) \ static moduledata_t name##_mod = { \ #name, g_modevent, &class \ }; \ DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST); int g_is_geom_thread(struct thread *td); #endif /* _KERNEL */ /* geom_ctl.c */ int gctl_set_param(struct gctl_req *req, const char *param, void const *ptr, int len); void gctl_set_param_err(struct gctl_req *req, const char *param, void const *ptr, int len); void *gctl_get_param(struct gctl_req *req, const char *param, int *len); char const *gctl_get_asciiparam(struct gctl_req *req, const char *param); void *gctl_get_paraml(struct gctl_req *req, const char *param, int len); int gctl_error(struct gctl_req *req, const char *fmt, ...) __printflike(2, 3); struct g_class *gctl_get_class(struct gctl_req *req, char const *arg); struct g_geom *gctl_get_geom(struct gctl_req *req, struct g_class *mpr, char const *arg); struct g_provider *gctl_get_provider(struct gctl_req *req, char const *arg); #endif /* _GEOM_GEOM_H_ */ Index: head/sys/geom/geom_disk.h =================================================================== --- head/sys/geom/geom_disk.h (revision 339814) +++ head/sys/geom/geom_disk.h (revision 339815) @@ -1,157 +1,158 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2003 Poul-Henning Kamp * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _GEOM_GEOM_DISK_H_ #define _GEOM_GEOM_DISK_H_ #define DISK_RR_UNKNOWN 0 #define DISK_RR_NON_ROTATING 1 #define DISK_RR_MIN 0x0401 #define DISK_RR_MAX 0xfffe #ifdef _KERNEL #include #include #include #include #define G_DISK_CLASS_NAME "DISK" struct disk; typedef int disk_open_t(struct disk *); typedef int disk_close_t(struct disk *); typedef void disk_strategy_t(struct bio *bp); typedef int disk_getattr_t(struct bio *bp); typedef void disk_gone_t(struct disk *); typedef int disk_ioctl_t(struct disk *, u_long cmd, void *data, int fflag, struct thread *td); /* NB: disk_ioctl_t SHALL be cast'able to d_ioctl_t */ struct g_geom; struct devstat; typedef enum { DISK_INIT_NONE, DISK_INIT_START, DISK_INIT_DONE } disk_init_level; struct disk_alias { LIST_ENTRY(disk_alias) da_next; const char *da_alias; }; struct disk { /* Fields which are private to geom_disk */ struct g_geom *d_geom; struct devstat *d_devstat; int d_goneflag; int d_destroyed; disk_init_level d_init_level; /* Shared fields */ u_int d_flags; const char *d_name; u_int d_unit; struct bio_queue_head *d_queue; struct mtx *d_lock; /* Disk methods */ disk_open_t *d_open; disk_close_t *d_close; disk_strategy_t *d_strategy; disk_ioctl_t *d_ioctl; dumper_t *d_dump; disk_getattr_t *d_getattr; disk_gone_t *d_gone; /* Info fields from driver to geom_disk.c. Valid when open */ u_int d_sectorsize; off_t d_mediasize; u_int d_fwsectors; u_int d_fwheads; u_int d_maxsize; off_t d_delmaxsize; - u_int d_stripeoffset; - u_int d_stripesize; + off_t d_stripeoffset; + off_t d_stripesize; char d_ident[DISK_IDENT_SIZE]; char d_descr[DISK_IDENT_SIZE]; uint16_t d_hba_vendor; uint16_t d_hba_device; uint16_t d_hba_subvendor; uint16_t d_hba_subdevice; uint16_t d_rotation_rate; /* Fields private to the driver */ void *d_drv1; /* Fields private to geom_disk, to be moved on next version bump */ LIST_HEAD(,disk_alias) d_aliases; }; #define DISKFLAG_RESERVED 0x0001 /* Was NEEDSGIANT */ #define DISKFLAG_OPEN 0x0002 #define DISKFLAG_CANDELETE 0x0004 #define DISKFLAG_CANFLUSHCACHE 0x0008 #define DISKFLAG_UNMAPPED_BIO 0x0010 #define DISKFLAG_DIRECT_COMPLETION 0x0020 #define DISKFLAG_CANZONE 0x0080 #define DISKFLAG_WRITE_PROTECT 0x0100 struct disk *disk_alloc(void); void disk_create(struct disk *disk, int version); void disk_destroy(struct disk *disk); void disk_gone(struct disk *disk); void disk_attr_changed(struct disk *dp, const char *attr, int flag); void disk_media_changed(struct disk *dp, int flag); void disk_media_gone(struct disk *dp, int flag); int disk_resize(struct disk *dp, int flag); void disk_add_alias(struct disk *disk, const char *); #define DISK_VERSION_00 0x58561059 #define DISK_VERSION_01 0x5856105a #define DISK_VERSION_02 0x5856105b #define DISK_VERSION_03 0x5856105c #define DISK_VERSION_04 0x5856105d #define DISK_VERSION_05 0x5856105e -#define DISK_VERSION DISK_VERSION_05 +#define DISK_VERSION_06 0x5856105f +#define DISK_VERSION DISK_VERSION_06 #endif /* _KERNEL */ #endif /* _GEOM_GEOM_DISK_H_ */ Index: head/sys/geom/geom_dump.c =================================================================== --- head/sys/geom/geom_dump.c (revision 339814) +++ head/sys/geom/geom_dump.c (revision 339815) @@ -1,326 +1,326 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include static void g_confdot_consumer(struct sbuf *sb, struct g_consumer *cp) { sbuf_printf(sb, "z%p [label=\"r%dw%de%d\"];\n", cp, cp->acr, cp->acw, cp->ace); if (cp->provider) sbuf_printf(sb, "z%p -> z%p;\n", cp, cp->provider); } static void g_confdot_provider(struct sbuf *sb, struct g_provider *pp) { sbuf_printf(sb, "z%p [shape=hexagon,label=\"%s\\nr%dw%de%d\\nerr#%d\\n" - "sector=%u\\nstripe=%u\"];\n", pp, pp->name, pp->acr, pp->acw, - pp->ace, pp->error, pp->sectorsize, pp->stripesize); + "sector=%u\\nstripe=%ju\"];\n", pp, pp->name, pp->acr, pp->acw, + pp->ace, pp->error, pp->sectorsize, (uintmax_t)pp->stripesize); } static void g_confdot_geom(struct sbuf *sb, struct g_geom *gp) { struct g_consumer *cp; struct g_provider *pp; sbuf_printf(sb, "z%p [shape=box,label=\"%s\\n%s\\nr#%d\"];\n", gp, gp->class->name, gp->name, gp->rank); LIST_FOREACH(cp, &gp->consumer, consumer) { g_confdot_consumer(sb, cp); sbuf_printf(sb, "z%p -> z%p;\n", gp, cp); } LIST_FOREACH(pp, &gp->provider, provider) { g_confdot_provider(sb, pp); sbuf_printf(sb, "z%p -> z%p;\n", pp, gp); } } static void g_confdot_class(struct sbuf *sb, struct g_class *mp) { struct g_geom *gp; LIST_FOREACH(gp, &mp->geom, geom) g_confdot_geom(sb, gp); } void g_confdot(void *p, int flag ) { struct g_class *mp; struct sbuf *sb; KASSERT(flag != EV_CANCEL, ("g_confdot was cancelled")); sb = p; g_topology_assert(); sbuf_printf(sb, "digraph geom {\n"); LIST_FOREACH(mp, &g_classes, class) g_confdot_class(sb, mp); sbuf_printf(sb, "}\n"); sbuf_finish(sb); } static void g_conftxt_geom(struct sbuf *sb, struct g_geom *gp, int level) { struct g_provider *pp; struct g_consumer *cp; if (gp->flags & G_GEOM_WITHER) return; LIST_FOREACH(pp, &gp->provider, provider) { sbuf_printf(sb, "%d %s %s %ju %u", level, gp->class->name, pp->name, (uintmax_t)pp->mediasize, pp->sectorsize); if (gp->dumpconf != NULL) gp->dumpconf(sb, NULL, gp, NULL, pp); sbuf_printf(sb, "\n"); LIST_FOREACH(cp, &pp->consumers, consumers) g_conftxt_geom(sb, cp->geom, level + 1); } } static void g_conftxt_class(struct sbuf *sb, struct g_class *mp) { struct g_geom *gp; LIST_FOREACH(gp, &mp->geom, geom) g_conftxt_geom(sb, gp, 0); } void g_conftxt(void *p, int flag) { struct g_class *mp; struct sbuf *sb; KASSERT(flag != EV_CANCEL, ("g_conftxt was cancelled")); sb = p; g_topology_assert(); LIST_FOREACH(mp, &g_classes, class) { if (!strcmp(mp->name, G_DISK_CLASS_NAME) || !strcmp(mp->name, "MD")) g_conftxt_class(sb, mp); } sbuf_finish(sb); } void g_conf_printf_escaped(struct sbuf *sb, const char *fmt, ...) { struct sbuf *s; const u_char *c; va_list ap; s = sbuf_new_auto(); va_start(ap, fmt); sbuf_vprintf(s, fmt, ap); va_end(ap); sbuf_finish(s); for (c = sbuf_data(s); *c != '\0'; c++) { if (*c == '&' || *c == '<' || *c == '>' || *c == '\'' || *c == '"' || *c > 0x7e) sbuf_printf(sb, "&#x%X;", *c); else if (*c == '\t' || *c == '\n' || *c == '\r' || *c > 0x1f) sbuf_putc(sb, *c); else sbuf_putc(sb, '?'); } sbuf_delete(s); } static void g_conf_consumer(struct sbuf *sb, struct g_consumer *cp) { sbuf_printf(sb, "\t\n", cp); sbuf_printf(sb, "\t \n", cp->geom); if (cp->provider != NULL) sbuf_printf(sb, "\t \n", cp->provider); sbuf_printf(sb, "\t r%dw%de%d\n", cp->acr, cp->acw, cp->ace); if (cp->geom->flags & G_GEOM_WITHER) ; else if (cp->geom->dumpconf != NULL) { sbuf_printf(sb, "\t \n"); cp->geom->dumpconf(sb, "\t ", cp->geom, cp, NULL); sbuf_printf(sb, "\t \n"); } sbuf_printf(sb, "\t\n"); } static void g_conf_provider(struct sbuf *sb, struct g_provider *pp) { sbuf_printf(sb, "\t\n", pp); sbuf_printf(sb, "\t \n", pp->geom); sbuf_printf(sb, "\t r%dw%de%d\n", pp->acr, pp->acw, pp->ace); sbuf_printf(sb, "\t "); g_conf_printf_escaped(sb, "%s", pp->name); sbuf_printf(sb, "\n"); sbuf_printf(sb, "\t %jd\n", (intmax_t)pp->mediasize); sbuf_printf(sb, "\t %u\n", pp->sectorsize); - sbuf_printf(sb, "\t %u\n", pp->stripesize); - sbuf_printf(sb, "\t %u\n", pp->stripeoffset); + sbuf_printf(sb, "\t %ju\n", (uintmax_t)pp->stripesize); + sbuf_printf(sb, "\t %ju\n", (uintmax_t)pp->stripeoffset); if (pp->flags & G_PF_WITHER) sbuf_printf(sb, "\t \n"); else if (pp->geom->flags & G_GEOM_WITHER) ; else if (pp->geom->dumpconf != NULL) { sbuf_printf(sb, "\t \n"); pp->geom->dumpconf(sb, "\t ", pp->geom, NULL, pp); sbuf_printf(sb, "\t \n"); } sbuf_printf(sb, "\t\n"); } static void g_conf_geom(struct sbuf *sb, struct g_geom *gp, struct g_provider *pp, struct g_consumer *cp) { struct g_consumer *cp2; struct g_provider *pp2; struct g_geom_alias *gap; sbuf_printf(sb, " \n", gp); sbuf_printf(sb, " \n", gp->class); sbuf_printf(sb, " "); g_conf_printf_escaped(sb, "%s", gp->name); sbuf_printf(sb, "\n"); sbuf_printf(sb, " %d\n", gp->rank); if (gp->flags & G_GEOM_WITHER) sbuf_printf(sb, " \n"); else if (gp->dumpconf != NULL) { sbuf_printf(sb, " \n"); gp->dumpconf(sb, "\t", gp, NULL, NULL); sbuf_printf(sb, " \n"); } LIST_FOREACH(cp2, &gp->consumer, consumer) { if (cp != NULL && cp != cp2) continue; g_conf_consumer(sb, cp2); } LIST_FOREACH(pp2, &gp->provider, provider) { if (pp != NULL && pp != pp2) continue; g_conf_provider(sb, pp2); } LIST_FOREACH(gap, &gp->aliases, ga_next) { sbuf_printf(sb, " \n"); g_conf_printf_escaped(sb, "%s", gap->ga_alias); sbuf_printf(sb, " \n"); } sbuf_printf(sb, " \n"); } static void g_conf_class(struct sbuf *sb, struct g_class *mp, struct g_geom *gp, struct g_provider *pp, struct g_consumer *cp) { struct g_geom *gp2; sbuf_printf(sb, " \n", mp); sbuf_printf(sb, " "); g_conf_printf_escaped(sb, "%s", mp->name); sbuf_printf(sb, "\n"); LIST_FOREACH(gp2, &mp->geom, geom) { if (gp != NULL && gp != gp2) continue; g_conf_geom(sb, gp2, pp, cp); } sbuf_printf(sb, " \n"); } void g_conf_specific(struct sbuf *sb, struct g_class *mp, struct g_geom *gp, struct g_provider *pp, struct g_consumer *cp) { struct g_class *mp2; g_topology_assert(); sbuf_printf(sb, "\n"); LIST_FOREACH(mp2, &g_classes, class) { if (mp != NULL && mp != mp2) continue; g_conf_class(sb, mp2, gp, pp, cp); } sbuf_printf(sb, "\n"); sbuf_finish(sb); } void g_confxml(void *p, int flag) { KASSERT(flag != EV_CANCEL, ("g_confxml was cancelled")); g_topology_assert(); g_conf_specific(p, NULL, NULL, NULL, NULL); } void g_trace(int level, const char *fmt, ...) { va_list ap; if (!(g_debugflags & level)) return; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("\n"); } Index: head/sys/geom/geom_redboot.c =================================================================== --- head/sys/geom/geom_redboot.c (revision 339814) +++ head/sys/geom/geom_redboot.c (revision 339815) @@ -1,360 +1,360 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define REDBOOT_CLASS_NAME "REDBOOT" struct fis_image_desc { uint8_t name[16]; /* null-terminated name */ uint32_t offset; /* offset in flash */ uint32_t addr; /* address in memory */ uint32_t size; /* image size in bytes */ uint32_t entry; /* offset in image for entry point */ uint32_t dsize; /* data size in bytes */ uint8_t pad[256-(16+7*sizeof(uint32_t)+sizeof(void*))]; struct fis_image_desc *next; /* linked list (in memory) */ uint32_t dsum; /* descriptor checksum */ uint32_t fsum; /* checksum over image data */ }; #define FISDIR_NAME "FIS directory" #define REDBCFG_NAME "RedBoot config" #define REDBOOT_NAME "RedBoot" #define REDBOOT_MAXSLICE 64 #define REDBOOT_MAXOFF \ (REDBOOT_MAXSLICE*sizeof(struct fis_image_desc)) struct g_redboot_softc { uint32_t entry[REDBOOT_MAXSLICE]; uint32_t dsize[REDBOOT_MAXSLICE]; uint8_t readonly[REDBOOT_MAXSLICE]; g_access_t *parent_access; }; static void g_redboot_print(int i, struct fis_image_desc *fd) { printf("[%2d] \"%-15.15s\" %08x:%08x", i, fd->name, fd->offset, fd->size); printf(" addr %08x entry %08x\n", fd->addr, fd->entry); printf(" dsize 0x%x dsum 0x%x fsum 0x%x\n", fd->dsize, fd->dsum, fd->fsum); } static int g_redboot_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td) { return (ENOIOCTL); } static int g_redboot_access(struct g_provider *pp, int dread, int dwrite, int dexcl) { struct g_geom *gp = pp->geom; struct g_slicer *gsp = gp->softc; struct g_redboot_softc *sc = gsp->softc; if (dwrite > 0 && sc->readonly[pp->index]) return (EPERM); return (sc->parent_access(pp, dread, dwrite, dexcl)); } static int g_redboot_start(struct bio *bp) { struct g_provider *pp; struct g_geom *gp; struct g_redboot_softc *sc; struct g_slicer *gsp; int idx; pp = bp->bio_to; idx = pp->index; gp = pp->geom; gsp = gp->softc; sc = gsp->softc; if (bp->bio_cmd == BIO_GETATTR) { if (g_handleattr_int(bp, REDBOOT_CLASS_NAME "::entry", sc->entry[idx])) return (1); if (g_handleattr_int(bp, REDBOOT_CLASS_NAME "::dsize", sc->dsize[idx])) return (1); } return (0); } static void g_redboot_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp) { struct g_redboot_softc *sc; struct g_slicer *gsp; gsp = gp->softc; sc = gsp->softc; g_slice_dumpconf(sb, indent, gp, cp, pp); if (pp != NULL) { if (indent == NULL) { sbuf_printf(sb, " entry %d", sc->entry[pp->index]); sbuf_printf(sb, " dsize %d", sc->dsize[pp->index]); } else { sbuf_printf(sb, "%s%d\n", indent, sc->entry[pp->index]); sbuf_printf(sb, "%s%d\n", indent, sc->dsize[pp->index]); } } } #include static int nameok(const char name[16]) { int i; /* descriptor names are null-terminated printable ascii */ for (i = 0; i < 15; i++) if (!isprint(name[i])) break; return (name[i] == '\0'); } static struct fis_image_desc * parse_fis_directory(u_char *buf, size_t bufsize, off_t offset, uint32_t offmask) { #define match(a,b) (bcmp(a, b, sizeof(b)-1) == 0) struct fis_image_desc *fd, *efd; struct fis_image_desc *fisdir, *redbcfg; struct fis_image_desc *head, **tail; int i; fd = (struct fis_image_desc *)buf; efd = fd + (bufsize / sizeof(struct fis_image_desc)); #if 0 /* * Find the start of the FIS table. */ while (fd < efd && fd->name[0] != 0xff) fd++; if (fd == efd) return (NULL); if (bootverbose) printf("RedBoot FIS table starts at 0x%jx\n", offset + fd - (struct fis_image_desc *) buf); #endif /* * Scan forward collecting entries in a list. */ fisdir = redbcfg = NULL; *(tail = &head) = NULL; for (i = 0; fd < efd; i++, fd++) { if (fd->name[0] == 0xff) continue; if (match(fd->name, FISDIR_NAME)) fisdir = fd; else if (match(fd->name, REDBCFG_NAME)) redbcfg = fd; if (nameok(fd->name)) { /* * NB: flash address includes platform mapping; * strip it so we have only a flash offset. */ fd->offset &= offmask; if (bootverbose) g_redboot_print(i, fd); *tail = fd; *(tail = &fd->next) = NULL; } } if (fisdir == NULL) { if (bootverbose) printf("No RedBoot FIS table located at %lu\n", (long) offset); return (NULL); } if (redbcfg != NULL && fisdir->offset + fisdir->size == redbcfg->offset) { /* * Merged FIS/RedBoot config directory. */ if (bootverbose) printf("FIS/RedBoot merged at 0x%jx (not yet)\n", offset + fisdir->offset); /* XXX */ } return head; #undef match } static struct g_geom * g_redboot_taste(struct g_class *mp, struct g_provider *pp, int insist) { struct g_geom *gp; struct g_consumer *cp; struct g_redboot_softc *sc; int error, sectorsize, i; struct fis_image_desc *fd, *head; uint32_t offmask; - u_int blksize; /* NB: flash block size stored as stripesize */ + off_t blksize; /* NB: flash block size stored as stripesize */ u_char *buf; off_t offset; const char *value; char *op; offset = 0; if (resource_string_value("redboot", 0, "fisoffset", &value) == 0) { offset = strtouq(value, &op, 0); if (*op != '\0') { offset = 0; } } g_trace(G_T_TOPOLOGY, "redboot_taste(%s,%s)", mp->name, pp->name); g_topology_assert(); if (!strcmp(pp->geom->class->name, REDBOOT_CLASS_NAME)) return (NULL); /* XXX only taste flash providers */ if (strncmp(pp->name, "cfi", 3) && strncmp(pp->name, "flash/spi", 9)) return (NULL); gp = g_slice_new(mp, REDBOOT_MAXSLICE, pp, &cp, &sc, sizeof(*sc), g_redboot_start); if (gp == NULL) return (NULL); /* interpose our access method */ sc->parent_access = gp->access; gp->access = g_redboot_access; sectorsize = cp->provider->sectorsize; blksize = cp->provider->stripesize; if (powerof2(cp->provider->mediasize)) offmask = cp->provider->mediasize-1; else offmask = 0xffffffff; /* XXX */ if (bootverbose) - printf("%s: mediasize %ld secsize %d blksize %d offmask 0x%x\n", + printf("%s: mediasize %ld secsize %d blksize %ju offmask 0x%x\n", __func__, (long) cp->provider->mediasize, sectorsize, - blksize, offmask); + (uintmax_t)blksize, offmask); if (sectorsize < sizeof(struct fis_image_desc) || (sectorsize % sizeof(struct fis_image_desc))) return (NULL); g_topology_unlock(); head = NULL; if(offset == 0) offset = cp->provider->mediasize - blksize; again: buf = g_read_data(cp, offset, blksize, NULL); if (buf != NULL) head = parse_fis_directory(buf, blksize, offset, offmask); if (head == NULL && offset != 0) { if (buf != NULL) g_free(buf); offset = 0; /* check the front */ goto again; } g_topology_lock(); if (head == NULL) { if (buf != NULL) g_free(buf); return NULL; } /* * Craft a slice for each entry. */ for (fd = head, i = 0; fd != NULL; fd = fd->next) { if (fd->name[0] == '\0') continue; error = g_slice_config(gp, i, G_SLICE_CONFIG_SET, fd->offset, fd->size, sectorsize, "redboot/%s", fd->name); if (error) printf("%s: g_slice_config returns %d for \"%s\"\n", __func__, error, fd->name); sc->entry[i] = fd->entry; sc->dsize[i] = fd->dsize; /* disallow writing hard-to-recover entries */ sc->readonly[i] = (strcmp(fd->name, FISDIR_NAME) == 0) || (strcmp(fd->name, REDBOOT_NAME) == 0); i++; } g_free(buf); g_access(cp, -1, 0, 0); if (LIST_EMPTY(&gp->provider)) { g_slice_spoiled(cp); return (NULL); } return (gp); } static void g_redboot_config(struct gctl_req *req, struct g_class *mp, const char *verb) { struct g_geom *gp; g_topology_assert(); gp = gctl_get_geom(req, mp, "geom"); if (gp == NULL) return; gctl_error(req, "Unknown verb"); } static struct g_class g_redboot_class = { .name = REDBOOT_CLASS_NAME, .version = G_VERSION, .taste = g_redboot_taste, .dumpconf = g_redboot_dumpconf, .ctlreq = g_redboot_config, .ioctl = g_redboot_ioctl, }; DECLARE_GEOM_CLASS(g_redboot_class, g_redboot); MODULE_VERSION(geom_redboot, 0); Index: head/sys/geom/geom_subr.c =================================================================== --- head/sys/geom/geom_subr.c (revision 339814) +++ head/sys/geom/geom_subr.c (revision 339815) @@ -1,1613 +1,1613 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #ifdef KDB #include #endif struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes); static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms); char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim; struct g_hh00 { struct g_class *mp; struct g_provider *pp; off_t size; int error; int post; }; /* * This event offers a new class a chance to taste all preexisting providers. */ static void g_load_class(void *arg, int flag) { struct g_hh00 *hh; struct g_class *mp2, *mp; struct g_geom *gp; struct g_provider *pp; g_topology_assert(); if (flag == EV_CANCEL) /* XXX: can't happen ? */ return; if (g_shutdown) return; hh = arg; mp = hh->mp; hh->error = 0; if (hh->post) { g_free(hh); hh = NULL; } g_trace(G_T_TOPOLOGY, "g_load_class(%s)", mp->name); KASSERT(mp->name != NULL && *mp->name != '\0', ("GEOM class has no name")); LIST_FOREACH(mp2, &g_classes, class) { if (mp2 == mp) { printf("The GEOM class %s is already loaded.\n", mp2->name); if (hh != NULL) hh->error = EEXIST; return; } else if (strcmp(mp2->name, mp->name) == 0) { printf("A GEOM class %s is already loaded.\n", mp2->name); if (hh != NULL) hh->error = EEXIST; return; } } LIST_INIT(&mp->geom); LIST_INSERT_HEAD(&g_classes, mp, class); if (mp->init != NULL) mp->init(mp); if (mp->taste == NULL) return; LIST_FOREACH(mp2, &g_classes, class) { if (mp == mp2) continue; LIST_FOREACH(gp, &mp2->geom, geom) { LIST_FOREACH(pp, &gp->provider, provider) { mp->taste(mp, pp, 0); g_topology_assert(); } } } } static int g_unload_class(struct g_class *mp) { struct g_geom *gp; struct g_provider *pp; struct g_consumer *cp; int error; g_topology_lock(); g_trace(G_T_TOPOLOGY, "g_unload_class(%s)", mp->name); retry: G_VALID_CLASS(mp); LIST_FOREACH(gp, &mp->geom, geom) { /* We refuse to unload if anything is open */ LIST_FOREACH(pp, &gp->provider, provider) if (pp->acr || pp->acw || pp->ace) { g_topology_unlock(); return (EBUSY); } LIST_FOREACH(cp, &gp->consumer, consumer) if (cp->acr || cp->acw || cp->ace) { g_topology_unlock(); return (EBUSY); } /* If the geom is withering, wait for it to finish. */ if (gp->flags & G_GEOM_WITHER) { g_topology_sleep(mp, 1); goto retry; } } /* * We allow unloading if we have no geoms, or a class * method we can use to get rid of them. */ if (!LIST_EMPTY(&mp->geom) && mp->destroy_geom == NULL) { g_topology_unlock(); return (EOPNOTSUPP); } /* Bar new entries */ mp->taste = NULL; mp->config = NULL; LIST_FOREACH(gp, &mp->geom, geom) { error = mp->destroy_geom(NULL, mp, gp); if (error != 0) { g_topology_unlock(); return (error); } } /* Wait for withering to finish. */ for (;;) { gp = LIST_FIRST(&mp->geom); if (gp == NULL) break; KASSERT(gp->flags & G_GEOM_WITHER, ("Non-withering geom in class %s", mp->name)); g_topology_sleep(mp, 1); } G_VALID_CLASS(mp); if (mp->fini != NULL) mp->fini(mp); LIST_REMOVE(mp, class); g_topology_unlock(); return (0); } int g_modevent(module_t mod, int type, void *data) { struct g_hh00 *hh; int error; static int g_ignition; struct g_class *mp; mp = data; if (mp->version != G_VERSION) { printf("GEOM class %s has Wrong version %x\n", mp->name, mp->version); return (EINVAL); } if (!g_ignition) { g_ignition++; g_init(); } error = EOPNOTSUPP; switch (type) { case MOD_LOAD: g_trace(G_T_TOPOLOGY, "g_modevent(%s, LOAD)", mp->name); hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO); hh->mp = mp; /* * Once the system is not cold, MOD_LOAD calls will be * from the userland and the g_event thread will be able * to acknowledge their completion. */ if (cold) { hh->post = 1; error = g_post_event(g_load_class, hh, M_WAITOK, NULL); } else { error = g_waitfor_event(g_load_class, hh, M_WAITOK, NULL); if (error == 0) error = hh->error; g_free(hh); } break; case MOD_UNLOAD: g_trace(G_T_TOPOLOGY, "g_modevent(%s, UNLOAD)", mp->name); error = g_unload_class(mp); if (error == 0) { KASSERT(LIST_EMPTY(&mp->geom), ("Unloaded class (%s) still has geom", mp->name)); } break; } return (error); } static void g_retaste_event(void *arg, int flag) { struct g_class *mp, *mp2; struct g_geom *gp; struct g_hh00 *hh; struct g_provider *pp; struct g_consumer *cp; g_topology_assert(); if (flag == EV_CANCEL) /* XXX: can't happen ? */ return; if (g_shutdown || g_notaste) return; hh = arg; mp = hh->mp; hh->error = 0; if (hh->post) { g_free(hh); hh = NULL; } g_trace(G_T_TOPOLOGY, "g_retaste(%s)", mp->name); LIST_FOREACH(mp2, &g_classes, class) { LIST_FOREACH(gp, &mp2->geom, geom) { LIST_FOREACH(pp, &gp->provider, provider) { if (pp->acr || pp->acw || pp->ace) continue; LIST_FOREACH(cp, &pp->consumers, consumers) { if (cp->geom->class == mp && (cp->flags & G_CF_ORPHAN) == 0) break; } if (cp != NULL) { cp->flags |= G_CF_ORPHAN; g_wither_geom(cp->geom, ENXIO); } mp->taste(mp, pp, 0); g_topology_assert(); } } } } int g_retaste(struct g_class *mp) { struct g_hh00 *hh; int error; if (mp->taste == NULL) return (EINVAL); hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO); hh->mp = mp; if (cold) { hh->post = 1; error = g_post_event(g_retaste_event, hh, M_WAITOK, NULL); } else { error = g_waitfor_event(g_retaste_event, hh, M_WAITOK, NULL); if (error == 0) error = hh->error; g_free(hh); } return (error); } struct g_geom * g_new_geomf(struct g_class *mp, const char *fmt, ...) { struct g_geom *gp; va_list ap; struct sbuf *sb; g_topology_assert(); G_VALID_CLASS(mp); sb = sbuf_new_auto(); va_start(ap, fmt); sbuf_vprintf(sb, fmt, ap); va_end(ap); sbuf_finish(sb); gp = g_malloc(sizeof *gp, M_WAITOK | M_ZERO); gp->name = g_malloc(sbuf_len(sb) + 1, M_WAITOK | M_ZERO); gp->class = mp; gp->rank = 1; LIST_INIT(&gp->consumer); LIST_INIT(&gp->provider); LIST_INIT(&gp->aliases); LIST_INSERT_HEAD(&mp->geom, gp, geom); TAILQ_INSERT_HEAD(&geoms, gp, geoms); strcpy(gp->name, sbuf_data(sb)); sbuf_delete(sb); /* Fill in defaults from class */ gp->start = mp->start; gp->spoiled = mp->spoiled; gp->attrchanged = mp->attrchanged; gp->providergone = mp->providergone; gp->dumpconf = mp->dumpconf; gp->access = mp->access; gp->orphan = mp->orphan; gp->ioctl = mp->ioctl; gp->resize = mp->resize; return (gp); } void g_destroy_geom(struct g_geom *gp) { struct g_geom_alias *gap, *gaptmp; g_topology_assert(); G_VALID_GEOM(gp); g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name); KASSERT(LIST_EMPTY(&gp->consumer), ("g_destroy_geom(%s) with consumer(s) [%p]", gp->name, LIST_FIRST(&gp->consumer))); KASSERT(LIST_EMPTY(&gp->provider), ("g_destroy_geom(%s) with provider(s) [%p]", gp->name, LIST_FIRST(&gp->provider))); g_cancel_event(gp); LIST_REMOVE(gp, geom); TAILQ_REMOVE(&geoms, gp, geoms); LIST_FOREACH_SAFE(gap, &gp->aliases, ga_next, gaptmp) g_free(gap); g_free(gp->name); g_free(gp); } /* * This function is called (repeatedly) until the geom has withered away. */ void g_wither_geom(struct g_geom *gp, int error) { struct g_provider *pp; g_topology_assert(); G_VALID_GEOM(gp); g_trace(G_T_TOPOLOGY, "g_wither_geom(%p(%s))", gp, gp->name); if (!(gp->flags & G_GEOM_WITHER)) { gp->flags |= G_GEOM_WITHER; LIST_FOREACH(pp, &gp->provider, provider) if (!(pp->flags & G_PF_ORPHAN)) g_orphan_provider(pp, error); } g_do_wither(); } /* * Convenience function to destroy a particular provider. */ void g_wither_provider(struct g_provider *pp, int error) { pp->flags |= G_PF_WITHER; if (!(pp->flags & G_PF_ORPHAN)) g_orphan_provider(pp, error); } /* * This function is called (repeatedly) until the has withered away. */ void g_wither_geom_close(struct g_geom *gp, int error) { struct g_consumer *cp; g_topology_assert(); G_VALID_GEOM(gp); g_trace(G_T_TOPOLOGY, "g_wither_geom_close(%p(%s))", gp, gp->name); LIST_FOREACH(cp, &gp->consumer, consumer) if (cp->acr || cp->acw || cp->ace) g_access(cp, -cp->acr, -cp->acw, -cp->ace); g_wither_geom(gp, error); } /* * This function is called (repeatedly) until we cant wash away more * withered bits at present. */ void g_wither_washer() { struct g_class *mp; struct g_geom *gp, *gp2; struct g_provider *pp, *pp2; struct g_consumer *cp, *cp2; g_topology_assert(); LIST_FOREACH(mp, &g_classes, class) { LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) { if (!(pp->flags & G_PF_WITHER)) continue; if (LIST_EMPTY(&pp->consumers)) g_destroy_provider(pp); } if (!(gp->flags & G_GEOM_WITHER)) continue; LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) { if (LIST_EMPTY(&pp->consumers)) g_destroy_provider(pp); } LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp2) { if (cp->acr || cp->acw || cp->ace) continue; if (cp->provider != NULL) g_detach(cp); g_destroy_consumer(cp); } if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer)) g_destroy_geom(gp); } } } struct g_consumer * g_new_consumer(struct g_geom *gp) { struct g_consumer *cp; g_topology_assert(); G_VALID_GEOM(gp); KASSERT(!(gp->flags & G_GEOM_WITHER), ("g_new_consumer on WITHERing geom(%s) (class %s)", gp->name, gp->class->name)); KASSERT(gp->orphan != NULL, ("g_new_consumer on geom(%s) (class %s) without orphan", gp->name, gp->class->name)); cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO); cp->geom = gp; cp->stat = devstat_new_entry(cp, -1, 0, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); LIST_INSERT_HEAD(&gp->consumer, cp, consumer); return(cp); } void g_destroy_consumer(struct g_consumer *cp) { struct g_geom *gp; g_topology_assert(); G_VALID_CONSUMER(cp); g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp); KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached")); KASSERT (cp->acr == 0, ("g_destroy_consumer with acr")); KASSERT (cp->acw == 0, ("g_destroy_consumer with acw")); KASSERT (cp->ace == 0, ("g_destroy_consumer with ace")); g_cancel_event(cp); gp = cp->geom; LIST_REMOVE(cp, consumer); devstat_remove_entry(cp->stat); g_free(cp); if (gp->flags & G_GEOM_WITHER) g_do_wither(); } static void g_new_provider_event(void *arg, int flag) { struct g_class *mp; struct g_provider *pp; struct g_consumer *cp, *next_cp; g_topology_assert(); if (flag == EV_CANCEL) return; if (g_shutdown) return; pp = arg; G_VALID_PROVIDER(pp); KASSERT(!(pp->flags & G_PF_WITHER), ("g_new_provider_event but withered")); LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, next_cp) { if ((cp->flags & G_CF_ORPHAN) == 0 && cp->geom->attrchanged != NULL) cp->geom->attrchanged(cp, "GEOM::media"); } if (g_notaste) return; LIST_FOREACH(mp, &g_classes, class) { if (mp->taste == NULL) continue; LIST_FOREACH(cp, &pp->consumers, consumers) if (cp->geom->class == mp && (cp->flags & G_CF_ORPHAN) == 0) break; if (cp != NULL) continue; mp->taste(mp, pp, 0); g_topology_assert(); } } struct g_provider * g_new_providerf(struct g_geom *gp, const char *fmt, ...) { struct g_provider *pp; struct sbuf *sb; va_list ap; g_topology_assert(); G_VALID_GEOM(gp); KASSERT(gp->access != NULL, ("new provider on geom(%s) without ->access (class %s)", gp->name, gp->class->name)); KASSERT(gp->start != NULL, ("new provider on geom(%s) without ->start (class %s)", gp->name, gp->class->name)); KASSERT(!(gp->flags & G_GEOM_WITHER), ("new provider on WITHERing geom(%s) (class %s)", gp->name, gp->class->name)); sb = sbuf_new_auto(); va_start(ap, fmt); sbuf_vprintf(sb, fmt, ap); va_end(ap); sbuf_finish(sb); pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO); pp->name = (char *)(pp + 1); strcpy(pp->name, sbuf_data(sb)); sbuf_delete(sb); LIST_INIT(&pp->consumers); pp->error = ENXIO; pp->geom = gp; pp->stat = devstat_new_entry(pp, -1, 0, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); LIST_INSERT_HEAD(&gp->provider, pp, provider); g_post_event(g_new_provider_event, pp, M_WAITOK, pp, gp, NULL); return (pp); } void g_error_provider(struct g_provider *pp, int error) { /* G_VALID_PROVIDER(pp); We may not have g_topology */ pp->error = error; } static void g_resize_provider_event(void *arg, int flag) { struct g_hh00 *hh; struct g_class *mp; struct g_geom *gp; struct g_provider *pp; struct g_consumer *cp, *cp2; off_t size; g_topology_assert(); if (g_shutdown) return; hh = arg; pp = hh->pp; size = hh->size; g_free(hh); G_VALID_PROVIDER(pp); KASSERT(!(pp->flags & G_PF_WITHER), ("g_resize_provider_event but withered")); g_trace(G_T_TOPOLOGY, "g_resize_provider_event(%p)", pp); LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, cp2) { gp = cp->geom; if (gp->resize == NULL && size < pp->mediasize) { /* * XXX: g_dev_orphan method does deferred destroying * and it is possible, that other event could already * call the orphan method. Check consumer's flags to * do not schedule it twice. */ if (cp->flags & G_CF_ORPHAN) continue; cp->flags |= G_CF_ORPHAN; cp->geom->orphan(cp); } } pp->mediasize = size; LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, cp2) { gp = cp->geom; if ((gp->flags & G_GEOM_WITHER) == 0 && gp->resize != NULL) gp->resize(cp); } /* * After resizing, the previously invalid GEOM class metadata * might become valid. This means we should retaste. */ LIST_FOREACH(mp, &g_classes, class) { if (mp->taste == NULL) continue; LIST_FOREACH(cp, &pp->consumers, consumers) if (cp->geom->class == mp && (cp->flags & G_CF_ORPHAN) == 0) break; if (cp != NULL) continue; mp->taste(mp, pp, 0); g_topology_assert(); } } void g_resize_provider(struct g_provider *pp, off_t size) { struct g_hh00 *hh; G_VALID_PROVIDER(pp); if (pp->flags & G_PF_WITHER) return; if (size == pp->mediasize) return; hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO); hh->pp = pp; hh->size = size; g_post_event(g_resize_provider_event, hh, M_WAITOK, NULL); } #ifndef _PATH_DEV #define _PATH_DEV "/dev/" #endif struct g_provider * g_provider_by_name(char const *arg) { struct g_class *cp; struct g_geom *gp; struct g_provider *pp, *wpp; if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) arg += sizeof(_PATH_DEV) - 1; wpp = NULL; LIST_FOREACH(cp, &g_classes, class) { LIST_FOREACH(gp, &cp->geom, geom) { LIST_FOREACH(pp, &gp->provider, provider) { if (strcmp(arg, pp->name) != 0) continue; if ((gp->flags & G_GEOM_WITHER) == 0 && (pp->flags & G_PF_WITHER) == 0) return (pp); else wpp = pp; } } } return (wpp); } void g_destroy_provider(struct g_provider *pp) { struct g_geom *gp; g_topology_assert(); G_VALID_PROVIDER(pp); KASSERT(LIST_EMPTY(&pp->consumers), ("g_destroy_provider but attached")); KASSERT (pp->acr == 0, ("g_destroy_provider with acr")); KASSERT (pp->acw == 0, ("g_destroy_provider with acw")); KASSERT (pp->ace == 0, ("g_destroy_provider with ace")); g_cancel_event(pp); LIST_REMOVE(pp, provider); gp = pp->geom; devstat_remove_entry(pp->stat); /* * If a callback was provided, send notification that the provider * is now gone. */ if (gp->providergone != NULL) gp->providergone(pp); g_free(pp); if ((gp->flags & G_GEOM_WITHER)) g_do_wither(); } /* * We keep the "geoms" list sorted by topological order (== increasing * numerical rank) at all times. * When an attach is done, the attaching geoms rank is invalidated * and it is moved to the tail of the list. * All geoms later in the sequence has their ranks reevaluated in * sequence. If we cannot assign rank to a geom because it's * prerequisites do not have rank, we move that element to the tail * of the sequence with invalid rank as well. * At some point we encounter our original geom and if we stil fail * to assign it a rank, there must be a loop and we fail back to * g_attach() which detach again and calls redo_rank again * to fix up the damage. * It would be much simpler code wise to do it recursively, but we * can't risk that on the kernel stack. */ static int redo_rank(struct g_geom *gp) { struct g_consumer *cp; struct g_geom *gp1, *gp2; int n, m; g_topology_assert(); G_VALID_GEOM(gp); /* Invalidate this geoms rank and move it to the tail */ gp1 = TAILQ_NEXT(gp, geoms); if (gp1 != NULL) { gp->rank = 0; TAILQ_REMOVE(&geoms, gp, geoms); TAILQ_INSERT_TAIL(&geoms, gp, geoms); } else { gp1 = gp; } /* re-rank the rest of the sequence */ for (; gp1 != NULL; gp1 = gp2) { gp1->rank = 0; m = 1; LIST_FOREACH(cp, &gp1->consumer, consumer) { if (cp->provider == NULL) continue; n = cp->provider->geom->rank; if (n == 0) { m = 0; break; } else if (n >= m) m = n + 1; } gp1->rank = m; gp2 = TAILQ_NEXT(gp1, geoms); /* got a rank, moving on */ if (m != 0) continue; /* no rank to original geom means loop */ if (gp == gp1) return (ELOOP); /* no rank, put it at the end move on */ TAILQ_REMOVE(&geoms, gp1, geoms); TAILQ_INSERT_TAIL(&geoms, gp1, geoms); } return (0); } int g_attach(struct g_consumer *cp, struct g_provider *pp) { int error; g_topology_assert(); G_VALID_CONSUMER(cp); G_VALID_PROVIDER(pp); g_trace(G_T_TOPOLOGY, "g_attach(%p, %p)", cp, pp); KASSERT(cp->provider == NULL, ("attach but attached")); cp->provider = pp; cp->flags &= ~G_CF_ORPHAN; LIST_INSERT_HEAD(&pp->consumers, cp, consumers); error = redo_rank(cp->geom); if (error) { LIST_REMOVE(cp, consumers); cp->provider = NULL; redo_rank(cp->geom); } return (error); } void g_detach(struct g_consumer *cp) { struct g_provider *pp; g_topology_assert(); G_VALID_CONSUMER(cp); g_trace(G_T_TOPOLOGY, "g_detach(%p)", cp); KASSERT(cp->provider != NULL, ("detach but not attached")); KASSERT(cp->acr == 0, ("detach but nonzero acr")); KASSERT(cp->acw == 0, ("detach but nonzero acw")); KASSERT(cp->ace == 0, ("detach but nonzero ace")); KASSERT(cp->nstart == cp->nend, ("detach with active requests")); pp = cp->provider; LIST_REMOVE(cp, consumers); cp->provider = NULL; if ((cp->geom->flags & G_GEOM_WITHER) || (pp->geom->flags & G_GEOM_WITHER) || (pp->flags & G_PF_WITHER)) g_do_wither(); redo_rank(cp->geom); } /* * g_access() * * Access-check with delta values. The question asked is "can provider * "cp" change the access counters by the relative amounts dc[rwe] ?" */ int g_access(struct g_consumer *cp, int dcr, int dcw, int dce) { struct g_provider *pp; struct g_geom *gp; int pw, pe; #ifdef INVARIANTS int sr, sw, se; #endif int error; g_topology_assert(); G_VALID_CONSUMER(cp); pp = cp->provider; KASSERT(pp != NULL, ("access but not attached")); G_VALID_PROVIDER(pp); gp = pp->geom; g_trace(G_T_ACCESS, "g_access(%p(%s), %d, %d, %d)", cp, pp->name, dcr, dcw, dce); KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr")); KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw")); KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace")); KASSERT(dcr != 0 || dcw != 0 || dce != 0, ("NOP access request")); KASSERT(gp->access != NULL, ("NULL geom->access")); /* * If our class cares about being spoiled, and we have been, we * are probably just ahead of the event telling us that. Fail * now rather than having to unravel this later. */ if (cp->geom->spoiled != NULL && (cp->flags & G_CF_SPOILED) && (dcr > 0 || dcw > 0 || dce > 0)) return (ENXIO); /* * A number of GEOM classes either need to perform an I/O on the first * open or to acquire a different subsystem's lock. To do that they * may have to drop the topology lock. * Other GEOM classes perform special actions when opening a lower rank * geom for the first time. As a result, more than one thread may * end up performing the special actions. * So, we prevent concurrent "first" opens by marking the consumer with * special flag. * * Note that if the geom's access method never drops the topology lock, * then we will never see G_GEOM_IN_ACCESS here. */ while ((gp->flags & G_GEOM_IN_ACCESS) != 0) { g_trace(G_T_ACCESS, "%s: race on geom %s via provider %s and consumer of %s", __func__, gp->name, pp->name, cp->geom->name); gp->flags |= G_GEOM_ACCESS_WAIT; g_topology_sleep(gp, 0); } /* * Figure out what counts the provider would have had, if this * consumer had (r0w0e0) at this time. */ pw = pp->acw - cp->acw; pe = pp->ace - cp->ace; g_trace(G_T_ACCESS, "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)", dcr, dcw, dce, cp->acr, cp->acw, cp->ace, pp->acr, pp->acw, pp->ace, pp, pp->name); /* If foot-shooting is enabled, any open on rank#1 is OK */ if ((g_debugflags & 16) && gp->rank == 1) ; /* If we try exclusive but already write: fail */ else if (dce > 0 && pw > 0) return (EPERM); /* If we try write but already exclusive: fail */ else if (dcw > 0 && pe > 0) return (EPERM); /* If we try to open more but provider is error'ed: fail */ else if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0) { printf("%s(%d): provider %s has error %d set\n", __func__, __LINE__, pp->name, pp->error); return (pp->error); } /* Ok then... */ #ifdef INVARIANTS sr = cp->acr; sw = cp->acw; se = cp->ace; #endif gp->flags |= G_GEOM_IN_ACCESS; error = gp->access(pp, dcr, dcw, dce); KASSERT(dcr > 0 || dcw > 0 || dce > 0 || error == 0, ("Geom provider %s::%s dcr=%d dcw=%d dce=%d error=%d failed " "closing ->access()", gp->class->name, pp->name, dcr, dcw, dce, error)); g_topology_assert(); gp->flags &= ~G_GEOM_IN_ACCESS; KASSERT(cp->acr == sr && cp->acw == sw && cp->ace == se, ("Access counts changed during geom->access")); if ((gp->flags & G_GEOM_ACCESS_WAIT) != 0) { gp->flags &= ~G_GEOM_ACCESS_WAIT; wakeup(gp); } if (!error) { /* * If we open first write, spoil any partner consumers. * If we close last write and provider is not errored, * trigger re-taste. */ if (pp->acw == 0 && dcw != 0) g_spoil(pp, cp); else if (pp->acw != 0 && pp->acw == -dcw && pp->error == 0 && !(gp->flags & G_GEOM_WITHER)) g_post_event(g_new_provider_event, pp, M_WAITOK, pp, NULL); pp->acr += dcr; pp->acw += dcw; pp->ace += dce; cp->acr += dcr; cp->acw += dcw; cp->ace += dce; if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) KASSERT(pp->sectorsize > 0, ("Provider %s lacks sectorsize", pp->name)); if ((cp->geom->flags & G_GEOM_WITHER) && cp->acr == 0 && cp->acw == 0 && cp->ace == 0) g_do_wither(); } return (error); } int g_handleattr_int(struct bio *bp, const char *attribute, int val) { return (g_handleattr(bp, attribute, &val, sizeof val)); } int g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val) { return (g_handleattr(bp, attribute, &val, sizeof val)); } int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val) { return (g_handleattr(bp, attribute, &val, sizeof val)); } int g_handleattr_str(struct bio *bp, const char *attribute, const char *str) { return (g_handleattr(bp, attribute, str, 0)); } int g_handleattr(struct bio *bp, const char *attribute, const void *val, int len) { int error = 0; if (strcmp(bp->bio_attribute, attribute)) return (0); if (len == 0) { bzero(bp->bio_data, bp->bio_length); if (strlcpy(bp->bio_data, val, bp->bio_length) >= bp->bio_length) { printf("%s: %s %s bio_length %jd strlen %zu -> EFAULT\n", __func__, bp->bio_to->name, attribute, (intmax_t)bp->bio_length, strlen(val)); error = EFAULT; } } else if (bp->bio_length == len) { bcopy(val, bp->bio_data, len); } else { printf("%s: %s %s bio_length %jd len %d -> EFAULT\n", __func__, bp->bio_to->name, attribute, (intmax_t)bp->bio_length, len); error = EFAULT; } if (error == 0) bp->bio_completed = bp->bio_length; g_io_deliver(bp, error); return (1); } int g_std_access(struct g_provider *pp, int dr __unused, int dw __unused, int de __unused) { g_topology_assert(); G_VALID_PROVIDER(pp); return (0); } void g_std_done(struct bio *bp) { struct bio *bp2; bp2 = bp->bio_parent; if (bp2->bio_error == 0) bp2->bio_error = bp->bio_error; bp2->bio_completed += bp->bio_completed; g_destroy_bio(bp); bp2->bio_inbed++; if (bp2->bio_children == bp2->bio_inbed) g_io_deliver(bp2, bp2->bio_error); } /* XXX: maybe this is only g_slice_spoiled */ void g_std_spoiled(struct g_consumer *cp) { struct g_geom *gp; struct g_provider *pp; g_topology_assert(); G_VALID_CONSUMER(cp); g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp); cp->flags |= G_CF_ORPHAN; g_detach(cp); gp = cp->geom; LIST_FOREACH(pp, &gp->provider, provider) g_orphan_provider(pp, ENXIO); g_destroy_consumer(cp); if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer)) g_destroy_geom(gp); else gp->flags |= G_GEOM_WITHER; } /* * Spoiling happens when a provider is opened for writing, but consumers * which are configured by in-band data are attached (slicers for instance). * Since the write might potentially change the in-band data, such consumers * need to re-evaluate their existence after the writing session closes. * We do this by (offering to) tear them down when the open for write happens * in return for a re-taste when it closes again. * Together with the fact that such consumers grab an 'e' bit whenever they * are open, regardless of mode, this ends up DTRT. */ static void g_spoil_event(void *arg, int flag) { struct g_provider *pp; struct g_consumer *cp, *cp2; g_topology_assert(); if (flag == EV_CANCEL) return; pp = arg; G_VALID_PROVIDER(pp); g_trace(G_T_TOPOLOGY, "%s %p(%s:%s:%s)", __func__, pp, pp->geom->class->name, pp->geom->name, pp->name); for (cp = LIST_FIRST(&pp->consumers); cp != NULL; cp = cp2) { cp2 = LIST_NEXT(cp, consumers); if ((cp->flags & G_CF_SPOILED) == 0) continue; cp->flags &= ~G_CF_SPOILED; if (cp->geom->spoiled == NULL) continue; cp->geom->spoiled(cp); g_topology_assert(); } } void g_spoil(struct g_provider *pp, struct g_consumer *cp) { struct g_consumer *cp2; g_topology_assert(); G_VALID_PROVIDER(pp); G_VALID_CONSUMER(cp); LIST_FOREACH(cp2, &pp->consumers, consumers) { if (cp2 == cp) continue; /* KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr)); KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw)); */ KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace)); cp2->flags |= G_CF_SPOILED; } g_post_event(g_spoil_event, pp, M_WAITOK, pp, NULL); } static void g_media_changed_event(void *arg, int flag) { struct g_provider *pp; int retaste; g_topology_assert(); if (flag == EV_CANCEL) return; pp = arg; G_VALID_PROVIDER(pp); /* * If provider was not open for writing, queue retaste after spoiling. * If it was, retaste will happen automatically on close. */ retaste = (pp->acw == 0 && pp->error == 0 && !(pp->geom->flags & G_GEOM_WITHER)); g_spoil_event(arg, flag); if (retaste) g_post_event(g_new_provider_event, pp, M_WAITOK, pp, NULL); } int g_media_changed(struct g_provider *pp, int flag) { struct g_consumer *cp; LIST_FOREACH(cp, &pp->consumers, consumers) cp->flags |= G_CF_SPOILED; return (g_post_event(g_media_changed_event, pp, flag, pp, NULL)); } int g_media_gone(struct g_provider *pp, int flag) { struct g_consumer *cp; LIST_FOREACH(cp, &pp->consumers, consumers) cp->flags |= G_CF_SPOILED; return (g_post_event(g_spoil_event, pp, flag, pp, NULL)); } int g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len) { int error, i; i = len; error = g_io_getattr(attr, cp, &i, var); if (error) return (error); if (i != len) return (EINVAL); return (0); } static int g_get_device_prefix_len(const char *name) { int len; if (strncmp(name, "ada", 3) == 0) len = 3; else if (strncmp(name, "ad", 2) == 0) len = 2; else return (0); if (name[len] < '0' || name[len] > '9') return (0); do { len++; } while (name[len] >= '0' && name[len] <= '9'); return (len); } int g_compare_names(const char *namea, const char *nameb) { int deva, devb; if (strcmp(namea, nameb) == 0) return (1); deva = g_get_device_prefix_len(namea); if (deva == 0) return (0); devb = g_get_device_prefix_len(nameb); if (devb == 0) return (0); if (strcmp(namea + deva, nameb + devb) == 0) return (1); return (0); } void g_geom_add_alias(struct g_geom *gp, const char *alias) { struct g_geom_alias *gap; gap = (struct g_geom_alias *)g_malloc( sizeof(struct g_geom_alias) + strlen(alias) + 1, M_WAITOK); strcpy((char *)(gap + 1), alias); gap->ga_alias = (const char *)(gap + 1); LIST_INSERT_HEAD(&gp->aliases, gap, ga_next); } #if defined(DIAGNOSTIC) || defined(DDB) /* * This function walks the mesh and returns a non-zero integer if it * finds the argument pointer is an object. The return value indicates * which type of object it is believed to be. If topology is not locked, * this function is potentially dangerous, but we don't assert that the * topology lock is held when called from debugger. */ int g_valid_obj(void const *ptr) { struct g_class *mp; struct g_geom *gp; struct g_consumer *cp; struct g_provider *pp; #ifdef KDB if (kdb_active == 0) #endif g_topology_assert(); LIST_FOREACH(mp, &g_classes, class) { if (ptr == mp) return (1); LIST_FOREACH(gp, &mp->geom, geom) { if (ptr == gp) return (2); LIST_FOREACH(cp, &gp->consumer, consumer) if (ptr == cp) return (3); LIST_FOREACH(pp, &gp->provider, provider) if (ptr == pp) return (4); } } return(0); } #endif #ifdef DDB #define gprintf(...) do { \ db_printf("%*s", indent, ""); \ db_printf(__VA_ARGS__); \ } while (0) #define gprintln(...) do { \ gprintf(__VA_ARGS__); \ db_printf("\n"); \ } while (0) #define ADDFLAG(obj, flag, sflag) do { \ if ((obj)->flags & (flag)) { \ if (comma) \ strlcat(str, ",", size); \ strlcat(str, (sflag), size); \ comma = 1; \ } \ } while (0) static char * provider_flags_to_string(struct g_provider *pp, char *str, size_t size) { int comma = 0; bzero(str, size); if (pp->flags == 0) { strlcpy(str, "NONE", size); return (str); } ADDFLAG(pp, G_PF_WITHER, "G_PF_WITHER"); ADDFLAG(pp, G_PF_ORPHAN, "G_PF_ORPHAN"); return (str); } static char * geom_flags_to_string(struct g_geom *gp, char *str, size_t size) { int comma = 0; bzero(str, size); if (gp->flags == 0) { strlcpy(str, "NONE", size); return (str); } ADDFLAG(gp, G_GEOM_WITHER, "G_GEOM_WITHER"); return (str); } static void db_show_geom_consumer(int indent, struct g_consumer *cp) { if (indent == 0) { gprintln("consumer: %p", cp); gprintln(" class: %s (%p)", cp->geom->class->name, cp->geom->class); gprintln(" geom: %s (%p)", cp->geom->name, cp->geom); if (cp->provider == NULL) gprintln(" provider: none"); else { gprintln(" provider: %s (%p)", cp->provider->name, cp->provider); } gprintln(" access: r%dw%de%d", cp->acr, cp->acw, cp->ace); gprintln(" flags: 0x%04x", cp->flags); gprintln(" nstart: %u", cp->nstart); gprintln(" nend: %u", cp->nend); } else { gprintf("consumer: %p (%s), access=r%dw%de%d", cp, cp->provider != NULL ? cp->provider->name : "none", cp->acr, cp->acw, cp->ace); if (cp->flags) db_printf(", flags=0x%04x", cp->flags); db_printf("\n"); } } static void db_show_geom_provider(int indent, struct g_provider *pp) { struct g_consumer *cp; char flags[64]; if (indent == 0) { gprintln("provider: %s (%p)", pp->name, pp); gprintln(" class: %s (%p)", pp->geom->class->name, pp->geom->class); gprintln(" geom: %s (%p)", pp->geom->name, pp->geom); gprintln(" mediasize: %jd", (intmax_t)pp->mediasize); gprintln(" sectorsize: %u", pp->sectorsize); - gprintln(" stripesize: %u", pp->stripesize); - gprintln(" stripeoffset: %u", pp->stripeoffset); + gprintln(" stripesize: %ju", (uintmax_t)pp->stripesize); + gprintln(" stripeoffset: %ju", (uintmax_t)pp->stripeoffset); gprintln(" access: r%dw%de%d", pp->acr, pp->acw, pp->ace); gprintln(" flags: %s (0x%04x)", provider_flags_to_string(pp, flags, sizeof(flags)), pp->flags); gprintln(" error: %d", pp->error); gprintln(" nstart: %u", pp->nstart); gprintln(" nend: %u", pp->nend); if (LIST_EMPTY(&pp->consumers)) gprintln(" consumers: none"); } else { gprintf("provider: %s (%p), access=r%dw%de%d", pp->name, pp, pp->acr, pp->acw, pp->ace); if (pp->flags != 0) { db_printf(", flags=%s (0x%04x)", provider_flags_to_string(pp, flags, sizeof(flags)), pp->flags); } db_printf("\n"); } if (!LIST_EMPTY(&pp->consumers)) { LIST_FOREACH(cp, &pp->consumers, consumers) { db_show_geom_consumer(indent + 2, cp); if (db_pager_quit) break; } } } static void db_show_geom_geom(int indent, struct g_geom *gp) { struct g_provider *pp; struct g_consumer *cp; char flags[64]; if (indent == 0) { gprintln("geom: %s (%p)", gp->name, gp); gprintln(" class: %s (%p)", gp->class->name, gp->class); gprintln(" flags: %s (0x%04x)", geom_flags_to_string(gp, flags, sizeof(flags)), gp->flags); gprintln(" rank: %d", gp->rank); if (LIST_EMPTY(&gp->provider)) gprintln(" providers: none"); if (LIST_EMPTY(&gp->consumer)) gprintln(" consumers: none"); } else { gprintf("geom: %s (%p), rank=%d", gp->name, gp, gp->rank); if (gp->flags != 0) { db_printf(", flags=%s (0x%04x)", geom_flags_to_string(gp, flags, sizeof(flags)), gp->flags); } db_printf("\n"); } if (!LIST_EMPTY(&gp->provider)) { LIST_FOREACH(pp, &gp->provider, provider) { db_show_geom_provider(indent + 2, pp); if (db_pager_quit) break; } } if (!LIST_EMPTY(&gp->consumer)) { LIST_FOREACH(cp, &gp->consumer, consumer) { db_show_geom_consumer(indent + 2, cp); if (db_pager_quit) break; } } } static void db_show_geom_class(struct g_class *mp) { struct g_geom *gp; db_printf("class: %s (%p)\n", mp->name, mp); LIST_FOREACH(gp, &mp->geom, geom) { db_show_geom_geom(2, gp); if (db_pager_quit) break; } } /* * Print the GEOM topology or the given object. */ DB_SHOW_COMMAND(geom, db_show_geom) { struct g_class *mp; if (!have_addr) { /* No address given, print the entire topology. */ LIST_FOREACH(mp, &g_classes, class) { db_show_geom_class(mp); db_printf("\n"); if (db_pager_quit) break; } } else { switch (g_valid_obj((void *)addr)) { case 1: db_show_geom_class((struct g_class *)addr); break; case 2: db_show_geom_geom(0, (struct g_geom *)addr); break; case 3: db_show_geom_consumer(0, (struct g_consumer *)addr); break; case 4: db_show_geom_provider(0, (struct g_provider *)addr); break; default: db_printf("Not a GEOM object.\n"); break; } } } static void db_print_bio_cmd(struct bio *bp) { db_printf(" cmd: "); switch (bp->bio_cmd) { case BIO_READ: db_printf("BIO_READ"); break; case BIO_WRITE: db_printf("BIO_WRITE"); break; case BIO_DELETE: db_printf("BIO_DELETE"); break; case BIO_GETATTR: db_printf("BIO_GETATTR"); break; case BIO_FLUSH: db_printf("BIO_FLUSH"); break; case BIO_CMD0: db_printf("BIO_CMD0"); break; case BIO_CMD1: db_printf("BIO_CMD1"); break; case BIO_CMD2: db_printf("BIO_CMD2"); break; case BIO_ZONE: db_printf("BIO_ZONE"); break; default: db_printf("UNKNOWN"); break; } db_printf("\n"); } static void db_print_bio_flags(struct bio *bp) { int comma; comma = 0; db_printf(" flags: "); if (bp->bio_flags & BIO_ERROR) { db_printf("BIO_ERROR"); comma = 1; } if (bp->bio_flags & BIO_DONE) { db_printf("%sBIO_DONE", (comma ? ", " : "")); comma = 1; } if (bp->bio_flags & BIO_ONQUEUE) db_printf("%sBIO_ONQUEUE", (comma ? ", " : "")); db_printf("\n"); } /* * Print useful information in a BIO */ DB_SHOW_COMMAND(bio, db_show_bio) { struct bio *bp; if (have_addr) { bp = (struct bio *)addr; db_printf("BIO %p\n", bp); db_print_bio_cmd(bp); db_print_bio_flags(bp); db_printf(" cflags: 0x%hx\n", bp->bio_cflags); db_printf(" pflags: 0x%hx\n", bp->bio_pflags); db_printf(" offset: %jd\n", (intmax_t)bp->bio_offset); db_printf(" length: %jd\n", (intmax_t)bp->bio_length); db_printf(" bcount: %ld\n", bp->bio_bcount); db_printf(" resid: %ld\n", bp->bio_resid); db_printf(" completed: %jd\n", (intmax_t)bp->bio_completed); db_printf(" children: %u\n", bp->bio_children); db_printf(" inbed: %u\n", bp->bio_inbed); db_printf(" error: %d\n", bp->bio_error); db_printf(" parent: %p\n", bp->bio_parent); db_printf(" driver1: %p\n", bp->bio_driver1); db_printf(" driver2: %p\n", bp->bio_driver2); db_printf(" caller1: %p\n", bp->bio_caller1); db_printf(" caller2: %p\n", bp->bio_caller2); db_printf(" bio_from: %p\n", bp->bio_from); db_printf(" bio_to: %p\n", bp->bio_to); #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING) db_printf(" bio_track_bp: %p\n", bp->bio_track_bp); #endif } } #undef gprintf #undef gprintln #undef ADDFLAG #endif /* DDB */ Index: head/sys/geom/nop/g_nop.c =================================================================== --- head/sys/geom/nop/g_nop.c (revision 339814) +++ head/sys/geom/nop/g_nop.c (revision 339815) @@ -1,720 +1,720 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004-2006 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include SYSCTL_DECL(_kern_geom); static SYSCTL_NODE(_kern_geom, OID_AUTO, nop, CTLFLAG_RW, 0, "GEOM_NOP stuff"); static u_int g_nop_debug = 0; SYSCTL_UINT(_kern_geom_nop, OID_AUTO, debug, CTLFLAG_RW, &g_nop_debug, 0, "Debug level"); static int g_nop_destroy(struct g_geom *gp, boolean_t force); static int g_nop_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp); static void g_nop_config(struct gctl_req *req, struct g_class *mp, const char *verb); static void g_nop_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); struct g_class g_nop_class = { .name = G_NOP_CLASS_NAME, .version = G_VERSION, .ctlreq = g_nop_config, .destroy_geom = g_nop_destroy_geom }; static void g_nop_orphan(struct g_consumer *cp) { g_topology_assert(); g_nop_destroy(cp->geom, 1); } static void g_nop_resize(struct g_consumer *cp) { struct g_nop_softc *sc; struct g_geom *gp; struct g_provider *pp; off_t size; g_topology_assert(); gp = cp->geom; sc = gp->softc; if (sc->sc_explicitsize != 0) return; if (cp->provider->mediasize < sc->sc_offset) { g_nop_destroy(gp, 1); return; } size = cp->provider->mediasize - sc->sc_offset; LIST_FOREACH(pp, &gp->provider, provider) g_resize_provider(pp, size); } static void g_nop_start(struct bio *bp) { struct g_nop_softc *sc; struct g_geom *gp; struct g_provider *pp; struct bio *cbp; u_int failprob = 0; gp = bp->bio_to->geom; sc = gp->softc; G_NOP_LOGREQ(bp, "Request received."); mtx_lock(&sc->sc_lock); switch (bp->bio_cmd) { case BIO_READ: sc->sc_reads++; sc->sc_readbytes += bp->bio_length; failprob = sc->sc_rfailprob; break; case BIO_WRITE: sc->sc_writes++; sc->sc_wrotebytes += bp->bio_length; failprob = sc->sc_wfailprob; break; case BIO_DELETE: sc->sc_deletes++; break; case BIO_GETATTR: sc->sc_getattrs++; if (sc->sc_physpath && g_handleattr_str(bp, "GEOM::physpath", sc->sc_physpath)) { mtx_unlock(&sc->sc_lock); return; } break; case BIO_FLUSH: sc->sc_flushes++; break; case BIO_CMD0: sc->sc_cmd0s++; break; case BIO_CMD1: sc->sc_cmd1s++; break; case BIO_CMD2: sc->sc_cmd2s++; break; } mtx_unlock(&sc->sc_lock); if (failprob > 0) { u_int rval; rval = arc4random() % 100; if (rval < failprob) { G_NOP_LOGREQLVL(1, bp, "Returning error=%d.", sc->sc_error); g_io_deliver(bp, sc->sc_error); return; } } cbp = g_clone_bio(bp); if (cbp == NULL) { g_io_deliver(bp, ENOMEM); return; } cbp->bio_done = g_std_done; cbp->bio_offset = bp->bio_offset + sc->sc_offset; pp = LIST_FIRST(&gp->provider); KASSERT(pp != NULL, ("NULL pp")); cbp->bio_to = pp; G_NOP_LOGREQ(cbp, "Sending request."); g_io_request(cbp, LIST_FIRST(&gp->consumer)); } static int g_nop_access(struct g_provider *pp, int dr, int dw, int de) { struct g_geom *gp; struct g_consumer *cp; int error; gp = pp->geom; cp = LIST_FIRST(&gp->consumer); error = g_access(cp, dr, dw, de); return (error); } static int g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp, int ioerror, u_int rfailprob, u_int wfailprob, off_t offset, off_t size, - u_int secsize, u_int stripesize, u_int stripeoffset, const char *physpath) + u_int secsize, off_t stripesize, off_t stripeoffset, const char *physpath) { struct g_nop_softc *sc; struct g_geom *gp; struct g_provider *newpp; struct g_consumer *cp; char name[64]; int error; off_t explicitsize; g_topology_assert(); gp = NULL; newpp = NULL; cp = NULL; if ((offset % pp->sectorsize) != 0) { gctl_error(req, "Invalid offset for provider %s.", pp->name); return (EINVAL); } if ((size % pp->sectorsize) != 0) { gctl_error(req, "Invalid size for provider %s.", pp->name); return (EINVAL); } if (offset >= pp->mediasize) { gctl_error(req, "Invalid offset for provider %s.", pp->name); return (EINVAL); } explicitsize = size; if (size == 0) size = pp->mediasize - offset; if (offset + size > pp->mediasize) { gctl_error(req, "Invalid size for provider %s.", pp->name); return (EINVAL); } if (secsize == 0) secsize = pp->sectorsize; else if ((secsize % pp->sectorsize) != 0) { gctl_error(req, "Invalid secsize for provider %s.", pp->name); return (EINVAL); } if (secsize > MAXPHYS) { gctl_error(req, "secsize is too big."); return (EINVAL); } size -= size % secsize; if ((stripesize % pp->sectorsize) != 0) { gctl_error(req, "Invalid stripesize for provider %s.", pp->name); return (EINVAL); } if ((stripeoffset % pp->sectorsize) != 0) { gctl_error(req, "Invalid stripeoffset for provider %s.", pp->name); return (EINVAL); } if (stripesize != 0 && stripeoffset >= stripesize) { gctl_error(req, "stripeoffset is too big."); return (EINVAL); } snprintf(name, sizeof(name), "%s%s", pp->name, G_NOP_SUFFIX); LIST_FOREACH(gp, &mp->geom, geom) { if (strcmp(gp->name, name) == 0) { gctl_error(req, "Provider %s already exists.", name); return (EEXIST); } } gp = g_new_geomf(mp, "%s", name); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); sc->sc_offset = offset; sc->sc_explicitsize = explicitsize; sc->sc_stripesize = stripesize; sc->sc_stripeoffset = stripeoffset; if (physpath && strcmp(physpath, G_NOP_PHYSPATH_PASSTHROUGH)) { sc->sc_physpath = strndup(physpath, MAXPATHLEN, M_GEOM); } else sc->sc_physpath = NULL; sc->sc_error = ioerror; sc->sc_rfailprob = rfailprob; sc->sc_wfailprob = wfailprob; sc->sc_reads = 0; sc->sc_writes = 0; sc->sc_deletes = 0; sc->sc_getattrs = 0; sc->sc_flushes = 0; sc->sc_cmd0s = 0; sc->sc_cmd1s = 0; sc->sc_cmd2s = 0; sc->sc_readbytes = 0; sc->sc_wrotebytes = 0; mtx_init(&sc->sc_lock, "gnop lock", NULL, MTX_DEF); gp->softc = sc; gp->start = g_nop_start; gp->orphan = g_nop_orphan; gp->resize = g_nop_resize; gp->access = g_nop_access; gp->dumpconf = g_nop_dumpconf; newpp = g_new_providerf(gp, "%s", gp->name); newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; newpp->mediasize = size; newpp->sectorsize = secsize; newpp->stripesize = stripesize; newpp->stripeoffset = stripeoffset; cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error != 0) { gctl_error(req, "Cannot attach to provider %s.", pp->name); goto fail; } newpp->flags |= pp->flags & G_PF_ACCEPT_UNMAPPED; g_error_provider(newpp, 0); G_NOP_DEBUG(0, "Device %s created.", gp->name); return (0); fail: if (cp->provider != NULL) g_detach(cp); g_destroy_consumer(cp); g_destroy_provider(newpp); mtx_destroy(&sc->sc_lock); free(sc->sc_physpath, M_GEOM); g_free(gp->softc); g_destroy_geom(gp); return (error); } static int g_nop_destroy(struct g_geom *gp, boolean_t force) { struct g_nop_softc *sc; struct g_provider *pp; g_topology_assert(); sc = gp->softc; if (sc == NULL) return (ENXIO); free(sc->sc_physpath, M_GEOM); pp = LIST_FIRST(&gp->provider); if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { if (force) { G_NOP_DEBUG(0, "Device %s is still open, so it " "can't be definitely removed.", pp->name); } else { G_NOP_DEBUG(1, "Device %s is still open (r%dw%de%d).", pp->name, pp->acr, pp->acw, pp->ace); return (EBUSY); } } else { G_NOP_DEBUG(0, "Device %s removed.", gp->name); } gp->softc = NULL; mtx_destroy(&sc->sc_lock); g_free(sc); g_wither_geom(gp, ENXIO); return (0); } static int g_nop_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) { return (g_nop_destroy(gp, 0)); } static void g_nop_ctl_create(struct gctl_req *req, struct g_class *mp) { struct g_provider *pp; intmax_t *error, *rfailprob, *wfailprob, *offset, *secsize, *size, *stripesize, *stripeoffset; const char *name, *physpath; char param[16]; int i, *nargs; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument", "nargs"); return; } if (*nargs <= 0) { gctl_error(req, "Missing device(s)."); return; } error = gctl_get_paraml(req, "error", sizeof(*error)); if (error == NULL) { gctl_error(req, "No '%s' argument", "error"); return; } rfailprob = gctl_get_paraml(req, "rfailprob", sizeof(*rfailprob)); if (rfailprob == NULL) { gctl_error(req, "No '%s' argument", "rfailprob"); return; } if (*rfailprob < -1 || *rfailprob > 100) { gctl_error(req, "Invalid '%s' argument", "rfailprob"); return; } wfailprob = gctl_get_paraml(req, "wfailprob", sizeof(*wfailprob)); if (wfailprob == NULL) { gctl_error(req, "No '%s' argument", "wfailprob"); return; } if (*wfailprob < -1 || *wfailprob > 100) { gctl_error(req, "Invalid '%s' argument", "wfailprob"); return; } offset = gctl_get_paraml(req, "offset", sizeof(*offset)); if (offset == NULL) { gctl_error(req, "No '%s' argument", "offset"); return; } if (*offset < 0) { gctl_error(req, "Invalid '%s' argument", "offset"); return; } size = gctl_get_paraml(req, "size", sizeof(*size)); if (size == NULL) { gctl_error(req, "No '%s' argument", "size"); return; } if (*size < 0) { gctl_error(req, "Invalid '%s' argument", "size"); return; } secsize = gctl_get_paraml(req, "secsize", sizeof(*secsize)); if (secsize == NULL) { gctl_error(req, "No '%s' argument", "secsize"); return; } if (*secsize < 0) { gctl_error(req, "Invalid '%s' argument", "secsize"); return; } stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize)); if (stripesize == NULL) { gctl_error(req, "No '%s' argument", "stripesize"); return; } if (*stripesize < 0) { gctl_error(req, "Invalid '%s' argument", "stripesize"); return; } stripeoffset = gctl_get_paraml(req, "stripeoffset", sizeof(*stripeoffset)); if (stripeoffset == NULL) { gctl_error(req, "No '%s' argument", "stripeoffset"); return; } if (*stripeoffset < 0) { gctl_error(req, "Invalid '%s' argument", "stripeoffset"); return; } physpath = gctl_get_asciiparam(req, "physpath"); for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%d' argument", i); return; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); pp = g_provider_by_name(name); if (pp == NULL) { G_NOP_DEBUG(1, "Provider %s is invalid.", name); gctl_error(req, "Provider %s is invalid.", name); return; } if (g_nop_create(req, mp, pp, *error == -1 ? EIO : (int)*error, *rfailprob == -1 ? 0 : (u_int)*rfailprob, *wfailprob == -1 ? 0 : (u_int)*wfailprob, (off_t)*offset, (off_t)*size, (u_int)*secsize, - (u_int)*stripesize, (u_int)*stripeoffset, + (off_t)*stripesize, (off_t)*stripeoffset, physpath) != 0) { return; } } } static void g_nop_ctl_configure(struct gctl_req *req, struct g_class *mp) { struct g_nop_softc *sc; struct g_provider *pp; intmax_t *error, *rfailprob, *wfailprob; const char *name; char param[16]; int i, *nargs; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument", "nargs"); return; } if (*nargs <= 0) { gctl_error(req, "Missing device(s)."); return; } error = gctl_get_paraml(req, "error", sizeof(*error)); if (error == NULL) { gctl_error(req, "No '%s' argument", "error"); return; } rfailprob = gctl_get_paraml(req, "rfailprob", sizeof(*rfailprob)); if (rfailprob == NULL) { gctl_error(req, "No '%s' argument", "rfailprob"); return; } if (*rfailprob < -1 || *rfailprob > 100) { gctl_error(req, "Invalid '%s' argument", "rfailprob"); return; } wfailprob = gctl_get_paraml(req, "wfailprob", sizeof(*wfailprob)); if (wfailprob == NULL) { gctl_error(req, "No '%s' argument", "wfailprob"); return; } if (*wfailprob < -1 || *wfailprob > 100) { gctl_error(req, "Invalid '%s' argument", "wfailprob"); return; } for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%d' argument", i); return; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); pp = g_provider_by_name(name); if (pp == NULL || pp->geom->class != mp) { G_NOP_DEBUG(1, "Provider %s is invalid.", name); gctl_error(req, "Provider %s is invalid.", name); return; } sc = pp->geom->softc; if (*error != -1) sc->sc_error = (int)*error; if (*rfailprob != -1) sc->sc_rfailprob = (u_int)*rfailprob; if (*wfailprob != -1) sc->sc_wfailprob = (u_int)*wfailprob; } } static struct g_geom * g_nop_find_geom(struct g_class *mp, const char *name) { struct g_geom *gp; LIST_FOREACH(gp, &mp->geom, geom) { if (strcmp(gp->name, name) == 0) return (gp); } return (NULL); } static void g_nop_ctl_destroy(struct gctl_req *req, struct g_class *mp) { int *nargs, *force, error, i; struct g_geom *gp; const char *name; char param[16]; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument", "nargs"); return; } if (*nargs <= 0) { gctl_error(req, "Missing device(s)."); return; } force = gctl_get_paraml(req, "force", sizeof(*force)); if (force == NULL) { gctl_error(req, "No 'force' argument"); return; } for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%d' argument", i); return; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); gp = g_nop_find_geom(mp, name); if (gp == NULL) { G_NOP_DEBUG(1, "Device %s is invalid.", name); gctl_error(req, "Device %s is invalid.", name); return; } error = g_nop_destroy(gp, *force); if (error != 0) { gctl_error(req, "Cannot destroy device %s (error=%d).", gp->name, error); return; } } } static void g_nop_ctl_reset(struct gctl_req *req, struct g_class *mp) { struct g_nop_softc *sc; struct g_provider *pp; const char *name; char param[16]; int i, *nargs; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument", "nargs"); return; } if (*nargs <= 0) { gctl_error(req, "Missing device(s)."); return; } for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%d' argument", i); return; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); pp = g_provider_by_name(name); if (pp == NULL || pp->geom->class != mp) { G_NOP_DEBUG(1, "Provider %s is invalid.", name); gctl_error(req, "Provider %s is invalid.", name); return; } sc = pp->geom->softc; sc->sc_reads = 0; sc->sc_writes = 0; sc->sc_deletes = 0; sc->sc_getattrs = 0; sc->sc_flushes = 0; sc->sc_cmd0s = 0; sc->sc_cmd1s = 0; sc->sc_cmd2s = 0; sc->sc_readbytes = 0; sc->sc_wrotebytes = 0; } } static void g_nop_config(struct gctl_req *req, struct g_class *mp, const char *verb) { uint32_t *version; g_topology_assert(); version = gctl_get_paraml(req, "version", sizeof(*version)); if (version == NULL) { gctl_error(req, "No '%s' argument.", "version"); return; } if (*version != G_NOP_VERSION) { gctl_error(req, "Userland and kernel parts are out of sync."); return; } if (strcmp(verb, "create") == 0) { g_nop_ctl_create(req, mp); return; } else if (strcmp(verb, "configure") == 0) { g_nop_ctl_configure(req, mp); return; } else if (strcmp(verb, "destroy") == 0) { g_nop_ctl_destroy(req, mp); return; } else if (strcmp(verb, "reset") == 0) { g_nop_ctl_reset(req, mp); return; } gctl_error(req, "Unknown verb."); } static void g_nop_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { struct g_nop_softc *sc; if (pp != NULL || cp != NULL) return; sc = gp->softc; sbuf_printf(sb, "%s%jd\n", indent, (intmax_t)sc->sc_offset); sbuf_printf(sb, "%s%u\n", indent, sc->sc_rfailprob); sbuf_printf(sb, "%s%u\n", indent, sc->sc_wfailprob); sbuf_printf(sb, "%s%d\n", indent, sc->sc_error); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_reads); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_writes); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_deletes); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_getattrs); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_flushes); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_cmd0s); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_cmd1s); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_cmd2s); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_readbytes); sbuf_printf(sb, "%s%ju\n", indent, sc->sc_wrotebytes); } DECLARE_GEOM_CLASS(g_nop_class, g_nop); MODULE_VERSION(geom_nop, 0); Index: head/sys/geom/part/g_part.c =================================================================== --- head/sys/geom/part/g_part.c (revision 339814) +++ head/sys/geom/part/g_part.c (revision 339815) @@ -1,2414 +1,2414 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002, 2005-2009 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "g_part_if.h" #ifndef _PATH_DEV #define _PATH_DEV "/dev/" #endif static kobj_method_t g_part_null_methods[] = { { 0, 0 } }; static struct g_part_scheme g_part_null_scheme = { "(none)", g_part_null_methods, sizeof(struct g_part_table), }; TAILQ_HEAD(, g_part_scheme) g_part_schemes = TAILQ_HEAD_INITIALIZER(g_part_schemes); struct g_part_alias_list { const char *lexeme; enum g_part_alias alias; } g_part_alias_list[G_PART_ALIAS_COUNT] = { { "apple-apfs", G_PART_ALIAS_APPLE_APFS }, { "apple-boot", G_PART_ALIAS_APPLE_BOOT }, { "apple-core-storage", G_PART_ALIAS_APPLE_CORE_STORAGE }, { "apple-hfs", G_PART_ALIAS_APPLE_HFS }, { "apple-label", G_PART_ALIAS_APPLE_LABEL }, { "apple-raid", G_PART_ALIAS_APPLE_RAID }, { "apple-raid-offline", G_PART_ALIAS_APPLE_RAID_OFFLINE }, { "apple-tv-recovery", G_PART_ALIAS_APPLE_TV_RECOVERY }, { "apple-ufs", G_PART_ALIAS_APPLE_UFS }, { "bios-boot", G_PART_ALIAS_BIOS_BOOT }, { "chromeos-firmware", G_PART_ALIAS_CHROMEOS_FIRMWARE }, { "chromeos-kernel", G_PART_ALIAS_CHROMEOS_KERNEL }, { "chromeos-reserved", G_PART_ALIAS_CHROMEOS_RESERVED }, { "chromeos-root", G_PART_ALIAS_CHROMEOS_ROOT }, { "dragonfly-ccd", G_PART_ALIAS_DFBSD_CCD }, { "dragonfly-hammer", G_PART_ALIAS_DFBSD_HAMMER }, { "dragonfly-hammer2", G_PART_ALIAS_DFBSD_HAMMER2 }, { "dragonfly-label32", G_PART_ALIAS_DFBSD }, { "dragonfly-label64", G_PART_ALIAS_DFBSD64 }, { "dragonfly-legacy", G_PART_ALIAS_DFBSD_LEGACY }, { "dragonfly-swap", G_PART_ALIAS_DFBSD_SWAP }, { "dragonfly-ufs", G_PART_ALIAS_DFBSD_UFS }, { "dragonfly-vinum", G_PART_ALIAS_DFBSD_VINUM }, { "ebr", G_PART_ALIAS_EBR }, { "efi", G_PART_ALIAS_EFI }, { "fat16", G_PART_ALIAS_MS_FAT16 }, { "fat32", G_PART_ALIAS_MS_FAT32 }, { "fat32lba", G_PART_ALIAS_MS_FAT32LBA }, { "freebsd", G_PART_ALIAS_FREEBSD }, { "freebsd-boot", G_PART_ALIAS_FREEBSD_BOOT }, { "freebsd-nandfs", G_PART_ALIAS_FREEBSD_NANDFS }, { "freebsd-swap", G_PART_ALIAS_FREEBSD_SWAP }, { "freebsd-ufs", G_PART_ALIAS_FREEBSD_UFS }, { "freebsd-vinum", G_PART_ALIAS_FREEBSD_VINUM }, { "freebsd-zfs", G_PART_ALIAS_FREEBSD_ZFS }, { "linux-data", G_PART_ALIAS_LINUX_DATA }, { "linux-lvm", G_PART_ALIAS_LINUX_LVM }, { "linux-raid", G_PART_ALIAS_LINUX_RAID }, { "linux-swap", G_PART_ALIAS_LINUX_SWAP }, { "mbr", G_PART_ALIAS_MBR }, { "ms-basic-data", G_PART_ALIAS_MS_BASIC_DATA }, { "ms-ldm-data", G_PART_ALIAS_MS_LDM_DATA }, { "ms-ldm-metadata", G_PART_ALIAS_MS_LDM_METADATA }, { "ms-recovery", G_PART_ALIAS_MS_RECOVERY }, { "ms-reserved", G_PART_ALIAS_MS_RESERVED }, { "ms-spaces", G_PART_ALIAS_MS_SPACES }, { "netbsd-ccd", G_PART_ALIAS_NETBSD_CCD }, { "netbsd-cgd", G_PART_ALIAS_NETBSD_CGD }, { "netbsd-ffs", G_PART_ALIAS_NETBSD_FFS }, { "netbsd-lfs", G_PART_ALIAS_NETBSD_LFS }, { "netbsd-raid", G_PART_ALIAS_NETBSD_RAID }, { "netbsd-swap", G_PART_ALIAS_NETBSD_SWAP }, { "ntfs", G_PART_ALIAS_MS_NTFS }, { "openbsd-data", G_PART_ALIAS_OPENBSD_DATA }, { "prep-boot", G_PART_ALIAS_PREP_BOOT }, { "vmware-reserved", G_PART_ALIAS_VMRESERVED }, { "vmware-vmfs", G_PART_ALIAS_VMFS }, { "vmware-vmkdiag", G_PART_ALIAS_VMKDIAG }, { "vmware-vsanhdr", G_PART_ALIAS_VMVSANHDR }, }; SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, part, CTLFLAG_RW, 0, "GEOM_PART stuff"); static u_int check_integrity = 1; SYSCTL_UINT(_kern_geom_part, OID_AUTO, check_integrity, CTLFLAG_RWTUN, &check_integrity, 1, "Enable integrity checking"); static u_int auto_resize = 1; SYSCTL_UINT(_kern_geom_part, OID_AUTO, auto_resize, CTLFLAG_RWTUN, &auto_resize, 1, "Enable auto resize"); /* * The GEOM partitioning class. */ static g_ctl_req_t g_part_ctlreq; static g_ctl_destroy_geom_t g_part_destroy_geom; static g_fini_t g_part_fini; static g_init_t g_part_init; static g_taste_t g_part_taste; static g_access_t g_part_access; static g_dumpconf_t g_part_dumpconf; static g_orphan_t g_part_orphan; static g_spoiled_t g_part_spoiled; static g_start_t g_part_start; static g_resize_t g_part_resize; static g_ioctl_t g_part_ioctl; static struct g_class g_part_class = { .name = "PART", .version = G_VERSION, /* Class methods. */ .ctlreq = g_part_ctlreq, .destroy_geom = g_part_destroy_geom, .fini = g_part_fini, .init = g_part_init, .taste = g_part_taste, /* Geom methods. */ .access = g_part_access, .dumpconf = g_part_dumpconf, .orphan = g_part_orphan, .spoiled = g_part_spoiled, .start = g_part_start, .resize = g_part_resize, .ioctl = g_part_ioctl, }; DECLARE_GEOM_CLASS(g_part_class, g_part); MODULE_VERSION(g_part, 0); /* * Support functions. */ static void g_part_wither(struct g_geom *, int); const char * g_part_alias_name(enum g_part_alias alias) { int i; for (i = 0; i < G_PART_ALIAS_COUNT; i++) { if (g_part_alias_list[i].alias != alias) continue; return (g_part_alias_list[i].lexeme); } return (NULL); } void g_part_geometry_heads(off_t blocks, u_int sectors, off_t *bestchs, u_int *bestheads) { static u_int candidate_heads[] = { 1, 2, 16, 32, 64, 128, 255, 0 }; off_t chs, cylinders; u_int heads; int idx; *bestchs = 0; *bestheads = 0; for (idx = 0; candidate_heads[idx] != 0; idx++) { heads = candidate_heads[idx]; cylinders = blocks / heads / sectors; if (cylinders < heads || cylinders < sectors) break; if (cylinders > 1023) continue; chs = cylinders * heads * sectors; if (chs > *bestchs || (chs == *bestchs && *bestheads == 1)) { *bestchs = chs; *bestheads = heads; } } } static void g_part_geometry(struct g_part_table *table, struct g_consumer *cp, off_t blocks) { static u_int candidate_sectors[] = { 1, 9, 17, 33, 63, 0 }; off_t chs, bestchs; u_int heads, sectors; int idx; if (g_getattr("GEOM::fwsectors", cp, §ors) != 0 || sectors == 0 || g_getattr("GEOM::fwheads", cp, &heads) != 0 || heads == 0) { table->gpt_fixgeom = 0; table->gpt_heads = 0; table->gpt_sectors = 0; bestchs = 0; for (idx = 0; candidate_sectors[idx] != 0; idx++) { sectors = candidate_sectors[idx]; g_part_geometry_heads(blocks, sectors, &chs, &heads); if (chs == 0) continue; /* * Prefer a geometry with sectors > 1, but only if * it doesn't bump down the number of heads to 1. */ if (chs > bestchs || (chs == bestchs && heads > 1 && table->gpt_sectors == 1)) { bestchs = chs; table->gpt_heads = heads; table->gpt_sectors = sectors; } } /* * If we didn't find a geometry at all, then the disk is * too big. This means we can use the maximum number of * heads and sectors. */ if (bestchs == 0) { table->gpt_heads = 255; table->gpt_sectors = 63; } } else { table->gpt_fixgeom = 1; table->gpt_heads = heads; table->gpt_sectors = sectors; } } static void g_part_get_physpath_done(struct bio *bp) { struct g_geom *gp; struct g_part_entry *entry; struct g_part_table *table; struct g_provider *pp; struct bio *pbp; pbp = bp->bio_parent; pp = pbp->bio_to; gp = pp->geom; table = gp->softc; entry = pp->private; if (bp->bio_error == 0) { char *end; size_t len, remainder; len = strlcat(bp->bio_data, "/", bp->bio_length); if (len < bp->bio_length) { end = bp->bio_data + len; remainder = bp->bio_length - len; G_PART_NAME(table, entry, end, remainder); } } g_std_done(bp); } #define DPRINTF(...) if (bootverbose) { \ printf("GEOM_PART: " __VA_ARGS__); \ } static int g_part_check_integrity(struct g_part_table *table, struct g_consumer *cp) { struct g_part_entry *e1, *e2; struct g_provider *pp; off_t offset; int failed; failed = 0; pp = cp->provider; if (table->gpt_last < table->gpt_first) { DPRINTF("last LBA is below first LBA: %jd < %jd\n", (intmax_t)table->gpt_last, (intmax_t)table->gpt_first); failed++; } if (table->gpt_last > pp->mediasize / pp->sectorsize - 1) { DPRINTF("last LBA extends beyond mediasize: " "%jd > %jd\n", (intmax_t)table->gpt_last, (intmax_t)pp->mediasize / pp->sectorsize - 1); failed++; } LIST_FOREACH(e1, &table->gpt_entry, gpe_entry) { if (e1->gpe_deleted || e1->gpe_internal) continue; if (e1->gpe_start < table->gpt_first) { DPRINTF("partition %d has start offset below first " "LBA: %jd < %jd\n", e1->gpe_index, (intmax_t)e1->gpe_start, (intmax_t)table->gpt_first); failed++; } if (e1->gpe_start > table->gpt_last) { DPRINTF("partition %d has start offset beyond last " "LBA: %jd > %jd\n", e1->gpe_index, (intmax_t)e1->gpe_start, (intmax_t)table->gpt_last); failed++; } if (e1->gpe_end < e1->gpe_start) { DPRINTF("partition %d has end offset below start " "offset: %jd < %jd\n", e1->gpe_index, (intmax_t)e1->gpe_end, (intmax_t)e1->gpe_start); failed++; } if (e1->gpe_end > table->gpt_last) { DPRINTF("partition %d has end offset beyond last " "LBA: %jd > %jd\n", e1->gpe_index, (intmax_t)e1->gpe_end, (intmax_t)table->gpt_last); failed++; } if (pp->stripesize > 0) { offset = e1->gpe_start * pp->sectorsize; if (e1->gpe_offset > offset) offset = e1->gpe_offset; if ((offset + pp->stripeoffset) % pp->stripesize) { DPRINTF("partition %d on (%s, %s) is not " - "aligned on %u bytes\n", e1->gpe_index, + "aligned on %ju bytes\n", e1->gpe_index, pp->name, table->gpt_scheme->name, - pp->stripesize); + (uintmax_t)pp->stripesize); /* Don't treat this as a critical failure */ } } e2 = e1; while ((e2 = LIST_NEXT(e2, gpe_entry)) != NULL) { if (e2->gpe_deleted || e2->gpe_internal) continue; if (e1->gpe_start >= e2->gpe_start && e1->gpe_start <= e2->gpe_end) { DPRINTF("partition %d has start offset inside " "partition %d: start[%d] %jd >= start[%d] " "%jd <= end[%d] %jd\n", e1->gpe_index, e2->gpe_index, e2->gpe_index, (intmax_t)e2->gpe_start, e1->gpe_index, (intmax_t)e1->gpe_start, e2->gpe_index, (intmax_t)e2->gpe_end); failed++; } if (e1->gpe_end >= e2->gpe_start && e1->gpe_end <= e2->gpe_end) { DPRINTF("partition %d has end offset inside " "partition %d: start[%d] %jd >= end[%d] " "%jd <= end[%d] %jd\n", e1->gpe_index, e2->gpe_index, e2->gpe_index, (intmax_t)e2->gpe_start, e1->gpe_index, (intmax_t)e1->gpe_end, e2->gpe_index, (intmax_t)e2->gpe_end); failed++; } if (e1->gpe_start < e2->gpe_start && e1->gpe_end > e2->gpe_end) { DPRINTF("partition %d contains partition %d: " "start[%d] %jd > start[%d] %jd, end[%d] " "%jd < end[%d] %jd\n", e1->gpe_index, e2->gpe_index, e1->gpe_index, (intmax_t)e1->gpe_start, e2->gpe_index, (intmax_t)e2->gpe_start, e2->gpe_index, (intmax_t)e2->gpe_end, e1->gpe_index, (intmax_t)e1->gpe_end); failed++; } } } if (failed != 0) { printf("GEOM_PART: integrity check failed (%s, %s)\n", pp->name, table->gpt_scheme->name); if (check_integrity != 0) return (EINVAL); table->gpt_corrupt = 1; } return (0); } #undef DPRINTF struct g_part_entry * g_part_new_entry(struct g_part_table *table, int index, quad_t start, quad_t end) { struct g_part_entry *entry, *last; last = NULL; LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_index == index) break; if (entry->gpe_index > index) { entry = NULL; break; } last = entry; } if (entry == NULL) { entry = g_malloc(table->gpt_scheme->gps_entrysz, M_WAITOK | M_ZERO); entry->gpe_index = index; if (last == NULL) LIST_INSERT_HEAD(&table->gpt_entry, entry, gpe_entry); else LIST_INSERT_AFTER(last, entry, gpe_entry); } else entry->gpe_offset = 0; entry->gpe_start = start; entry->gpe_end = end; return (entry); } static void g_part_new_provider(struct g_geom *gp, struct g_part_table *table, struct g_part_entry *entry) { struct g_consumer *cp; struct g_provider *pp; struct sbuf *sb; struct g_geom_alias *gap; off_t offset; cp = LIST_FIRST(&gp->consumer); pp = cp->provider; offset = entry->gpe_start * pp->sectorsize; if (entry->gpe_offset < offset) entry->gpe_offset = offset; if (entry->gpe_pp == NULL) { /* * Add aliases to the geom before we create the provider so that * geom_dev can taste it with all the aliases in place so all * the aliased dev_t instances get created for each partition * (eg foo5p7 gets created for bar5p7 when foo is an alias of bar). */ LIST_FOREACH(gap, &table->gpt_gp->aliases, ga_next) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gap->ga_alias); sbuf_finish(sb); g_geom_add_alias(gp, sbuf_data(sb)); sbuf_delete(sb); } sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gp->name); sbuf_finish(sb); entry->gpe_pp = g_new_providerf(gp, "%s", sbuf_data(sb)); sbuf_delete(sb); entry->gpe_pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; entry->gpe_pp->private = entry; /* Close the circle. */ } entry->gpe_pp->index = entry->gpe_index - 1; /* index is 1-based. */ entry->gpe_pp->mediasize = (entry->gpe_end - entry->gpe_start + 1) * pp->sectorsize; entry->gpe_pp->mediasize -= entry->gpe_offset - offset; entry->gpe_pp->sectorsize = pp->sectorsize; entry->gpe_pp->stripesize = pp->stripesize; entry->gpe_pp->stripeoffset = pp->stripeoffset + entry->gpe_offset; if (pp->stripesize > 0) entry->gpe_pp->stripeoffset %= pp->stripesize; entry->gpe_pp->flags |= pp->flags & G_PF_ACCEPT_UNMAPPED; g_error_provider(entry->gpe_pp, 0); } static struct g_geom* g_part_find_geom(const char *name) { struct g_geom *gp; LIST_FOREACH(gp, &g_part_class.geom, geom) { if ((gp->flags & G_GEOM_WITHER) == 0 && strcmp(name, gp->name) == 0) break; } return (gp); } static int g_part_parm_geom(struct gctl_req *req, const char *name, struct g_geom **v) { struct g_geom *gp; const char *gname; gname = gctl_get_asciiparam(req, name); if (gname == NULL) return (ENOATTR); if (strncmp(gname, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) gname += sizeof(_PATH_DEV) - 1; gp = g_part_find_geom(gname); if (gp == NULL) { gctl_error(req, "%d %s '%s'", EINVAL, name, gname); return (EINVAL); } *v = gp; return (0); } static int g_part_parm_provider(struct gctl_req *req, const char *name, struct g_provider **v) { struct g_provider *pp; const char *pname; pname = gctl_get_asciiparam(req, name); if (pname == NULL) return (ENOATTR); if (strncmp(pname, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) pname += sizeof(_PATH_DEV) - 1; pp = g_provider_by_name(pname); if (pp == NULL) { gctl_error(req, "%d %s '%s'", EINVAL, name, pname); return (EINVAL); } *v = pp; return (0); } static int g_part_parm_quad(struct gctl_req *req, const char *name, quad_t *v) { const char *p; char *x; quad_t q; p = gctl_get_asciiparam(req, name); if (p == NULL) return (ENOATTR); q = strtoq(p, &x, 0); if (*x != '\0' || q < 0) { gctl_error(req, "%d %s '%s'", EINVAL, name, p); return (EINVAL); } *v = q; return (0); } static int g_part_parm_scheme(struct gctl_req *req, const char *name, struct g_part_scheme **v) { struct g_part_scheme *s; const char *p; p = gctl_get_asciiparam(req, name); if (p == NULL) return (ENOATTR); TAILQ_FOREACH(s, &g_part_schemes, scheme_list) { if (s == &g_part_null_scheme) continue; if (!strcasecmp(s->name, p)) break; } if (s == NULL) { gctl_error(req, "%d %s '%s'", EINVAL, name, p); return (EINVAL); } *v = s; return (0); } static int g_part_parm_str(struct gctl_req *req, const char *name, const char **v) { const char *p; p = gctl_get_asciiparam(req, name); if (p == NULL) return (ENOATTR); /* An empty label is always valid. */ if (strcmp(name, "label") != 0 && p[0] == '\0') { gctl_error(req, "%d %s '%s'", EINVAL, name, p); return (EINVAL); } *v = p; return (0); } static int g_part_parm_intmax(struct gctl_req *req, const char *name, u_int *v) { const intmax_t *p; int size; p = gctl_get_param(req, name, &size); if (p == NULL) return (ENOATTR); if (size != sizeof(*p) || *p < 0 || *p > INT_MAX) { gctl_error(req, "%d %s '%jd'", EINVAL, name, *p); return (EINVAL); } *v = (u_int)*p; return (0); } static int g_part_parm_uint32(struct gctl_req *req, const char *name, u_int *v) { const uint32_t *p; int size; p = gctl_get_param(req, name, &size); if (p == NULL) return (ENOATTR); if (size != sizeof(*p) || *p > INT_MAX) { gctl_error(req, "%d %s '%u'", EINVAL, name, (unsigned int)*p); return (EINVAL); } *v = (u_int)*p; return (0); } static int g_part_parm_bootcode(struct gctl_req *req, const char *name, const void **v, unsigned int *s) { const void *p; int size; p = gctl_get_param(req, name, &size); if (p == NULL) return (ENOATTR); *v = p; *s = size; return (0); } static int g_part_probe(struct g_geom *gp, struct g_consumer *cp, int depth) { struct g_part_scheme *iter, *scheme; struct g_part_table *table; int pri, probe; table = gp->softc; scheme = (table != NULL) ? table->gpt_scheme : NULL; pri = (scheme != NULL) ? G_PART_PROBE(table, cp) : INT_MIN; if (pri == 0) goto done; if (pri > 0) { /* error */ scheme = NULL; pri = INT_MIN; } TAILQ_FOREACH(iter, &g_part_schemes, scheme_list) { if (iter == &g_part_null_scheme) continue; table = (void *)kobj_create((kobj_class_t)iter, M_GEOM, M_WAITOK); table->gpt_gp = gp; table->gpt_scheme = iter; table->gpt_depth = depth; probe = G_PART_PROBE(table, cp); if (probe <= 0 && probe > pri) { pri = probe; scheme = iter; if (gp->softc != NULL) kobj_delete((kobj_t)gp->softc, M_GEOM); gp->softc = table; if (pri == 0) goto done; } else kobj_delete((kobj_t)table, M_GEOM); } done: return ((scheme == NULL) ? ENXIO : 0); } /* * Control request functions. */ static int g_part_ctl_add(struct gctl_req *req, struct g_part_parms *gpp) { struct g_geom *gp; struct g_provider *pp; struct g_part_entry *delent, *last, *entry; struct g_part_table *table; struct sbuf *sb; quad_t end; unsigned int index; int error; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); pp = LIST_FIRST(&gp->consumer)->provider; table = gp->softc; end = gpp->gpp_start + gpp->gpp_size - 1; if (gpp->gpp_start < table->gpt_first || gpp->gpp_start > table->gpt_last) { gctl_error(req, "%d start '%jd'", EINVAL, (intmax_t)gpp->gpp_start); return (EINVAL); } if (end < gpp->gpp_start || end > table->gpt_last) { gctl_error(req, "%d size '%jd'", EINVAL, (intmax_t)gpp->gpp_size); return (EINVAL); } if (gpp->gpp_index > table->gpt_entries) { gctl_error(req, "%d index '%d'", EINVAL, gpp->gpp_index); return (EINVAL); } delent = last = NULL; index = (gpp->gpp_index > 0) ? gpp->gpp_index : 1; LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted) { if (entry->gpe_index == index) delent = entry; continue; } if (entry->gpe_index == index) index = entry->gpe_index + 1; if (entry->gpe_index < index) last = entry; if (entry->gpe_internal) continue; if (gpp->gpp_start >= entry->gpe_start && gpp->gpp_start <= entry->gpe_end) { gctl_error(req, "%d start '%jd'", ENOSPC, (intmax_t)gpp->gpp_start); return (ENOSPC); } if (end >= entry->gpe_start && end <= entry->gpe_end) { gctl_error(req, "%d end '%jd'", ENOSPC, (intmax_t)end); return (ENOSPC); } if (gpp->gpp_start < entry->gpe_start && end > entry->gpe_end) { gctl_error(req, "%d size '%jd'", ENOSPC, (intmax_t)gpp->gpp_size); return (ENOSPC); } } if (gpp->gpp_index > 0 && index != gpp->gpp_index) { gctl_error(req, "%d index '%d'", EEXIST, gpp->gpp_index); return (EEXIST); } if (index > table->gpt_entries) { gctl_error(req, "%d index '%d'", ENOSPC, index); return (ENOSPC); } entry = (delent == NULL) ? g_malloc(table->gpt_scheme->gps_entrysz, M_WAITOK | M_ZERO) : delent; entry->gpe_index = index; entry->gpe_start = gpp->gpp_start; entry->gpe_end = end; error = G_PART_ADD(table, entry, gpp); if (error) { gctl_error(req, "%d", error); if (delent == NULL) g_free(entry); return (error); } if (delent == NULL) { if (last == NULL) LIST_INSERT_HEAD(&table->gpt_entry, entry, gpe_entry); else LIST_INSERT_AFTER(last, entry, gpe_entry); entry->gpe_created = 1; } else { entry->gpe_deleted = 0; entry->gpe_modified = 1; } g_part_new_provider(gp, table, entry); /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gp->name); if (pp->stripesize > 0 && entry->gpe_pp->stripeoffset != 0) sbuf_printf(sb, " added, but partition is not " - "aligned on %u bytes\n", pp->stripesize); + "aligned on %ju bytes\n", (uintmax_t)pp->stripesize); else sbuf_cat(sb, " added\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_bootcode(struct gctl_req *req, struct g_part_parms *gpp) { struct g_geom *gp; struct g_part_table *table; struct sbuf *sb; int error, sz; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; sz = table->gpt_scheme->gps_bootcodesz; if (sz == 0) { error = ENODEV; goto fail; } if (gpp->gpp_codesize > sz) { error = EFBIG; goto fail; } error = G_PART_BOOTCODE(table, gpp); if (error) goto fail; /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); sbuf_printf(sb, "bootcode written to %s\n", gp->name); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); fail: gctl_error(req, "%d", error); return (error); } static int g_part_ctl_commit(struct gctl_req *req, struct g_part_parms *gpp) { struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp; struct g_part_entry *entry, *tmp; struct g_part_table *table; char *buf; int error, i; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; if (!table->gpt_opened) { gctl_error(req, "%d", EPERM); return (EPERM); } g_topology_unlock(); cp = LIST_FIRST(&gp->consumer); if ((table->gpt_smhead | table->gpt_smtail) != 0) { pp = cp->provider; buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO); while (table->gpt_smhead != 0) { i = ffs(table->gpt_smhead) - 1; error = g_write_data(cp, i * pp->sectorsize, buf, pp->sectorsize); if (error) { g_free(buf); goto fail; } table->gpt_smhead &= ~(1 << i); } while (table->gpt_smtail != 0) { i = ffs(table->gpt_smtail) - 1; error = g_write_data(cp, pp->mediasize - (i + 1) * pp->sectorsize, buf, pp->sectorsize); if (error) { g_free(buf); goto fail; } table->gpt_smtail &= ~(1 << i); } g_free(buf); } if (table->gpt_scheme == &g_part_null_scheme) { g_topology_lock(); g_access(cp, -1, -1, -1); g_part_wither(gp, ENXIO); return (0); } error = G_PART_WRITE(table, cp); if (error) goto fail; LIST_FOREACH_SAFE(entry, &table->gpt_entry, gpe_entry, tmp) { if (!entry->gpe_deleted) { /* Notify consumers that provider might be changed. */ if (entry->gpe_modified && ( entry->gpe_pp->acw + entry->gpe_pp->ace + entry->gpe_pp->acr) == 0) g_media_changed(entry->gpe_pp, M_NOWAIT); entry->gpe_created = 0; entry->gpe_modified = 0; continue; } LIST_REMOVE(entry, gpe_entry); g_free(entry); } table->gpt_created = 0; table->gpt_opened = 0; g_topology_lock(); g_access(cp, -1, -1, -1); return (0); fail: g_topology_lock(); gctl_error(req, "%d", error); return (error); } static int g_part_ctl_create(struct gctl_req *req, struct g_part_parms *gpp) { struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp; struct g_part_scheme *scheme; struct g_part_table *null, *table; struct sbuf *sb; int attr, error; pp = gpp->gpp_provider; scheme = gpp->gpp_scheme; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, pp->name)); g_topology_assert(); /* Check that there isn't already a g_part geom on the provider. */ gp = g_part_find_geom(pp->name); if (gp != NULL) { null = gp->softc; if (null->gpt_scheme != &g_part_null_scheme) { gctl_error(req, "%d geom '%s'", EEXIST, pp->name); return (EEXIST); } } else null = NULL; if ((gpp->gpp_parms & G_PART_PARM_ENTRIES) && (gpp->gpp_entries < scheme->gps_minent || gpp->gpp_entries > scheme->gps_maxent)) { gctl_error(req, "%d entries '%d'", EINVAL, gpp->gpp_entries); return (EINVAL); } if (null == NULL) gp = g_new_geomf(&g_part_class, "%s", pp->name); gp->softc = kobj_create((kobj_class_t)gpp->gpp_scheme, M_GEOM, M_WAITOK); table = gp->softc; table->gpt_gp = gp; table->gpt_scheme = gpp->gpp_scheme; table->gpt_entries = (gpp->gpp_parms & G_PART_PARM_ENTRIES) ? gpp->gpp_entries : scheme->gps_minent; LIST_INIT(&table->gpt_entry); if (null == NULL) { cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 1, 1); if (error != 0) { g_part_wither(gp, error); gctl_error(req, "%d geom '%s'", error, pp->name); return (error); } table->gpt_opened = 1; } else { cp = LIST_FIRST(&gp->consumer); table->gpt_opened = null->gpt_opened; table->gpt_smhead = null->gpt_smhead; table->gpt_smtail = null->gpt_smtail; } g_topology_unlock(); /* Make sure the provider has media. */ if (pp->mediasize == 0 || pp->sectorsize == 0) { error = ENODEV; goto fail; } /* Make sure we can nest and if so, determine our depth. */ error = g_getattr("PART::isleaf", cp, &attr); if (!error && attr) { error = ENODEV; goto fail; } error = g_getattr("PART::depth", cp, &attr); table->gpt_depth = (!error) ? attr + 1 : 0; /* * Synthesize a disk geometry. Some partitioning schemes * depend on it and since some file systems need it even * when the partitition scheme doesn't, we do it here in * scheme-independent code. */ g_part_geometry(table, cp, pp->mediasize / pp->sectorsize); error = G_PART_CREATE(table, gpp); if (error) goto fail; g_topology_lock(); table->gpt_created = 1; if (null != NULL) kobj_delete((kobj_t)null, M_GEOM); /* * Support automatic commit by filling in the gpp_geom * parameter. */ gpp->gpp_parms |= G_PART_PARM_GEOM; gpp->gpp_geom = gp; /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); sbuf_printf(sb, "%s created\n", gp->name); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); fail: g_topology_lock(); if (null == NULL) { g_access(cp, -1, -1, -1); g_part_wither(gp, error); } else { kobj_delete((kobj_t)gp->softc, M_GEOM); gp->softc = null; } gctl_error(req, "%d provider", error); return (error); } static int g_part_ctl_delete(struct gctl_req *req, struct g_part_parms *gpp) { struct g_geom *gp; struct g_provider *pp; struct g_part_entry *entry; struct g_part_table *table; struct sbuf *sb; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; if (entry->gpe_index == gpp->gpp_index) break; } if (entry == NULL) { gctl_error(req, "%d index '%d'", ENOENT, gpp->gpp_index); return (ENOENT); } pp = entry->gpe_pp; if (pp != NULL) { if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0) { gctl_error(req, "%d", EBUSY); return (EBUSY); } pp->private = NULL; entry->gpe_pp = NULL; } if (pp != NULL) g_wither_provider(pp, ENXIO); /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gp->name); sbuf_cat(sb, " deleted\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } if (entry->gpe_created) { LIST_REMOVE(entry, gpe_entry); g_free(entry); } else { entry->gpe_modified = 0; entry->gpe_deleted = 1; } return (0); } static int g_part_ctl_destroy(struct gctl_req *req, struct g_part_parms *gpp) { struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp; struct g_part_entry *entry, *tmp; struct g_part_table *null, *table; struct sbuf *sb; int error; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; /* Check for busy providers. */ LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; if (gpp->gpp_force) { pp = entry->gpe_pp; if (pp == NULL) continue; if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) continue; } gctl_error(req, "%d", EBUSY); return (EBUSY); } if (gpp->gpp_force) { /* Destroy all providers. */ LIST_FOREACH_SAFE(entry, &table->gpt_entry, gpe_entry, tmp) { pp = entry->gpe_pp; if (pp != NULL) { pp->private = NULL; g_wither_provider(pp, ENXIO); } LIST_REMOVE(entry, gpe_entry); g_free(entry); } } error = G_PART_DESTROY(table, gpp); if (error) { gctl_error(req, "%d", error); return (error); } gp->softc = kobj_create((kobj_class_t)&g_part_null_scheme, M_GEOM, M_WAITOK); null = gp->softc; null->gpt_gp = gp; null->gpt_scheme = &g_part_null_scheme; LIST_INIT(&null->gpt_entry); cp = LIST_FIRST(&gp->consumer); pp = cp->provider; null->gpt_last = pp->mediasize / pp->sectorsize - 1; null->gpt_depth = table->gpt_depth; null->gpt_opened = table->gpt_opened; null->gpt_smhead = table->gpt_smhead; null->gpt_smtail = table->gpt_smtail; while ((entry = LIST_FIRST(&table->gpt_entry)) != NULL) { LIST_REMOVE(entry, gpe_entry); g_free(entry); } kobj_delete((kobj_t)table, M_GEOM); /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); sbuf_printf(sb, "%s destroyed\n", gp->name); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_modify(struct gctl_req *req, struct g_part_parms *gpp) { struct g_geom *gp; struct g_part_entry *entry; struct g_part_table *table; struct sbuf *sb; int error; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; if (entry->gpe_index == gpp->gpp_index) break; } if (entry == NULL) { gctl_error(req, "%d index '%d'", ENOENT, gpp->gpp_index); return (ENOENT); } error = G_PART_MODIFY(table, entry, gpp); if (error) { gctl_error(req, "%d", error); return (error); } if (!entry->gpe_created) entry->gpe_modified = 1; /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gp->name); sbuf_cat(sb, " modified\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_move(struct gctl_req *req, struct g_part_parms *gpp) { gctl_error(req, "%d verb 'move'", ENOSYS); return (ENOSYS); } static int g_part_ctl_recover(struct gctl_req *req, struct g_part_parms *gpp) { struct g_part_table *table; struct g_geom *gp; struct sbuf *sb; int error, recovered; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; error = recovered = 0; if (table->gpt_corrupt) { error = G_PART_RECOVER(table); if (error == 0) error = g_part_check_integrity(table, LIST_FIRST(&gp->consumer)); if (error) { gctl_error(req, "%d recovering '%s' failed", error, gp->name); return (error); } recovered = 1; } /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); if (recovered) sbuf_printf(sb, "%s recovered\n", gp->name); else sbuf_printf(sb, "%s recovering is not needed\n", gp->name); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_resize(struct gctl_req *req, struct g_part_parms *gpp) { struct g_geom *gp; struct g_provider *pp; struct g_part_entry *pe, *entry; struct g_part_table *table; struct sbuf *sb; quad_t end; int error; off_t mediasize; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; /* check gpp_index */ LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; if (entry->gpe_index == gpp->gpp_index) break; } if (entry == NULL) { gctl_error(req, "%d index '%d'", ENOENT, gpp->gpp_index); return (ENOENT); } /* check gpp_size */ end = entry->gpe_start + gpp->gpp_size - 1; if (gpp->gpp_size < 1 || end > table->gpt_last) { gctl_error(req, "%d size '%jd'", EINVAL, (intmax_t)gpp->gpp_size); return (EINVAL); } LIST_FOREACH(pe, &table->gpt_entry, gpe_entry) { if (pe->gpe_deleted || pe->gpe_internal || pe == entry) continue; if (end >= pe->gpe_start && end <= pe->gpe_end) { gctl_error(req, "%d end '%jd'", ENOSPC, (intmax_t)end); return (ENOSPC); } if (entry->gpe_start < pe->gpe_start && end > pe->gpe_end) { gctl_error(req, "%d size '%jd'", ENOSPC, (intmax_t)gpp->gpp_size); return (ENOSPC); } } pp = entry->gpe_pp; if ((g_debugflags & 16) == 0 && (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)) { if (entry->gpe_end - entry->gpe_start + 1 > gpp->gpp_size) { /* Deny shrinking of an opened partition. */ gctl_error(req, "%d", EBUSY); return (EBUSY); } } error = G_PART_RESIZE(table, entry, gpp); if (error) { gctl_error(req, "%d%s", error, error != EBUSY ? "": " resizing will lead to unexpected shrinking" " due to alignment"); return (error); } if (!entry->gpe_created) entry->gpe_modified = 1; /* update mediasize of changed provider */ mediasize = (entry->gpe_end - entry->gpe_start + 1) * pp->sectorsize; g_resize_provider(pp, mediasize); /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gp->name); sbuf_cat(sb, " resized\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_setunset(struct gctl_req *req, struct g_part_parms *gpp, unsigned int set) { struct g_geom *gp; struct g_part_entry *entry; struct g_part_table *table; struct sbuf *sb; int error; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; if (gpp->gpp_parms & G_PART_PARM_INDEX) { LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_deleted || entry->gpe_internal) continue; if (entry->gpe_index == gpp->gpp_index) break; } if (entry == NULL) { gctl_error(req, "%d index '%d'", ENOENT, gpp->gpp_index); return (ENOENT); } } else entry = NULL; error = G_PART_SETUNSET(table, entry, gpp->gpp_attrib, set); if (error) { gctl_error(req, "%d attrib '%s'", error, gpp->gpp_attrib); return (error); } /* Provide feedback if so requested. */ if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); sbuf_printf(sb, "%s %sset on ", gpp->gpp_attrib, (set) ? "" : "un"); if (entry) G_PART_FULLNAME(table, entry, sb, gp->name); else sbuf_cat(sb, gp->name); sbuf_cat(sb, "\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); } return (0); } static int g_part_ctl_undo(struct gctl_req *req, struct g_part_parms *gpp) { struct g_consumer *cp; struct g_provider *pp; struct g_geom *gp; struct g_part_entry *entry, *tmp; struct g_part_table *table; int error, reprobe; gp = gpp->gpp_geom; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); g_topology_assert(); table = gp->softc; if (!table->gpt_opened) { gctl_error(req, "%d", EPERM); return (EPERM); } cp = LIST_FIRST(&gp->consumer); LIST_FOREACH_SAFE(entry, &table->gpt_entry, gpe_entry, tmp) { entry->gpe_modified = 0; if (entry->gpe_created) { pp = entry->gpe_pp; if (pp != NULL) { pp->private = NULL; entry->gpe_pp = NULL; g_wither_provider(pp, ENXIO); } entry->gpe_deleted = 1; } if (entry->gpe_deleted) { LIST_REMOVE(entry, gpe_entry); g_free(entry); } } g_topology_unlock(); reprobe = (table->gpt_scheme == &g_part_null_scheme || table->gpt_created) ? 1 : 0; if (reprobe) { LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (entry->gpe_internal) continue; error = EBUSY; goto fail; } while ((entry = LIST_FIRST(&table->gpt_entry)) != NULL) { LIST_REMOVE(entry, gpe_entry); g_free(entry); } error = g_part_probe(gp, cp, table->gpt_depth); if (error) { g_topology_lock(); g_access(cp, -1, -1, -1); g_part_wither(gp, error); return (0); } table = gp->softc; /* * Synthesize a disk geometry. Some partitioning schemes * depend on it and since some file systems need it even * when the partitition scheme doesn't, we do it here in * scheme-independent code. */ pp = cp->provider; g_part_geometry(table, cp, pp->mediasize / pp->sectorsize); } error = G_PART_READ(table, cp); if (error) goto fail; error = g_part_check_integrity(table, cp); if (error) goto fail; g_topology_lock(); LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (!entry->gpe_internal) g_part_new_provider(gp, table, entry); } table->gpt_opened = 0; g_access(cp, -1, -1, -1); return (0); fail: g_topology_lock(); gctl_error(req, "%d", error); return (error); } static void g_part_wither(struct g_geom *gp, int error) { struct g_part_entry *entry; struct g_part_table *table; struct g_provider *pp; table = gp->softc; if (table != NULL) { gp->softc = NULL; while ((entry = LIST_FIRST(&table->gpt_entry)) != NULL) { LIST_REMOVE(entry, gpe_entry); pp = entry->gpe_pp; entry->gpe_pp = NULL; if (pp != NULL) { pp->private = NULL; g_wither_provider(pp, error); } g_free(entry); } G_PART_DESTROY(table, NULL); kobj_delete((kobj_t)table, M_GEOM); } g_wither_geom(gp, error); } /* * Class methods. */ static void g_part_ctlreq(struct gctl_req *req, struct g_class *mp, const char *verb) { struct g_part_parms gpp; struct g_part_table *table; struct gctl_req_arg *ap; enum g_part_ctl ctlreq; unsigned int i, mparms, oparms, parm; int auto_commit, close_on_error; int error, modifies; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, verb)); g_topology_assert(); ctlreq = G_PART_CTL_NONE; modifies = 1; mparms = 0; oparms = G_PART_PARM_FLAGS | G_PART_PARM_OUTPUT | G_PART_PARM_VERSION; switch (*verb) { case 'a': if (!strcmp(verb, "add")) { ctlreq = G_PART_CTL_ADD; mparms |= G_PART_PARM_GEOM | G_PART_PARM_SIZE | G_PART_PARM_START | G_PART_PARM_TYPE; oparms |= G_PART_PARM_INDEX | G_PART_PARM_LABEL; } break; case 'b': if (!strcmp(verb, "bootcode")) { ctlreq = G_PART_CTL_BOOTCODE; mparms |= G_PART_PARM_GEOM | G_PART_PARM_BOOTCODE; } break; case 'c': if (!strcmp(verb, "commit")) { ctlreq = G_PART_CTL_COMMIT; mparms |= G_PART_PARM_GEOM; modifies = 0; } else if (!strcmp(verb, "create")) { ctlreq = G_PART_CTL_CREATE; mparms |= G_PART_PARM_PROVIDER | G_PART_PARM_SCHEME; oparms |= G_PART_PARM_ENTRIES; } break; case 'd': if (!strcmp(verb, "delete")) { ctlreq = G_PART_CTL_DELETE; mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX; } else if (!strcmp(verb, "destroy")) { ctlreq = G_PART_CTL_DESTROY; mparms |= G_PART_PARM_GEOM; oparms |= G_PART_PARM_FORCE; } break; case 'm': if (!strcmp(verb, "modify")) { ctlreq = G_PART_CTL_MODIFY; mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX; oparms |= G_PART_PARM_LABEL | G_PART_PARM_TYPE; } else if (!strcmp(verb, "move")) { ctlreq = G_PART_CTL_MOVE; mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX; } break; case 'r': if (!strcmp(verb, "recover")) { ctlreq = G_PART_CTL_RECOVER; mparms |= G_PART_PARM_GEOM; } else if (!strcmp(verb, "resize")) { ctlreq = G_PART_CTL_RESIZE; mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX | G_PART_PARM_SIZE; } break; case 's': if (!strcmp(verb, "set")) { ctlreq = G_PART_CTL_SET; mparms |= G_PART_PARM_ATTRIB | G_PART_PARM_GEOM; oparms |= G_PART_PARM_INDEX; } break; case 'u': if (!strcmp(verb, "undo")) { ctlreq = G_PART_CTL_UNDO; mparms |= G_PART_PARM_GEOM; modifies = 0; } else if (!strcmp(verb, "unset")) { ctlreq = G_PART_CTL_UNSET; mparms |= G_PART_PARM_ATTRIB | G_PART_PARM_GEOM; oparms |= G_PART_PARM_INDEX; } break; } if (ctlreq == G_PART_CTL_NONE) { gctl_error(req, "%d verb '%s'", EINVAL, verb); return; } bzero(&gpp, sizeof(gpp)); for (i = 0; i < req->narg; i++) { ap = &req->arg[i]; parm = 0; switch (ap->name[0]) { case 'a': if (!strcmp(ap->name, "arg0")) { parm = mparms & (G_PART_PARM_GEOM | G_PART_PARM_PROVIDER); } if (!strcmp(ap->name, "attrib")) parm = G_PART_PARM_ATTRIB; break; case 'b': if (!strcmp(ap->name, "bootcode")) parm = G_PART_PARM_BOOTCODE; break; case 'c': if (!strcmp(ap->name, "class")) continue; break; case 'e': if (!strcmp(ap->name, "entries")) parm = G_PART_PARM_ENTRIES; break; case 'f': if (!strcmp(ap->name, "flags")) parm = G_PART_PARM_FLAGS; else if (!strcmp(ap->name, "force")) parm = G_PART_PARM_FORCE; break; case 'i': if (!strcmp(ap->name, "index")) parm = G_PART_PARM_INDEX; break; case 'l': if (!strcmp(ap->name, "label")) parm = G_PART_PARM_LABEL; break; case 'o': if (!strcmp(ap->name, "output")) parm = G_PART_PARM_OUTPUT; break; case 's': if (!strcmp(ap->name, "scheme")) parm = G_PART_PARM_SCHEME; else if (!strcmp(ap->name, "size")) parm = G_PART_PARM_SIZE; else if (!strcmp(ap->name, "start")) parm = G_PART_PARM_START; break; case 't': if (!strcmp(ap->name, "type")) parm = G_PART_PARM_TYPE; break; case 'v': if (!strcmp(ap->name, "verb")) continue; else if (!strcmp(ap->name, "version")) parm = G_PART_PARM_VERSION; break; } if ((parm & (mparms | oparms)) == 0) { gctl_error(req, "%d param '%s'", EINVAL, ap->name); return; } switch (parm) { case G_PART_PARM_ATTRIB: error = g_part_parm_str(req, ap->name, &gpp.gpp_attrib); break; case G_PART_PARM_BOOTCODE: error = g_part_parm_bootcode(req, ap->name, &gpp.gpp_codeptr, &gpp.gpp_codesize); break; case G_PART_PARM_ENTRIES: error = g_part_parm_intmax(req, ap->name, &gpp.gpp_entries); break; case G_PART_PARM_FLAGS: error = g_part_parm_str(req, ap->name, &gpp.gpp_flags); break; case G_PART_PARM_FORCE: error = g_part_parm_uint32(req, ap->name, &gpp.gpp_force); break; case G_PART_PARM_GEOM: error = g_part_parm_geom(req, ap->name, &gpp.gpp_geom); break; case G_PART_PARM_INDEX: error = g_part_parm_intmax(req, ap->name, &gpp.gpp_index); break; case G_PART_PARM_LABEL: error = g_part_parm_str(req, ap->name, &gpp.gpp_label); break; case G_PART_PARM_OUTPUT: error = 0; /* Write-only parameter */ break; case G_PART_PARM_PROVIDER: error = g_part_parm_provider(req, ap->name, &gpp.gpp_provider); break; case G_PART_PARM_SCHEME: error = g_part_parm_scheme(req, ap->name, &gpp.gpp_scheme); break; case G_PART_PARM_SIZE: error = g_part_parm_quad(req, ap->name, &gpp.gpp_size); break; case G_PART_PARM_START: error = g_part_parm_quad(req, ap->name, &gpp.gpp_start); break; case G_PART_PARM_TYPE: error = g_part_parm_str(req, ap->name, &gpp.gpp_type); break; case G_PART_PARM_VERSION: error = g_part_parm_uint32(req, ap->name, &gpp.gpp_version); break; default: error = EDOOFUS; gctl_error(req, "%d %s", error, ap->name); break; } if (error != 0) { if (error == ENOATTR) { gctl_error(req, "%d param '%s'", error, ap->name); } return; } gpp.gpp_parms |= parm; } if ((gpp.gpp_parms & mparms) != mparms) { parm = mparms - (gpp.gpp_parms & mparms); gctl_error(req, "%d param '%x'", ENOATTR, parm); return; } /* Obtain permissions if possible/necessary. */ close_on_error = 0; table = NULL; if (modifies && (gpp.gpp_parms & G_PART_PARM_GEOM)) { table = gpp.gpp_geom->softc; if (table != NULL && table->gpt_corrupt && ctlreq != G_PART_CTL_DESTROY && ctlreq != G_PART_CTL_RECOVER) { gctl_error(req, "%d table '%s' is corrupt", EPERM, gpp.gpp_geom->name); return; } if (table != NULL && !table->gpt_opened) { error = g_access(LIST_FIRST(&gpp.gpp_geom->consumer), 1, 1, 1); if (error) { gctl_error(req, "%d geom '%s'", error, gpp.gpp_geom->name); return; } table->gpt_opened = 1; close_on_error = 1; } } /* Allow the scheme to check or modify the parameters. */ if (table != NULL) { error = G_PART_PRECHECK(table, ctlreq, &gpp); if (error) { gctl_error(req, "%d pre-check failed", error); goto out; } } else error = EDOOFUS; /* Prevent bogus uninit. warning. */ switch (ctlreq) { case G_PART_CTL_NONE: panic("%s", __func__); case G_PART_CTL_ADD: error = g_part_ctl_add(req, &gpp); break; case G_PART_CTL_BOOTCODE: error = g_part_ctl_bootcode(req, &gpp); break; case G_PART_CTL_COMMIT: error = g_part_ctl_commit(req, &gpp); break; case G_PART_CTL_CREATE: error = g_part_ctl_create(req, &gpp); break; case G_PART_CTL_DELETE: error = g_part_ctl_delete(req, &gpp); break; case G_PART_CTL_DESTROY: error = g_part_ctl_destroy(req, &gpp); break; case G_PART_CTL_MODIFY: error = g_part_ctl_modify(req, &gpp); break; case G_PART_CTL_MOVE: error = g_part_ctl_move(req, &gpp); break; case G_PART_CTL_RECOVER: error = g_part_ctl_recover(req, &gpp); break; case G_PART_CTL_RESIZE: error = g_part_ctl_resize(req, &gpp); break; case G_PART_CTL_SET: error = g_part_ctl_setunset(req, &gpp, 1); break; case G_PART_CTL_UNDO: error = g_part_ctl_undo(req, &gpp); break; case G_PART_CTL_UNSET: error = g_part_ctl_setunset(req, &gpp, 0); break; } /* Implement automatic commit. */ if (!error) { auto_commit = (modifies && (gpp.gpp_parms & G_PART_PARM_FLAGS) && strchr(gpp.gpp_flags, 'C') != NULL) ? 1 : 0; if (auto_commit) { KASSERT(gpp.gpp_parms & G_PART_PARM_GEOM, ("%s", __func__)); error = g_part_ctl_commit(req, &gpp); } } out: if (error && close_on_error) { g_access(LIST_FIRST(&gpp.gpp_geom->consumer), -1, -1, -1); table->gpt_opened = 0; } } static int g_part_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) { G_PART_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, gp->name)); g_topology_assert(); g_part_wither(gp, EINVAL); return (0); } static struct g_geom * g_part_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) { struct g_consumer *cp; struct g_geom *gp; struct g_part_entry *entry; struct g_part_table *table; struct root_hold_token *rht; struct g_geom_alias *gap; int attr, depth; int error; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, pp->name)); g_topology_assert(); /* Skip providers that are already open for writing. */ if (pp->acw > 0) return (NULL); /* * Create a GEOM with consumer and hook it up to the provider. * With that we become part of the topology. Obtain read access * to the provider. */ gp = g_new_geomf(mp, "%s", pp->name); LIST_FOREACH(gap, &pp->geom->aliases, ga_next) g_geom_add_alias(gp, gap->ga_alias); cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 0, 0); if (error != 0) { if (cp->provider) g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); return (NULL); } rht = root_mount_hold(mp->name); g_topology_unlock(); /* * Short-circuit the whole probing galore when there's no * media present. */ if (pp->mediasize == 0 || pp->sectorsize == 0) { error = ENODEV; goto fail; } /* Make sure we can nest and if so, determine our depth. */ error = g_getattr("PART::isleaf", cp, &attr); if (!error && attr) { error = ENODEV; goto fail; } error = g_getattr("PART::depth", cp, &attr); depth = (!error) ? attr + 1 : 0; error = g_part_probe(gp, cp, depth); if (error) goto fail; table = gp->softc; /* * Synthesize a disk geometry. Some partitioning schemes * depend on it and since some file systems need it even * when the partitition scheme doesn't, we do it here in * scheme-independent code. */ g_part_geometry(table, cp, pp->mediasize / pp->sectorsize); error = G_PART_READ(table, cp); if (error) goto fail; error = g_part_check_integrity(table, cp); if (error) goto fail; g_topology_lock(); LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { if (!entry->gpe_internal) g_part_new_provider(gp, table, entry); } root_mount_rel(rht); g_access(cp, -1, 0, 0); return (gp); fail: g_topology_lock(); root_mount_rel(rht); g_access(cp, -1, 0, 0); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); return (NULL); } /* * Geom methods. */ static int g_part_access(struct g_provider *pp, int dr, int dw, int de) { struct g_consumer *cp; G_PART_TRACE((G_T_ACCESS, "%s(%s,%d,%d,%d)", __func__, pp->name, dr, dw, de)); cp = LIST_FIRST(&pp->geom->consumer); /* We always gain write-exclusive access. */ return (g_access(cp, dr, dw, dw + de)); } static void g_part_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { char buf[64]; struct g_part_entry *entry; struct g_part_table *table; KASSERT(sb != NULL && gp != NULL, ("%s", __func__)); table = gp->softc; if (indent == NULL) { KASSERT(cp == NULL && pp != NULL, ("%s", __func__)); entry = pp->private; if (entry == NULL) return; sbuf_printf(sb, " i %u o %ju ty %s", entry->gpe_index, (uintmax_t)entry->gpe_offset, G_PART_TYPE(table, entry, buf, sizeof(buf))); /* * libdisk compatibility quirk - the scheme dumps the * slicer name and partition type in a way that is * compatible with libdisk. When libdisk is not used * anymore, this should go away. */ G_PART_DUMPCONF(table, entry, sb, indent); } else if (cp != NULL) { /* Consumer configuration. */ KASSERT(pp == NULL, ("%s", __func__)); /* none */ } else if (pp != NULL) { /* Provider configuration. */ entry = pp->private; if (entry == NULL) return; sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)entry->gpe_start); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)entry->gpe_end); sbuf_printf(sb, "%s%u\n", indent, entry->gpe_index); sbuf_printf(sb, "%s%s\n", indent, G_PART_TYPE(table, entry, buf, sizeof(buf))); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)entry->gpe_offset); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)pp->mediasize); G_PART_DUMPCONF(table, entry, sb, indent); } else { /* Geom configuration. */ sbuf_printf(sb, "%s%s\n", indent, table->gpt_scheme->name); sbuf_printf(sb, "%s%u\n", indent, table->gpt_entries); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)table->gpt_first); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)table->gpt_last); sbuf_printf(sb, "%s%u\n", indent, table->gpt_sectors); sbuf_printf(sb, "%s%u\n", indent, table->gpt_heads); sbuf_printf(sb, "%s%s\n", indent, table->gpt_corrupt ? "CORRUPT": "OK"); sbuf_printf(sb, "%s%s\n", indent, table->gpt_opened ? "true": "false"); G_PART_DUMPCONF(table, NULL, sb, indent); } } /*- * This start routine is only called for non-trivial requests, all the * trivial ones are handled autonomously by the slice code. * For requests we handle here, we must call the g_io_deliver() on the * bio, and return non-zero to indicate to the slice code that we did so. * This code executes in the "DOWN" I/O path, this means: * * No sleeping. * * Don't grab the topology lock. * * Don't call biowait, g_getattr(), g_setattr() or g_read_data() */ static int g_part_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td) { struct g_part_table *table; table = pp->geom->softc; return G_PART_IOCTL(table, pp, cmd, data, fflag, td); } static void g_part_resize(struct g_consumer *cp) { struct g_part_table *table; G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name)); g_topology_assert(); if (auto_resize == 0) return; table = cp->geom->softc; if (table->gpt_opened == 0) { if (g_access(cp, 1, 1, 1) != 0) return; table->gpt_opened = 1; } if (G_PART_RESIZE(table, NULL, NULL) == 0) printf("GEOM_PART: %s was automatically resized.\n" " Use `gpart commit %s` to save changes or " "`gpart undo %s` to revert them.\n", cp->geom->name, cp->geom->name, cp->geom->name); if (g_part_check_integrity(table, cp) != 0) { g_access(cp, -1, -1, -1); table->gpt_opened = 0; g_part_wither(table->gpt_gp, ENXIO); } } static void g_part_orphan(struct g_consumer *cp) { struct g_provider *pp; struct g_part_table *table; pp = cp->provider; KASSERT(pp != NULL, ("%s", __func__)); G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, pp->name)); g_topology_assert(); KASSERT(pp->error != 0, ("%s", __func__)); table = cp->geom->softc; if (table != NULL && table->gpt_opened) g_access(cp, -1, -1, -1); g_part_wither(cp->geom, pp->error); } static void g_part_spoiled(struct g_consumer *cp) { G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name)); g_topology_assert(); cp->flags |= G_CF_ORPHAN; g_part_wither(cp->geom, ENXIO); } static void g_part_start(struct bio *bp) { struct bio *bp2; struct g_consumer *cp; struct g_geom *gp; struct g_part_entry *entry; struct g_part_table *table; struct g_kerneldump *gkd; struct g_provider *pp; void (*done_func)(struct bio *) = g_std_done; char buf[64]; biotrack(bp, __func__); pp = bp->bio_to; gp = pp->geom; table = gp->softc; cp = LIST_FIRST(&gp->consumer); G_PART_TRACE((G_T_BIO, "%s: cmd=%d, provider=%s", __func__, bp->bio_cmd, pp->name)); entry = pp->private; if (entry == NULL) { g_io_deliver(bp, ENXIO); return; } switch(bp->bio_cmd) { case BIO_DELETE: case BIO_READ: case BIO_WRITE: if (bp->bio_offset >= pp->mediasize) { g_io_deliver(bp, EIO); return; } bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } if (bp2->bio_offset + bp2->bio_length > pp->mediasize) bp2->bio_length = pp->mediasize - bp2->bio_offset; bp2->bio_done = g_std_done; bp2->bio_offset += entry->gpe_offset; g_io_request(bp2, cp); return; case BIO_FLUSH: break; case BIO_GETATTR: if (g_handleattr_int(bp, "GEOM::fwheads", table->gpt_heads)) return; if (g_handleattr_int(bp, "GEOM::fwsectors", table->gpt_sectors)) return; if (g_handleattr_int(bp, "PART::isleaf", table->gpt_isleaf)) return; if (g_handleattr_int(bp, "PART::depth", table->gpt_depth)) return; if (g_handleattr_str(bp, "PART::scheme", table->gpt_scheme->name)) return; if (g_handleattr_str(bp, "PART::type", G_PART_TYPE(table, entry, buf, sizeof(buf)))) return; if (!strcmp("GEOM::physpath", bp->bio_attribute)) { done_func = g_part_get_physpath_done; break; } if (!strcmp("GEOM::kerneldump", bp->bio_attribute)) { /* * Check that the partition is suitable for kernel * dumps. Typically only swap partitions should be * used. If the request comes from the nested scheme * we allow dumping there as well. */ if ((bp->bio_from == NULL || bp->bio_from->geom->class != &g_part_class) && G_PART_DUMPTO(table, entry) == 0) { g_io_deliver(bp, ENODEV); printf("GEOM_PART: Partition '%s' not suitable" " for kernel dumps (wrong type?)\n", pp->name); return; } gkd = (struct g_kerneldump *)bp->bio_data; if (gkd->offset >= pp->mediasize) { g_io_deliver(bp, EIO); return; } if (gkd->offset + gkd->length > pp->mediasize) gkd->length = pp->mediasize - gkd->offset; gkd->offset += entry->gpe_offset; } break; default: g_io_deliver(bp, EOPNOTSUPP); return; } bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } bp2->bio_done = done_func; g_io_request(bp2, cp); } static void g_part_init(struct g_class *mp) { TAILQ_INSERT_HEAD(&g_part_schemes, &g_part_null_scheme, scheme_list); } static void g_part_fini(struct g_class *mp) { TAILQ_REMOVE(&g_part_schemes, &g_part_null_scheme, scheme_list); } static void g_part_unload_event(void *arg, int flag) { struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp; struct g_part_scheme *scheme; struct g_part_table *table; uintptr_t *xchg; int acc, error; if (flag == EV_CANCEL) return; xchg = arg; error = 0; scheme = (void *)(*xchg); g_topology_assert(); LIST_FOREACH(gp, &g_part_class.geom, geom) { table = gp->softc; if (table->gpt_scheme != scheme) continue; acc = 0; LIST_FOREACH(pp, &gp->provider, provider) acc += pp->acr + pp->acw + pp->ace; LIST_FOREACH(cp, &gp->consumer, consumer) acc += cp->acr + cp->acw + cp->ace; if (!acc) g_part_wither(gp, ENOSYS); else error = EBUSY; } if (!error) TAILQ_REMOVE(&g_part_schemes, scheme, scheme_list); *xchg = error; } int g_part_modevent(module_t mod, int type, struct g_part_scheme *scheme) { struct g_part_scheme *iter; uintptr_t arg; int error; error = 0; switch (type) { case MOD_LOAD: TAILQ_FOREACH(iter, &g_part_schemes, scheme_list) { if (scheme == iter) { printf("GEOM_PART: scheme %s is already " "registered!\n", scheme->name); break; } } if (iter == NULL) { TAILQ_INSERT_TAIL(&g_part_schemes, scheme, scheme_list); g_retaste(&g_part_class); } break; case MOD_UNLOAD: arg = (uintptr_t)scheme; error = g_waitfor_event(g_part_unload_event, &arg, M_WAITOK, NULL); if (error == 0) error = arg; break; default: error = EOPNOTSUPP; break; } return (error); } Index: head/sys/geom/raid/g_raid.c =================================================================== --- head/sys/geom/raid/g_raid.c (revision 339814) +++ head/sys/geom/raid/g_raid.c (revision 339815) @@ -1,2575 +1,2575 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2010 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "g_raid_md_if.h" #include "g_raid_tr_if.h" static MALLOC_DEFINE(M_RAID, "raid_data", "GEOM_RAID Data"); SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, raid, CTLFLAG_RW, 0, "GEOM_RAID stuff"); int g_raid_enable = 1; SYSCTL_INT(_kern_geom_raid, OID_AUTO, enable, CTLFLAG_RWTUN, &g_raid_enable, 0, "Enable on-disk metadata taste"); u_int g_raid_aggressive_spare = 0; SYSCTL_UINT(_kern_geom_raid, OID_AUTO, aggressive_spare, CTLFLAG_RWTUN, &g_raid_aggressive_spare, 0, "Use disks without metadata as spare"); u_int g_raid_debug = 0; SYSCTL_UINT(_kern_geom_raid, OID_AUTO, debug, CTLFLAG_RWTUN, &g_raid_debug, 0, "Debug level"); int g_raid_read_err_thresh = 10; SYSCTL_UINT(_kern_geom_raid, OID_AUTO, read_err_thresh, CTLFLAG_RWTUN, &g_raid_read_err_thresh, 0, "Number of read errors equated to disk failure"); u_int g_raid_start_timeout = 30; SYSCTL_UINT(_kern_geom_raid, OID_AUTO, start_timeout, CTLFLAG_RWTUN, &g_raid_start_timeout, 0, "Time to wait for all array components"); static u_int g_raid_clean_time = 5; SYSCTL_UINT(_kern_geom_raid, OID_AUTO, clean_time, CTLFLAG_RWTUN, &g_raid_clean_time, 0, "Mark volume as clean when idling"); static u_int g_raid_disconnect_on_failure = 1; SYSCTL_UINT(_kern_geom_raid, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN, &g_raid_disconnect_on_failure, 0, "Disconnect component on I/O failure."); static u_int g_raid_name_format = 0; SYSCTL_UINT(_kern_geom_raid, OID_AUTO, name_format, CTLFLAG_RWTUN, &g_raid_name_format, 0, "Providers name format."); static u_int g_raid_idle_threshold = 1000000; SYSCTL_UINT(_kern_geom_raid, OID_AUTO, idle_threshold, CTLFLAG_RWTUN, &g_raid_idle_threshold, 1000000, "Time in microseconds to consider a volume idle."); #define MSLEEP(rv, ident, mtx, priority, wmesg, timeout) do { \ G_RAID_DEBUG(4, "%s: Sleeping %p.", __func__, (ident)); \ rv = msleep((ident), (mtx), (priority), (wmesg), (timeout)); \ G_RAID_DEBUG(4, "%s: Woken up %p.", __func__, (ident)); \ } while (0) LIST_HEAD(, g_raid_md_class) g_raid_md_classes = LIST_HEAD_INITIALIZER(g_raid_md_classes); LIST_HEAD(, g_raid_tr_class) g_raid_tr_classes = LIST_HEAD_INITIALIZER(g_raid_tr_classes); LIST_HEAD(, g_raid_volume) g_raid_volumes = LIST_HEAD_INITIALIZER(g_raid_volumes); static eventhandler_tag g_raid_post_sync = NULL; static int g_raid_started = 0; static int g_raid_shutdown = 0; static int g_raid_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp); static g_taste_t g_raid_taste; static void g_raid_init(struct g_class *mp); static void g_raid_fini(struct g_class *mp); struct g_class g_raid_class = { .name = G_RAID_CLASS_NAME, .version = G_VERSION, .ctlreq = g_raid_ctl, .taste = g_raid_taste, .destroy_geom = g_raid_destroy_geom, .init = g_raid_init, .fini = g_raid_fini }; static void g_raid_destroy_provider(struct g_raid_volume *vol); static int g_raid_update_disk(struct g_raid_disk *disk, u_int event); static int g_raid_update_subdisk(struct g_raid_subdisk *subdisk, u_int event); static int g_raid_update_volume(struct g_raid_volume *vol, u_int event); static int g_raid_update_node(struct g_raid_softc *sc, u_int event); static void g_raid_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); static void g_raid_start(struct bio *bp); static void g_raid_start_request(struct bio *bp); static void g_raid_disk_done(struct bio *bp); static void g_raid_poll(struct g_raid_softc *sc); static const char * g_raid_node_event2str(int event) { switch (event) { case G_RAID_NODE_E_WAKE: return ("WAKE"); case G_RAID_NODE_E_START: return ("START"); default: return ("INVALID"); } } const char * g_raid_disk_state2str(int state) { switch (state) { case G_RAID_DISK_S_NONE: return ("NONE"); case G_RAID_DISK_S_OFFLINE: return ("OFFLINE"); case G_RAID_DISK_S_DISABLED: return ("DISABLED"); case G_RAID_DISK_S_FAILED: return ("FAILED"); case G_RAID_DISK_S_STALE_FAILED: return ("STALE_FAILED"); case G_RAID_DISK_S_SPARE: return ("SPARE"); case G_RAID_DISK_S_STALE: return ("STALE"); case G_RAID_DISK_S_ACTIVE: return ("ACTIVE"); default: return ("INVALID"); } } static const char * g_raid_disk_event2str(int event) { switch (event) { case G_RAID_DISK_E_DISCONNECTED: return ("DISCONNECTED"); default: return ("INVALID"); } } const char * g_raid_subdisk_state2str(int state) { switch (state) { case G_RAID_SUBDISK_S_NONE: return ("NONE"); case G_RAID_SUBDISK_S_FAILED: return ("FAILED"); case G_RAID_SUBDISK_S_NEW: return ("NEW"); case G_RAID_SUBDISK_S_REBUILD: return ("REBUILD"); case G_RAID_SUBDISK_S_UNINITIALIZED: return ("UNINITIALIZED"); case G_RAID_SUBDISK_S_STALE: return ("STALE"); case G_RAID_SUBDISK_S_RESYNC: return ("RESYNC"); case G_RAID_SUBDISK_S_ACTIVE: return ("ACTIVE"); default: return ("INVALID"); } } static const char * g_raid_subdisk_event2str(int event) { switch (event) { case G_RAID_SUBDISK_E_NEW: return ("NEW"); case G_RAID_SUBDISK_E_FAILED: return ("FAILED"); case G_RAID_SUBDISK_E_DISCONNECTED: return ("DISCONNECTED"); default: return ("INVALID"); } } const char * g_raid_volume_state2str(int state) { switch (state) { case G_RAID_VOLUME_S_STARTING: return ("STARTING"); case G_RAID_VOLUME_S_BROKEN: return ("BROKEN"); case G_RAID_VOLUME_S_DEGRADED: return ("DEGRADED"); case G_RAID_VOLUME_S_SUBOPTIMAL: return ("SUBOPTIMAL"); case G_RAID_VOLUME_S_OPTIMAL: return ("OPTIMAL"); case G_RAID_VOLUME_S_UNSUPPORTED: return ("UNSUPPORTED"); case G_RAID_VOLUME_S_STOPPED: return ("STOPPED"); default: return ("INVALID"); } } static const char * g_raid_volume_event2str(int event) { switch (event) { case G_RAID_VOLUME_E_UP: return ("UP"); case G_RAID_VOLUME_E_DOWN: return ("DOWN"); case G_RAID_VOLUME_E_START: return ("START"); case G_RAID_VOLUME_E_STARTMD: return ("STARTMD"); default: return ("INVALID"); } } const char * g_raid_volume_level2str(int level, int qual) { switch (level) { case G_RAID_VOLUME_RL_RAID0: return ("RAID0"); case G_RAID_VOLUME_RL_RAID1: return ("RAID1"); case G_RAID_VOLUME_RL_RAID3: if (qual == G_RAID_VOLUME_RLQ_R3P0) return ("RAID3-P0"); if (qual == G_RAID_VOLUME_RLQ_R3PN) return ("RAID3-PN"); return ("RAID3"); case G_RAID_VOLUME_RL_RAID4: if (qual == G_RAID_VOLUME_RLQ_R4P0) return ("RAID4-P0"); if (qual == G_RAID_VOLUME_RLQ_R4PN) return ("RAID4-PN"); return ("RAID4"); case G_RAID_VOLUME_RL_RAID5: if (qual == G_RAID_VOLUME_RLQ_R5RA) return ("RAID5-RA"); if (qual == G_RAID_VOLUME_RLQ_R5RS) return ("RAID5-RS"); if (qual == G_RAID_VOLUME_RLQ_R5LA) return ("RAID5-LA"); if (qual == G_RAID_VOLUME_RLQ_R5LS) return ("RAID5-LS"); return ("RAID5"); case G_RAID_VOLUME_RL_RAID6: if (qual == G_RAID_VOLUME_RLQ_R6RA) return ("RAID6-RA"); if (qual == G_RAID_VOLUME_RLQ_R6RS) return ("RAID6-RS"); if (qual == G_RAID_VOLUME_RLQ_R6LA) return ("RAID6-LA"); if (qual == G_RAID_VOLUME_RLQ_R6LS) return ("RAID6-LS"); return ("RAID6"); case G_RAID_VOLUME_RL_RAIDMDF: if (qual == G_RAID_VOLUME_RLQ_RMDFRA) return ("RAIDMDF-RA"); if (qual == G_RAID_VOLUME_RLQ_RMDFRS) return ("RAIDMDF-RS"); if (qual == G_RAID_VOLUME_RLQ_RMDFLA) return ("RAIDMDF-LA"); if (qual == G_RAID_VOLUME_RLQ_RMDFLS) return ("RAIDMDF-LS"); return ("RAIDMDF"); case G_RAID_VOLUME_RL_RAID1E: if (qual == G_RAID_VOLUME_RLQ_R1EA) return ("RAID1E-A"); if (qual == G_RAID_VOLUME_RLQ_R1EO) return ("RAID1E-O"); return ("RAID1E"); case G_RAID_VOLUME_RL_SINGLE: return ("SINGLE"); case G_RAID_VOLUME_RL_CONCAT: return ("CONCAT"); case G_RAID_VOLUME_RL_RAID5E: if (qual == G_RAID_VOLUME_RLQ_R5ERA) return ("RAID5E-RA"); if (qual == G_RAID_VOLUME_RLQ_R5ERS) return ("RAID5E-RS"); if (qual == G_RAID_VOLUME_RLQ_R5ELA) return ("RAID5E-LA"); if (qual == G_RAID_VOLUME_RLQ_R5ELS) return ("RAID5E-LS"); return ("RAID5E"); case G_RAID_VOLUME_RL_RAID5EE: if (qual == G_RAID_VOLUME_RLQ_R5EERA) return ("RAID5EE-RA"); if (qual == G_RAID_VOLUME_RLQ_R5EERS) return ("RAID5EE-RS"); if (qual == G_RAID_VOLUME_RLQ_R5EELA) return ("RAID5EE-LA"); if (qual == G_RAID_VOLUME_RLQ_R5EELS) return ("RAID5EE-LS"); return ("RAID5EE"); case G_RAID_VOLUME_RL_RAID5R: if (qual == G_RAID_VOLUME_RLQ_R5RRA) return ("RAID5R-RA"); if (qual == G_RAID_VOLUME_RLQ_R5RRS) return ("RAID5R-RS"); if (qual == G_RAID_VOLUME_RLQ_R5RLA) return ("RAID5R-LA"); if (qual == G_RAID_VOLUME_RLQ_R5RLS) return ("RAID5R-LS"); return ("RAID5E"); default: return ("UNKNOWN"); } } int g_raid_volume_str2level(const char *str, int *level, int *qual) { *level = G_RAID_VOLUME_RL_UNKNOWN; *qual = G_RAID_VOLUME_RLQ_NONE; if (strcasecmp(str, "RAID0") == 0) *level = G_RAID_VOLUME_RL_RAID0; else if (strcasecmp(str, "RAID1") == 0) *level = G_RAID_VOLUME_RL_RAID1; else if (strcasecmp(str, "RAID3-P0") == 0) { *level = G_RAID_VOLUME_RL_RAID3; *qual = G_RAID_VOLUME_RLQ_R3P0; } else if (strcasecmp(str, "RAID3-PN") == 0 || strcasecmp(str, "RAID3") == 0) { *level = G_RAID_VOLUME_RL_RAID3; *qual = G_RAID_VOLUME_RLQ_R3PN; } else if (strcasecmp(str, "RAID4-P0") == 0) { *level = G_RAID_VOLUME_RL_RAID4; *qual = G_RAID_VOLUME_RLQ_R4P0; } else if (strcasecmp(str, "RAID4-PN") == 0 || strcasecmp(str, "RAID4") == 0) { *level = G_RAID_VOLUME_RL_RAID4; *qual = G_RAID_VOLUME_RLQ_R4PN; } else if (strcasecmp(str, "RAID5-RA") == 0) { *level = G_RAID_VOLUME_RL_RAID5; *qual = G_RAID_VOLUME_RLQ_R5RA; } else if (strcasecmp(str, "RAID5-RS") == 0) { *level = G_RAID_VOLUME_RL_RAID5; *qual = G_RAID_VOLUME_RLQ_R5RS; } else if (strcasecmp(str, "RAID5") == 0 || strcasecmp(str, "RAID5-LA") == 0) { *level = G_RAID_VOLUME_RL_RAID5; *qual = G_RAID_VOLUME_RLQ_R5LA; } else if (strcasecmp(str, "RAID5-LS") == 0) { *level = G_RAID_VOLUME_RL_RAID5; *qual = G_RAID_VOLUME_RLQ_R5LS; } else if (strcasecmp(str, "RAID6-RA") == 0) { *level = G_RAID_VOLUME_RL_RAID6; *qual = G_RAID_VOLUME_RLQ_R6RA; } else if (strcasecmp(str, "RAID6-RS") == 0) { *level = G_RAID_VOLUME_RL_RAID6; *qual = G_RAID_VOLUME_RLQ_R6RS; } else if (strcasecmp(str, "RAID6") == 0 || strcasecmp(str, "RAID6-LA") == 0) { *level = G_RAID_VOLUME_RL_RAID6; *qual = G_RAID_VOLUME_RLQ_R6LA; } else if (strcasecmp(str, "RAID6-LS") == 0) { *level = G_RAID_VOLUME_RL_RAID6; *qual = G_RAID_VOLUME_RLQ_R6LS; } else if (strcasecmp(str, "RAIDMDF-RA") == 0) { *level = G_RAID_VOLUME_RL_RAIDMDF; *qual = G_RAID_VOLUME_RLQ_RMDFRA; } else if (strcasecmp(str, "RAIDMDF-RS") == 0) { *level = G_RAID_VOLUME_RL_RAIDMDF; *qual = G_RAID_VOLUME_RLQ_RMDFRS; } else if (strcasecmp(str, "RAIDMDF") == 0 || strcasecmp(str, "RAIDMDF-LA") == 0) { *level = G_RAID_VOLUME_RL_RAIDMDF; *qual = G_RAID_VOLUME_RLQ_RMDFLA; } else if (strcasecmp(str, "RAIDMDF-LS") == 0) { *level = G_RAID_VOLUME_RL_RAIDMDF; *qual = G_RAID_VOLUME_RLQ_RMDFLS; } else if (strcasecmp(str, "RAID10") == 0 || strcasecmp(str, "RAID1E") == 0 || strcasecmp(str, "RAID1E-A") == 0) { *level = G_RAID_VOLUME_RL_RAID1E; *qual = G_RAID_VOLUME_RLQ_R1EA; } else if (strcasecmp(str, "RAID1E-O") == 0) { *level = G_RAID_VOLUME_RL_RAID1E; *qual = G_RAID_VOLUME_RLQ_R1EO; } else if (strcasecmp(str, "SINGLE") == 0) *level = G_RAID_VOLUME_RL_SINGLE; else if (strcasecmp(str, "CONCAT") == 0) *level = G_RAID_VOLUME_RL_CONCAT; else if (strcasecmp(str, "RAID5E-RA") == 0) { *level = G_RAID_VOLUME_RL_RAID5E; *qual = G_RAID_VOLUME_RLQ_R5ERA; } else if (strcasecmp(str, "RAID5E-RS") == 0) { *level = G_RAID_VOLUME_RL_RAID5E; *qual = G_RAID_VOLUME_RLQ_R5ERS; } else if (strcasecmp(str, "RAID5E") == 0 || strcasecmp(str, "RAID5E-LA") == 0) { *level = G_RAID_VOLUME_RL_RAID5E; *qual = G_RAID_VOLUME_RLQ_R5ELA; } else if (strcasecmp(str, "RAID5E-LS") == 0) { *level = G_RAID_VOLUME_RL_RAID5E; *qual = G_RAID_VOLUME_RLQ_R5ELS; } else if (strcasecmp(str, "RAID5EE-RA") == 0) { *level = G_RAID_VOLUME_RL_RAID5EE; *qual = G_RAID_VOLUME_RLQ_R5EERA; } else if (strcasecmp(str, "RAID5EE-RS") == 0) { *level = G_RAID_VOLUME_RL_RAID5EE; *qual = G_RAID_VOLUME_RLQ_R5EERS; } else if (strcasecmp(str, "RAID5EE") == 0 || strcasecmp(str, "RAID5EE-LA") == 0) { *level = G_RAID_VOLUME_RL_RAID5EE; *qual = G_RAID_VOLUME_RLQ_R5EELA; } else if (strcasecmp(str, "RAID5EE-LS") == 0) { *level = G_RAID_VOLUME_RL_RAID5EE; *qual = G_RAID_VOLUME_RLQ_R5EELS; } else if (strcasecmp(str, "RAID5R-RA") == 0) { *level = G_RAID_VOLUME_RL_RAID5R; *qual = G_RAID_VOLUME_RLQ_R5RRA; } else if (strcasecmp(str, "RAID5R-RS") == 0) { *level = G_RAID_VOLUME_RL_RAID5R; *qual = G_RAID_VOLUME_RLQ_R5RRS; } else if (strcasecmp(str, "RAID5R") == 0 || strcasecmp(str, "RAID5R-LA") == 0) { *level = G_RAID_VOLUME_RL_RAID5R; *qual = G_RAID_VOLUME_RLQ_R5RLA; } else if (strcasecmp(str, "RAID5R-LS") == 0) { *level = G_RAID_VOLUME_RL_RAID5R; *qual = G_RAID_VOLUME_RLQ_R5RLS; } else return (-1); return (0); } const char * g_raid_get_diskname(struct g_raid_disk *disk) { if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL) return ("[unknown]"); return (disk->d_consumer->provider->name); } void g_raid_get_disk_info(struct g_raid_disk *disk) { struct g_consumer *cp = disk->d_consumer; int error, len; /* Read kernel dumping information. */ disk->d_kd.offset = 0; disk->d_kd.length = OFF_MAX; len = sizeof(disk->d_kd); error = g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd); if (error) disk->d_kd.di.dumper = NULL; if (disk->d_kd.di.dumper == NULL) G_RAID_DEBUG1(2, disk->d_softc, "Dumping not supported by %s: %d.", cp->provider->name, error); /* Read BIO_DELETE support. */ error = g_getattr("GEOM::candelete", cp, &disk->d_candelete); if (error) disk->d_candelete = 0; if (!disk->d_candelete) G_RAID_DEBUG1(2, disk->d_softc, "BIO_DELETE not supported by %s: %d.", cp->provider->name, error); } void g_raid_report_disk_state(struct g_raid_disk *disk) { struct g_raid_subdisk *sd; int len, state; uint32_t s; if (disk->d_consumer == NULL) return; if (disk->d_state == G_RAID_DISK_S_DISABLED) { s = G_STATE_ACTIVE; /* XXX */ } else if (disk->d_state == G_RAID_DISK_S_FAILED || disk->d_state == G_RAID_DISK_S_STALE_FAILED) { s = G_STATE_FAILED; } else { state = G_RAID_SUBDISK_S_ACTIVE; TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) { if (sd->sd_state < state) state = sd->sd_state; } if (state == G_RAID_SUBDISK_S_FAILED) s = G_STATE_FAILED; else if (state == G_RAID_SUBDISK_S_NEW || state == G_RAID_SUBDISK_S_REBUILD) s = G_STATE_REBUILD; else if (state == G_RAID_SUBDISK_S_STALE || state == G_RAID_SUBDISK_S_RESYNC) s = G_STATE_RESYNC; else s = G_STATE_ACTIVE; } len = sizeof(s); g_io_getattr("GEOM::setstate", disk->d_consumer, &len, &s); G_RAID_DEBUG1(2, disk->d_softc, "Disk %s state reported as %d.", g_raid_get_diskname(disk), s); } void g_raid_change_disk_state(struct g_raid_disk *disk, int state) { G_RAID_DEBUG1(0, disk->d_softc, "Disk %s state changed from %s to %s.", g_raid_get_diskname(disk), g_raid_disk_state2str(disk->d_state), g_raid_disk_state2str(state)); disk->d_state = state; g_raid_report_disk_state(disk); } void g_raid_change_subdisk_state(struct g_raid_subdisk *sd, int state) { G_RAID_DEBUG1(0, sd->sd_softc, "Subdisk %s:%d-%s state changed from %s to %s.", sd->sd_volume->v_name, sd->sd_pos, sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]", g_raid_subdisk_state2str(sd->sd_state), g_raid_subdisk_state2str(state)); sd->sd_state = state; if (sd->sd_disk) g_raid_report_disk_state(sd->sd_disk); } void g_raid_change_volume_state(struct g_raid_volume *vol, int state) { G_RAID_DEBUG1(0, vol->v_softc, "Volume %s state changed from %s to %s.", vol->v_name, g_raid_volume_state2str(vol->v_state), g_raid_volume_state2str(state)); vol->v_state = state; } /* * --- Events handling functions --- * Events in geom_raid are used to maintain subdisks and volumes status * from one thread to simplify locking. */ static void g_raid_event_free(struct g_raid_event *ep) { free(ep, M_RAID); } int g_raid_event_send(void *arg, int event, int flags) { struct g_raid_softc *sc; struct g_raid_event *ep; int error; if ((flags & G_RAID_EVENT_VOLUME) != 0) { sc = ((struct g_raid_volume *)arg)->v_softc; } else if ((flags & G_RAID_EVENT_DISK) != 0) { sc = ((struct g_raid_disk *)arg)->d_softc; } else if ((flags & G_RAID_EVENT_SUBDISK) != 0) { sc = ((struct g_raid_subdisk *)arg)->sd_softc; } else { sc = arg; } ep = malloc(sizeof(*ep), M_RAID, sx_xlocked(&sc->sc_lock) ? M_WAITOK : M_NOWAIT); if (ep == NULL) return (ENOMEM); ep->e_tgt = arg; ep->e_event = event; ep->e_flags = flags; ep->e_error = 0; G_RAID_DEBUG1(4, sc, "Sending event %p. Waking up %p.", ep, sc); mtx_lock(&sc->sc_queue_mtx); TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next); mtx_unlock(&sc->sc_queue_mtx); wakeup(sc); if ((flags & G_RAID_EVENT_WAIT) == 0) return (0); sx_assert(&sc->sc_lock, SX_XLOCKED); G_RAID_DEBUG1(4, sc, "Sleeping on %p.", ep); sx_xunlock(&sc->sc_lock); while ((ep->e_flags & G_RAID_EVENT_DONE) == 0) { mtx_lock(&sc->sc_queue_mtx); MSLEEP(error, ep, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:event", hz * 5); } error = ep->e_error; g_raid_event_free(ep); sx_xlock(&sc->sc_lock); return (error); } static void g_raid_event_cancel(struct g_raid_softc *sc, void *tgt) { struct g_raid_event *ep, *tmpep; sx_assert(&sc->sc_lock, SX_XLOCKED); mtx_lock(&sc->sc_queue_mtx); TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) { if (ep->e_tgt != tgt) continue; TAILQ_REMOVE(&sc->sc_events, ep, e_next); if ((ep->e_flags & G_RAID_EVENT_WAIT) == 0) g_raid_event_free(ep); else { ep->e_error = ECANCELED; wakeup(ep); } } mtx_unlock(&sc->sc_queue_mtx); } static int g_raid_event_check(struct g_raid_softc *sc, void *tgt) { struct g_raid_event *ep; int res = 0; sx_assert(&sc->sc_lock, SX_XLOCKED); mtx_lock(&sc->sc_queue_mtx); TAILQ_FOREACH(ep, &sc->sc_events, e_next) { if (ep->e_tgt != tgt) continue; res = 1; break; } mtx_unlock(&sc->sc_queue_mtx); return (res); } /* * Return the number of disks in given state. * If state is equal to -1, count all connected disks. */ u_int g_raid_ndisks(struct g_raid_softc *sc, int state) { struct g_raid_disk *disk; u_int n; sx_assert(&sc->sc_lock, SX_LOCKED); n = 0; TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { if (disk->d_state == state || state == -1) n++; } return (n); } /* * Return the number of subdisks in given state. * If state is equal to -1, count all connected disks. */ u_int g_raid_nsubdisks(struct g_raid_volume *vol, int state) { struct g_raid_subdisk *subdisk; struct g_raid_softc *sc; u_int i, n ; sc = vol->v_softc; sx_assert(&sc->sc_lock, SX_LOCKED); n = 0; for (i = 0; i < vol->v_disks_count; i++) { subdisk = &vol->v_subdisks[i]; if ((state == -1 && subdisk->sd_state != G_RAID_SUBDISK_S_NONE) || subdisk->sd_state == state) n++; } return (n); } /* * Return the first subdisk in given state. * If state is equal to -1, then the first connected disks. */ struct g_raid_subdisk * g_raid_get_subdisk(struct g_raid_volume *vol, int state) { struct g_raid_subdisk *sd; struct g_raid_softc *sc; u_int i; sc = vol->v_softc; sx_assert(&sc->sc_lock, SX_LOCKED); for (i = 0; i < vol->v_disks_count; i++) { sd = &vol->v_subdisks[i]; if ((state == -1 && sd->sd_state != G_RAID_SUBDISK_S_NONE) || sd->sd_state == state) return (sd); } return (NULL); } struct g_consumer * g_raid_open_consumer(struct g_raid_softc *sc, const char *name) { struct g_consumer *cp; struct g_provider *pp; g_topology_assert(); if (strncmp(name, "/dev/", 5) == 0) name += 5; pp = g_provider_by_name(name); if (pp == NULL) return (NULL); cp = g_new_consumer(sc->sc_geom); cp->flags |= G_CF_DIRECT_RECEIVE; if (g_attach(cp, pp) != 0) { g_destroy_consumer(cp); return (NULL); } if (g_access(cp, 1, 1, 1) != 0) { g_detach(cp); g_destroy_consumer(cp); return (NULL); } return (cp); } static u_int g_raid_nrequests(struct g_raid_softc *sc, struct g_consumer *cp) { struct bio *bp; u_int nreqs = 0; mtx_lock(&sc->sc_queue_mtx); TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { if (bp->bio_from == cp) nreqs++; } mtx_unlock(&sc->sc_queue_mtx); return (nreqs); } u_int g_raid_nopens(struct g_raid_softc *sc) { struct g_raid_volume *vol; u_int opens; opens = 0; TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { if (vol->v_provider_open != 0) opens++; } return (opens); } static int g_raid_consumer_is_busy(struct g_raid_softc *sc, struct g_consumer *cp) { if (cp->index > 0) { G_RAID_DEBUG1(2, sc, "I/O requests for %s exist, can't destroy it now.", cp->provider->name); return (1); } if (g_raid_nrequests(sc, cp) > 0) { G_RAID_DEBUG1(2, sc, "I/O requests for %s in queue, can't destroy it now.", cp->provider->name); return (1); } return (0); } static void g_raid_destroy_consumer(void *arg, int flags __unused) { struct g_consumer *cp; g_topology_assert(); cp = arg; G_RAID_DEBUG(1, "Consumer %s destroyed.", cp->provider->name); g_detach(cp); g_destroy_consumer(cp); } void g_raid_kill_consumer(struct g_raid_softc *sc, struct g_consumer *cp) { struct g_provider *pp; int retaste_wait; g_topology_assert_not(); g_topology_lock(); cp->private = NULL; if (g_raid_consumer_is_busy(sc, cp)) goto out; pp = cp->provider; retaste_wait = 0; if (cp->acw == 1) { if ((pp->geom->flags & G_GEOM_WITHER) == 0) retaste_wait = 1; } if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) g_access(cp, -cp->acr, -cp->acw, -cp->ace); if (retaste_wait) { /* * After retaste event was send (inside g_access()), we can send * event to detach and destroy consumer. * A class, which has consumer to the given provider connected * will not receive retaste event for the provider. * This is the way how I ignore retaste events when I close * consumers opened for write: I detach and destroy consumer * after retaste event is sent. */ g_post_event(g_raid_destroy_consumer, cp, M_WAITOK, NULL); goto out; } G_RAID_DEBUG(1, "Consumer %s destroyed.", pp->name); g_detach(cp); g_destroy_consumer(cp); out: g_topology_unlock(); } static void g_raid_orphan(struct g_consumer *cp) { struct g_raid_disk *disk; g_topology_assert(); disk = cp->private; if (disk == NULL) return; g_raid_event_send(disk, G_RAID_DISK_E_DISCONNECTED, G_RAID_EVENT_DISK); } static void g_raid_clean(struct g_raid_volume *vol, int acw) { struct g_raid_softc *sc; int timeout; sc = vol->v_softc; g_topology_assert_not(); sx_assert(&sc->sc_lock, SX_XLOCKED); // if ((sc->sc_flags & G_RAID_DEVICE_FLAG_NOFAILSYNC) != 0) // return; if (!vol->v_dirty) return; if (vol->v_writes > 0) return; if (acw > 0 || (acw == -1 && vol->v_provider != NULL && vol->v_provider->acw > 0)) { timeout = g_raid_clean_time - (time_uptime - vol->v_last_write); if (!g_raid_shutdown && timeout > 0) return; } vol->v_dirty = 0; G_RAID_DEBUG1(1, sc, "Volume %s marked as clean.", vol->v_name); g_raid_write_metadata(sc, vol, NULL, NULL); } static void g_raid_dirty(struct g_raid_volume *vol) { struct g_raid_softc *sc; sc = vol->v_softc; g_topology_assert_not(); sx_assert(&sc->sc_lock, SX_XLOCKED); // if ((sc->sc_flags & G_RAID_DEVICE_FLAG_NOFAILSYNC) != 0) // return; vol->v_dirty = 1; G_RAID_DEBUG1(1, sc, "Volume %s marked as dirty.", vol->v_name); g_raid_write_metadata(sc, vol, NULL, NULL); } void g_raid_tr_flush_common(struct g_raid_tr_object *tr, struct bio *bp) { struct g_raid_volume *vol; struct g_raid_subdisk *sd; struct bio_queue_head queue; struct bio *cbp; int i; vol = tr->tro_volume; /* * Allocate all bios before sending any request, so we can return * ENOMEM in nice and clean way. */ bioq_init(&queue); for (i = 0; i < vol->v_disks_count; i++) { sd = &vol->v_subdisks[i]; if (sd->sd_state == G_RAID_SUBDISK_S_NONE || sd->sd_state == G_RAID_SUBDISK_S_FAILED) continue; cbp = g_clone_bio(bp); if (cbp == NULL) goto failure; cbp->bio_caller1 = sd; bioq_insert_tail(&queue, cbp); } while ((cbp = bioq_takefirst(&queue)) != NULL) { sd = cbp->bio_caller1; cbp->bio_caller1 = NULL; g_raid_subdisk_iostart(sd, cbp); } return; failure: while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_raid_iodone(bp, bp->bio_error); } static void g_raid_tr_kerneldump_common_done(struct bio *bp) { bp->bio_flags |= BIO_DONE; } int g_raid_tr_kerneldump_common(struct g_raid_tr_object *tr, void *virtual, vm_offset_t physical, off_t offset, size_t length) { struct g_raid_softc *sc; struct g_raid_volume *vol; struct bio bp; vol = tr->tro_volume; sc = vol->v_softc; g_reset_bio(&bp); bp.bio_cmd = BIO_WRITE; bp.bio_done = g_raid_tr_kerneldump_common_done; bp.bio_attribute = NULL; bp.bio_offset = offset; bp.bio_length = length; bp.bio_data = virtual; bp.bio_to = vol->v_provider; g_raid_start(&bp); while (!(bp.bio_flags & BIO_DONE)) { G_RAID_DEBUG1(4, sc, "Poll..."); g_raid_poll(sc); DELAY(10); } return (bp.bio_error != 0 ? EIO : 0); } static int g_raid_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length) { struct g_raid_volume *vol; int error; vol = (struct g_raid_volume *)arg; G_RAID_DEBUG1(3, vol->v_softc, "Dumping at off %llu len %llu.", (long long unsigned)offset, (long long unsigned)length); error = G_RAID_TR_KERNELDUMP(vol->v_tr, virtual, physical, offset, length); return (error); } static void g_raid_kerneldump(struct g_raid_softc *sc, struct bio *bp) { struct g_kerneldump *gkd; struct g_provider *pp; struct g_raid_volume *vol; gkd = (struct g_kerneldump*)bp->bio_data; pp = bp->bio_to; vol = pp->private; g_trace(G_T_TOPOLOGY, "g_raid_kerneldump(%s, %jd, %jd)", pp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length); gkd->di.dumper = g_raid_dump; gkd->di.priv = vol; gkd->di.blocksize = vol->v_sectorsize; gkd->di.maxiosize = DFLTPHYS; gkd->di.mediaoffset = gkd->offset; if ((gkd->offset + gkd->length) > vol->v_mediasize) gkd->length = vol->v_mediasize - gkd->offset; gkd->di.mediasize = gkd->length; g_io_deliver(bp, 0); } static void g_raid_candelete(struct g_raid_softc *sc, struct bio *bp) { struct g_provider *pp; struct g_raid_volume *vol; struct g_raid_subdisk *sd; int *val; int i; val = (int *)bp->bio_data; pp = bp->bio_to; vol = pp->private; *val = 0; for (i = 0; i < vol->v_disks_count; i++) { sd = &vol->v_subdisks[i]; if (sd->sd_state == G_RAID_SUBDISK_S_NONE) continue; if (sd->sd_disk->d_candelete) { *val = 1; break; } } g_io_deliver(bp, 0); } static void g_raid_start(struct bio *bp) { struct g_raid_softc *sc; sc = bp->bio_to->geom->softc; /* * If sc == NULL or there are no valid disks, provider's error * should be set and g_raid_start() should not be called at all. */ // KASSERT(sc != NULL && sc->sc_state == G_RAID_VOLUME_S_RUNNING, // ("Provider's error should be set (error=%d)(mirror=%s).", // bp->bio_to->error, bp->bio_to->name)); G_RAID_LOGREQ(3, bp, "Request received."); switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_DELETE: case BIO_FLUSH: break; case BIO_GETATTR: if (!strcmp(bp->bio_attribute, "GEOM::candelete")) g_raid_candelete(sc, bp); else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump")) g_raid_kerneldump(sc, bp); else g_io_deliver(bp, EOPNOTSUPP); return; default: g_io_deliver(bp, EOPNOTSUPP); return; } mtx_lock(&sc->sc_queue_mtx); bioq_insert_tail(&sc->sc_queue, bp); mtx_unlock(&sc->sc_queue_mtx); if (!dumping) { G_RAID_DEBUG1(4, sc, "Waking up %p.", sc); wakeup(sc); } } static int g_raid_bio_overlaps(const struct bio *bp, off_t lstart, off_t len) { /* * 5 cases: * (1) bp entirely below NO * (2) bp entirely above NO * (3) bp start below, but end in range YES * (4) bp entirely within YES * (5) bp starts within, ends above YES * * lock range 10-19 (offset 10 length 10) * (1) 1-5: first if kicks it out * (2) 30-35: second if kicks it out * (3) 5-15: passes both ifs * (4) 12-14: passes both ifs * (5) 19-20: passes both */ off_t lend = lstart + len - 1; off_t bstart = bp->bio_offset; off_t bend = bp->bio_offset + bp->bio_length - 1; if (bend < lstart) return (0); if (lend < bstart) return (0); return (1); } static int g_raid_is_in_locked_range(struct g_raid_volume *vol, const struct bio *bp) { struct g_raid_lock *lp; sx_assert(&vol->v_softc->sc_lock, SX_LOCKED); LIST_FOREACH(lp, &vol->v_locks, l_next) { if (g_raid_bio_overlaps(bp, lp->l_offset, lp->l_length)) return (1); } return (0); } static void g_raid_start_request(struct bio *bp) { struct g_raid_softc *sc; struct g_raid_volume *vol; sc = bp->bio_to->geom->softc; sx_assert(&sc->sc_lock, SX_LOCKED); vol = bp->bio_to->private; /* * Check to see if this item is in a locked range. If so, * queue it to our locked queue and return. We'll requeue * it when the range is unlocked. Internal I/O for the * rebuild/rescan/recovery process is excluded from this * check so we can actually do the recovery. */ if (!(bp->bio_cflags & G_RAID_BIO_FLAG_SPECIAL) && g_raid_is_in_locked_range(vol, bp)) { G_RAID_LOGREQ(3, bp, "Defer request."); bioq_insert_tail(&vol->v_locked, bp); return; } /* * If we're actually going to do the write/delete, then * update the idle stats for the volume. */ if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE) { if (!vol->v_dirty) g_raid_dirty(vol); vol->v_writes++; } /* * Put request onto inflight queue, so we can check if new * synchronization requests don't collide with it. Then tell * the transformation layer to start the I/O. */ bioq_insert_tail(&vol->v_inflight, bp); G_RAID_LOGREQ(4, bp, "Request started"); G_RAID_TR_IOSTART(vol->v_tr, bp); } static void g_raid_finish_with_locked_ranges(struct g_raid_volume *vol, struct bio *bp) { off_t off, len; struct bio *nbp; struct g_raid_lock *lp; vol->v_pending_lock = 0; LIST_FOREACH(lp, &vol->v_locks, l_next) { if (lp->l_pending) { off = lp->l_offset; len = lp->l_length; lp->l_pending = 0; TAILQ_FOREACH(nbp, &vol->v_inflight.queue, bio_queue) { if (g_raid_bio_overlaps(nbp, off, len)) lp->l_pending++; } if (lp->l_pending) { vol->v_pending_lock = 1; G_RAID_DEBUG1(4, vol->v_softc, "Deferred lock(%jd, %jd) has %d pending", (intmax_t)off, (intmax_t)(off + len), lp->l_pending); continue; } G_RAID_DEBUG1(4, vol->v_softc, "Deferred lock of %jd to %jd completed", (intmax_t)off, (intmax_t)(off + len)); G_RAID_TR_LOCKED(vol->v_tr, lp->l_callback_arg); } } } void g_raid_iodone(struct bio *bp, int error) { struct g_raid_softc *sc; struct g_raid_volume *vol; sc = bp->bio_to->geom->softc; sx_assert(&sc->sc_lock, SX_LOCKED); vol = bp->bio_to->private; G_RAID_LOGREQ(3, bp, "Request done: %d.", error); /* Update stats if we done write/delete. */ if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE) { vol->v_writes--; vol->v_last_write = time_uptime; } bioq_remove(&vol->v_inflight, bp); if (vol->v_pending_lock && g_raid_is_in_locked_range(vol, bp)) g_raid_finish_with_locked_ranges(vol, bp); getmicrouptime(&vol->v_last_done); g_io_deliver(bp, error); } int g_raid_lock_range(struct g_raid_volume *vol, off_t off, off_t len, struct bio *ignore, void *argp) { struct g_raid_softc *sc; struct g_raid_lock *lp; struct bio *bp; sc = vol->v_softc; lp = malloc(sizeof(*lp), M_RAID, M_WAITOK | M_ZERO); LIST_INSERT_HEAD(&vol->v_locks, lp, l_next); lp->l_offset = off; lp->l_length = len; lp->l_callback_arg = argp; lp->l_pending = 0; TAILQ_FOREACH(bp, &vol->v_inflight.queue, bio_queue) { if (bp != ignore && g_raid_bio_overlaps(bp, off, len)) lp->l_pending++; } /* * If there are any writes that are pending, we return EBUSY. All * callers will have to wait until all pending writes clear. */ if (lp->l_pending > 0) { vol->v_pending_lock = 1; G_RAID_DEBUG1(4, sc, "Locking range %jd to %jd deferred %d pend", (intmax_t)off, (intmax_t)(off+len), lp->l_pending); return (EBUSY); } G_RAID_DEBUG1(4, sc, "Locking range %jd to %jd", (intmax_t)off, (intmax_t)(off+len)); G_RAID_TR_LOCKED(vol->v_tr, lp->l_callback_arg); return (0); } int g_raid_unlock_range(struct g_raid_volume *vol, off_t off, off_t len) { struct g_raid_lock *lp; struct g_raid_softc *sc; struct bio *bp; sc = vol->v_softc; LIST_FOREACH(lp, &vol->v_locks, l_next) { if (lp->l_offset == off && lp->l_length == len) { LIST_REMOVE(lp, l_next); /* XXX * Right now we just put them all back on the queue * and hope for the best. We hope this because any * locked ranges will go right back on this list * when the worker thread runs. * XXX */ G_RAID_DEBUG1(4, sc, "Unlocked %jd to %jd", (intmax_t)lp->l_offset, (intmax_t)(lp->l_offset+lp->l_length)); mtx_lock(&sc->sc_queue_mtx); while ((bp = bioq_takefirst(&vol->v_locked)) != NULL) bioq_insert_tail(&sc->sc_queue, bp); mtx_unlock(&sc->sc_queue_mtx); free(lp, M_RAID); return (0); } } return (EINVAL); } void g_raid_subdisk_iostart(struct g_raid_subdisk *sd, struct bio *bp) { struct g_consumer *cp; struct g_raid_disk *disk, *tdisk; bp->bio_caller1 = sd; /* * Make sure that the disk is present. Generally it is a task of * transformation layers to not send requests to absent disks, but * it is better to be safe and report situation then sorry. */ if (sd->sd_disk == NULL) { G_RAID_LOGREQ(0, bp, "Warning! I/O request to an absent disk!"); nodisk: bp->bio_from = NULL; bp->bio_to = NULL; bp->bio_error = ENXIO; g_raid_disk_done(bp); return; } disk = sd->sd_disk; if (disk->d_state != G_RAID_DISK_S_ACTIVE && disk->d_state != G_RAID_DISK_S_FAILED) { G_RAID_LOGREQ(0, bp, "Warning! I/O request to a disk in a " "wrong state (%s)!", g_raid_disk_state2str(disk->d_state)); goto nodisk; } cp = disk->d_consumer; bp->bio_from = cp; bp->bio_to = cp->provider; cp->index++; /* Update average disks load. */ TAILQ_FOREACH(tdisk, &sd->sd_softc->sc_disks, d_next) { if (tdisk->d_consumer == NULL) tdisk->d_load = 0; else tdisk->d_load = (tdisk->d_consumer->index * G_RAID_SUBDISK_LOAD_SCALE + tdisk->d_load * 7) / 8; } disk->d_last_offset = bp->bio_offset + bp->bio_length; if (dumping) { G_RAID_LOGREQ(3, bp, "Sending dumping request."); if (bp->bio_cmd == BIO_WRITE) { bp->bio_error = g_raid_subdisk_kerneldump(sd, bp->bio_data, 0, bp->bio_offset, bp->bio_length); } else bp->bio_error = EOPNOTSUPP; g_raid_disk_done(bp); } else { bp->bio_done = g_raid_disk_done; bp->bio_offset += sd->sd_offset; G_RAID_LOGREQ(3, bp, "Sending request."); g_io_request(bp, cp); } } int g_raid_subdisk_kerneldump(struct g_raid_subdisk *sd, void *virtual, vm_offset_t physical, off_t offset, size_t length) { if (sd->sd_disk == NULL) return (ENXIO); if (sd->sd_disk->d_kd.di.dumper == NULL) return (EOPNOTSUPP); return (dump_write(&sd->sd_disk->d_kd.di, virtual, physical, sd->sd_disk->d_kd.di.mediaoffset + sd->sd_offset + offset, length)); } static void g_raid_disk_done(struct bio *bp) { struct g_raid_softc *sc; struct g_raid_subdisk *sd; sd = bp->bio_caller1; sc = sd->sd_softc; mtx_lock(&sc->sc_queue_mtx); bioq_insert_tail(&sc->sc_queue, bp); mtx_unlock(&sc->sc_queue_mtx); if (!dumping) wakeup(sc); } static void g_raid_disk_done_request(struct bio *bp) { struct g_raid_softc *sc; struct g_raid_disk *disk; struct g_raid_subdisk *sd; struct g_raid_volume *vol; g_topology_assert_not(); G_RAID_LOGREQ(3, bp, "Disk request done: %d.", bp->bio_error); sd = bp->bio_caller1; sc = sd->sd_softc; vol = sd->sd_volume; if (bp->bio_from != NULL) { bp->bio_from->index--; disk = bp->bio_from->private; if (disk == NULL) g_raid_kill_consumer(sc, bp->bio_from); } bp->bio_offset -= sd->sd_offset; G_RAID_TR_IODONE(vol->v_tr, sd, bp); } static void g_raid_handle_event(struct g_raid_softc *sc, struct g_raid_event *ep) { if ((ep->e_flags & G_RAID_EVENT_VOLUME) != 0) ep->e_error = g_raid_update_volume(ep->e_tgt, ep->e_event); else if ((ep->e_flags & G_RAID_EVENT_DISK) != 0) ep->e_error = g_raid_update_disk(ep->e_tgt, ep->e_event); else if ((ep->e_flags & G_RAID_EVENT_SUBDISK) != 0) ep->e_error = g_raid_update_subdisk(ep->e_tgt, ep->e_event); else ep->e_error = g_raid_update_node(ep->e_tgt, ep->e_event); if ((ep->e_flags & G_RAID_EVENT_WAIT) == 0) { KASSERT(ep->e_error == 0, ("Error cannot be handled.")); g_raid_event_free(ep); } else { ep->e_flags |= G_RAID_EVENT_DONE; G_RAID_DEBUG1(4, sc, "Waking up %p.", ep); mtx_lock(&sc->sc_queue_mtx); wakeup(ep); mtx_unlock(&sc->sc_queue_mtx); } } /* * Worker thread. */ static void g_raid_worker(void *arg) { struct g_raid_softc *sc; struct g_raid_event *ep; struct g_raid_volume *vol; struct bio *bp; struct timeval now, t; int timeout, rv; sc = arg; thread_lock(curthread); sched_prio(curthread, PRIBIO); thread_unlock(curthread); sx_xlock(&sc->sc_lock); for (;;) { mtx_lock(&sc->sc_queue_mtx); /* * First take a look at events. * This is important to handle events before any I/O requests. */ bp = NULL; vol = NULL; rv = 0; ep = TAILQ_FIRST(&sc->sc_events); if (ep != NULL) TAILQ_REMOVE(&sc->sc_events, ep, e_next); else if ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) ; else { getmicrouptime(&now); t = now; TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { if (bioq_first(&vol->v_inflight) == NULL && vol->v_tr && timevalcmp(&vol->v_last_done, &t, < )) t = vol->v_last_done; } timevalsub(&t, &now); timeout = g_raid_idle_threshold + t.tv_sec * 1000000 + t.tv_usec; if (timeout > 0) { /* * Two steps to avoid overflows at HZ=1000 * and idle timeouts > 2.1s. Some rounding * errors can occur, but they are < 1tick, * which is deemed to be close enough for * this purpose. */ int micpertic = 1000000 / hz; timeout = (timeout + micpertic - 1) / micpertic; sx_xunlock(&sc->sc_lock); MSLEEP(rv, sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "-", timeout); sx_xlock(&sc->sc_lock); goto process; } else rv = EWOULDBLOCK; } mtx_unlock(&sc->sc_queue_mtx); process: if (ep != NULL) { g_raid_handle_event(sc, ep); } else if (bp != NULL) { if (bp->bio_to != NULL && bp->bio_to->geom == sc->sc_geom) g_raid_start_request(bp); else g_raid_disk_done_request(bp); } else if (rv == EWOULDBLOCK) { TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { g_raid_clean(vol, -1); if (bioq_first(&vol->v_inflight) == NULL && vol->v_tr) { t.tv_sec = g_raid_idle_threshold / 1000000; t.tv_usec = g_raid_idle_threshold % 1000000; timevaladd(&t, &vol->v_last_done); getmicrouptime(&now); if (timevalcmp(&t, &now, <= )) { G_RAID_TR_IDLE(vol->v_tr); vol->v_last_done = now; } } } } if (sc->sc_stopping == G_RAID_DESTROY_HARD) g_raid_destroy_node(sc, 1); /* May not return. */ } } static void g_raid_poll(struct g_raid_softc *sc) { struct g_raid_event *ep; struct bio *bp; sx_xlock(&sc->sc_lock); mtx_lock(&sc->sc_queue_mtx); /* * First take a look at events. * This is important to handle events before any I/O requests. */ ep = TAILQ_FIRST(&sc->sc_events); if (ep != NULL) { TAILQ_REMOVE(&sc->sc_events, ep, e_next); mtx_unlock(&sc->sc_queue_mtx); g_raid_handle_event(sc, ep); goto out; } bp = bioq_takefirst(&sc->sc_queue); if (bp != NULL) { mtx_unlock(&sc->sc_queue_mtx); if (bp->bio_from == NULL || bp->bio_from->geom != sc->sc_geom) g_raid_start_request(bp); else g_raid_disk_done_request(bp); } out: sx_xunlock(&sc->sc_lock); } static void g_raid_launch_provider(struct g_raid_volume *vol) { struct g_raid_disk *disk; struct g_raid_subdisk *sd; struct g_raid_softc *sc; struct g_provider *pp; char name[G_RAID_MAX_VOLUMENAME]; off_t off; int i; sc = vol->v_softc; sx_assert(&sc->sc_lock, SX_LOCKED); g_topology_lock(); /* Try to name provider with volume name. */ snprintf(name, sizeof(name), "raid/%s", vol->v_name); if (g_raid_name_format == 0 || vol->v_name[0] == 0 || g_provider_by_name(name) != NULL) { /* Otherwise use sequential volume number. */ snprintf(name, sizeof(name), "raid/r%d", vol->v_global_id); } pp = g_new_providerf(sc->sc_geom, "%s", name); pp->flags |= G_PF_DIRECT_RECEIVE; if (vol->v_tr->tro_class->trc_accept_unmapped) { pp->flags |= G_PF_ACCEPT_UNMAPPED; for (i = 0; i < vol->v_disks_count; i++) { sd = &vol->v_subdisks[i]; if (sd->sd_state == G_RAID_SUBDISK_S_NONE) continue; if ((sd->sd_disk->d_consumer->provider->flags & G_PF_ACCEPT_UNMAPPED) == 0) pp->flags &= ~G_PF_ACCEPT_UNMAPPED; } } pp->private = vol; pp->mediasize = vol->v_mediasize; pp->sectorsize = vol->v_sectorsize; pp->stripesize = 0; pp->stripeoffset = 0; if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 || vol->v_raid_level == G_RAID_VOLUME_RL_RAID3 || vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE || vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT) { if ((disk = vol->v_subdisks[0].sd_disk) != NULL && disk->d_consumer != NULL && disk->d_consumer->provider != NULL) { pp->stripesize = disk->d_consumer->provider->stripesize; off = disk->d_consumer->provider->stripeoffset; pp->stripeoffset = off + vol->v_subdisks[0].sd_offset; if (off > 0) pp->stripeoffset %= off; } if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3) { pp->stripesize *= (vol->v_disks_count - 1); pp->stripeoffset *= (vol->v_disks_count - 1); } } else pp->stripesize = vol->v_strip_size; vol->v_provider = pp; g_error_provider(pp, 0); g_topology_unlock(); G_RAID_DEBUG1(0, sc, "Provider %s for volume %s created.", pp->name, vol->v_name); } static void g_raid_destroy_provider(struct g_raid_volume *vol) { struct g_raid_softc *sc; struct g_provider *pp; struct bio *bp, *tmp; g_topology_assert_not(); sc = vol->v_softc; pp = vol->v_provider; KASSERT(pp != NULL, ("NULL provider (volume=%s).", vol->v_name)); g_topology_lock(); g_error_provider(pp, ENXIO); mtx_lock(&sc->sc_queue_mtx); TAILQ_FOREACH_SAFE(bp, &sc->sc_queue.queue, bio_queue, tmp) { if (bp->bio_to != pp) continue; bioq_remove(&sc->sc_queue, bp); g_io_deliver(bp, ENXIO); } mtx_unlock(&sc->sc_queue_mtx); G_RAID_DEBUG1(0, sc, "Provider %s for volume %s destroyed.", pp->name, vol->v_name); g_wither_provider(pp, ENXIO); g_topology_unlock(); vol->v_provider = NULL; } /* * Update device state. */ static int g_raid_update_volume(struct g_raid_volume *vol, u_int event) { struct g_raid_softc *sc; sc = vol->v_softc; sx_assert(&sc->sc_lock, SX_XLOCKED); G_RAID_DEBUG1(2, sc, "Event %s for volume %s.", g_raid_volume_event2str(event), vol->v_name); switch (event) { case G_RAID_VOLUME_E_DOWN: if (vol->v_provider != NULL) g_raid_destroy_provider(vol); break; case G_RAID_VOLUME_E_UP: if (vol->v_provider == NULL) g_raid_launch_provider(vol); break; case G_RAID_VOLUME_E_START: if (vol->v_tr) G_RAID_TR_START(vol->v_tr); return (0); default: if (sc->sc_md) G_RAID_MD_VOLUME_EVENT(sc->sc_md, vol, event); return (0); } /* Manage root mount release. */ if (vol->v_starting) { vol->v_starting = 0; G_RAID_DEBUG1(1, sc, "root_mount_rel %p", vol->v_rootmount); root_mount_rel(vol->v_rootmount); vol->v_rootmount = NULL; } if (vol->v_stopping && vol->v_provider_open == 0) g_raid_destroy_volume(vol); return (0); } /* * Update subdisk state. */ static int g_raid_update_subdisk(struct g_raid_subdisk *sd, u_int event) { struct g_raid_softc *sc; struct g_raid_volume *vol; sc = sd->sd_softc; vol = sd->sd_volume; sx_assert(&sc->sc_lock, SX_XLOCKED); G_RAID_DEBUG1(2, sc, "Event %s for subdisk %s:%d-%s.", g_raid_subdisk_event2str(event), vol->v_name, sd->sd_pos, sd->sd_disk ? g_raid_get_diskname(sd->sd_disk) : "[none]"); if (vol->v_tr) G_RAID_TR_EVENT(vol->v_tr, sd, event); return (0); } /* * Update disk state. */ static int g_raid_update_disk(struct g_raid_disk *disk, u_int event) { struct g_raid_softc *sc; sc = disk->d_softc; sx_assert(&sc->sc_lock, SX_XLOCKED); G_RAID_DEBUG1(2, sc, "Event %s for disk %s.", g_raid_disk_event2str(event), g_raid_get_diskname(disk)); if (sc->sc_md) G_RAID_MD_EVENT(sc->sc_md, disk, event); return (0); } /* * Node event. */ static int g_raid_update_node(struct g_raid_softc *sc, u_int event) { sx_assert(&sc->sc_lock, SX_XLOCKED); G_RAID_DEBUG1(2, sc, "Event %s for the array.", g_raid_node_event2str(event)); if (event == G_RAID_NODE_E_WAKE) return (0); if (sc->sc_md) G_RAID_MD_EVENT(sc->sc_md, NULL, event); return (0); } static int g_raid_access(struct g_provider *pp, int acr, int acw, int ace) { struct g_raid_volume *vol; struct g_raid_softc *sc; int dcw, opens, error = 0; g_topology_assert(); sc = pp->geom->softc; vol = pp->private; KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name)); KASSERT(vol != NULL, ("NULL volume (provider=%s).", pp->name)); G_RAID_DEBUG1(2, sc, "Access request for %s: r%dw%de%d.", pp->name, acr, acw, ace); dcw = pp->acw + acw; g_topology_unlock(); sx_xlock(&sc->sc_lock); /* Deny new opens while dying. */ if (sc->sc_stopping != 0 && (acr > 0 || acw > 0 || ace > 0)) { error = ENXIO; goto out; } /* Deny write opens for read-only volumes. */ if (vol->v_read_only && acw > 0) { error = EROFS; goto out; } if (dcw == 0) g_raid_clean(vol, dcw); vol->v_provider_open += acr + acw + ace; /* Handle delayed node destruction. */ if (sc->sc_stopping == G_RAID_DESTROY_DELAYED && vol->v_provider_open == 0) { /* Count open volumes. */ opens = g_raid_nopens(sc); if (opens == 0) { sc->sc_stopping = G_RAID_DESTROY_HARD; /* Wake up worker to make it selfdestruct. */ g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0); } } /* Handle open volume destruction. */ if (vol->v_stopping && vol->v_provider_open == 0) g_raid_destroy_volume(vol); out: sx_xunlock(&sc->sc_lock); g_topology_lock(); return (error); } struct g_raid_softc * g_raid_create_node(struct g_class *mp, const char *name, struct g_raid_md_object *md) { struct g_raid_softc *sc; struct g_geom *gp; int error; g_topology_assert(); G_RAID_DEBUG(1, "Creating array %s.", name); gp = g_new_geomf(mp, "%s", name); sc = malloc(sizeof(*sc), M_RAID, M_WAITOK | M_ZERO); gp->start = g_raid_start; gp->orphan = g_raid_orphan; gp->access = g_raid_access; gp->dumpconf = g_raid_dumpconf; sc->sc_md = md; sc->sc_geom = gp; sc->sc_flags = 0; TAILQ_INIT(&sc->sc_volumes); TAILQ_INIT(&sc->sc_disks); sx_init(&sc->sc_lock, "graid:lock"); mtx_init(&sc->sc_queue_mtx, "graid:queue", NULL, MTX_DEF); TAILQ_INIT(&sc->sc_events); bioq_init(&sc->sc_queue); gp->softc = sc; error = kproc_create(g_raid_worker, sc, &sc->sc_worker, 0, 0, "g_raid %s", name); if (error != 0) { G_RAID_DEBUG(0, "Cannot create kernel thread for %s.", name); mtx_destroy(&sc->sc_queue_mtx); sx_destroy(&sc->sc_lock); g_destroy_geom(sc->sc_geom); free(sc, M_RAID); return (NULL); } G_RAID_DEBUG1(0, sc, "Array %s created.", name); return (sc); } struct g_raid_volume * g_raid_create_volume(struct g_raid_softc *sc, const char *name, int id) { struct g_raid_volume *vol, *vol1; int i; G_RAID_DEBUG1(1, sc, "Creating volume %s.", name); vol = malloc(sizeof(*vol), M_RAID, M_WAITOK | M_ZERO); vol->v_softc = sc; strlcpy(vol->v_name, name, G_RAID_MAX_VOLUMENAME); vol->v_state = G_RAID_VOLUME_S_STARTING; vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN; vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_UNKNOWN; vol->v_rotate_parity = 1; bioq_init(&vol->v_inflight); bioq_init(&vol->v_locked); LIST_INIT(&vol->v_locks); for (i = 0; i < G_RAID_MAX_SUBDISKS; i++) { vol->v_subdisks[i].sd_softc = sc; vol->v_subdisks[i].sd_volume = vol; vol->v_subdisks[i].sd_pos = i; vol->v_subdisks[i].sd_state = G_RAID_DISK_S_NONE; } /* Find free ID for this volume. */ g_topology_lock(); vol1 = vol; if (id >= 0) { LIST_FOREACH(vol1, &g_raid_volumes, v_global_next) { if (vol1->v_global_id == id) break; } } if (vol1 != NULL) { for (id = 0; ; id++) { LIST_FOREACH(vol1, &g_raid_volumes, v_global_next) { if (vol1->v_global_id == id) break; } if (vol1 == NULL) break; } } vol->v_global_id = id; LIST_INSERT_HEAD(&g_raid_volumes, vol, v_global_next); g_topology_unlock(); /* Delay root mounting. */ vol->v_rootmount = root_mount_hold("GRAID"); G_RAID_DEBUG1(1, sc, "root_mount_hold %p", vol->v_rootmount); vol->v_starting = 1; TAILQ_INSERT_TAIL(&sc->sc_volumes, vol, v_next); return (vol); } struct g_raid_disk * g_raid_create_disk(struct g_raid_softc *sc) { struct g_raid_disk *disk; G_RAID_DEBUG1(1, sc, "Creating disk."); disk = malloc(sizeof(*disk), M_RAID, M_WAITOK | M_ZERO); disk->d_softc = sc; disk->d_state = G_RAID_DISK_S_NONE; TAILQ_INIT(&disk->d_subdisks); TAILQ_INSERT_TAIL(&sc->sc_disks, disk, d_next); return (disk); } int g_raid_start_volume(struct g_raid_volume *vol) { struct g_raid_tr_class *class; struct g_raid_tr_object *obj; int status; G_RAID_DEBUG1(2, vol->v_softc, "Starting volume %s.", vol->v_name); LIST_FOREACH(class, &g_raid_tr_classes, trc_list) { if (!class->trc_enable) continue; G_RAID_DEBUG1(2, vol->v_softc, "Tasting volume %s for %s transformation.", vol->v_name, class->name); obj = (void *)kobj_create((kobj_class_t)class, M_RAID, M_WAITOK); obj->tro_class = class; obj->tro_volume = vol; status = G_RAID_TR_TASTE(obj, vol); if (status != G_RAID_TR_TASTE_FAIL) break; kobj_delete((kobj_t)obj, M_RAID); } if (class == NULL) { G_RAID_DEBUG1(0, vol->v_softc, "No transformation module found for %s.", vol->v_name); vol->v_tr = NULL; g_raid_change_volume_state(vol, G_RAID_VOLUME_S_UNSUPPORTED); g_raid_event_send(vol, G_RAID_VOLUME_E_DOWN, G_RAID_EVENT_VOLUME); return (-1); } G_RAID_DEBUG1(2, vol->v_softc, "Transformation module %s chosen for %s.", class->name, vol->v_name); vol->v_tr = obj; return (0); } int g_raid_destroy_node(struct g_raid_softc *sc, int worker) { struct g_raid_volume *vol, *tmpv; struct g_raid_disk *disk, *tmpd; int error = 0; sc->sc_stopping = G_RAID_DESTROY_HARD; TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tmpv) { if (g_raid_destroy_volume(vol)) error = EBUSY; } if (error) return (error); TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tmpd) { if (g_raid_destroy_disk(disk)) error = EBUSY; } if (error) return (error); if (sc->sc_md) { G_RAID_MD_FREE(sc->sc_md); kobj_delete((kobj_t)sc->sc_md, M_RAID); sc->sc_md = NULL; } if (sc->sc_geom != NULL) { G_RAID_DEBUG1(0, sc, "Array %s destroyed.", sc->sc_name); g_topology_lock(); sc->sc_geom->softc = NULL; g_wither_geom(sc->sc_geom, ENXIO); g_topology_unlock(); sc->sc_geom = NULL; } else G_RAID_DEBUG(1, "Array destroyed."); if (worker) { g_raid_event_cancel(sc, sc); mtx_destroy(&sc->sc_queue_mtx); sx_xunlock(&sc->sc_lock); sx_destroy(&sc->sc_lock); wakeup(&sc->sc_stopping); free(sc, M_RAID); curthread->td_pflags &= ~TDP_GEOM; G_RAID_DEBUG(1, "Thread exiting."); kproc_exit(0); } else { /* Wake up worker to make it selfdestruct. */ g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0); } return (0); } int g_raid_destroy_volume(struct g_raid_volume *vol) { struct g_raid_softc *sc; struct g_raid_disk *disk; int i; sc = vol->v_softc; G_RAID_DEBUG1(2, sc, "Destroying volume %s.", vol->v_name); vol->v_stopping = 1; if (vol->v_state != G_RAID_VOLUME_S_STOPPED) { if (vol->v_tr) { G_RAID_TR_STOP(vol->v_tr); return (EBUSY); } else vol->v_state = G_RAID_VOLUME_S_STOPPED; } if (g_raid_event_check(sc, vol) != 0) return (EBUSY); if (vol->v_provider != NULL) return (EBUSY); if (vol->v_provider_open != 0) return (EBUSY); if (vol->v_tr) { G_RAID_TR_FREE(vol->v_tr); kobj_delete((kobj_t)vol->v_tr, M_RAID); vol->v_tr = NULL; } if (vol->v_rootmount) root_mount_rel(vol->v_rootmount); g_topology_lock(); LIST_REMOVE(vol, v_global_next); g_topology_unlock(); TAILQ_REMOVE(&sc->sc_volumes, vol, v_next); for (i = 0; i < G_RAID_MAX_SUBDISKS; i++) { g_raid_event_cancel(sc, &vol->v_subdisks[i]); disk = vol->v_subdisks[i].sd_disk; if (disk == NULL) continue; TAILQ_REMOVE(&disk->d_subdisks, &vol->v_subdisks[i], sd_next); } G_RAID_DEBUG1(2, sc, "Volume %s destroyed.", vol->v_name); if (sc->sc_md) G_RAID_MD_FREE_VOLUME(sc->sc_md, vol); g_raid_event_cancel(sc, vol); free(vol, M_RAID); if (sc->sc_stopping == G_RAID_DESTROY_HARD) { /* Wake up worker to let it selfdestruct. */ g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0); } return (0); } int g_raid_destroy_disk(struct g_raid_disk *disk) { struct g_raid_softc *sc; struct g_raid_subdisk *sd, *tmp; sc = disk->d_softc; G_RAID_DEBUG1(2, sc, "Destroying disk."); if (disk->d_consumer) { g_raid_kill_consumer(sc, disk->d_consumer); disk->d_consumer = NULL; } TAILQ_FOREACH_SAFE(sd, &disk->d_subdisks, sd_next, tmp) { g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_NONE); g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED, G_RAID_EVENT_SUBDISK); TAILQ_REMOVE(&disk->d_subdisks, sd, sd_next); sd->sd_disk = NULL; } TAILQ_REMOVE(&sc->sc_disks, disk, d_next); if (sc->sc_md) G_RAID_MD_FREE_DISK(sc->sc_md, disk); g_raid_event_cancel(sc, disk); free(disk, M_RAID); return (0); } int g_raid_destroy(struct g_raid_softc *sc, int how) { int error, opens; g_topology_assert_not(); if (sc == NULL) return (ENXIO); sx_assert(&sc->sc_lock, SX_XLOCKED); /* Count open volumes. */ opens = g_raid_nopens(sc); /* React on some opened volumes. */ if (opens > 0) { switch (how) { case G_RAID_DESTROY_SOFT: G_RAID_DEBUG1(1, sc, "%d volumes are still open.", opens); sx_xunlock(&sc->sc_lock); return (EBUSY); case G_RAID_DESTROY_DELAYED: G_RAID_DEBUG1(1, sc, "Array will be destroyed on last close."); sc->sc_stopping = G_RAID_DESTROY_DELAYED; sx_xunlock(&sc->sc_lock); return (EBUSY); case G_RAID_DESTROY_HARD: G_RAID_DEBUG1(1, sc, "%d volumes are still open.", opens); } } /* Mark node for destruction. */ sc->sc_stopping = G_RAID_DESTROY_HARD; /* Wake up worker to let it selfdestruct. */ g_raid_event_send(sc, G_RAID_NODE_E_WAKE, 0); /* Sleep until node destroyed. */ error = sx_sleep(&sc->sc_stopping, &sc->sc_lock, PRIBIO | PDROP, "r:destroy", hz * 3); return (error == EWOULDBLOCK ? EBUSY : 0); } static void g_raid_taste_orphan(struct g_consumer *cp) { KASSERT(1 == 0, ("%s called while tasting %s.", __func__, cp->provider->name)); } static struct g_geom * g_raid_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) { struct g_consumer *cp; struct g_geom *gp, *geom; struct g_raid_md_class *class; struct g_raid_md_object *obj; int status; g_topology_assert(); g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); if (!g_raid_enable) return (NULL); G_RAID_DEBUG(2, "Tasting provider %s.", pp->name); geom = NULL; status = G_RAID_MD_TASTE_FAIL; gp = g_new_geomf(mp, "raid:taste"); /* * This orphan function should be never called. */ gp->orphan = g_raid_taste_orphan; cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_RECEIVE; g_attach(cp, pp); if (g_access(cp, 1, 0, 0) != 0) goto ofail; LIST_FOREACH(class, &g_raid_md_classes, mdc_list) { if (!class->mdc_enable) continue; G_RAID_DEBUG(2, "Tasting provider %s for %s metadata.", pp->name, class->name); obj = (void *)kobj_create((kobj_class_t)class, M_RAID, M_WAITOK); obj->mdo_class = class; status = G_RAID_MD_TASTE(obj, mp, cp, &geom); if (status != G_RAID_MD_TASTE_NEW) kobj_delete((kobj_t)obj, M_RAID); if (status != G_RAID_MD_TASTE_FAIL) break; } if (status == G_RAID_MD_TASTE_FAIL) (void)g_access(cp, -1, 0, 0); ofail: g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); G_RAID_DEBUG(2, "Tasting provider %s done.", pp->name); return (geom); } int g_raid_create_node_format(const char *format, struct gctl_req *req, struct g_geom **gp) { struct g_raid_md_class *class; struct g_raid_md_object *obj; int status; G_RAID_DEBUG(2, "Creating array for %s metadata.", format); LIST_FOREACH(class, &g_raid_md_classes, mdc_list) { if (strcasecmp(class->name, format) == 0) break; } if (class == NULL) { G_RAID_DEBUG(1, "No support for %s metadata.", format); return (G_RAID_MD_TASTE_FAIL); } obj = (void *)kobj_create((kobj_class_t)class, M_RAID, M_WAITOK); obj->mdo_class = class; status = G_RAID_MD_CREATE_REQ(obj, &g_raid_class, req, gp); if (status != G_RAID_MD_TASTE_NEW) kobj_delete((kobj_t)obj, M_RAID); return (status); } static int g_raid_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused, struct g_geom *gp) { struct g_raid_softc *sc; int error; g_topology_unlock(); sc = gp->softc; sx_xlock(&sc->sc_lock); g_cancel_event(sc); error = g_raid_destroy(gp->softc, G_RAID_DESTROY_SOFT); g_topology_lock(); return (error); } void g_raid_write_metadata(struct g_raid_softc *sc, struct g_raid_volume *vol, struct g_raid_subdisk *sd, struct g_raid_disk *disk) { if (sc->sc_stopping == G_RAID_DESTROY_HARD) return; if (sc->sc_md) G_RAID_MD_WRITE(sc->sc_md, vol, sd, disk); } void g_raid_fail_disk(struct g_raid_softc *sc, struct g_raid_subdisk *sd, struct g_raid_disk *disk) { if (disk == NULL) disk = sd->sd_disk; if (disk == NULL) { G_RAID_DEBUG1(0, sc, "Warning! Fail request to an absent disk!"); return; } if (disk->d_state != G_RAID_DISK_S_ACTIVE) { G_RAID_DEBUG1(0, sc, "Warning! Fail request to a disk in a " "wrong state (%s)!", g_raid_disk_state2str(disk->d_state)); return; } if (sc->sc_md) G_RAID_MD_FAIL_DISK(sc->sc_md, sd, disk); } static void g_raid_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { struct g_raid_softc *sc; struct g_raid_volume *vol; struct g_raid_subdisk *sd; struct g_raid_disk *disk; int i, s; g_topology_assert(); sc = gp->softc; if (sc == NULL) return; if (pp != NULL) { vol = pp->private; g_topology_unlock(); sx_xlock(&sc->sc_lock); sbuf_printf(sb, "%s%s %s volume\n", indent, sc->sc_md->mdo_class->name, g_raid_volume_level2str(vol->v_raid_level, vol->v_raid_level_qualifier)); sbuf_printf(sb, "%s\n", indent, vol->v_name); sbuf_printf(sb, "%s%s\n", indent, g_raid_volume_level2str(vol->v_raid_level, vol->v_raid_level_qualifier)); sbuf_printf(sb, "%s%s\n", indent, vol->v_tr ? vol->v_tr->tro_class->name : "NONE"); sbuf_printf(sb, "%s%u\n", indent, vol->v_disks_count); sbuf_printf(sb, "%s%u\n", indent, vol->v_strip_size); sbuf_printf(sb, "%s%s\n", indent, g_raid_volume_state2str(vol->v_state)); sbuf_printf(sb, "%s%s\n", indent, vol->v_dirty ? "Yes" : "No"); sbuf_printf(sb, "%s", indent); for (i = 0; i < vol->v_disks_count; i++) { sd = &vol->v_subdisks[i]; if (sd->sd_disk != NULL && sd->sd_disk->d_consumer != NULL) { sbuf_printf(sb, "%s ", g_raid_get_diskname(sd->sd_disk)); } else { sbuf_printf(sb, "NONE "); } sbuf_printf(sb, "(%s", g_raid_subdisk_state2str(sd->sd_state)); if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD || sd->sd_state == G_RAID_SUBDISK_S_RESYNC) { sbuf_printf(sb, " %d%%", (int)(sd->sd_rebuild_pos * 100 / sd->sd_size)); } sbuf_printf(sb, ")"); if (i + 1 < vol->v_disks_count) sbuf_printf(sb, ", "); } sbuf_printf(sb, "\n"); sx_xunlock(&sc->sc_lock); g_topology_lock(); } else if (cp != NULL) { disk = cp->private; if (disk == NULL) return; g_topology_unlock(); sx_xlock(&sc->sc_lock); sbuf_printf(sb, "%s%s", indent, g_raid_disk_state2str(disk->d_state)); if (!TAILQ_EMPTY(&disk->d_subdisks)) { sbuf_printf(sb, " ("); TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) { sbuf_printf(sb, "%s", g_raid_subdisk_state2str(sd->sd_state)); if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD || sd->sd_state == G_RAID_SUBDISK_S_RESYNC) { sbuf_printf(sb, " %d%%", (int)(sd->sd_rebuild_pos * 100 / sd->sd_size)); } if (TAILQ_NEXT(sd, sd_next)) sbuf_printf(sb, ", "); } sbuf_printf(sb, ")"); } sbuf_printf(sb, "\n"); sbuf_printf(sb, "%s", indent); TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) { sbuf_printf(sb, "r%d(%s):%d@%ju", sd->sd_volume->v_global_id, sd->sd_volume->v_name, - sd->sd_pos, sd->sd_offset); + sd->sd_pos, (uintmax_t)sd->sd_offset); if (TAILQ_NEXT(sd, sd_next)) sbuf_printf(sb, ", "); } sbuf_printf(sb, "\n"); sbuf_printf(sb, "%s%d\n", indent, disk->d_read_errs); sx_xunlock(&sc->sc_lock); g_topology_lock(); } else { g_topology_unlock(); sx_xlock(&sc->sc_lock); if (sc->sc_md) { sbuf_printf(sb, "%s%s\n", indent, sc->sc_md->mdo_class->name); } if (!TAILQ_EMPTY(&sc->sc_volumes)) { s = 0xff; TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { if (vol->v_state < s) s = vol->v_state; } sbuf_printf(sb, "%s%s\n", indent, g_raid_volume_state2str(s)); } sx_xunlock(&sc->sc_lock); g_topology_lock(); } } static void g_raid_shutdown_post_sync(void *arg, int howto) { struct g_class *mp; struct g_geom *gp, *gp2; struct g_raid_softc *sc; struct g_raid_volume *vol; mp = arg; g_topology_lock(); g_raid_shutdown = 1; LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { if ((sc = gp->softc) == NULL) continue; g_topology_unlock(); sx_xlock(&sc->sc_lock); TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) g_raid_clean(vol, -1); g_cancel_event(sc); g_raid_destroy(sc, G_RAID_DESTROY_DELAYED); g_topology_lock(); } g_topology_unlock(); } static void g_raid_init(struct g_class *mp) { g_raid_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync, g_raid_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST); if (g_raid_post_sync == NULL) G_RAID_DEBUG(0, "Warning! Cannot register shutdown event."); g_raid_started = 1; } static void g_raid_fini(struct g_class *mp) { if (g_raid_post_sync != NULL) EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_raid_post_sync); g_raid_started = 0; } int g_raid_md_modevent(module_t mod, int type, void *arg) { struct g_raid_md_class *class, *c, *nc; int error; error = 0; class = arg; switch (type) { case MOD_LOAD: c = LIST_FIRST(&g_raid_md_classes); if (c == NULL || c->mdc_priority > class->mdc_priority) LIST_INSERT_HEAD(&g_raid_md_classes, class, mdc_list); else { while ((nc = LIST_NEXT(c, mdc_list)) != NULL && nc->mdc_priority < class->mdc_priority) c = nc; LIST_INSERT_AFTER(c, class, mdc_list); } if (g_raid_started) g_retaste(&g_raid_class); break; case MOD_UNLOAD: LIST_REMOVE(class, mdc_list); break; default: error = EOPNOTSUPP; break; } return (error); } int g_raid_tr_modevent(module_t mod, int type, void *arg) { struct g_raid_tr_class *class, *c, *nc; int error; error = 0; class = arg; switch (type) { case MOD_LOAD: c = LIST_FIRST(&g_raid_tr_classes); if (c == NULL || c->trc_priority > class->trc_priority) LIST_INSERT_HEAD(&g_raid_tr_classes, class, trc_list); else { while ((nc = LIST_NEXT(c, trc_list)) != NULL && nc->trc_priority < class->trc_priority) c = nc; LIST_INSERT_AFTER(c, class, trc_list); } break; case MOD_UNLOAD: LIST_REMOVE(class, trc_list); break; default: error = EOPNOTSUPP; break; } return (error); } /* * Use local implementation of DECLARE_GEOM_CLASS(g_raid_class, g_raid) * to reduce module priority, allowing submodules to register them first. */ static moduledata_t g_raid_mod = { "g_raid", g_modevent, &g_raid_class }; DECLARE_MODULE(g_raid, g_raid_mod, SI_SUB_DRIVERS, SI_ORDER_THIRD); MODULE_VERSION(geom_raid, 0); Index: head/sys/geom/stripe/g_stripe.c =================================================================== --- head/sys/geom/stripe/g_stripe.c (revision 339814) +++ head/sys/geom/stripe/g_stripe.c (revision 339815) @@ -1,1273 +1,1274 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004-2005 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(geom_stripe, "GEOM striping support"); static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data"); static uma_zone_t g_stripe_zone; static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force); static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp); static g_taste_t g_stripe_taste; static g_ctl_req_t g_stripe_config; static g_dumpconf_t g_stripe_dumpconf; static g_init_t g_stripe_init; static g_fini_t g_stripe_fini; struct g_class g_stripe_class = { .name = G_STRIPE_CLASS_NAME, .version = G_VERSION, .ctlreq = g_stripe_config, .taste = g_stripe_taste, .destroy_geom = g_stripe_destroy_geom, .init = g_stripe_init, .fini = g_stripe_fini }; SYSCTL_DECL(_kern_geom); static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0, "GEOM_STRIPE stuff"); static u_int g_stripe_debug = 0; SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0, "Debug level"); static int g_stripe_fast = 0; static int g_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS) { int error, fast; fast = g_stripe_fast; error = sysctl_handle_int(oidp, &fast, 0, req); if (error == 0 && req->newptr != NULL) g_stripe_fast = fast; return (error); } SYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, CTLTYPE_INT | CTLFLAG_RWTUN, NULL, 0, g_sysctl_stripe_fast, "I", "Fast, but memory-consuming, mode"); static u_int g_stripe_maxmem = MAXPHYS * 100; SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RDTUN, &g_stripe_maxmem, 0, "Maximum memory that can be allocated in \"fast\" mode (in bytes)"); static u_int g_stripe_fast_failed = 0; SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD, &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed"); /* * Greatest Common Divisor. */ static u_int gcd(u_int a, u_int b) { u_int c; while (b != 0) { c = a; a = b; b = (c % b); } return (a); } /* * Least Common Multiple. */ static u_int lcm(u_int a, u_int b) { return ((a * b) / gcd(a, b)); } static void g_stripe_init(struct g_class *mp __unused) { g_stripe_zone = uma_zcreate("g_stripe_zone", MAXPHYS, NULL, NULL, NULL, NULL, 0, 0); g_stripe_maxmem -= g_stripe_maxmem % MAXPHYS; uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAXPHYS); } static void g_stripe_fini(struct g_class *mp __unused) { uma_zdestroy(g_stripe_zone); } /* * Return the number of valid disks. */ static u_int g_stripe_nvalid(struct g_stripe_softc *sc) { u_int i, no; no = 0; for (i = 0; i < sc->sc_ndisks; i++) { if (sc->sc_disks[i] != NULL) no++; } return (no); } static void g_stripe_remove_disk(struct g_consumer *cp) { struct g_stripe_softc *sc; g_topology_assert(); KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__)); sc = (struct g_stripe_softc *)cp->geom->softc; KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); if (cp->private == NULL) { G_STRIPE_DEBUG(0, "Disk %s removed from %s.", cp->provider->name, sc->sc_name); cp->private = (void *)(uintptr_t)-1; } if (sc->sc_provider != NULL) { G_STRIPE_DEBUG(0, "Device %s deactivated.", sc->sc_provider->name); g_wither_provider(sc->sc_provider, ENXIO); sc->sc_provider = NULL; } if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) return; sc->sc_disks[cp->index] = NULL; cp->index = 0; g_detach(cp); g_destroy_consumer(cp); /* If there are no valid disks anymore, remove device. */ if (LIST_EMPTY(&sc->sc_geom->consumer)) g_stripe_destroy(sc, 1); } static void g_stripe_orphan(struct g_consumer *cp) { struct g_stripe_softc *sc; struct g_geom *gp; g_topology_assert(); gp = cp->geom; sc = gp->softc; if (sc == NULL) return; g_stripe_remove_disk(cp); } static int g_stripe_access(struct g_provider *pp, int dr, int dw, int de) { struct g_consumer *cp1, *cp2, *tmp; struct g_stripe_softc *sc; struct g_geom *gp; int error; g_topology_assert(); gp = pp->geom; sc = gp->softc; KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); /* On first open, grab an extra "exclusive" bit */ if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) de++; /* ... and let go of it on last close */ if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0) de--; LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) { error = g_access(cp1, dr, dw, de); if (error != 0) goto fail; if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 && cp1->private != NULL) { g_stripe_remove_disk(cp1); /* May destroy geom. */ } } return (0); fail: LIST_FOREACH(cp2, &gp->consumer, consumer) { if (cp1 == cp2) break; g_access(cp2, -dr, -dw, -de); } return (error); } static void g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset, off_t length, int mode) { - u_int stripesize; + off_t stripesize; size_t len; stripesize = sc->sc_stripesize; len = (size_t)(stripesize - (offset & (stripesize - 1))); do { bcopy(src, dst, len); if (mode) { dst += len + stripesize * (sc->sc_ndisks - 1); src += len; } else { dst += len; src += len + stripesize * (sc->sc_ndisks - 1); } length -= len; KASSERT(length >= 0, - ("Length < 0 (stripesize=%zu, offset=%jd, length=%jd).", - (size_t)stripesize, (intmax_t)offset, (intmax_t)length)); + ("Length < 0 (stripesize=%ju, offset=%ju, length=%jd).", + (uintmax_t)stripesize, (uintmax_t)offset, (intmax_t)length)); if (length > stripesize) len = stripesize; else len = length; } while (length > 0); } static void g_stripe_done(struct bio *bp) { struct g_stripe_softc *sc; struct bio *pbp; pbp = bp->bio_parent; sc = pbp->bio_to->geom->softc; if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) { g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset, bp->bio_length, 1); bp->bio_data = bp->bio_caller1; bp->bio_caller1 = NULL; } mtx_lock(&sc->sc_lock); if (pbp->bio_error == 0) pbp->bio_error = bp->bio_error; pbp->bio_completed += bp->bio_completed; pbp->bio_inbed++; if (pbp->bio_children == pbp->bio_inbed) { mtx_unlock(&sc->sc_lock); if (pbp->bio_driver1 != NULL) uma_zfree(g_stripe_zone, pbp->bio_driver1); g_io_deliver(pbp, pbp->bio_error); } else mtx_unlock(&sc->sc_lock); g_destroy_bio(bp); } static int g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length) { TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); - u_int nparts = 0, stripesize; struct g_stripe_softc *sc; char *addr, *data = NULL; struct bio *cbp; + off_t stripesize; + u_int nparts = 0; int error; sc = bp->bio_to->geom->softc; addr = bp->bio_data; stripesize = sc->sc_stripesize; cbp = g_clone_bio(bp); if (cbp == NULL) { error = ENOMEM; goto failure; } TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); nparts++; /* * Fill in the component buf structure. */ cbp->bio_done = g_stripe_done; cbp->bio_offset = offset; cbp->bio_data = addr; cbp->bio_caller1 = NULL; cbp->bio_length = length; cbp->bio_caller2 = sc->sc_disks[no]; /* offset -= offset % stripesize; */ offset -= offset & (stripesize - 1); addr += length; length = bp->bio_length - length; for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { if (no > sc->sc_ndisks - 1) { no = 0; offset += stripesize; } if (nparts >= sc->sc_ndisks) { cbp = TAILQ_NEXT(cbp, bio_queue); if (cbp == NULL) cbp = TAILQ_FIRST(&queue); nparts++; /* * Update bio structure. */ /* * MIN() is in case when * (bp->bio_length % sc->sc_stripesize) != 0. */ cbp->bio_length += MIN(stripesize, length); if (cbp->bio_caller1 == NULL) { cbp->bio_caller1 = cbp->bio_data; cbp->bio_data = NULL; if (data == NULL) { data = uma_zalloc(g_stripe_zone, M_NOWAIT); if (data == NULL) { error = ENOMEM; goto failure; } } } } else { cbp = g_clone_bio(bp); if (cbp == NULL) { error = ENOMEM; goto failure; } TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); nparts++; /* * Fill in the component buf structure. */ cbp->bio_done = g_stripe_done; cbp->bio_offset = offset; cbp->bio_data = addr; cbp->bio_caller1 = NULL; /* * MIN() is in case when * (bp->bio_length % sc->sc_stripesize) != 0. */ cbp->bio_length = MIN(stripesize, length); cbp->bio_caller2 = sc->sc_disks[no]; } } if (data != NULL) bp->bio_driver1 = data; /* * Fire off all allocated requests! */ while ((cbp = TAILQ_FIRST(&queue)) != NULL) { struct g_consumer *cp; TAILQ_REMOVE(&queue, cbp, bio_queue); cp = cbp->bio_caller2; cbp->bio_caller2 = NULL; cbp->bio_to = cp->provider; if (cbp->bio_caller1 != NULL) { cbp->bio_data = data; if (bp->bio_cmd == BIO_WRITE) { g_stripe_copy(sc, cbp->bio_caller1, data, cbp->bio_offset, cbp->bio_length, 0); } data += cbp->bio_length; } G_STRIPE_LOGREQ(cbp, "Sending request."); g_io_request(cbp, cp); } return (0); failure: if (data != NULL) uma_zfree(g_stripe_zone, data); while ((cbp = TAILQ_FIRST(&queue)) != NULL) { TAILQ_REMOVE(&queue, cbp, bio_queue); if (cbp->bio_caller1 != NULL) { cbp->bio_data = cbp->bio_caller1; cbp->bio_caller1 = NULL; } bp->bio_children--; g_destroy_bio(cbp); } return (error); } static int g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) { TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); struct g_stripe_softc *sc; - uint32_t stripesize; + off_t stripesize; struct bio *cbp; char *addr; int error; sc = bp->bio_to->geom->softc; stripesize = sc->sc_stripesize; cbp = g_clone_bio(bp); if (cbp == NULL) { error = ENOMEM; goto failure; } TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); /* * Fill in the component buf structure. */ if (bp->bio_length == length) cbp->bio_done = g_std_done; /* Optimized lockless case. */ else cbp->bio_done = g_stripe_done; cbp->bio_offset = offset; cbp->bio_length = length; if ((bp->bio_flags & BIO_UNMAPPED) != 0) { bp->bio_ma_n = round_page(bp->bio_ma_offset + bp->bio_length) / PAGE_SIZE; addr = NULL; } else addr = bp->bio_data; cbp->bio_caller2 = sc->sc_disks[no]; /* offset -= offset % stripesize; */ offset -= offset & (stripesize - 1); if (bp->bio_cmd != BIO_DELETE) addr += length; length = bp->bio_length - length; for (no++; length > 0; no++, length -= stripesize) { if (no > sc->sc_ndisks - 1) { no = 0; offset += stripesize; } cbp = g_clone_bio(bp); if (cbp == NULL) { error = ENOMEM; goto failure; } TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); /* * Fill in the component buf structure. */ cbp->bio_done = g_stripe_done; cbp->bio_offset = offset; /* * MIN() is in case when * (bp->bio_length % sc->sc_stripesize) != 0. */ cbp->bio_length = MIN(stripesize, length); if ((bp->bio_flags & BIO_UNMAPPED) != 0) { cbp->bio_ma_offset += (uintptr_t)addr; cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; cbp->bio_ma_offset %= PAGE_SIZE; cbp->bio_ma_n = round_page(cbp->bio_ma_offset + cbp->bio_length) / PAGE_SIZE; } else cbp->bio_data = addr; cbp->bio_caller2 = sc->sc_disks[no]; if (bp->bio_cmd != BIO_DELETE) addr += stripesize; } /* * Fire off all allocated requests! */ while ((cbp = TAILQ_FIRST(&queue)) != NULL) { struct g_consumer *cp; TAILQ_REMOVE(&queue, cbp, bio_queue); cp = cbp->bio_caller2; cbp->bio_caller2 = NULL; cbp->bio_to = cp->provider; G_STRIPE_LOGREQ(cbp, "Sending request."); g_io_request(cbp, cp); } return (0); failure: while ((cbp = TAILQ_FIRST(&queue)) != NULL) { TAILQ_REMOVE(&queue, cbp, bio_queue); bp->bio_children--; g_destroy_bio(cbp); } return (error); } static void g_stripe_flush(struct g_stripe_softc *sc, struct bio *bp) { struct bio_queue_head queue; struct g_consumer *cp; struct bio *cbp; u_int no; bioq_init(&queue); for (no = 0; no < sc->sc_ndisks; no++) { cbp = g_clone_bio(bp); if (cbp == NULL) { for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { bioq_remove(&queue, cbp); g_destroy_bio(cbp); } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_io_deliver(bp, bp->bio_error); return; } bioq_insert_tail(&queue, cbp); cbp->bio_done = g_stripe_done; cbp->bio_caller2 = sc->sc_disks[no]; cbp->bio_to = sc->sc_disks[no]->provider; } for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { bioq_remove(&queue, cbp); G_STRIPE_LOGREQ(cbp, "Sending request."); cp = cbp->bio_caller2; cbp->bio_caller2 = NULL; g_io_request(cbp, cp); } } static void g_stripe_start(struct bio *bp) { - off_t offset, start, length, nstripe; + off_t offset, start, length, nstripe, stripesize; struct g_stripe_softc *sc; - u_int no, stripesize; + u_int no; int error, fast = 0; sc = bp->bio_to->geom->softc; /* * If sc == NULL, provider's error should be set and g_stripe_start() * should not be called at all. */ KASSERT(sc != NULL, ("Provider's error should be set (error=%d)(device=%s).", bp->bio_to->error, bp->bio_to->name)); G_STRIPE_LOGREQ(bp, "Request received."); switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_DELETE: break; case BIO_FLUSH: g_stripe_flush(sc, bp); return; case BIO_GETATTR: /* To which provider it should be delivered? */ default: g_io_deliver(bp, EOPNOTSUPP); return; } stripesize = sc->sc_stripesize; /* * Calculations are quite messy, but fast I hope. */ /* Stripe number. */ /* nstripe = bp->bio_offset / stripesize; */ nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits; /* Disk number. */ no = nstripe % sc->sc_ndisks; /* Start position in stripe. */ /* start = bp->bio_offset % stripesize; */ start = bp->bio_offset & (stripesize - 1); /* Start position in disk. */ /* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */ offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start; /* Length of data to operate. */ length = MIN(bp->bio_length, stripesize - start); /* * Do use "fast" mode when: * 1. "Fast" mode is ON. * and * 2. Request size is less than or equal to MAXPHYS, * which should always be true. * and * 3. Request size is bigger than stripesize * ndisks. If it isn't, * there will be no need to send more than one I/O request to * a provider, so there is nothing to optmize. * and * 4. Request is not unmapped. * and * 5. It is not a BIO_DELETE. */ if (g_stripe_fast && bp->bio_length <= MAXPHYS && bp->bio_length >= stripesize * sc->sc_ndisks && (bp->bio_flags & BIO_UNMAPPED) == 0 && bp->bio_cmd != BIO_DELETE) { fast = 1; } error = 0; if (fast) { error = g_stripe_start_fast(bp, no, offset, length); if (error != 0) g_stripe_fast_failed++; } /* * Do use "economic" when: * 1. "Economic" mode is ON. * or * 2. "Fast" mode failed. It can only fail if there is no memory. */ if (!fast || error != 0) error = g_stripe_start_economic(bp, no, offset, length); if (error != 0) { if (bp->bio_error == 0) bp->bio_error = error; g_io_deliver(bp, bp->bio_error); } } static void g_stripe_check_and_run(struct g_stripe_softc *sc) { struct g_provider *dp; off_t mediasize, ms; u_int no, sectorsize = 0; g_topology_assert(); if (g_stripe_nvalid(sc) != sc->sc_ndisks) return; sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s", sc->sc_name); sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; if (g_stripe_fast == 0) sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED; /* * Find the smallest disk. */ mediasize = sc->sc_disks[0]->provider->mediasize; if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) mediasize -= sc->sc_disks[0]->provider->sectorsize; mediasize -= mediasize % sc->sc_stripesize; sectorsize = sc->sc_disks[0]->provider->sectorsize; for (no = 1; no < sc->sc_ndisks; no++) { dp = sc->sc_disks[no]->provider; ms = dp->mediasize; if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) ms -= dp->sectorsize; ms -= ms % sc->sc_stripesize; if (ms < mediasize) mediasize = ms; sectorsize = lcm(sectorsize, dp->sectorsize); /* A provider underneath us doesn't support unmapped */ if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) { G_STRIPE_DEBUG(1, "Cancelling unmapped " "because of %s.", dp->name); sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED; } } sc->sc_provider->sectorsize = sectorsize; sc->sc_provider->mediasize = mediasize * sc->sc_ndisks; sc->sc_provider->stripesize = sc->sc_stripesize; sc->sc_provider->stripeoffset = 0; g_error_provider(sc->sc_provider, 0); G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name); } static int g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md) { struct g_provider *pp; u_char *buf; int error; g_topology_assert(); error = g_access(cp, 1, 0, 0); if (error != 0) return (error); pp = cp->provider; g_topology_unlock(); buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, &error); g_topology_lock(); g_access(cp, -1, 0, 0); if (buf == NULL) return (error); /* Decode metadata. */ stripe_metadata_decode(buf, md); g_free(buf); return (0); } /* * Add disk to given device. */ static int g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no) { struct g_consumer *cp, *fcp; struct g_geom *gp; int error; g_topology_assert(); /* Metadata corrupted? */ if (no >= sc->sc_ndisks) return (EINVAL); /* Check if disk is not already attached. */ if (sc->sc_disks[no] != NULL) return (EEXIST); gp = sc->sc_geom; fcp = LIST_FIRST(&gp->consumer); cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; cp->private = NULL; cp->index = no; error = g_attach(cp, pp); if (error != 0) { g_destroy_consumer(cp); return (error); } if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) { error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); if (error != 0) { g_detach(cp); g_destroy_consumer(cp); return (error); } } if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) { struct g_stripe_metadata md; /* Reread metadata. */ error = g_stripe_read_metadata(cp, &md); if (error != 0) goto fail; if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 || strcmp(md.md_name, sc->sc_name) != 0 || md.md_id != sc->sc_id) { G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name); goto fail; } } sc->sc_disks[no] = cp; G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name); g_stripe_check_and_run(sc); return (0); fail: if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace); g_detach(cp); g_destroy_consumer(cp); return (error); } static struct g_geom * g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md, u_int type) { struct g_stripe_softc *sc; struct g_geom *gp; u_int no; g_topology_assert(); G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name, md->md_id); /* Two disks is minimum. */ if (md->md_all < 2) { G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name); return (NULL); } #if 0 /* Stripe size have to be grater than or equal to sector size. */ if (md->md_stripesize < sectorsize) { G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); return (NULL); } #endif /* Stripe size have to be power of 2. */ if (!powerof2(md->md_stripesize)) { G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); return (NULL); } /* Check for duplicate unit */ LIST_FOREACH(gp, &mp->geom, geom) { sc = gp->softc; if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) { G_STRIPE_DEBUG(0, "Device %s already configured.", sc->sc_name); return (NULL); } } gp = g_new_geomf(mp, "%s", md->md_name); sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO); gp->start = g_stripe_start; gp->spoiled = g_stripe_orphan; gp->orphan = g_stripe_orphan; gp->access = g_stripe_access; gp->dumpconf = g_stripe_dumpconf; sc->sc_id = md->md_id; sc->sc_stripesize = md->md_stripesize; sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1); sc->sc_ndisks = md->md_all; sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks, M_STRIPE, M_WAITOK | M_ZERO); for (no = 0; no < sc->sc_ndisks; no++) sc->sc_disks[no] = NULL; sc->sc_type = type; mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF); gp->softc = sc; sc->sc_geom = gp; sc->sc_provider = NULL; G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id); return (gp); } static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force) { struct g_provider *pp; struct g_consumer *cp, *cp1; struct g_geom *gp; g_topology_assert(); if (sc == NULL) return (ENXIO); pp = sc->sc_provider; if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { if (force) { G_STRIPE_DEBUG(0, "Device %s is still open, so it " "can't be definitely removed.", pp->name); } else { G_STRIPE_DEBUG(1, "Device %s is still open (r%dw%de%d).", pp->name, pp->acr, pp->acw, pp->ace); return (EBUSY); } } gp = sc->sc_geom; LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) { g_stripe_remove_disk(cp); if (cp1 == NULL) return (0); /* Recursion happened. */ } if (!LIST_EMPTY(&gp->consumer)) return (EINPROGRESS); gp->softc = NULL; KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)", gp->name)); free(sc->sc_disks, M_STRIPE); mtx_destroy(&sc->sc_lock); free(sc, M_STRIPE); G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name); g_wither_geom(gp, ENXIO); return (0); } static int g_stripe_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused, struct g_geom *gp) { struct g_stripe_softc *sc; sc = gp->softc; return (g_stripe_destroy(sc, 0)); } static struct g_geom * g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) { struct g_stripe_metadata md; struct g_stripe_softc *sc; struct g_consumer *cp; struct g_geom *gp; int error; g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); g_topology_assert(); /* Skip providers that are already open for writing. */ if (pp->acw > 0) return (NULL); G_STRIPE_DEBUG(3, "Tasting %s.", pp->name); gp = g_new_geomf(mp, "stripe:taste"); gp->start = g_stripe_start; gp->access = g_stripe_access; gp->orphan = g_stripe_orphan; cp = g_new_consumer(gp); g_attach(cp, pp); error = g_stripe_read_metadata(cp, &md); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); if (error != 0) return (NULL); gp = NULL; if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0) return (NULL); if (md.md_version > G_STRIPE_VERSION) { printf("geom_stripe.ko module is too old to handle %s.\n", pp->name); return (NULL); } /* * Backward compatibility: */ /* There was no md_provider field in earlier versions of metadata. */ if (md.md_version < 2) bzero(md.md_provider, sizeof(md.md_provider)); /* There was no md_provsize field in earlier versions of metadata. */ if (md.md_version < 3) md.md_provsize = pp->mediasize; if (md.md_provider[0] != '\0' && !g_compare_names(md.md_provider, pp->name)) return (NULL); if (md.md_provsize != pp->mediasize) return (NULL); /* * Let's check if device already exists. */ sc = NULL; LIST_FOREACH(gp, &mp->geom, geom) { sc = gp->softc; if (sc == NULL) continue; if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC) continue; if (strcmp(md.md_name, sc->sc_name) != 0) continue; if (md.md_id != sc->sc_id) continue; break; } if (gp != NULL) { G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); error = g_stripe_add_disk(sc, pp, md.md_no); if (error != 0) { G_STRIPE_DEBUG(0, "Cannot add disk %s to %s (error=%d).", pp->name, gp->name, error); return (NULL); } } else { gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC); if (gp == NULL) { G_STRIPE_DEBUG(0, "Cannot create device %s.", md.md_name); return (NULL); } sc = gp->softc; G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); error = g_stripe_add_disk(sc, pp, md.md_no); if (error != 0) { G_STRIPE_DEBUG(0, "Cannot add disk %s to %s (error=%d).", pp->name, gp->name, error); g_stripe_destroy(sc, 1); return (NULL); } } return (gp); } static void g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp) { u_int attached, no; struct g_stripe_metadata md; struct g_provider *pp; struct g_stripe_softc *sc; struct g_geom *gp; struct sbuf *sb; - intmax_t *stripesize; + off_t *stripesize; const char *name; char param[16]; int *nargs; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument.", "nargs"); return; } if (*nargs <= 2) { gctl_error(req, "Too few arguments."); return; } strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic)); md.md_version = G_STRIPE_VERSION; name = gctl_get_asciiparam(req, "arg0"); if (name == NULL) { gctl_error(req, "No 'arg%u' argument.", 0); return; } strlcpy(md.md_name, name, sizeof(md.md_name)); md.md_id = arc4random(); md.md_no = 0; md.md_all = *nargs - 1; stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize)); if (stripesize == NULL) { gctl_error(req, "No '%s' argument.", "stripesize"); return; } - md.md_stripesize = *stripesize; + md.md_stripesize = (uint32_t)*stripesize; bzero(md.md_provider, sizeof(md.md_provider)); /* This field is not important here. */ md.md_provsize = 0; /* Check all providers are valid */ for (no = 1; no < *nargs; no++) { snprintf(param, sizeof(param), "arg%u", no); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%u' argument.", no); return; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); pp = g_provider_by_name(name); if (pp == NULL) { G_STRIPE_DEBUG(1, "Disk %s is invalid.", name); gctl_error(req, "Disk %s is invalid.", name); return; } } gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL); if (gp == NULL) { gctl_error(req, "Can't configure %s.", md.md_name); return; } sc = gp->softc; sb = sbuf_new_auto(); sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name); for (attached = 0, no = 1; no < *nargs; no++) { snprintf(param, sizeof(param), "arg%u", no); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%u' argument.", no); continue; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); pp = g_provider_by_name(name); KASSERT(pp != NULL, ("Provider %s disappear?!", name)); if (g_stripe_add_disk(sc, pp, no - 1) != 0) { G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.", no, pp->name, gp->name); sbuf_printf(sb, " %s", pp->name); continue; } attached++; } sbuf_finish(sb); if (md.md_all != attached) { g_stripe_destroy(gp->softc, 1); gctl_error(req, "%s", sbuf_data(sb)); } sbuf_delete(sb); } static struct g_stripe_softc * g_stripe_find_device(struct g_class *mp, const char *name) { struct g_stripe_softc *sc; struct g_geom *gp; LIST_FOREACH(gp, &mp->geom, geom) { sc = gp->softc; if (sc == NULL) continue; if (strcmp(sc->sc_name, name) == 0) return (sc); } return (NULL); } static void g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp) { struct g_stripe_softc *sc; int *force, *nargs, error; const char *name; char param[16]; u_int i; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument.", "nargs"); return; } if (*nargs <= 0) { gctl_error(req, "Missing device(s)."); return; } force = gctl_get_paraml(req, "force", sizeof(*force)); if (force == NULL) { gctl_error(req, "No '%s' argument.", "force"); return; } for (i = 0; i < (u_int)*nargs; i++) { snprintf(param, sizeof(param), "arg%u", i); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%u' argument.", i); return; } sc = g_stripe_find_device(mp, name); if (sc == NULL) { gctl_error(req, "No such device: %s.", name); return; } error = g_stripe_destroy(sc, *force); if (error != 0) { gctl_error(req, "Cannot destroy device %s (error=%d).", sc->sc_name, error); return; } } } static void g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb) { uint32_t *version; g_topology_assert(); version = gctl_get_paraml(req, "version", sizeof(*version)); if (version == NULL) { gctl_error(req, "No '%s' argument.", "version"); return; } if (*version != G_STRIPE_VERSION) { gctl_error(req, "Userland and kernel parts are out of sync."); return; } if (strcmp(verb, "create") == 0) { g_stripe_ctl_create(req, mp); return; } else if (strcmp(verb, "destroy") == 0 || strcmp(verb, "stop") == 0) { g_stripe_ctl_destroy(req, mp); return; } gctl_error(req, "Unknown verb."); } static void g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { struct g_stripe_softc *sc; sc = gp->softc; if (sc == NULL) return; if (pp != NULL) { /* Nothing here. */ } else if (cp != NULL) { sbuf_printf(sb, "%s%u\n", indent, (u_int)cp->index); } else { sbuf_printf(sb, "%s%u\n", indent, (u_int)sc->sc_id); - sbuf_printf(sb, "%s%u\n", indent, - (u_int)sc->sc_stripesize); + sbuf_printf(sb, "%s%ju\n", indent, + (uintmax_t)sc->sc_stripesize); sbuf_printf(sb, "%s", indent); switch (sc->sc_type) { case G_STRIPE_TYPE_AUTOMATIC: sbuf_printf(sb, "AUTOMATIC"); break; case G_STRIPE_TYPE_MANUAL: sbuf_printf(sb, "MANUAL"); break; default: sbuf_printf(sb, "UNKNOWN"); break; } sbuf_printf(sb, "\n"); sbuf_printf(sb, "%sTotal=%u, Online=%u\n", indent, sc->sc_ndisks, g_stripe_nvalid(sc)); sbuf_printf(sb, "%s", indent); if (sc->sc_provider != NULL && sc->sc_provider->error == 0) sbuf_printf(sb, "UP"); else sbuf_printf(sb, "DOWN"); sbuf_printf(sb, "\n"); } } DECLARE_GEOM_CLASS(g_stripe_class, g_stripe); MODULE_VERSION(geom_stripe, 0); Index: head/sys/geom/stripe/g_stripe.h =================================================================== --- head/sys/geom/stripe/g_stripe.h (revision 339814) +++ head/sys/geom/stripe/g_stripe.h (revision 339815) @@ -1,126 +1,126 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004-2005 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _G_STRIPE_H_ #define _G_STRIPE_H_ #include #define G_STRIPE_CLASS_NAME "STRIPE" #define G_STRIPE_MAGIC "GEOM::STRIPE" /* * Version history: * 0 - Initial version number. * 1 - Added 'stop' command for gstripe(8). * 2 - Added md_provider field to metadata and '-h' option for gstripe(8). * 3 - Added md_provsize field to metadata. */ #define G_STRIPE_VERSION 3 #ifdef _KERNEL #define G_STRIPE_TYPE_MANUAL 0 #define G_STRIPE_TYPE_AUTOMATIC 1 #define G_STRIPE_DEBUG(lvl, ...) do { \ if (g_stripe_debug >= (lvl)) { \ printf("GEOM_STRIPE"); \ if (g_stripe_debug > 0) \ printf("[%u]", lvl); \ printf(": "); \ printf(__VA_ARGS__); \ printf("\n"); \ } \ } while (0) #define G_STRIPE_LOGREQ(bp, ...) do { \ if (g_stripe_debug >= 2) { \ printf("GEOM_STRIPE[2]: "); \ printf(__VA_ARGS__); \ printf(" "); \ g_print_bio(bp); \ printf("\n"); \ } \ } while (0) struct g_stripe_softc { u_int sc_type; /* provider type */ struct g_geom *sc_geom; struct g_provider *sc_provider; uint32_t sc_id; /* stripe unique ID */ struct g_consumer **sc_disks; uint16_t sc_ndisks; - uint32_t sc_stripesize; + off_t sc_stripesize; uint32_t sc_stripebits; struct mtx sc_lock; }; #define sc_name sc_geom->name #endif /* _KERNEL */ struct g_stripe_metadata { char md_magic[16]; /* Magic value. */ uint32_t md_version; /* Version number. */ char md_name[16]; /* Stripe name. */ uint32_t md_id; /* Unique ID. */ uint16_t md_no; /* Disk number. */ uint16_t md_all; /* Number of all disks. */ uint32_t md_stripesize; /* Stripe size. */ char md_provider[16]; /* Hardcoded provider. */ uint64_t md_provsize; /* Provider's size. */ }; static __inline void stripe_metadata_encode(const struct g_stripe_metadata *md, u_char *data) { bcopy(md->md_magic, data, sizeof(md->md_magic)); le32enc(data + 16, md->md_version); bcopy(md->md_name, data + 20, sizeof(md->md_name)); le32enc(data + 36, md->md_id); le16enc(data + 40, md->md_no); le16enc(data + 42, md->md_all); le32enc(data + 44, md->md_stripesize); bcopy(md->md_provider, data + 48, sizeof(md->md_provider)); le64enc(data + 64, md->md_provsize); } static __inline void stripe_metadata_decode(const u_char *data, struct g_stripe_metadata *md) { bcopy(data, md->md_magic, sizeof(md->md_magic)); md->md_version = le32dec(data + 16); bcopy(data + 20, md->md_name, sizeof(md->md_name)); md->md_id = le32dec(data + 36); md->md_no = le16dec(data + 40); md->md_all = le16dec(data + 42); md->md_stripesize = le32dec(data + 44); bcopy(data + 48, md->md_provider, sizeof(md->md_provider)); md->md_provsize = le64dec(data + 64); } #endif /* _G_STRIPE_H_ */ Index: head/sys/geom/uzip/g_uzip.c =================================================================== --- head/sys/geom/uzip/g_uzip.c (revision 339814) +++ head/sys/geom/uzip/g_uzip.c (revision 339815) @@ -1,925 +1,925 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004 Max Khon * Copyright (c) 2014 Juniper Networks, Inc. * Copyright (c) 2006-2016 Maxim Sobolev * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_geom.h" MALLOC_DEFINE(M_GEOM_UZIP, "geom_uzip", "GEOM UZIP data structures"); FEATURE(geom_uzip, "GEOM read-only compressed disks support"); struct g_uzip_blk { uint64_t offset; uint32_t blen; unsigned char last:1; unsigned char padded:1; #define BLEN_UNDEF UINT32_MAX }; #ifndef ABS #define ABS(a) ((a) < 0 ? -(a) : (a)) #endif #define BLK_IN_RANGE(mcn, bcn, ilen) \ (((bcn) != BLEN_UNDEF) && ( \ ((ilen) >= 0 && (mcn >= bcn) && (mcn <= ((intmax_t)(bcn) + (ilen)))) || \ ((ilen) < 0 && (mcn <= bcn) && (mcn >= ((intmax_t)(bcn) + (ilen)))) \ )) #ifdef GEOM_UZIP_DEBUG # define GEOM_UZIP_DBG_DEFAULT 3 #else # define GEOM_UZIP_DBG_DEFAULT 0 #endif #define GUZ_DBG_ERR 1 #define GUZ_DBG_INFO 2 #define GUZ_DBG_IO 3 #define GUZ_DBG_TOC 4 #define GUZ_DEV_SUFX ".uzip" #define GUZ_DEV_NAME(p) (p GUZ_DEV_SUFX) static char g_uzip_attach_to[MAXPATHLEN] = {"*"}; static char g_uzip_noattach_to[MAXPATHLEN] = {GUZ_DEV_NAME("*")}; TUNABLE_STR("kern.geom.uzip.attach_to", g_uzip_attach_to, sizeof(g_uzip_attach_to)); TUNABLE_STR("kern.geom.uzip.noattach_to", g_uzip_noattach_to, sizeof(g_uzip_noattach_to)); SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, uzip, CTLFLAG_RW, 0, "GEOM_UZIP stuff"); static u_int g_uzip_debug = GEOM_UZIP_DBG_DEFAULT; SYSCTL_UINT(_kern_geom_uzip, OID_AUTO, debug, CTLFLAG_RWTUN, &g_uzip_debug, 0, "Debug level (0-4)"); static u_int g_uzip_debug_block = BLEN_UNDEF; SYSCTL_UINT(_kern_geom_uzip, OID_AUTO, debug_block, CTLFLAG_RWTUN, &g_uzip_debug_block, 0, "Debug operations around specific cluster#"); #define DPRINTF(lvl, a) \ if ((lvl) <= g_uzip_debug) { \ printf a; \ } #define DPRINTF_BLK(lvl, cn, a) \ if ((lvl) <= g_uzip_debug || \ BLK_IN_RANGE(cn, g_uzip_debug_block, 8) || \ BLK_IN_RANGE(cn, g_uzip_debug_block, -8)) { \ printf a; \ } #define DPRINTF_BRNG(lvl, bcn, ecn, a) \ KASSERT(bcn < ecn, ("DPRINTF_BRNG: invalid range (%ju, %ju)", \ (uintmax_t)bcn, (uintmax_t)ecn)); \ if (((lvl) <= g_uzip_debug) || \ BLK_IN_RANGE(g_uzip_debug_block, bcn, \ (intmax_t)ecn - (intmax_t)bcn)) { \ printf a; \ } #define UZIP_CLASS_NAME "UZIP" /* * Maximum allowed valid block size (to prevent foot-shooting) */ #define MAX_BLKSZ (MAXPHYS) static char CLOOP_MAGIC_START[] = "#!/bin/sh\n"; static void g_uzip_read_done(struct bio *bp); static void g_uzip_do(struct g_uzip_softc *, struct bio *bp); static void g_uzip_softc_free(struct g_uzip_softc *sc, struct g_geom *gp) { if (gp != NULL) { DPRINTF(GUZ_DBG_INFO, ("%s: %d requests, %d cached\n", gp->name, sc->req_total, sc->req_cached)); } mtx_lock(&sc->queue_mtx); sc->wrkthr_flags |= GUZ_SHUTDOWN; wakeup(sc); while (!(sc->wrkthr_flags & GUZ_EXITING)) { msleep(sc->procp, &sc->queue_mtx, PRIBIO, "guzfree", hz / 10); } mtx_unlock(&sc->queue_mtx); sc->dcp->free(sc->dcp); free(sc->toc, M_GEOM_UZIP); mtx_destroy(&sc->queue_mtx); mtx_destroy(&sc->last_mtx); free(sc->last_buf, M_GEOM_UZIP); free(sc, M_GEOM_UZIP); } static int g_uzip_cached(struct g_geom *gp, struct bio *bp) { struct g_uzip_softc *sc; off_t ofs; size_t blk, blkofs, usz; sc = gp->softc; ofs = bp->bio_offset + bp->bio_completed; blk = ofs / sc->blksz; mtx_lock(&sc->last_mtx); if (blk == sc->last_blk) { blkofs = ofs % sc->blksz; usz = sc->blksz - blkofs; if (bp->bio_resid < usz) usz = bp->bio_resid; memcpy(bp->bio_data + bp->bio_completed, sc->last_buf + blkofs, usz); sc->req_cached++; mtx_unlock(&sc->last_mtx); DPRINTF(GUZ_DBG_IO, ("%s/%s: %p: offset=%jd: got %jd bytes " "from cache\n", __func__, gp->name, bp, (intmax_t)ofs, (intmax_t)usz)); bp->bio_completed += usz; bp->bio_resid -= usz; if (bp->bio_resid == 0) { g_io_deliver(bp, 0); return (1); } } else mtx_unlock(&sc->last_mtx); return (0); } #define BLK_ENDS(sc, bi) ((sc)->toc[(bi)].offset + \ (sc)->toc[(bi)].blen) #define BLK_IS_CONT(sc, bi) (BLK_ENDS((sc), (bi) - 1) == \ (sc)->toc[(bi)].offset) #define BLK_IS_NIL(sc, bi) ((sc)->toc[(bi)].blen == 0) #define TOFF_2_BOFF(sc, pp, bi) ((sc)->toc[(bi)].offset - \ (sc)->toc[(bi)].offset % (pp)->sectorsize) #define TLEN_2_BLEN(sc, pp, bp, ei) roundup(BLK_ENDS((sc), (ei)) - \ (bp)->bio_offset, (pp)->sectorsize) static int g_uzip_request(struct g_geom *gp, struct bio *bp) { struct g_uzip_softc *sc; struct bio *bp2; struct g_consumer *cp; struct g_provider *pp; off_t ofs, start_blk_ofs; size_t i, start_blk, end_blk, zsize; if (g_uzip_cached(gp, bp) != 0) return (1); sc = gp->softc; cp = LIST_FIRST(&gp->consumer); pp = cp->provider; ofs = bp->bio_offset + bp->bio_completed; start_blk = ofs / sc->blksz; KASSERT(start_blk < sc->nblocks, ("start_blk out of range")); end_blk = howmany(ofs + bp->bio_resid, sc->blksz); KASSERT(end_blk <= sc->nblocks, ("end_blk out of range")); for (; BLK_IS_NIL(sc, start_blk) && start_blk < end_blk; start_blk++) { /* Fill in any leading Nil blocks */ start_blk_ofs = ofs % sc->blksz; zsize = MIN(sc->blksz - start_blk_ofs, bp->bio_resid); DPRINTF_BLK(GUZ_DBG_IO, start_blk, ("%s/%s: %p/%ju: " "filling %ju zero bytes\n", __func__, gp->name, gp, (uintmax_t)bp->bio_completed, (uintmax_t)zsize)); bzero(bp->bio_data + bp->bio_completed, zsize); bp->bio_completed += zsize; bp->bio_resid -= zsize; ofs += zsize; } if (start_blk == end_blk) { KASSERT(bp->bio_resid == 0, ("bp->bio_resid is invalid")); /* * No non-Nil data is left, complete request immediately. */ DPRINTF(GUZ_DBG_IO, ("%s/%s: %p: all done returning %ju " "bytes\n", __func__, gp->name, gp, (uintmax_t)bp->bio_completed)); g_io_deliver(bp, 0); return (1); } for (i = start_blk + 1; i < end_blk; i++) { /* Trim discontinuous areas if any */ if (!BLK_IS_CONT(sc, i)) { end_blk = i; break; } } DPRINTF_BRNG(GUZ_DBG_IO, start_blk, end_blk, ("%s/%s: %p: " "start=%u (%ju[%jd]), end=%u (%ju)\n", __func__, gp->name, bp, (u_int)start_blk, (uintmax_t)sc->toc[start_blk].offset, (intmax_t)sc->toc[start_blk].blen, (u_int)end_blk, (uintmax_t)BLK_ENDS(sc, end_blk - 1))); bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return (1); } bp2->bio_done = g_uzip_read_done; bp2->bio_offset = TOFF_2_BOFF(sc, pp, start_blk); while (1) { bp2->bio_length = TLEN_2_BLEN(sc, pp, bp2, end_blk - 1); if (bp2->bio_length <= MAXPHYS) { break; } if (end_blk == (start_blk + 1)) { break; } end_blk--; } DPRINTF(GUZ_DBG_IO, ("%s/%s: bp2->bio_length = %jd, " "bp2->bio_offset = %jd\n", __func__, gp->name, (intmax_t)bp2->bio_length, (intmax_t)bp2->bio_offset)); bp2->bio_data = malloc(bp2->bio_length, M_GEOM_UZIP, M_NOWAIT); if (bp2->bio_data == NULL) { g_destroy_bio(bp2); g_io_deliver(bp, ENOMEM); return (1); } DPRINTF_BRNG(GUZ_DBG_IO, start_blk, end_blk, ("%s/%s: %p: " "reading %jd bytes from offset %jd\n", __func__, gp->name, bp, (intmax_t)bp2->bio_length, (intmax_t)bp2->bio_offset)); g_io_request(bp2, cp); return (0); } static void g_uzip_read_done(struct bio *bp) { struct bio *bp2; struct g_geom *gp; struct g_uzip_softc *sc; bp2 = bp->bio_parent; gp = bp2->bio_to->geom; sc = gp->softc; mtx_lock(&sc->queue_mtx); bioq_disksort(&sc->bio_queue, bp); mtx_unlock(&sc->queue_mtx); wakeup(sc); } static int g_uzip_memvcmp(const void *memory, unsigned char val, size_t size) { const u_char *mm; mm = (const u_char *)memory; return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0; } static void g_uzip_do(struct g_uzip_softc *sc, struct bio *bp) { struct bio *bp2; struct g_provider *pp; struct g_consumer *cp; struct g_geom *gp; char *data, *data2; off_t ofs; size_t blk, blkofs, len, ulen, firstblk; int err; bp2 = bp->bio_parent; gp = bp2->bio_to->geom; cp = LIST_FIRST(&gp->consumer); pp = cp->provider; bp2->bio_error = bp->bio_error; if (bp2->bio_error != 0) goto done; /* Make sure there's forward progress. */ if (bp->bio_completed == 0) { bp2->bio_error = ECANCELED; goto done; } ofs = bp2->bio_offset + bp2->bio_completed; firstblk = blk = ofs / sc->blksz; blkofs = ofs % sc->blksz; data = bp->bio_data + sc->toc[blk].offset % pp->sectorsize; data2 = bp2->bio_data + bp2->bio_completed; while (bp->bio_completed && bp2->bio_resid) { if (blk > firstblk && !BLK_IS_CONT(sc, blk)) { DPRINTF_BLK(GUZ_DBG_IO, blk, ("%s/%s: %p: backref'ed " "cluster #%u requested, looping around\n", __func__, gp->name, bp2, (u_int)blk)); goto done; } ulen = MIN(sc->blksz - blkofs, bp2->bio_resid); len = sc->toc[blk].blen; DPRINTF(GUZ_DBG_IO, ("%s/%s: %p/%ju: data2=%p, ulen=%u, " "data=%p, len=%u\n", __func__, gp->name, gp, bp->bio_completed, data2, (u_int)ulen, data, (u_int)len)); if (len == 0) { /* All zero block: no cache update */ zero_block: bzero(data2, ulen); } else if (len <= bp->bio_completed) { mtx_lock(&sc->last_mtx); err = sc->dcp->decompress(sc->dcp, gp->name, data, len, sc->last_buf); if (err != 0 && sc->toc[blk].last != 0) { /* * Last block decompression has failed, check * if it's just zero padding. */ if (g_uzip_memvcmp(data, '\0', len) == 0) { sc->toc[blk].blen = 0; sc->last_blk = -1; mtx_unlock(&sc->last_mtx); len = 0; goto zero_block; } } if (err != 0) { sc->last_blk = -1; mtx_unlock(&sc->last_mtx); bp2->bio_error = EILSEQ; DPRINTF(GUZ_DBG_ERR, ("%s/%s: decompress" "(%p, %ju, %ju) failed\n", __func__, gp->name, sc->dcp, (uintmax_t)blk, (uintmax_t)len)); goto done; } sc->last_blk = blk; memcpy(data2, sc->last_buf + blkofs, ulen); mtx_unlock(&sc->last_mtx); err = sc->dcp->rewind(sc->dcp, gp->name); if (err != 0) { bp2->bio_error = EILSEQ; DPRINTF(GUZ_DBG_ERR, ("%s/%s: rewind(%p) " "failed\n", __func__, gp->name, sc->dcp)); goto done; } data += len; } else break; data2 += ulen; bp2->bio_completed += ulen; bp2->bio_resid -= ulen; bp->bio_completed -= len; blkofs = 0; blk++; } done: /* Finish processing the request. */ free(bp->bio_data, M_GEOM_UZIP); g_destroy_bio(bp); if (bp2->bio_error != 0 || bp2->bio_resid == 0) g_io_deliver(bp2, bp2->bio_error); else g_uzip_request(gp, bp2); } static void g_uzip_start(struct bio *bp) { struct g_provider *pp; struct g_geom *gp; struct g_uzip_softc *sc; pp = bp->bio_to; gp = pp->geom; DPRINTF(GUZ_DBG_IO, ("%s/%s: %p: cmd=%d, offset=%jd, length=%jd, " "buffer=%p\n", __func__, gp->name, bp, bp->bio_cmd, (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length, bp->bio_data)); sc = gp->softc; sc->req_total++; if (bp->bio_cmd == BIO_GETATTR) { struct bio *bp2; struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp; /* pass on MNT:* requests and ignore others */ if (strncmp(bp->bio_attribute, "MNT:", 4) == 0) { bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } bp2->bio_done = g_std_done; pp = bp->bio_to; gp = pp->geom; cp = LIST_FIRST(&gp->consumer); g_io_request(bp2, cp); return; } } if (bp->bio_cmd != BIO_READ) { g_io_deliver(bp, EOPNOTSUPP); return; } bp->bio_resid = bp->bio_length; bp->bio_completed = 0; g_uzip_request(gp, bp); } static void g_uzip_orphan(struct g_consumer *cp) { struct g_geom *gp; g_trace(G_T_TOPOLOGY, "%s(%p/%s)", __func__, cp, cp->provider->name); g_topology_assert(); gp = cp->geom; g_uzip_softc_free(gp->softc, gp); gp->softc = NULL; g_wither_geom(gp, ENXIO); } static int g_uzip_access(struct g_provider *pp, int dr, int dw, int de) { struct g_geom *gp; struct g_consumer *cp; gp = pp->geom; cp = LIST_FIRST(&gp->consumer); KASSERT (cp != NULL, ("g_uzip_access but no consumer")); if (cp->acw + dw > 0) return (EROFS); return (g_access(cp, dr, dw, de)); } static void g_uzip_spoiled(struct g_consumer *cp) { struct g_geom *gp; G_VALID_CONSUMER(cp); gp = cp->geom; g_trace(G_T_TOPOLOGY, "%s(%p/%s)", __func__, cp, gp->name); g_topology_assert(); g_uzip_softc_free(gp->softc, gp); gp->softc = NULL; g_wither_geom(gp, ENXIO); } static int g_uzip_parse_toc(struct g_uzip_softc *sc, struct g_provider *pp, struct g_geom *gp) { uint32_t i, j, backref_to; uint64_t max_offset, min_offset; struct g_uzip_blk *last_blk; min_offset = sizeof(struct cloop_header) + (sc->nblocks + 1) * sizeof(uint64_t); max_offset = sc->toc[0].offset - 1; last_blk = &sc->toc[0]; for (i = 0; i < sc->nblocks; i++) { /* First do some bounds checking */ if ((sc->toc[i].offset < min_offset) || (sc->toc[i].offset > pp->mediasize)) { goto error_offset; } DPRINTF_BLK(GUZ_DBG_IO, i, ("%s: cluster #%u " "offset=%ju max_offset=%ju\n", gp->name, (u_int)i, (uintmax_t)sc->toc[i].offset, (uintmax_t)max_offset)); backref_to = BLEN_UNDEF; if (sc->toc[i].offset < max_offset) { /* * For the backref'ed blocks search already parsed * TOC entries for the matching offset and copy the * size from matched entry. */ for (j = 0; j <= i; j++) { if (sc->toc[j].offset == sc->toc[i].offset && !BLK_IS_NIL(sc, j)) { break; } if (j != i) { continue; } DPRINTF(GUZ_DBG_ERR, ("%s: cannot match " "backref'ed offset at cluster #%u\n", gp->name, i)); return (-1); } sc->toc[i].blen = sc->toc[j].blen; backref_to = j; } else { last_blk = &sc->toc[i]; /* * For the "normal blocks" seek forward until we hit * block whose offset is larger than ours and assume * it's going to be the next one. */ for (j = i + 1; j < sc->nblocks; j++) { if (sc->toc[j].offset > max_offset) { break; } } sc->toc[i].blen = sc->toc[j].offset - sc->toc[i].offset; if (BLK_ENDS(sc, i) > pp->mediasize) { DPRINTF(GUZ_DBG_ERR, ("%s: cluster #%u " "extends past media boundary (%ju > %ju)\n", gp->name, (u_int)i, (uintmax_t)BLK_ENDS(sc, i), (intmax_t)pp->mediasize)); return (-1); } KASSERT(max_offset <= sc->toc[i].offset, ( "%s: max_offset is incorrect: %ju", gp->name, (uintmax_t)max_offset)); max_offset = BLK_ENDS(sc, i) - 1; } DPRINTF_BLK(GUZ_DBG_TOC, i, ("%s: cluster #%u, original %u " "bytes, in %u bytes", gp->name, i, sc->blksz, sc->toc[i].blen)); if (backref_to != BLEN_UNDEF) { DPRINTF_BLK(GUZ_DBG_TOC, i, (" (->#%u)", (u_int)backref_to)); } DPRINTF_BLK(GUZ_DBG_TOC, i, ("\n")); } last_blk->last = 1; /* Do a second pass to validate block lengths */ for (i = 0; i < sc->nblocks; i++) { if (sc->toc[i].blen > sc->dcp->max_blen) { if (sc->toc[i].last == 0) { DPRINTF(GUZ_DBG_ERR, ("%s: cluster #%u " "length (%ju) exceeds " "max_blen (%ju)\n", gp->name, i, (uintmax_t)sc->toc[i].blen, (uintmax_t)sc->dcp->max_blen)); return (-1); } DPRINTF(GUZ_DBG_INFO, ("%s: cluster #%u extra " "padding is detected, trimmed to %ju\n", gp->name, i, (uintmax_t)sc->dcp->max_blen)); sc->toc[i].blen = sc->dcp->max_blen; sc->toc[i].padded = 1; } } return (0); error_offset: DPRINTF(GUZ_DBG_ERR, ("%s: cluster #%u: invalid offset %ju, " "min_offset=%ju mediasize=%jd\n", gp->name, (u_int)i, sc->toc[i].offset, min_offset, pp->mediasize)); return (-1); } static struct g_geom * g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags) { int error; uint32_t i, total_offsets, offsets_read, blk; void *buf; struct cloop_header *header; struct g_consumer *cp; struct g_geom *gp; struct g_provider *pp2; struct g_uzip_softc *sc; enum { G_UZIP = 1, G_ULZMA } type; g_trace(G_T_TOPOLOGY, "%s(%s,%s)", __func__, mp->name, pp->name); g_topology_assert(); /* Skip providers that are already open for writing. */ if (pp->acw > 0) return (NULL); if ((fnmatch(g_uzip_attach_to, pp->name, 0) != 0) || (fnmatch(g_uzip_noattach_to, pp->name, 0) == 0)) { DPRINTF(GUZ_DBG_INFO, ("%s(%s,%s), ignoring\n", __func__, mp->name, pp->name)); return (NULL); } buf = NULL; /* * Create geom instance. */ gp = g_new_geomf(mp, GUZ_DEV_NAME("%s"), pp->name); cp = g_new_consumer(gp); error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 0, 0); if (error) { goto e1; } g_topology_unlock(); /* * Read cloop header, look for CLOOP magic, perform * other validity checks. */ DPRINTF(GUZ_DBG_INFO, ("%s: media sectorsize %u, mediasize %jd\n", gp->name, pp->sectorsize, (intmax_t)pp->mediasize)); buf = g_read_data(cp, 0, pp->sectorsize, NULL); if (buf == NULL) goto e2; header = (struct cloop_header *) buf; if (strncmp(header->magic, CLOOP_MAGIC_START, sizeof(CLOOP_MAGIC_START) - 1) != 0) { DPRINTF(GUZ_DBG_ERR, ("%s: no CLOOP magic\n", gp->name)); goto e3; } switch (header->magic[CLOOP_OFS_COMPR]) { case CLOOP_COMP_LZMA: case CLOOP_COMP_LZMA_DDP: type = G_ULZMA; if (header->magic[CLOOP_OFS_VERSN] < CLOOP_MINVER_LZMA) { DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n", gp->name)); goto e3; } DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_LZMA image found\n", gp->name)); break; case CLOOP_COMP_LIBZ: case CLOOP_COMP_LIBZ_DDP: type = G_UZIP; if (header->magic[CLOOP_OFS_VERSN] < CLOOP_MINVER_ZLIB) { DPRINTF(GUZ_DBG_ERR, ("%s: image version too old\n", gp->name)); goto e3; } DPRINTF(GUZ_DBG_INFO, ("%s: GEOM_UZIP_ZLIB image found\n", gp->name)); break; default: DPRINTF(GUZ_DBG_ERR, ("%s: unsupported image type\n", gp->name)); goto e3; } /* * Initialize softc and read offsets. */ sc = malloc(sizeof(*sc), M_GEOM_UZIP, M_WAITOK | M_ZERO); gp->softc = sc; sc->blksz = ntohl(header->blksz); sc->nblocks = ntohl(header->nblocks); if (sc->blksz % 512 != 0) { printf("%s: block size (%u) should be multiple of 512.\n", gp->name, sc->blksz); goto e4; } if (sc->blksz > MAX_BLKSZ) { printf("%s: block size (%u) should not be larger than %d.\n", gp->name, sc->blksz, MAX_BLKSZ); } total_offsets = sc->nblocks + 1; if (sizeof(struct cloop_header) + total_offsets * sizeof(uint64_t) > pp->mediasize) { printf("%s: media too small for %u blocks\n", gp->name, sc->nblocks); goto e4; } sc->toc = malloc(total_offsets * sizeof(struct g_uzip_blk), M_GEOM_UZIP, M_WAITOK | M_ZERO); offsets_read = MIN(total_offsets, (pp->sectorsize - sizeof(*header)) / sizeof(uint64_t)); for (i = 0; i < offsets_read; i++) { sc->toc[i].offset = be64toh(((uint64_t *) (header + 1))[i]); sc->toc[i].blen = BLEN_UNDEF; } DPRINTF(GUZ_DBG_INFO, ("%s: %u offsets in the first sector\n", gp->name, offsets_read)); for (blk = 1; offsets_read < total_offsets; blk++) { uint32_t nread; free(buf, M_GEOM); buf = g_read_data( cp, blk * pp->sectorsize, pp->sectorsize, NULL); if (buf == NULL) goto e5; nread = MIN(total_offsets - offsets_read, pp->sectorsize / sizeof(uint64_t)); DPRINTF(GUZ_DBG_TOC, ("%s: %u offsets read from sector %d\n", gp->name, nread, blk)); for (i = 0; i < nread; i++) { sc->toc[offsets_read + i].offset = be64toh(((uint64_t *) buf)[i]); sc->toc[offsets_read + i].blen = BLEN_UNDEF; } offsets_read += nread; } free(buf, M_GEOM); buf = NULL; offsets_read -= 1; DPRINTF(GUZ_DBG_INFO, ("%s: done reading %u block offsets from %u " "sectors\n", gp->name, offsets_read, blk)); if (sc->nblocks != offsets_read) { DPRINTF(GUZ_DBG_ERR, ("%s: read %s offsets than expected " "blocks\n", gp->name, sc->nblocks < offsets_read ? "more" : "less")); goto e5; } if (type == G_UZIP) { sc->dcp = g_uzip_zlib_ctor(sc->blksz); } else { sc->dcp = g_uzip_lzma_ctor(sc->blksz); } if (sc->dcp == NULL) { goto e5; } /* * "Fake" last+1 block, to make it easier for the TOC parser to * iterate without making the last element a special case. */ sc->toc[sc->nblocks].offset = pp->mediasize; /* Massage TOC (table of contents), make sure it is sound */ if (g_uzip_parse_toc(sc, pp, gp) != 0) { DPRINTF(GUZ_DBG_ERR, ("%s: TOC error\n", gp->name)); goto e6; } mtx_init(&sc->last_mtx, "geom_uzip cache", NULL, MTX_DEF); mtx_init(&sc->queue_mtx, "geom_uzip wrkthread", NULL, MTX_DEF); bioq_init(&sc->bio_queue); sc->last_blk = -1; sc->last_buf = malloc(sc->blksz, M_GEOM_UZIP, M_WAITOK); sc->req_total = 0; sc->req_cached = 0; sc->uzip_do = &g_uzip_do; error = kproc_create(g_uzip_wrkthr, sc, &sc->procp, 0, 0, "%s", gp->name); if (error != 0) { goto e7; } g_topology_lock(); pp2 = g_new_providerf(gp, "%s", gp->name); pp2->sectorsize = 512; pp2->mediasize = (off_t)sc->nblocks * sc->blksz; pp2->stripesize = pp->stripesize; pp2->stripeoffset = pp->stripeoffset; g_error_provider(pp2, 0); g_access(cp, -1, 0, 0); - DPRINTF(GUZ_DBG_INFO, ("%s: taste ok (%d, %jd), (%d, %d), %x\n", - gp->name, pp2->sectorsize, (intmax_t)pp2->mediasize, - pp2->stripeoffset, pp2->stripesize, pp2->flags)); + DPRINTF(GUZ_DBG_INFO, ("%s: taste ok (%d, %ju), (%ju, %ju), %x\n", + gp->name, pp2->sectorsize, (uintmax_t)pp2->mediasize, + (uintmax_t)pp2->stripeoffset, (uintmax_t)pp2->stripesize, pp2->flags)); DPRINTF(GUZ_DBG_INFO, ("%s: %u x %u blocks\n", gp->name, sc->nblocks, sc->blksz)); return (gp); e7: free(sc->last_buf, M_GEOM); mtx_destroy(&sc->queue_mtx); mtx_destroy(&sc->last_mtx); e6: sc->dcp->free(sc->dcp); e5: free(sc->toc, M_GEOM); e4: free(gp->softc, M_GEOM_UZIP); e3: if (buf != NULL) { free(buf, M_GEOM); } e2: g_topology_lock(); g_access(cp, -1, 0, 0); e1: g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); return (NULL); } static int g_uzip_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) { struct g_provider *pp; g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, gp->name); g_topology_assert(); if (gp->softc == NULL) { DPRINTF(GUZ_DBG_ERR, ("%s(%s): gp->softc == NULL\n", __func__, gp->name)); return (ENXIO); } KASSERT(gp != NULL, ("NULL geom")); pp = LIST_FIRST(&gp->provider); KASSERT(pp != NULL, ("NULL provider")); if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0) return (EBUSY); g_uzip_softc_free(gp->softc, gp); gp->softc = NULL; g_wither_geom(gp, ENXIO); return (0); } static struct g_class g_uzip_class = { .name = UZIP_CLASS_NAME, .version = G_VERSION, .taste = g_uzip_taste, .destroy_geom = g_uzip_destroy_geom, .start = g_uzip_start, .orphan = g_uzip_orphan, .access = g_uzip_access, .spoiled = g_uzip_spoiled, }; DECLARE_GEOM_CLASS(g_uzip_class, g_uzip); MODULE_DEPEND(g_uzip, zlib, 1, 1, 1); MODULE_VERSION(geom_uzip, 0);