Changeset View
Standalone View
sys/geom/union/g_union.c
- This file was added.
/*- | |||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD | |||||
* | |||||
* Copyright (c) 2021 Marshall Kirk McKusick <mckusick@mckusick.com> | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted provided that the following conditions | |||||
* are met: | |||||
* 1. Redistributions of source code must retain the above copyright | |||||
* notice, this list of conditions and the following disclaimer. | |||||
* 2. Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | |||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND | |||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE | |||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||||
* SUCH DAMAGE. | |||||
*/ | |||||
#include <sys/cdefs.h> | |||||
__FBSDID("$FreeBSD$"); | |||||
#include <sys/param.h> | |||||
#include <sys/bio.h> | |||||
#include <sys/buf.h> | |||||
#include <sys/ctype.h> | |||||
#include <sys/kernel.h> | |||||
#include <sys/lock.h> | |||||
#include <sys/malloc.h> | |||||
#include <sys/module.h> | |||||
#include <sys/rwlock.h> | |||||
#include <sys/sbuf.h> | |||||
#include <sys/sysctl.h> | |||||
#include <geom/geom.h> | |||||
#include <geom/geom_dbg.h> | |||||
#include <geom/union/g_union.h> | |||||
SYSCTL_DECL(_kern_geom); | |||||
static SYSCTL_NODE(_kern_geom, OID_AUTO, union, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | |||||
"GEOM_UNION stuff"); | |||||
static u_int g_union_debug = 0; | |||||
SYSCTL_UINT(_kern_geom_union, OID_AUTO, debug, CTLFLAG_RW, &g_union_debug, 0, | |||||
"Debug level"); | |||||
static void g_union_config(struct gctl_req *req, struct g_class *mp, | |||||
const char *verb); | |||||
static g_access_t g_union_access; | |||||
static g_start_t g_union_start; | |||||
static g_dumpconf_t g_union_dumpconf; | |||||
static g_orphan_t g_union_orphan; | |||||
static int g_union_destroy_geom(struct gctl_req *req, struct g_class *mp, | |||||
struct g_geom *gp); | |||||
static g_provgone_t g_union_providergone; | |||||
static g_resize_t g_union_resize; | |||||
struct g_class g_union_class = { | |||||
.name = G_UNION_CLASS_NAME, | |||||
.version = G_VERSION, | |||||
.ctlreq = g_union_config, | |||||
.access = g_union_access, | |||||
.start = g_union_start, | |||||
.dumpconf = g_union_dumpconf, | |||||
.orphan = g_union_orphan, | |||||
.destroy_geom = g_union_destroy_geom, | |||||
.providergone = g_union_providergone, | |||||
.resize = g_union_resize, | |||||
}; | |||||
static void g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool); | |||||
static intmax_t g_union_fetcharg(struct gctl_req *req, const char *name); | |||||
static bool g_union_verify_nprefix(const char *name); | |||||
static void g_union_ctl_destroy(struct gctl_req *req, struct g_class *mp, bool); | |||||
static struct g_geom *g_union_find_geom(struct g_class *mp, const char *name); | |||||
static void g_union_ctl_reset(struct gctl_req *req, struct g_class *mp, bool); | |||||
static void g_union_ctl_revert(struct gctl_req *req, struct g_class *mp, bool); | |||||
static void g_union_revert(struct g_union_softc *sc); | |||||
static void g_union_ctl_commit(struct gctl_req *req, struct g_class *mp, bool); | |||||
static void g_union_setmap(struct bio *bp, struct g_union_softc *sc); | |||||
static bool g_union_getmap(struct bio *bp, struct g_union_softc *sc, | |||||
off_t *len2read); | |||||
static void g_union_done(struct bio *bp); | |||||
static void g_union_kerneldump(struct bio *bp, struct g_union_softc *sc); | |||||
static int g_union_dumper(void *, void *, vm_offset_t, off_t, size_t); | |||||
static int g_union_destroy(struct g_geom *gp, bool force, bool verbose); | |||||
/* | |||||
* Operate on union-specific configuration commands. | |||||
*/ | |||||
static void | |||||
g_union_config(struct gctl_req *req, struct g_class *mp, const char *verb) | |||||
{ | |||||
uint32_t *version, *verbose; | |||||
g_topology_assert(); | |||||
version = gctl_get_paraml(req, "version", sizeof(*version)); | |||||
if (version == NULL) { | |||||
gctl_error(req, "No '%s' argument.", "version"); | |||||
return; | |||||
} | |||||
if (*version != G_UNION_VERSION) { | |||||
gctl_error(req, "Userland and kernel parts are out of sync."); | |||||
return; | |||||
} | |||||
verbose = gctl_get_paraml(req, "verbose", sizeof(*verbose)); | |||||
if (verbose == NULL) { | |||||
gctl_error(req, "No '%s' argument.", "verbose"); | |||||
return; | |||||
} | |||||
if (strcmp(verb, "create") == 0) { | |||||
g_union_ctl_create(req, mp, *verbose); | |||||
return; | |||||
} else if (strcmp(verb, "destroy") == 0) { | |||||
g_union_ctl_destroy(req, mp, *verbose); | |||||
return; | |||||
} else if (strcmp(verb, "reset") == 0) { | |||||
g_union_ctl_reset(req, mp, *verbose); | |||||
return; | |||||
} else if (strcmp(verb, "revert") == 0) { | |||||
g_union_ctl_revert(req, mp, *verbose); | |||||
return; | |||||
} else if (strcmp(verb, "commit") == 0) { | |||||
g_union_ctl_commit(req, mp, *verbose); | |||||
return; | |||||
} | |||||
gctl_error(req, "Unknown verb."); | |||||
} | |||||
/* | |||||
* Create a union device. | |||||
*/ | |||||
static void | |||||
g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool verbose) | |||||
{ | |||||
struct g_provider *upperpp, *lowerpp, *newpp; | |||||
struct g_consumer *uppercp, *lowercp; | |||||
struct g_union_softc *sc; | |||||
struct g_geom_alias *gap; | |||||
struct g_geom *gp; | |||||
intmax_t offset, secsize, size, needed; | |||||
const char *gunionname; | |||||
int *nargs, error, i, n; | |||||
char name[64]; | |||||
g_topology_assert(); | |||||
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); | |||||
if (nargs == NULL) { | |||||
gctl_error(req, "No '%s' argument", "nargs"); | |||||
return; | |||||
} | |||||
if (*nargs < 2) { | |||||
gctl_error(req, "Missing device(s)."); | |||||
return; | |||||
} | |||||
if (*nargs > 2) { | |||||
gctl_error(req, "Extra device(s)."); | |||||
return; | |||||
} | |||||
offset = g_union_fetcharg(req, "offset"); | |||||
size = g_union_fetcharg(req, "size"); | |||||
secsize = g_union_fetcharg(req, "secsize"); | |||||
gunionname = gctl_get_asciiparam(req, "gunionname"); | |||||
upperpp = gctl_get_provider(req, "arg0"); | |||||
lowerpp = gctl_get_provider(req, "arg1"); | |||||
kib: Don't you want to set an error in this case as well? | |||||
Done Inline ActionsThe error message is provided by gctl_get_provider() so we just need to return. I added a comment to note this fact. mckusick: The error message is provided by gctl_get_provider() so we just need to return. I added a… | |||||
if (upperpp == NULL || lowerpp == NULL) | |||||
/* error message provided by gctl_get_provider() */ | |||||
return; | |||||
/* Create the union */ | |||||
if (secsize == 0) | |||||
secsize = lowerpp->sectorsize; | |||||
else if ((secsize % lowerpp->sectorsize) != 0) { | |||||
gctl_error(req, "Sector size %jd is not a multiple of lower " | |||||
"provider %s's %jd sector size.", (intmax_t)secsize, | |||||
lowerpp->name, (intmax_t)lowerpp->sectorsize); | |||||
return; | |||||
} | |||||
if (secsize > maxphys) { | |||||
gctl_error(req, "Too big secsize %jd for lower provider %s.", | |||||
(intmax_t)secsize, lowerpp->name); | |||||
return; | |||||
} | |||||
if (secsize % upperpp->sectorsize != 0) { | |||||
gctl_error(req, "Sector size %jd is not a multiple of upper " | |||||
"provider %s's %jd sector size.", (intmax_t)secsize, | |||||
upperpp->name, (intmax_t)upperpp->sectorsize); | |||||
return; | |||||
} | |||||
if ((offset % secsize) != 0) { | |||||
gctl_error(req, "Offset %jd is not a multiple of lower " | |||||
"provider %s's %jd sector size.", (intmax_t)offset, | |||||
lowerpp->name, (intmax_t)lowerpp->sectorsize); | |||||
return; | |||||
} | |||||
if (size == 0) | |||||
size = lowerpp->mediasize - offset; | |||||
else | |||||
size -= offset; | |||||
if ((size % secsize) != 0) { | |||||
gctl_error(req, "Size %jd is not a multiple of sector size " | |||||
"%jd.", (intmax_t)size, (intmax_t)secsize); | |||||
return; | |||||
} | |||||
if (offset + size < lowerpp->mediasize) { | |||||
gctl_error(req, "Size %jd is too small for lower provider %s, " | |||||
"needs %jd.", (intmax_t)(offset + size), lowerpp->name, | |||||
lowerpp->mediasize); | |||||
return; | |||||
} | |||||
if (size > upperpp->mediasize) { | |||||
gctl_error(req, "Upper provider %s size (%jd) is too small, " | |||||
"needs %jd.", upperpp->name, (intmax_t)upperpp->mediasize, | |||||
(intmax_t)size); | |||||
return; | |||||
} | |||||
if (gunionname != NULL && !g_union_verify_nprefix(gunionname)) { | |||||
gctl_error(req, "Gunion name %s must be alphanumeric.", | |||||
gunionname); | |||||
return; | |||||
} | |||||
if (gunionname != NULL) { | |||||
n = snprintf(name, sizeof(name), "%s%s", gunionname, | |||||
G_UNION_SUFFIX); | |||||
} else { | |||||
n = snprintf(name, sizeof(name), "%s-%s%s", upperpp->name, | |||||
lowerpp->name, G_UNION_SUFFIX); | |||||
} | |||||
if (n <= 0 || n >= sizeof(name)) { | |||||
gctl_error(req, "Invalid provider name."); | |||||
return; | |||||
} | |||||
LIST_FOREACH(gp, &mp->geom, geom) { | |||||
if (strcmp(gp->name, name) == 0) { | |||||
gctl_error(req, "Provider %s already exists.", name); | |||||
return; | |||||
} | |||||
} | |||||
gp = g_new_geomf(mp, "%s", name); | |||||
sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); | |||||
rw_init(&sc->sc_rwlock, "gunion"); | |||||
sc->sc_offset = offset; | |||||
Done Inline Actionsphyspath != NULL kib: `physpath != NULL`
`strcmp() != 0` | |||||
Done Inline ActionsFixed. mckusick: Fixed. | |||||
sc->sc_size = size; | |||||
Done Inline ActionsWhy strndup? Can you handle non-null terminated string there? kib: Why strndup? Can you handle non-null terminated string there? | |||||
Done Inline ActionsFrom the strndup manual page: ``The strndup() function copies at most len characters from the string str always NUL terminating the copied string.'' So the pathname may be shortened but will always be null terminated. The string being saved with strndup is used in BIO_GETATTR. I just cut/pasted this from the nop GEOM. And looking at other GEOM providers it appears that nop is doing BIO_GETATTR differently than all the rest. So I have switched to using a more standard approach which gets rid of the need for strndup. mckusick: From the strndup manual page: ``The strndup() function copies at most len characters from the… | |||||
sc->sc_sectorsize = secsize; | |||||
sc->sc_reads = 0; | |||||
sc->sc_writes = 0; | |||||
sc->sc_deletes = 0; | |||||
sc->sc_getattrs = 0; | |||||
sc->sc_flushes = 0; | |||||
sc->sc_speedups = 0; | |||||
sc->sc_cmd0s = 0; | |||||
sc->sc_cmd1s = 0; | |||||
sc->sc_cmd2s = 0; | |||||
sc->sc_readbytes = 0; | |||||
sc->sc_wrotebytes = 0; | |||||
sc->sc_writemap_memory = 0; | |||||
gp->softc = sc; | |||||
newpp = g_new_providerf(gp, "%s", gp->name); | |||||
newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; | |||||
newpp->mediasize = size; | |||||
newpp->sectorsize = secsize; | |||||
LIST_FOREACH(gap, &upperpp->aliases, ga_next) | |||||
g_provider_add_alias(newpp, "%s%s", gap->ga_alias, | |||||
Done Inline ActionsThis looks very strange and is not style(9) compliant. kib: This looks very strange and is not style(9) compliant. | |||||
Done Inline ActionsLeft over from debugging, now cleaned up. mckusick: Left over from debugging, now cleaned up. | |||||
G_UNION_SUFFIX); | |||||
LIST_FOREACH(gap, &lowerpp->aliases, ga_next) | |||||
g_provider_add_alias(newpp, "%s%s", gap->ga_alias, | |||||
G_UNION_SUFFIX); | |||||
lowercp = g_new_consumer(gp); | |||||
lowercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; | |||||
if ((error = g_attach(lowercp, lowerpp)) != 0) { | |||||
gctl_error(req, "Error %d: cannot attach to provider %s.", | |||||
error, lowerpp->name); | |||||
goto fail1; | |||||
} | |||||
/* request read and exclusive access for lower */ | |||||
if ((error = g_access(lowercp, 1, 0, 1)) != 0) { | |||||
gctl_error(req, "Error %d: cannot obtain exclusive access to " | |||||
"%s.\n\tMust be unmounted or mounted read-only.", error, | |||||
lowerpp->name); | |||||
goto fail2; | |||||
} | |||||
uppercp = g_new_consumer(gp); | |||||
uppercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; | |||||
if ((error = g_attach(uppercp, upperpp)) != 0) { | |||||
gctl_error(req, "Error %d: cannot attach to provider %s.", | |||||
error, upperpp->name); | |||||
goto fail3; | |||||
} | |||||
/* request read, write, and exclusive access for upper */ | |||||
if ((error = g_access(uppercp, 1, 1, 1)) != 0) { | |||||
gctl_error(req, "Error %d: cannot obtain write access to %s.", | |||||
error, upperpp->name); | |||||
goto fail4; | |||||
} | |||||
sc->sc_uppercp = uppercp; | |||||
sc->sc_lowercp = lowercp; | |||||
newpp->flags |= (upperpp->flags & G_PF_ACCEPT_UNMAPPED) & | |||||
(lowerpp->flags & G_PF_ACCEPT_UNMAPPED); | |||||
g_error_provider(newpp, 0); | |||||
/* | |||||
* Allocate the map that tracks the sectors that have been written | |||||
* to the top layer. We use a 2-level hierarchy as that lets us | |||||
* map up to 1 petabyte using allocations of less than 33 Mb | |||||
* when using 4K byte sectors (or 268 Mb with 512 byte sectors). | |||||
* | |||||
* We totally populate the leaf nodes rather than allocating them | |||||
* as they are first used because their usage occurs in the | |||||
* g_union_start() routine that may be running in the g_down | |||||
* thread which cannot sleep. | |||||
*/ | |||||
sc->sc_map_size = roundup(size / secsize, BITS_PER_ENTRY); | |||||
needed = sc->sc_map_size / BITS_PER_ENTRY; | |||||
for (sc->sc_root_size = 1; | |||||
sc->sc_root_size * sc->sc_root_size < needed; | |||||
sc->sc_root_size++) | |||||
continue; | |||||
sc->sc_writemap_root = g_malloc(sc->sc_root_size * sizeof(uint64_t), | |||||
M_WAITOK | M_ZERO); | |||||
sc->sc_leaf_size = sc->sc_root_size; | |||||
sc->sc_bits_per_leaf = sc->sc_leaf_size * BITS_PER_ENTRY; | |||||
sc->sc_leafused = g_malloc(roundup(sc->sc_root_size, BITS_PER_ENTRY), | |||||
M_WAITOK | M_ZERO); | |||||
for (i = 0; i < sc->sc_root_size; i++) | |||||
sc->sc_writemap_root[i] = | |||||
g_malloc(sc->sc_leaf_size * sizeof(uint64_t), | |||||
M_WAITOK | M_ZERO); | |||||
sc->sc_writemap_memory = | |||||
(sc->sc_root_size + sc->sc_root_size * sc->sc_leaf_size) * | |||||
sizeof(uint64_t) + roundup(sc->sc_root_size, BITS_PER_ENTRY); | |||||
Not Done Inline ActionsIt might be better to use M_NOWAIT and error out if there is not enough memory. kib: It might be better to use M_NOWAIT and error out if there is not enough memory. | |||||
Not Done Inline ActionsI tried that initially, but on 2Tb disks it was consistently failing. The system just needed some time to get together the needed memory. Is there some way I can give the system some time to try to pull together the needed memory but avoid waiting forever or worse panic'ing? mckusick: I tried that initially, but on 2Tb disks it was consistently failing. The system just needed… | |||||
Not Done Inline Actions2TB means that you need to pre-allocate 512MB of bitsets. This is a lot, and probably makes it reasonable to re-evaluate the decision to do eager allocation of the leaf bitset pages. This is probably the best way around the problem. I think the only change would be to stop requiring direct send, but for g_union purposes it should be fine IMO. kib: 2TB means that you need to pre-allocate 512MB of bitsets. This is a lot, and probably makes it… | |||||
if (verbose) | |||||
printf("Device %s created with memory map size %jd.\n", | |||||
gp->name, sc->sc_writemap_memory); | |||||
G_UNION_DEBUG(1, "Device %s created with memory map size %jd.", | |||||
gp->name, sc->sc_writemap_memory); | |||||
return; | |||||
fail4: | |||||
g_detach(uppercp); | |||||
fail3: | |||||
g_destroy_consumer(uppercp); | |||||
g_access(lowercp, -1, 0, -1); | |||||
fail2: | |||||
g_detach(lowercp); | |||||
fail1: | |||||
g_destroy_consumer(lowercp); | |||||
g_destroy_provider(newpp); | |||||
g_destroy_geom(gp); | |||||
} | |||||
/* | |||||
* Fetch named option and verify that it is positive. | |||||
*/ | |||||
Done Inline ActionsThis return; is not needed. kib: This `return;` is not needed. | |||||
Done Inline ActionsDeleted. mckusick: Deleted. | |||||
static intmax_t | |||||
g_union_fetcharg(struct gctl_req *req, const char *name) | |||||
{ | |||||
intmax_t *val; | |||||
val = gctl_get_paraml_opt(req, name, sizeof(*val)); | |||||
if (val == NULL) | |||||
return (0); | |||||
if (*val >= 0) | |||||
return (*val); | |||||
gctl_error(req, "Invalid '%s': negative value, using default", name); | |||||
return (0); | |||||
} | |||||
/* | |||||
* Verify that a name is alphanumeric. | |||||
*/ | |||||
static bool | |||||
g_union_verify_nprefix(const char *name) | |||||
{ | |||||
int i; | |||||
for (i = 0; i < strlen(name); i++) { | |||||
if (isalpha(name[i]) == 0 && isdigit(name[i]) == 0) { | |||||
return (false); | |||||
} | |||||
} | |||||
return (true); | |||||
} | |||||
/* | |||||
* Destroy a union device. | |||||
*/ | |||||
static void | |||||
g_union_ctl_destroy(struct gctl_req *req, struct g_class *mp, bool verbose) | |||||
{ | |||||
int *nargs, *force, error, i; | |||||
struct g_geom *gp; | |||||
const char *name; | |||||
char param[16]; | |||||
g_topology_assert(); | |||||
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); | |||||
if (nargs == NULL) { | |||||
gctl_error(req, "No '%s' argument", "nargs"); | |||||
return; | |||||
} | |||||
if (*nargs <= 0) { | |||||
gctl_error(req, "Missing device(s)."); | |||||
return; | |||||
} | |||||
force = gctl_get_paraml(req, "force", sizeof(*force)); | |||||
if (force == NULL) { | |||||
gctl_error(req, "No 'force' argument"); | |||||
return; | |||||
} | |||||
for (i = 0; i < *nargs; i++) { | |||||
snprintf(param, sizeof(param), "arg%d", i); | |||||
name = gctl_get_asciiparam(req, param); | |||||
if (name == NULL) { | |||||
gctl_error(req, "No 'arg%d' argument", i); | |||||
return; | |||||
} | |||||
if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) == 0) | |||||
name += strlen(_PATH_DEV); | |||||
gp = g_union_find_geom(mp, name); | |||||
if (gp == NULL) { | |||||
gctl_error(req, "Device %s is invalid.", name); | |||||
return; | |||||
} | |||||
if ((error = g_union_destroy(gp, *force, verbose) != 0)) { | |||||
gctl_error(req, "Error %d: cannot destroy device %s.", | |||||
error, gp->name); | |||||
return; | |||||
} | |||||
} | |||||
} | |||||
/* | |||||
* Find a union geom. | |||||
*/ | |||||
static struct g_geom * | |||||
g_union_find_geom(struct g_class *mp, const char *name) | |||||
{ | |||||
struct g_geom *gp; | |||||
LIST_FOREACH(gp, &mp->geom, geom) { | |||||
if (strcmp(gp->name, name) == 0) | |||||
return (gp); | |||||
} | |||||
return (NULL); | |||||
} | |||||
/* | |||||
* Zero out all the statistics associated with a union device. | |||||
*/ | |||||
static void | |||||
g_union_ctl_reset(struct gctl_req *req, struct g_class *mp, bool verbose) | |||||
{ | |||||
struct g_union_softc *sc; | |||||
struct g_provider *pp; | |||||
struct g_geom *gp; | |||||
char param[16]; | |||||
int i, *nargs; | |||||
g_topology_assert(); | |||||
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); | |||||
if (nargs == NULL) { | |||||
gctl_error(req, "No '%s' argument", "nargs"); | |||||
return; | |||||
} | |||||
if (*nargs <= 0) { | |||||
gctl_error(req, "Missing device(s)."); | |||||
return; | |||||
} | |||||
for (i = 0; i < *nargs; i++) { | |||||
snprintf(param, sizeof(param), "arg%d", i); | |||||
pp = gctl_get_provider(req, param); | |||||
if (pp == NULL) | |||||
return; | |||||
gp = pp->geom; | |||||
if (gp->class != mp) { | |||||
gctl_error(req, "Provider %s is invalid.", pp->name); | |||||
return; | |||||
} | |||||
sc = gp->softc; | |||||
sc->sc_reads = 0; | |||||
sc->sc_writes = 0; | |||||
sc->sc_deletes = 0; | |||||
sc->sc_getattrs = 0; | |||||
sc->sc_flushes = 0; | |||||
sc->sc_speedups = 0; | |||||
sc->sc_cmd0s = 0; | |||||
sc->sc_cmd1s = 0; | |||||
sc->sc_cmd2s = 0; | |||||
sc->sc_readbytes = 0; | |||||
sc->sc_wrotebytes = 0; | |||||
if (verbose) | |||||
printf("Device %s has been reset.\n", gp->name); | |||||
G_UNION_DEBUG(1, "Device %s has been reset.", gp->name); | |||||
} | |||||
} | |||||
/* | |||||
* Revert all write requests made to the top layer of the union. | |||||
*/ | |||||
static void | |||||
g_union_ctl_revert(struct gctl_req *req, struct g_class *mp, bool verbose) | |||||
{ | |||||
struct g_provider *pp; | |||||
struct g_geom *gp; | |||||
char param[16]; | |||||
int i, *nargs; | |||||
g_topology_assert(); | |||||
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); | |||||
if (nargs == NULL) { | |||||
gctl_error(req, "No '%s' argument", "nargs"); | |||||
return; | |||||
} | |||||
if (*nargs <= 0) { | |||||
gctl_error(req, "Missing device(s)."); | |||||
return; | |||||
} | |||||
for (i = 0; i < *nargs; i++) { | |||||
snprintf(param, sizeof(param), "arg%d", i); | |||||
pp = gctl_get_provider(req, param); | |||||
if (pp == NULL) | |||||
return; | |||||
gp = pp->geom; | |||||
if (gp->class != mp) { | |||||
gctl_error(req, "Provider %s is invalid.", pp->name); | |||||
return; | |||||
} | |||||
g_union_revert(gp->softc); | |||||
if (verbose) | |||||
printf("Device %s has been reverted.\n", gp->name); | |||||
G_UNION_DEBUG(1, "Device %s has been reverted.", gp->name); | |||||
} | |||||
} | |||||
/* | |||||
* Revert union writes by zero'ing out the writemap. | |||||
*/ | |||||
static void | |||||
g_union_revert(struct g_union_softc *sc) | |||||
{ | |||||
int i; | |||||
G_WLOCK(sc); | |||||
for (i = 0; i < sc->sc_root_size; i++) | |||||
memset(sc->sc_writemap_root[i], 0, | |||||
sc->sc_leaf_size * sizeof(uint64_t)); | |||||
memset(sc->sc_leafused, 0, roundup(sc->sc_root_size, BITS_PER_ENTRY)); | |||||
G_WUNLOCK(sc); | |||||
} | |||||
/* | |||||
* Commit all the writes made in the top layer to the lower layer. | |||||
*/ | |||||
static void | |||||
g_union_ctl_commit(struct gctl_req *req, struct g_class *mp, bool verbose) | |||||
{ | |||||
struct g_union_softc *sc; | |||||
struct g_provider *pp, *lowerpp; | |||||
struct g_consumer *lowercp; | |||||
struct g_geom *gp; | |||||
struct bio *bp; | |||||
char param[16]; | |||||
off_t len2rd, len2wt, savelen; | |||||
int i, error, *nargs, *force; | |||||
g_topology_assert(); | |||||
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); | |||||
if (nargs == NULL) { | |||||
gctl_error(req, "No '%s' argument", "nargs"); | |||||
return; | |||||
} | |||||
if (*nargs <= 0) { | |||||
gctl_error(req, "Missing device(s)."); | |||||
return; | |||||
} | |||||
force = gctl_get_paraml(req, "force", sizeof(*force)); | |||||
if (force == NULL) { | |||||
gctl_error(req, "No 'force' argument"); | |||||
return; | |||||
} | |||||
/* Get a bio buffer to do our I/O */ | |||||
bp = g_alloc_bio(); | |||||
bp->bio_data = g_malloc(MAXBSIZE, M_WAITOK); | |||||
bp->bio_done = biodone; | |||||
for (i = 0; i < *nargs; i++) { | |||||
snprintf(param, sizeof(param), "arg%d", i); | |||||
pp = gctl_get_provider(req, param); | |||||
if (pp == NULL) | |||||
continue; | |||||
gp = pp->geom; | |||||
if (gp->class != mp) { | |||||
gctl_error(req, "Provider %s is invalid.", pp->name); | |||||
continue; | |||||
} | |||||
sc = gp->softc; | |||||
if (g_union_get_writelock(sc) != 0) { | |||||
kibUnsubmitted Not Done Inline ActionsWhy is this needed? kib: Why is this needed?
Isn't the fact that there is already a writer on the lower provider… | |||||
mckusickAuthorUnsubmitted Done Inline ActionsI put it in to give a more specific error message. You are correct that the access checks below will catch the error but give a less useful error message. mckusick: I put it in to give a more specific error message. You are correct that the access checks below… | |||||
gctl_error(req, "Commit already in progress for " | |||||
"provider %s.", pp->name); | |||||
continue; | |||||
} | |||||
/* upgrade to write access for lower */ | |||||
lowercp = sc->sc_lowercp; | |||||
lowerpp = lowercp->provider; | |||||
/* | |||||
* No mount or other use of union is allowed, unless the | |||||
* -f flag is given which allows read-only mount or usage. | |||||
*/ | |||||
if ((*force == false && pp->acr > 0) || pp->acw > 0 || | |||||
pp->ace > 0) { | |||||
gctl_error(req, "Unable to get exclusive access for " | |||||
"writing of %s.\n\tNote that %s cannot be mounted " | |||||
"or otherwise\n\topen during a commit unless the " | |||||
"-f flag is used.", pp->name, pp->name); | |||||
g_union_rel_writelock(sc); | |||||
continue; | |||||
} | |||||
/* | |||||
* No mount or other use of lower media is allowed, unless the | |||||
* -f flag is given which allows read-only mount or usage. | |||||
*/ | |||||
if ((*force == false && lowerpp->acr > lowercp->acr) || | |||||
lowerpp->acw > lowercp->acw || | |||||
lowerpp->ace > lowercp->ace) { | |||||
gctl_error(req, "provider %s is unable to get " | |||||
"exclusive access to %s\n\tfor writing. Note that " | |||||
"%s cannot be mounted or otherwise open\n\tduring " | |||||
"a commit unless the -f flag is used.", pp->name, | |||||
lowerpp->name, lowerpp->name); | |||||
g_union_rel_writelock(sc); | |||||
continue; | |||||
} | |||||
if ((error = g_access(lowercp, 0, 1, 0)) != 0) { | |||||
gctl_error(req, "Error %d: provider %s is unable to " | |||||
"access %s for writing.", error, pp->name, | |||||
lowerpp->name); | |||||
g_union_rel_writelock(sc); | |||||
continue; | |||||
} | |||||
/* Loop over write map copying across written blocks */ | |||||
bp->bio_offset = 0; | |||||
bp->bio_length = sc->sc_map_size * sc->sc_sectorsize; | |||||
G_RLOCK(sc); | |||||
while (bp->bio_length > 0) { | |||||
if (!g_union_getmap(bp, sc, &len2rd)) { | |||||
/* not written, so skip */ | |||||
bp->bio_offset += len2rd; | |||||
bp->bio_length -= len2rd; | |||||
continue; | |||||
} | |||||
G_RUNLOCK(sc); | |||||
/* need to read then write len2rd sectors */ | |||||
for ( ; len2rd > 0; len2rd -= len2wt) { | |||||
/* limit ourselves to MAXBSIZE size I/Os */ | |||||
len2wt = len2rd; | |||||
if (len2wt > MAXBSIZE) | |||||
len2wt = MAXBSIZE; | |||||
savelen = bp->bio_length; | |||||
bp->bio_length = len2wt; | |||||
bp->bio_cmd = BIO_READ; | |||||
g_io_request(bp, sc->sc_uppercp); | |||||
if ((error = biowait(bp, "rdunion")) != 0) { | |||||
gctl_error(req, "Commit read error %d " | |||||
"in provider %s, commit aborted.", | |||||
error, pp->name); | |||||
goto cleanup; | |||||
} | |||||
bp->bio_flags &= ~BIO_DONE; | |||||
bp->bio_cmd = BIO_WRITE; | |||||
g_io_request(bp, lowercp); | |||||
if ((error = biowait(bp, "wtunion")) != 0) { | |||||
gctl_error(req, "Commit write error %d " | |||||
"in provider %s, commit aborted.", | |||||
error, pp->name); | |||||
goto cleanup; | |||||
} | |||||
bp->bio_flags &= ~BIO_DONE; | |||||
bp->bio_offset += len2wt; | |||||
bp->bio_length = savelen - len2wt; | |||||
} | |||||
G_RLOCK(sc); | |||||
} | |||||
G_RUNLOCK(sc); | |||||
/* clear the write map */ | |||||
g_union_revert(sc); | |||||
cleanup: | |||||
/* return lower to previous access */ | |||||
if ((error = g_access(lowercp, 0, -1, 0)) != 0) { | |||||
G_UNION_DEBUG(2, "Error %d: device %s could not reset " | |||||
"access to %s (r=0 w=-1 e=0).", error, pp->name, | |||||
lowerpp->name); | |||||
} | |||||
g_union_rel_writelock(sc); | |||||
if (verbose) | |||||
printf("Device %s has been committed.\n", gp->name); | |||||
G_UNION_DEBUG(1, "Device %s has been committed.", gp->name); | |||||
} | |||||
g_free(bp->bio_data); | |||||
g_destroy_bio(bp); | |||||
} | |||||
/* | |||||
* Generally allow access unless a commit is in progress. | |||||
*/ | |||||
static int | |||||
g_union_access(struct g_provider *pp, int r, int w, int e) | |||||
{ | |||||
struct g_union_softc *sc; | |||||
sc = pp->geom->softc; | |||||
if (sc == NULL) { | |||||
if (r <= 0 && w <= 0 && e <= 0) | |||||
return (0); | |||||
return (ENXIO); | |||||
} | |||||
r += pp->acr; | |||||
w += pp->acw; | |||||
e += pp->ace; | |||||
if (g_union_get_writelock(sc) != 0) { | |||||
kibUnsubmitted Not Done Inline ActionsImagine that something gets writelock right after we rel-ed it. Then this check is basically nop. kib: Imagine that something gets writelock right after we rel-ed it. Then this check is basically… | |||||
mckusickAuthorUnsubmitted Done Inline ActionsThe goal is to block the opening of a device while a commit is in progress. So once a commit (write lock) starts we EBUSY anything other than a close. Can I just check the write flag without doing a full atomic lock/unlock here? mckusick: The goal is to block the opening of a device while a commit is in progress. So once a commit… | |||||
kibUnsubmitted Not Done Inline ActionsI do not see any additional protection that would the locked read get over unlocked. Basically the race is same, the condition we checked might become false right after unlock. kib: I do not see any additional protection that would the locked read get over unlocked. Basically… | |||||
if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) | |||||
return (0); | |||||
return (EBUSY); | |||||
} | |||||
g_union_rel_writelock(sc); | |||||
return (0); | |||||
} | |||||
/* | |||||
* Initiate an I/O operation on the union device. | |||||
*/ | |||||
static void | |||||
g_union_start(struct bio *bp) | |||||
{ | |||||
struct g_union_softc *sc; | |||||
struct g_consumer *cp, *firstcp; | |||||
struct bio *cbp, *firstbp; | |||||
struct g_union_iotrack *iotrackp; | |||||
off_t rdlen, len2rd, offset; | |||||
char *level; | |||||
int iocnt; | |||||
cbp = g_clone_bio(bp); | |||||
if (cbp == NULL) { | |||||
g_io_deliver(bp, ENOMEM); | |||||
return; | |||||
} | |||||
sc = bp->bio_to->geom->softc; | |||||
cbp->bio_offset += sc->sc_offset; | |||||
cbp->bio_done = g_std_done; | |||||
switch (bp->bio_cmd) { | |||||
case BIO_READ: | |||||
/* | |||||
* The usual read case is that we either read the top layer | |||||
* if the block has been previously written or the bottom layer | |||||
* if it has not been written. However, it is possible that | |||||
* only part of the block has been written, For example we may | |||||
* have written a UFS/FFS file fragment comprising several | |||||
* sectors out of an 8-sector block. Here, if the entire | |||||
* 8-sector block is read for example by a snapshot needing | |||||
* to copy the full block, then we need to read the written | |||||
* sectors from the upper level and the unwritten sectors from | |||||
* the lower level. We do this by alternately reading from the | |||||
* top and bottom layers until we complete the read. As | |||||
* requests for partially written blocks are uncommon, we | |||||
* make no attempt to optimize the code. | |||||
*/ | |||||
atomic_add_long(&sc->sc_reads, 1); | |||||
atomic_add_long(&sc->sc_readbytes, bp->bio_length); | |||||
rdlen = cbp->bio_length; | |||||
G_RLOCK(sc); | |||||
for (iocnt = 0; ; iocnt++) { | |||||
if (g_union_getmap(cbp, sc, &len2rd)) { | |||||
/* read top */ | |||||
cp = sc->sc_uppercp; | |||||
level = "upper"; | |||||
} else { | |||||
/* read bottom */ | |||||
cp = sc->sc_lowercp; | |||||
level = "lower"; | |||||
} | |||||
/* Check if only a single read is required */ | |||||
if (iocnt == 0 && rdlen == len2rd) { | |||||
G_UNION_LOGREQ(cbp, "Sending %jd byte read " | |||||
"request to %s level.", len2rd, level); | |||||
g_io_request(cbp, cp); | |||||
G_RUNLOCK(sc); | |||||
return; | |||||
} | |||||
if (iocnt == 0) { | |||||
iotrackp = g_malloc(sizeof(*iotrackp), | |||||
M_NOWAIT | M_ZERO); | |||||
if (iotrackp == NULL) { | |||||
cbp->bio_error = ENOMEM; | |||||
g_std_done(cbp); | |||||
G_RUNLOCK(sc); | |||||
return; | |||||
} | |||||
iotrackp->io_numios = 1; | |||||
} else { | |||||
atomic_add_long(&iotrackp->io_numios, 1); | |||||
} | |||||
cbp->bio_done = g_union_done; | |||||
cbp->bio_caller1 = iotrackp; | |||||
cbp->bio_length = len2rd; | |||||
offset = cbp->bio_offset + len2rd; | |||||
rdlen -= len2rd; | |||||
G_UNION_LOGREQ(cbp, "Sending %jd byte read " | |||||
"request to %s level.", len2rd, level); | |||||
/* | |||||
* To avoid prematurely notifying our consumer | |||||
* that their I/O has completed, we have to delay | |||||
* issuing our first I/O request until we have | |||||
* issued all the additional I/O requests. | |||||
*/ | |||||
if (iocnt > 0) { | |||||
g_io_request(cbp, cp); | |||||
} else { | |||||
firstbp = cbp; | |||||
firstcp = cp; | |||||
} | |||||
if (rdlen == len2rd) | |||||
break; | |||||
/* set up for next read */ | |||||
cbp = g_clone_bio(bp); | |||||
if (cbp == NULL) { | |||||
iotrackp->io_error = ENOMEM; | |||||
atomic_add_long(&iotrackp->io_numios, -1); | |||||
break; | |||||
} | |||||
cbp->bio_offset = offset; | |||||
cbp->bio_length = rdlen; | |||||
atomic_add_long(&sc->sc_reads, 1); | |||||
} | |||||
/* We have issued all our I/O, so start the first one */ | |||||
g_io_request(firstbp, firstcp); | |||||
G_RUNLOCK(sc); | |||||
return; | |||||
case BIO_WRITE: | |||||
/* | |||||
* Writes are always done to the top level. Here, we | |||||
* record the blocks that we are writing. | |||||
*/ | |||||
G_UNION_LOGREQ(cbp, "Sending %jd byte write request to upper " | |||||
"level.", cbp->bio_length); | |||||
atomic_add_long(&sc->sc_writes, 1); | |||||
atomic_add_long(&sc->sc_wrotebytes, bp->bio_length); | |||||
g_union_setmap(cbp, sc); | |||||
kibUnsubmitted Not Done Inline ActionsThere, you set the bit indicating that the upper layer has the copy of the block, and then drop the sc lock. Now, assume that a read comes in on other thread (it seems you allow this with G_PF_DIRECT flags) which is reading this block. Then, since it sees the bit set in bitmap, it reads garbage or old content from the upper layer. Am I missing something? kib: There, you set the bit indicating that the upper layer has the copy of the block, and then drop… | |||||
mckusickAuthorUnsubmitted Done Inline ActionsHere I assume that the client of the disk is managing the coordination of the reads and writes. For example, if UFS is running on this disk, then if there are two users of the filesystem with one writing a block and another reading it, then there will be a buffer associated with the block. That buffer will be exclusively locked while the write is in progress, so the reader will block until the write completes and the buffer is unlocked. I do not see any way to fix this if the above analysis is incorrect. I could delay setting the map until the I/O completes, but that just means the read could get stale data from before the write completed. Peter has gotten a crash that may be caused by this lack of coordination: https://people.freebsd.org/~pho/stress/log/log0198.txt But I have not been able to figure out what caused the failure in that crash. mckusick: Here I assume that the client of the disk is managing the coordination of the reads and writes. | |||||
kibUnsubmitted Not Done Inline ActionsWith current settings, reader gets so called out-of-thin-air data: the content of upper block was never written neither to lower nor to gunion geoms. I think returning lower block there is fine, the reader is racing with the writer, so returning lower block means that from our point of view, reader won the race. Or you could allocate two bits per block in the bitmap, instead of one, and use the other bit as an indicator of write in progress. Reader would sleep waiting for the in progress bit to clear. kib: With current settings, reader gets so called out-of-thin-air data: the content of upper block… | |||||
break; | |||||
case BIO_DELETE: | |||||
G_UNION_LOGREQ(bp, "Delete request received."); | |||||
atomic_add_long(&sc->sc_deletes, 1); | |||||
break; | |||||
case BIO_GETATTR: | |||||
G_UNION_LOGREQ(bp, "Getattr request received."); | |||||
atomic_add_long(&sc->sc_getattrs, 1); | |||||
if (strcmp(bp->bio_attribute, "GEOM::kerneldump") != 0) | |||||
/* forward the GETATTR to the lower-level device */ | |||||
break; | |||||
g_union_kerneldump(bp, sc); | |||||
return; | |||||
case BIO_FLUSH: | |||||
G_UNION_LOGREQ(bp, "Flush request received."); | |||||
atomic_add_long(&sc->sc_flushes, 1); | |||||
break; | |||||
case BIO_SPEEDUP: | |||||
G_UNION_LOGREQ(bp, "Speedup request received."); | |||||
atomic_add_long(&sc->sc_speedups, 1); | |||||
break; | |||||
case BIO_CMD0: | |||||
G_UNION_LOGREQ(bp, "Cmd0 request received."); | |||||
atomic_add_long(&sc->sc_cmd0s, 1); | |||||
break; | |||||
case BIO_CMD1: | |||||
G_UNION_LOGREQ(bp, "Cmd1 request received."); | |||||
atomic_add_long(&sc->sc_cmd1s, 1); | |||||
break; | |||||
case BIO_CMD2: | |||||
G_UNION_LOGREQ(bp, "Cmd2 request received."); | |||||
atomic_add_long(&sc->sc_cmd2s, 1); | |||||
break; | |||||
} | |||||
/* | |||||
* All commands other than write are passed through to the | |||||
* upper-level device since it is writable and thus able to | |||||
* respond to delete, flush, and speedup requests. | |||||
*/ | |||||
g_io_request(cbp, sc->sc_uppercp); | |||||
} | |||||
/* | |||||
* Used only when completing a BIO_READ operation. | |||||
*/ | |||||
static void | |||||
g_union_done(struct bio *bp) | |||||
{ | |||||
struct g_union_iotrack *iotrackp; | |||||
iotrackp = bp->bio_caller1; | |||||
if (iotrackp->io_error != 0 && bp->bio_error == 0) | |||||
bp->bio_error = iotrackp->io_error; | |||||
iotrackp->io_error = 0; | |||||
if (atomic_fetchadd_long(&iotrackp->io_numios, -1) == 0) | |||||
g_free(iotrackp); | |||||
g_std_done(bp); | |||||
} | |||||
/* | |||||
* Record blocks that have been written in the map. | |||||
*/ | |||||
static void | |||||
g_union_setmap(struct bio *bp, struct g_union_softc *sc) | |||||
{ | |||||
size_t root_idx; | |||||
uint64_t **leaf; | |||||
uint64_t *wordp; | |||||
off_t start, numsec; | |||||
KASSERT(bp->bio_offset % sc->sc_sectorsize == 0, | |||||
("g_union_setmap: offset not on sector boundry")); | |||||
KASSERT(bp->bio_length % sc->sc_sectorsize == 0, | |||||
("g_union_setmap: length not a multiple of sectors")); | |||||
start = bp->bio_offset / sc->sc_sectorsize; | |||||
numsec = bp->bio_length / sc->sc_sectorsize; | |||||
KASSERT(start + numsec < sc->sc_map_size, | |||||
("g_union_setmap: block %jd is out of range", start + numsec)); | |||||
G_WLOCK(sc); | |||||
for ( ; numsec > 0; numsec--, start++) { | |||||
root_idx = start / sc->sc_bits_per_leaf; | |||||
leaf = &sc->sc_writemap_root[root_idx]; | |||||
wordp = &(*leaf) | |||||
[(start % sc->sc_bits_per_leaf) / BITS_PER_ENTRY]; | |||||
*wordp |= 1ULL << (start % BITS_PER_ENTRY); | |||||
sc->sc_leafused[root_idx / BITS_PER_ENTRY] |= | |||||
1 << (root_idx % BITS_PER_ENTRY); | |||||
} | |||||
G_WUNLOCK(sc); | |||||
} | |||||
/* | |||||
* Check map to determine whether blocks have been written. | |||||
* | |||||
* Return true if they have been written so should be read from the top | |||||
* layer. Return false if they have not been written so should be read | |||||
* from the bottom layer. Return in len2read the bytes to be read. See | |||||
* the comment above the BIO_READ implementation in g_union_start() for | |||||
* an explantion of why len2read may be shorter than the buffer length. | |||||
*/ | |||||
static bool | |||||
g_union_getmap(struct bio *bp, struct g_union_softc *sc, off_t *len2read) | |||||
{ | |||||
off_t start, numsec, leafresid, bitloc; | |||||
bool first, maptype, retval; | |||||
uint64_t *leaf, word; | |||||
size_t root_idx; | |||||
KASSERT(bp->bio_offset % sc->sc_sectorsize == 0, | |||||
("g_union_getmap: offset not on sector boundry")); | |||||
KASSERT(bp->bio_length % sc->sc_sectorsize == 0, | |||||
("g_union_getmap: length not a multiple of sectors")); | |||||
start = bp->bio_offset / sc->sc_sectorsize; | |||||
numsec = bp->bio_length / sc->sc_sectorsize; | |||||
G_UNION_DEBUG(3, "g_union_getmap: check %jd sectors starting at %jd\n", | |||||
numsec, start); | |||||
KASSERT(start + numsec <= sc->sc_map_size, | |||||
("g_union_getmap: block %jd is out of range", start + numsec)); | |||||
root_idx = start / sc->sc_bits_per_leaf; | |||||
first = true; | |||||
while (numsec > 0) { | |||||
/* Check first if the leaf records any written sectors */ | |||||
root_idx = start / sc->sc_bits_per_leaf; | |||||
leafresid = sc->sc_bits_per_leaf - | |||||
(start % sc->sc_bits_per_leaf); | |||||
if (((sc->sc_leafused[root_idx / BITS_PER_ENTRY]) & | |||||
(1ULL << (root_idx % BITS_PER_ENTRY))) == 0) { | |||||
if (first) { | |||||
maptype = false; | |||||
first = false; | |||||
} | |||||
if (maptype) | |||||
break; | |||||
numsec -= leafresid; | |||||
start += leafresid; | |||||
continue; | |||||
} | |||||
/* Check up to a word boundry, then check word by word */ | |||||
leaf = sc->sc_writemap_root[root_idx]; | |||||
word = leaf[(start % sc->sc_bits_per_leaf) / BITS_PER_ENTRY]; | |||||
bitloc = start % BITS_PER_ENTRY; | |||||
if (bitloc == 0 && (word == 0 || word == ~0)) { | |||||
if (first) { | |||||
if (word == 0) | |||||
maptype = false; | |||||
else | |||||
maptype = true; | |||||
first = false; | |||||
} | |||||
if ((word == 0 && maptype) || | |||||
(word == ~0 && !maptype)) | |||||
break; | |||||
numsec -= BITS_PER_ENTRY; | |||||
start += BITS_PER_ENTRY; | |||||
continue; | |||||
} | |||||
for ( ; bitloc < BITS_PER_ENTRY; bitloc ++) { | |||||
retval = (word & (1ULL << bitloc)) != 0; | |||||
if (first) { | |||||
maptype = retval; | |||||
first = false; | |||||
} | |||||
if (maptype == retval) { | |||||
numsec--; | |||||
start++; | |||||
continue; | |||||
} | |||||
goto out; | |||||
} | |||||
} | |||||
out: | |||||
if (numsec < 0) | |||||
numsec = 0; | |||||
*len2read = bp->bio_length - (numsec * sc->sc_sectorsize); | |||||
G_UNION_DEBUG(3, "g_union_getmap: return maptype %swritten for %jd " | |||||
"sectors ending at %jd\n", maptype ? "" : "NOT ", | |||||
*len2read / sc->sc_sectorsize, start - 1); | |||||
return (maptype); | |||||
} | |||||
/* | |||||
* Fill in details for a BIO_GETATTR request. | |||||
*/ | |||||
static void | |||||
g_union_kerneldump(struct bio *bp, struct g_union_softc *sc) | |||||
{ | |||||
struct g_kerneldump *gkd; | |||||
struct g_geom *gp; | |||||
struct g_provider *pp; | |||||
gkd = (struct g_kerneldump *)bp->bio_data; | |||||
gp = bp->bio_to->geom; | |||||
g_trace(G_T_TOPOLOGY, "%s(%s, %jd, %jd)", __func__, gp->name, | |||||
(intmax_t)gkd->offset, (intmax_t)gkd->length); | |||||
pp = LIST_FIRST(&gp->provider); | |||||
gkd->di.dumper = g_union_dumper; | |||||
gkd->di.priv = sc; | |||||
gkd->di.blocksize = pp->sectorsize; | |||||
gkd->di.maxiosize = DFLTPHYS; | |||||
gkd->di.mediaoffset = sc->sc_offset + gkd->offset; | |||||
if (gkd->offset > sc->sc_size) { | |||||
g_io_deliver(bp, ENODEV); | |||||
return; | |||||
} | |||||
if (gkd->offset + gkd->length > sc->sc_size) | |||||
gkd->length = sc->sc_size - gkd->offset; | |||||
gkd->di.mediasize = gkd->length; | |||||
g_io_deliver(bp, 0); | |||||
} | |||||
/* | |||||
* Handler for g_union_kerneldump(). | |||||
*/ | |||||
static int | |||||
g_union_dumper(void *priv, void *virtual, vm_offset_t physical, off_t offset, | |||||
size_t length) | |||||
{ | |||||
return (0); | |||||
} | |||||
/* | |||||
* List union statistics. | |||||
*/ | |||||
static void | |||||
g_union_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, | |||||
struct g_consumer *cp, struct g_provider *pp) | |||||
{ | |||||
struct g_union_softc *sc; | |||||
if (pp != NULL || cp != NULL || gp->softc == NULL) | |||||
return; | |||||
sc = gp->softc; | |||||
sbuf_printf(sb, "%s<Reads>%ju</Reads>\n", indent, sc->sc_reads); | |||||
sbuf_printf(sb, "%s<Writes>%ju</Writes>\n", indent, sc->sc_writes); | |||||
sbuf_printf(sb, "%s<Deletes>%ju</Deletes>\n", indent, sc->sc_deletes); | |||||
sbuf_printf(sb, "%s<Getattrs>%ju</Getattrs>\n", indent, sc->sc_getattrs); | |||||
sbuf_printf(sb, "%s<Flushes>%ju</Flushes>\n", indent, sc->sc_flushes); | |||||
sbuf_printf(sb, "%s<Speedups>%ju</Speedups>\n", indent, sc->sc_speedups); | |||||
sbuf_printf(sb, "%s<Cmd0s>%ju</Cmd0s>\n", indent, sc->sc_cmd0s); | |||||
sbuf_printf(sb, "%s<Cmd1s>%ju</Cmd1s>\n", indent, sc->sc_cmd1s); | |||||
sbuf_printf(sb, "%s<Cmd2s>%ju</Cmd2s>\n", indent, sc->sc_cmd2s); | |||||
sbuf_printf(sb, "%s<ReadBytes>%ju</ReadBytes>\n", indent, | |||||
sc->sc_readbytes); | |||||
sbuf_printf(sb, "%s<WroteBytes>%ju</WroteBytes>\n", indent, | |||||
sc->sc_wrotebytes); | |||||
sbuf_printf(sb, "%s<WroteBytes>%ju</WroteBytes>\n", indent, | |||||
sc->sc_wrotebytes); | |||||
sbuf_printf(sb, "%s<Offset>%jd</Offset>\n", indent, | |||||
(intmax_t)sc->sc_offset); | |||||
} | |||||
/* | |||||
* Clean up an orphaned geom. | |||||
*/ | |||||
static void | |||||
g_union_orphan(struct g_consumer *cp) | |||||
{ | |||||
g_topology_assert(); | |||||
g_union_destroy(cp->geom, 1, false); | |||||
} | |||||
/* | |||||
* Clean up a union geom. | |||||
*/ | |||||
static int | |||||
g_union_destroy_geom(struct gctl_req *req, struct g_class *mp, | |||||
struct g_geom *gp) | |||||
{ | |||||
return (g_union_destroy(gp, 0, false)); | |||||
} | |||||
/* | |||||
* Clean up a union device. | |||||
*/ | |||||
static int | |||||
g_union_destroy(struct g_geom *gp, bool force, bool verbose) | |||||
{ | |||||
struct g_union_softc *sc; | |||||
struct g_provider *pp; | |||||
int error; | |||||
g_topology_assert(); | |||||
sc = gp->softc; | |||||
if (sc == NULL) | |||||
return (ENXIO); | |||||
pp = LIST_FIRST(&gp->provider); | |||||
if ((sc->sc_flags & DOING_COMMIT) != 0 || | |||||
(pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0))) { | |||||
if (force) { | |||||
if (verbose) | |||||
printf("Device %s is still in use, so " | |||||
"is being forcibly removed.\n", pp->name); | |||||
G_UNION_DEBUG(1, "Device %s is still in use, so " | |||||
"is being forcibly removed.", pp->name); | |||||
} else { | |||||
if (verbose) | |||||
printf("Device %s is still open " | |||||
"(r=%d w=%d e=%d).\n", pp->name, pp->acr, | |||||
pp->acw, pp->ace); | |||||
G_UNION_DEBUG(1, "Device %s is still open " | |||||
"(r=%d w=%d e=%d).", pp->name, pp->acr, | |||||
pp->acw, pp->ace); | |||||
return (EBUSY); | |||||
} | |||||
} else { | |||||
if (verbose) | |||||
printf("Device %s removed.\n", gp->name); | |||||
G_UNION_DEBUG(1, "Device %s removed.", gp->name); | |||||
} | |||||
/* Close consumers */ | |||||
if ((error = g_access(sc->sc_lowercp, -1, 0, -1)) != 0) | |||||
G_UNION_DEBUG(2, "Error %d: device %s could not reset access " | |||||
"to %s.", error, pp->name, sc->sc_lowercp->provider->name); | |||||
if ((error = g_access(sc->sc_uppercp, -1, -1, -1)) != 0) | |||||
G_UNION_DEBUG(2, "Error %d: device %s could not reset access " | |||||
"to %s.", error, pp->name, sc->sc_uppercp->provider->name); | |||||
g_wither_geom(gp, ENXIO); | |||||
return (0); | |||||
} | |||||
/* | |||||
* Clean up a union provider. | |||||
*/ | |||||
static void | |||||
g_union_providergone(struct g_provider *pp) | |||||
{ | |||||
struct g_geom *gp; | |||||
struct g_union_softc *sc; | |||||
size_t i; | |||||
gp = pp->geom; | |||||
sc = gp->softc; | |||||
gp->softc = NULL; | |||||
for (i = 0; i < sc->sc_root_size; i++) | |||||
g_free(sc->sc_writemap_root[i]); | |||||
g_free(sc->sc_writemap_root); | |||||
rw_destroy(&sc->sc_rwlock); | |||||
g_free(sc); | |||||
} | |||||
/* | |||||
* Respond to a resized provider. | |||||
*/ | |||||
static void | |||||
g_union_resize(struct g_consumer *cp) | |||||
{ | |||||
struct g_union_softc *sc; | |||||
struct g_geom *gp; | |||||
g_topology_assert(); | |||||
gp = cp->geom; | |||||
sc = gp->softc; | |||||
/* | |||||
* If size has gotten bigger, ignore it and just keep using | |||||
* the space we already had. Otherwise we are done. | |||||
*/ | |||||
if (sc->sc_size < cp->provider->mediasize - sc->sc_offset) | |||||
return; | |||||
g_union_destroy(gp, 1, false); | |||||
} | |||||
DECLARE_GEOM_CLASS(g_union_class, g_union); | |||||
MODULE_VERSION(geom_union, 0); |
Don't you want to set an error in this case as well?