Changeset View
Standalone View
sys/geom/mirror/g_mirror.c
Show First 20 Lines • Show All 53 Lines • ▼ Show 20 Lines | |||||
static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data"); | static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data"); | ||||
SYSCTL_DECL(_kern_geom); | SYSCTL_DECL(_kern_geom); | ||||
static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, | static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, | ||||
"GEOM_MIRROR stuff"); | "GEOM_MIRROR stuff"); | ||||
int g_mirror_debug = 0; | int g_mirror_debug = 0; | ||||
SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0, | SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0, | ||||
"Debug level"); | "Debug level"); | ||||
bool g_launch_mirror_before_timeout = true; | |||||
SYSCTL_BOOL(_kern_geom_mirror, OID_AUTO, launch_mirror_before_timeout, | |||||
CTLFLAG_RWTUN, &g_launch_mirror_before_timeout, 0, | |||||
"If false, force gmirror to wait out the full kern.geom.mirror.timeout " | |||||
"before launching mirrors"); | |||||
markj: "before launching mirrors" | |||||
static u_int g_mirror_timeout = 4; | static u_int g_mirror_timeout = 4; | ||||
SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout, | SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout, | ||||
0, "Time to wait on all mirror components"); | 0, "Time to wait on all mirror components"); | ||||
static u_int g_mirror_idletime = 5; | static u_int g_mirror_idletime = 5; | ||||
SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN, | SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN, | ||||
&g_mirror_idletime, 0, "Mark components as clean when idling"); | &g_mirror_idletime, 0, "Mark components as clean when idling"); | ||||
static u_int g_mirror_disconnect_on_failure = 1; | static u_int g_mirror_disconnect_on_failure = 1; | ||||
SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN, | SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN, | ||||
Show All 35 Lines | |||||
}; | }; | ||||
static void g_mirror_destroy_provider(struct g_mirror_softc *sc); | static void g_mirror_destroy_provider(struct g_mirror_softc *sc); | ||||
static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state); | static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state); | ||||
static void g_mirror_update_device(struct g_mirror_softc *sc, bool force); | static void g_mirror_update_device(struct g_mirror_softc *sc, bool force); | ||||
static void g_mirror_dumpconf(struct sbuf *sb, const char *indent, | static void g_mirror_dumpconf(struct sbuf *sb, const char *indent, | ||||
struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); | struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); | ||||
static int g_mirror_refresh_device(struct g_mirror_softc *sc, | |||||
const struct g_provider *pp, const struct g_mirror_metadata *md); | |||||
static void g_mirror_sync_reinit(const struct g_mirror_disk *disk, | static void g_mirror_sync_reinit(const struct g_mirror_disk *disk, | ||||
struct bio *bp, off_t offset); | struct bio *bp, off_t offset); | ||||
static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type); | static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type); | ||||
static void g_mirror_register_request(struct g_mirror_softc *sc, | static void g_mirror_register_request(struct g_mirror_softc *sc, | ||||
struct bio *bp); | struct bio *bp); | ||||
static void g_mirror_sync_release(struct g_mirror_softc *sc); | static void g_mirror_sync_release(struct g_mirror_softc *sc); | ||||
▲ Show 20 Lines • Show All 346 Lines • ▼ Show 20 Lines | g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp, | ||||
if (md->md_provider[0] != '\0') | if (md->md_provider[0] != '\0') | ||||
disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED; | disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED; | ||||
disk->d_sync.ds_consumer = NULL; | disk->d_sync.ds_consumer = NULL; | ||||
disk->d_sync.ds_offset = md->md_sync_offset; | disk->d_sync.ds_offset = md->md_sync_offset; | ||||
disk->d_sync.ds_offset_done = md->md_sync_offset; | disk->d_sync.ds_offset_done = md->md_sync_offset; | ||||
disk->d_sync.ds_update_ts = time_uptime; | disk->d_sync.ds_update_ts = time_uptime; | ||||
disk->d_genid = md->md_genid; | disk->d_genid = md->md_genid; | ||||
disk->d_sync.ds_syncid = md->md_syncid; | disk->d_sync.ds_syncid = md->md_syncid; | ||||
disk->d_init_ndisks = md->md_all; | |||||
disk->d_init_slice = md->md_slice; | |||||
disk->d_init_balance = md->md_balance; | |||||
disk->d_init_mediasize = md->md_mediasize; | |||||
if (errorp != NULL) | if (errorp != NULL) | ||||
*errorp = 0; | *errorp = 0; | ||||
return (disk); | return (disk); | ||||
fail: | fail: | ||||
if (errorp != NULL) | if (errorp != NULL) | ||||
*errorp = error; | *errorp = error; | ||||
if (disk != NULL) | if (disk != NULL) | ||||
free(disk, M_MIRROR); | free(disk, M_MIRROR); | ||||
▲ Show 20 Lines • Show All 1,874 Lines • ▼ Show 20 Lines | g_mirror_update_device(struct g_mirror_softc *sc, bool force) | ||||
u_int state; | u_int state; | ||||
sx_assert(&sc->sc_lock, SX_XLOCKED); | sx_assert(&sc->sc_lock, SX_XLOCKED); | ||||
switch (sc->sc_state) { | switch (sc->sc_state) { | ||||
case G_MIRROR_DEVICE_STATE_STARTING: | case G_MIRROR_DEVICE_STATE_STARTING: | ||||
{ | { | ||||
struct g_mirror_disk *pdisk, *tdisk; | struct g_mirror_disk *pdisk, *tdisk; | ||||
u_int dirty, ndisks, genid, syncid; | const char *mismatch; | ||||
bool broken; | uintmax_t found, newest; | ||||
u_int dirty, ndisks; | |||||
/* Pre-flight checks */ | |||||
LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) { | |||||
/* | |||||
* Confirm we already detected the newest genid. | |||||
*/ | |||||
KASSERT(sc->sc_genid >= disk->d_genid, | |||||
("%s: found newer genid %u (sc:%p had %u).", __func__, | |||||
disk->d_genid, sc, sc->sc_genid)); | |||||
/* Kick out any previously tasted stale components. */ | |||||
if (disk->d_genid < sc->sc_genid) { | |||||
G_MIRROR_DEBUG(0, "Stale 'genid' field on %s " | |||||
"(device %s) (component=%u latest=%u), skipping.", | |||||
g_mirror_get_diskname(disk), sc->sc_name, | |||||
disk->d_genid, sc->sc_genid); | |||||
g_mirror_destroy_disk(disk); | |||||
sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; | |||||
continue; | |||||
} | |||||
/* | |||||
* Confirm we already detected the newest syncid. | |||||
*/ | |||||
KASSERT(sc->sc_syncid >= disk->d_sync.ds_syncid, | |||||
("%s: found newer syncid %u (sc:%p had %u).", | |||||
__func__, disk->d_sync.ds_syncid, sc, | |||||
sc->sc_syncid)); | |||||
#define DETECT_MISMATCH(field, name) \ | |||||
if (mismatch == NULL && \ | |||||
disk->d_init_ ## field != sc->sc_ ## field) { \ | |||||
mismatch = name; \ | |||||
found = (intmax_t)disk->d_init_ ## field; \ | |||||
newest = (intmax_t)sc->sc_ ## field; \ | |||||
} | |||||
mismatch = NULL; | |||||
DETECT_MISMATCH(ndisks, "md_all"); | |||||
DETECT_MISMATCH(balance, "md_balance"); | |||||
DETECT_MISMATCH(slice, "md_slice"); | |||||
DETECT_MISMATCH(mediasize, "md_mediasize"); | |||||
#undef DETECT_MISMATCH | |||||
if (mismatch != NULL) { | |||||
G_MIRROR_DEBUG(0, "Found a mismatching '%s' " | |||||
"field on %s (device %s) (found=%ju " | |||||
"newest=%ju).", mismatch, | |||||
g_mirror_get_diskname(disk), sc->sc_name, | |||||
found, newest); | |||||
g_mirror_destroy_disk(disk); | |||||
sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; | |||||
continue; | |||||
} | |||||
} | |||||
KASSERT(sc->sc_provider == NULL, | KASSERT(sc->sc_provider == NULL, | ||||
("Non-NULL provider in STARTING state (%s).", sc->sc_name)); | ("Non-NULL provider in STARTING state (%s).", sc->sc_name)); | ||||
/* | /* | ||||
* Are we ready? We are, if all disks are connected or | * Are we ready? If the timeout (force is true) has expired, and | ||||
* if we have any disks and 'force' is true. | * any disks are present, then yes. If we're permitted to launch | ||||
* before the timeout has expired and the expected number of | |||||
* current-generation mirror disks have been tasted, then yes. | |||||
*/ | */ | ||||
ndisks = g_mirror_ndisks(sc, -1); | ndisks = g_mirror_ndisks(sc, -1); | ||||
if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) { | if ((force && ndisks > 0) || | ||||
(g_launch_mirror_before_timeout && ndisks == sc->sc_ndisks)) { | |||||
; | ; | ||||
} else if (ndisks == 0) { | } else if (ndisks == 0) { | ||||
/* | /* | ||||
* Disks went down in starting phase, so destroy | * Disks went down in starting phase, so destroy | ||||
* device. | * device. | ||||
*/ | */ | ||||
callout_drain(&sc->sc_callout); | callout_drain(&sc->sc_callout); | ||||
sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; | sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; | ||||
Show All 31 Lines | if (force) { | ||||
return; | return; | ||||
} | } | ||||
} else { | } else { | ||||
/* Cancel timeout. */ | /* Cancel timeout. */ | ||||
callout_drain(&sc->sc_callout); | callout_drain(&sc->sc_callout); | ||||
} | } | ||||
/* | /* | ||||
* Find the biggest genid. | |||||
*/ | |||||
genid = 0; | |||||
LIST_FOREACH(disk, &sc->sc_disks, d_next) { | |||||
if (disk->d_genid > genid) | |||||
genid = disk->d_genid; | |||||
} | |||||
sc->sc_genid = genid; | |||||
/* | |||||
* Remove all disks without the biggest genid. | |||||
*/ | |||||
broken = false; | |||||
LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) { | |||||
if (disk->d_genid < genid) { | |||||
G_MIRROR_DEBUG(0, | |||||
"Component %s (device %s) broken, skipping.", | |||||
g_mirror_get_diskname(disk), sc->sc_name); | |||||
g_mirror_destroy_disk(disk); | |||||
/* | |||||
* Bump the syncid in case we discover a healthy | |||||
* replacement disk after starting the mirror. | |||||
*/ | |||||
broken = true; | |||||
} | |||||
} | |||||
/* | |||||
* Find the biggest syncid. | |||||
*/ | |||||
syncid = 0; | |||||
LIST_FOREACH(disk, &sc->sc_disks, d_next) { | |||||
if (disk->d_sync.ds_syncid > syncid) | |||||
syncid = disk->d_sync.ds_syncid; | |||||
} | |||||
/* | |||||
* Here we need to look for dirty disks and if all disks | * Here we need to look for dirty disks and if all disks | ||||
* with the biggest syncid are dirty, we have to choose | * with the biggest syncid are dirty, we have to choose | ||||
* one with the biggest priority and rebuild the rest. | * one with the biggest priority and rebuild the rest. | ||||
*/ | */ | ||||
/* | /* | ||||
* Find the number of dirty disks with the biggest syncid. | * Find the number of dirty disks with the biggest syncid. | ||||
* Find the number of disks with the biggest syncid. | * Find the number of disks with the biggest syncid. | ||||
* While here, find a disk with the biggest priority. | * While here, find a disk with the biggest priority. | ||||
*/ | */ | ||||
dirty = ndisks = 0; | dirty = ndisks = 0; | ||||
pdisk = NULL; | pdisk = NULL; | ||||
LIST_FOREACH(disk, &sc->sc_disks, d_next) { | LIST_FOREACH(disk, &sc->sc_disks, d_next) { | ||||
if (disk->d_sync.ds_syncid != syncid) | if (disk->d_sync.ds_syncid != sc->sc_syncid) | ||||
continue; | continue; | ||||
if ((disk->d_flags & | if ((disk->d_flags & | ||||
G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { | G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { | ||||
continue; | continue; | ||||
} | } | ||||
ndisks++; | ndisks++; | ||||
if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) { | if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) { | ||||
dirty++; | dirty++; | ||||
if (pdisk == NULL || | if (pdisk == NULL || | ||||
pdisk->d_priority < disk->d_priority) { | pdisk->d_priority < disk->d_priority) { | ||||
pdisk = disk; | pdisk = disk; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if (dirty == 0) { | if (dirty == 0) { | ||||
Done Inline ActionsExtra #if markj: Extra #if | |||||
Done Inline ActionsSure. I meant this bit to be a discussion point. If we got here, we have some number of disks, all with the same genid. The one with the latest syncid says sc_ndisks is N. If there are other disk(s) with the same genid and sc_ndisks != N, do we want to kick them out? That was the existing behavior (via g_mirror_check_metadata). I guess preserving the existing behavior for now makes some sense (smaller bite-size change) even if it isn't the right long-term behavior. I'm happy to drop the extraneous #if and comment(s). cem: Sure. I meant this bit to be a discussion point.
If we got here, we have some number of disks… | |||||
Done Inline ActionsI think the existing behaviour was reasonable. At least, I can't think of a scenario where it'd be harmful. But now we're being kind of inconsistent - shouldn't we do the same if, e.g. the balance algorithm is different? markj: I think the existing behaviour was reasonable. At least, I can't think of a scenario where it'd… | |||||
Done Inline ActionsSure, I guess we could reject mismatched balance and slice here as well. (And mediasize?) cem: Sure, I guess we could reject mismatched balance and slice here as well. (And mediasize?) | |||||
/* No dirty disks at all, great. */ | /* No dirty disks at all, great. */ | ||||
Done Inline ActionsWhy is the bump necessary? Shouldn't we be able to assume that the inconsistent disk already has a lower syncid? markj: Why is the bump necessary? Shouldn't we be able to assume that the inconsistent disk already… | |||||
Not Done Inline ActionsI added this bit b/c during testing I saw syncid did not get automatically bumped if a disk failed to add to a running mirror during the early part of a rebalance. If the node got rebooted at this very instant, the valid components and disconnected component would have the same genid and syncid. The 'forget' ioctl allows us to force gmirror to increment the genid from the userspace. The ioctl anyway updates sc->ndisks when a mirror is degraded, so it makes sense to update genid as well. cindy.yang_isilon.com: I added this bit b/c during testing I saw syncid did not get automatically bumped if a disk… | |||||
Done Inline ActionsI don't see any reason to assume the inconsistent disk has lower syncid, and the bump is pretty harmless (one-time write). If we can assume the inconsistent disk has lower syncid, we might as well KASSERT it. cem: I don't see any reason to assume the inconsistent disk has lower syncid, and the bump is pretty… | |||||
Not Done Inline ActionsThey have contradictory metadata. It shouldn't be possible for them to have the same syncid and genid unless something other than gmirror was messing with the metadata block. At least, if they do, then there is a bug elsewhere in gmirror. markj: They have contradictory metadata. It shouldn't be possible for them to have the same syncid and… | |||||
Done Inline ActionsI guess all of the metadata is encoded in a single disk block and locks are held such that the genid bump should happen simultaneously with metadata updates. Operative word being "should;" it wouldn't surprise me if there are bugs remaining in gmirror :-). I guess we should move this kick-out loop above auto-start, like the genid one (and add the other metadata fields that can change at runtime). Should we actually kick out disks with inconsistent metadata if they have a lower syncid, or just dirty them? cem: I guess all of the metadata is encoded in a single disk block and locks are held such that the… | |||||
} else if (dirty == ndisks) { | } else if (dirty == ndisks) { | ||||
/* | /* | ||||
* Force synchronization for all dirty disks except one | * Force synchronization for all dirty disks except one | ||||
* with the biggest priority. | * with the biggest priority. | ||||
*/ | */ | ||||
KASSERT(pdisk != NULL, ("pdisk == NULL")); | KASSERT(pdisk != NULL, ("pdisk == NULL")); | ||||
G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a " | G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a " | ||||
"master disk for synchronization.", | "master disk for synchronization.", | ||||
g_mirror_get_diskname(pdisk), sc->sc_name); | g_mirror_get_diskname(pdisk), sc->sc_name); | ||||
LIST_FOREACH(disk, &sc->sc_disks, d_next) { | LIST_FOREACH(disk, &sc->sc_disks, d_next) { | ||||
if (disk->d_sync.ds_syncid != syncid) | if (disk->d_sync.ds_syncid != sc->sc_syncid) | ||||
continue; | continue; | ||||
if ((disk->d_flags & | if ((disk->d_flags & | ||||
G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { | G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { | ||||
continue; | continue; | ||||
} | } | ||||
KASSERT((disk->d_flags & | KASSERT((disk->d_flags & | ||||
G_MIRROR_DISK_FLAG_DIRTY) != 0, | G_MIRROR_DISK_FLAG_DIRTY) != 0, | ||||
("Disk %s isn't marked as dirty.", | ("Disk %s isn't marked as dirty.", | ||||
g_mirror_get_diskname(disk))); | g_mirror_get_diskname(disk))); | ||||
/* Skip the disk with the biggest priority. */ | /* Skip the disk with the biggest priority. */ | ||||
if (disk == pdisk) | if (disk == pdisk) | ||||
continue; | continue; | ||||
disk->d_sync.ds_syncid = 0; | disk->d_sync.ds_syncid = 0; | ||||
} | } | ||||
} else if (dirty < ndisks) { | } else if (dirty < ndisks) { | ||||
/* | /* | ||||
* Force synchronization for all dirty disks. | * Force synchronization for all dirty disks. | ||||
* We have some non-dirty disks. | * We have some non-dirty disks. | ||||
*/ | */ | ||||
LIST_FOREACH(disk, &sc->sc_disks, d_next) { | LIST_FOREACH(disk, &sc->sc_disks, d_next) { | ||||
if (disk->d_sync.ds_syncid != syncid) | if (disk->d_sync.ds_syncid != sc->sc_syncid) | ||||
continue; | continue; | ||||
if ((disk->d_flags & | if ((disk->d_flags & | ||||
G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { | G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { | ||||
continue; | continue; | ||||
} | } | ||||
if ((disk->d_flags & | if ((disk->d_flags & | ||||
G_MIRROR_DISK_FLAG_DIRTY) == 0) { | G_MIRROR_DISK_FLAG_DIRTY) == 0) { | ||||
continue; | continue; | ||||
} | } | ||||
disk->d_sync.ds_syncid = 0; | disk->d_sync.ds_syncid = 0; | ||||
} | } | ||||
} | } | ||||
/* Reset hint. */ | /* Reset hint. */ | ||||
sc->sc_hint = NULL; | sc->sc_hint = NULL; | ||||
sc->sc_syncid = syncid; | if (force) { | ||||
Done Inline ActionsI believe this reintroduces the bug fixed in r306743. markj: I believe this reintroduces the bug fixed in r306743. | |||||
Done Inline ActionsI don't think it does — the kick-out of stale components, and marking for SYNCID bump were just relocated from this function to g_mirror_add_disk. cem: I don't think it does — the kick-out of stale components, and marking for SYNCID bump were just… | |||||
Done Inline ActionsSuppose g_launch_mirror_before_timeout is true. The g_mirror_event_send() call in g_mirror_add_disk() will start the mirror once a second mirror is found. The event_send is synchronous, so the mirror's state will be updated to RUNNING before we perform the genid check on the last discovered component. In particular, in a mirror with two components, we'll always end up adding the broken component to the mirror, so the bug being introduced is actually larger than the one from r306743. markj: Suppose g_launch_mirror_before_timeout is true. The g_mirror_event_send() call in… | |||||
Done Inline ActionsOh, I see. I think the sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING condition (line 2950~2967 or so in the current incarnation of the diff) needs to be removed from g_mirror_add_disk to prevent us from adding the broken m2 component after initially discovering m1 in the r306743 scenario. For the ordering where m2 is tasted before m1, we either need to run the kickout loop before event_send, or g_mirror_ndisks could be modified not to count stale components. My concern with the former option would be the mirror having zero components (ephemerally). I'm not sure there's any reason to be concerned about the latter option. I suppose a 3rd option would be to just temporarily bump sc_ndisks while the event_send and kickout loop runs in g_mirror_add_disk, and decrement it afterwards. cem: Oh, I see. I think the `sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING` condition (line… | |||||
Done Inline ActionsFWIW, I should mention that I introduced this bug (deleting the "Remove all disks without the biggest genid" from g_mirror_device_update) in my own modification to Cindy's patch — it's not present in the patch she submitted. (Blame where blame is due 🙂.) And I should also add that a 4th option would be to just restore the kickout loop to g_mirror_update_device, perhaps via common subroutine. cem: FWIW, I should mention that I introduced this bug (deleting the "Remove all disks without the… | |||||
Done Inline ActionsI'll move the kick-out logic back to g_mirror_device_update, but locate it prior to the auto-start logic. cem: I'll move the kick-out logic back to g_mirror_device_update, but locate it prior to the auto… | |||||
if (force || broken) { | |||||
/* Remember to bump syncid on first write. */ | /* Remember to bump syncid on first write. */ | ||||
sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; | sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; | ||||
} | } | ||||
state = G_MIRROR_DEVICE_STATE_RUNNING; | state = G_MIRROR_DEVICE_STATE_RUNNING; | ||||
G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.", | G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.", | ||||
sc->sc_name, g_mirror_device_state2str(sc->sc_state), | sc->sc_name, g_mirror_device_state2str(sc->sc_state), | ||||
g_mirror_device_state2str(state)); | g_mirror_device_state2str(state)); | ||||
sc->sc_state = state; | sc->sc_state = state; | ||||
▲ Show 20 Lines • Show All 320 Lines • ▼ Show 20 Lines | g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md) | ||||
return (0); | return (0); | ||||
} | } | ||||
static int | static int | ||||
g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp, | g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp, | ||||
struct g_mirror_metadata *md) | struct g_mirror_metadata *md) | ||||
{ | { | ||||
G_MIRROR_DEBUG(2, "%s: md_did 0x%u disk %s device %s md_all 0x%x " | |||||
"sc_ndisks 0x%x md_slice 0x%x sc_slice 0x%x md_balance 0x%x " | |||||
"sc_balance 0x%x sc_mediasize 0x%lx pp_mediasize 0x%lx " | |||||
"md_sectorsize 0x%x sc_sectorsize 0x%x md_mflags 0x%lx " | |||||
"md_dflags 0x%lx md_syncid 0x%x md_genid 0x%x md_priority 0x%x " | |||||
"sc_state 0x%x.", | |||||
__func__, md->md_did, pp->name, sc->sc_name, md->md_all, | |||||
sc->sc_ndisks, md->md_slice, sc->sc_slice, md->md_balance, | |||||
sc->sc_balance, sc->sc_mediasize, pp->mediasize, md->md_sectorsize, | |||||
sc->sc_sectorsize, md->md_mflags, md->md_dflags, md->md_syncid, | |||||
md->md_genid, md->md_priority, sc->sc_state); | |||||
if (g_mirror_id2disk(sc, md->md_did) != NULL) { | if (g_mirror_id2disk(sc, md->md_did) != NULL) { | ||||
G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.", | G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.", | ||||
pp->name, md->md_did); | pp->name, md->md_did); | ||||
return (EEXIST); | return (EEXIST); | ||||
} | } | ||||
if (md->md_all != sc->sc_ndisks) { | |||||
G_MIRROR_DEBUG(1, | |||||
"Invalid '%s' field on disk %s (device %s), skipping.", | |||||
"md_all", pp->name, sc->sc_name); | |||||
return (EINVAL); | |||||
} | |||||
if (md->md_slice != sc->sc_slice) { | |||||
G_MIRROR_DEBUG(1, | |||||
"Invalid '%s' field on disk %s (device %s), skipping.", | |||||
"md_slice", pp->name, sc->sc_name); | |||||
return (EINVAL); | |||||
} | |||||
if (md->md_balance != sc->sc_balance) { | |||||
G_MIRROR_DEBUG(1, | |||||
"Invalid '%s' field on disk %s (device %s), skipping.", | |||||
"md_balance", pp->name, sc->sc_name); | |||||
return (EINVAL); | |||||
} | |||||
#if 0 | |||||
if (md->md_mediasize != sc->sc_mediasize) { | |||||
G_MIRROR_DEBUG(1, | |||||
"Invalid '%s' field on disk %s (device %s), skipping.", | |||||
"md_mediasize", pp->name, sc->sc_name); | |||||
return (EINVAL); | |||||
} | |||||
#endif | |||||
if (sc->sc_mediasize > pp->mediasize) { | if (sc->sc_mediasize > pp->mediasize) { | ||||
G_MIRROR_DEBUG(1, | G_MIRROR_DEBUG(1, | ||||
"Invalid size of disk %s (device %s), skipping.", pp->name, | "Invalid size of disk %s (device %s), skipping.", pp->name, | ||||
sc->sc_name); | sc->sc_name); | ||||
return (EINVAL); | return (EINVAL); | ||||
} | } | ||||
if (md->md_sectorsize != sc->sc_sectorsize) { | if (md->md_sectorsize != sc->sc_sectorsize) { | ||||
G_MIRROR_DEBUG(1, | G_MIRROR_DEBUG(1, | ||||
Show All 30 Lines | g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp, | ||||
int error; | int error; | ||||
g_topology_assert_not(); | g_topology_assert_not(); | ||||
G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name); | G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name); | ||||
error = g_mirror_check_metadata(sc, pp, md); | error = g_mirror_check_metadata(sc, pp, md); | ||||
if (error != 0) | if (error != 0) | ||||
return (error); | return (error); | ||||
if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING && | |||||
md->md_genid < sc->sc_genid) { | if (md->md_genid < sc->sc_genid) { | ||||
G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.", | G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.", | ||||
pp->name, sc->sc_name); | pp->name, sc->sc_name); | ||||
return (EINVAL); | return (EINVAL); | ||||
} | } | ||||
/* | |||||
* If the component disk we're tasting has newer metadata than the | |||||
* STARTING gmirror device, refresh the device from the component. | |||||
*/ | |||||
error = g_mirror_refresh_device(sc, pp, md); | |||||
if (error != 0) | |||||
return (error); | |||||
disk = g_mirror_init_disk(sc, pp, md, &error); | disk = g_mirror_init_disk(sc, pp, md, &error); | ||||
if (disk == NULL) | if (disk == NULL) | ||||
return (error); | return (error); | ||||
error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW, | error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW, | ||||
G_MIRROR_EVENT_WAIT); | G_MIRROR_EVENT_WAIT); | ||||
if (error != 0) | if (error != 0) | ||||
return (error); | return (error); | ||||
if (md->md_version < G_MIRROR_VERSION) { | if (md->md_version < G_MIRROR_VERSION) { | ||||
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines | if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 && | ||||
sc->sc_provider_open == 0) | sc->sc_provider_open == 0) | ||||
g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL); | g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL); | ||||
end: | end: | ||||
sx_xunlock(&sc->sc_lock); | sx_xunlock(&sc->sc_lock); | ||||
g_topology_lock(); | g_topology_lock(); | ||||
return (error); | return (error); | ||||
} | } | ||||
static void | |||||
g_mirror_reinit_from_metadata(struct g_mirror_softc *sc, | |||||
const struct g_mirror_metadata *md) | |||||
{ | |||||
sc->sc_genid = md->md_genid; | |||||
sc->sc_syncid = md->md_syncid; | |||||
sc->sc_slice = md->md_slice; | |||||
sc->sc_balance = md->md_balance; | |||||
sc->sc_mediasize = md->md_mediasize; | |||||
sc->sc_ndisks = md->md_all; | |||||
sc->sc_flags = md->md_mflags; | |||||
} | |||||
struct g_geom * | struct g_geom * | ||||
g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md, | g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md, | ||||
u_int type) | u_int type) | ||||
{ | { | ||||
struct g_mirror_softc *sc; | struct g_mirror_softc *sc; | ||||
struct g_geom *gp; | struct g_geom *gp; | ||||
int error, timeout; | int error, timeout; | ||||
Show All 11 Lines | g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md, | ||||
sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO); | sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO); | ||||
gp->start = g_mirror_start; | gp->start = g_mirror_start; | ||||
gp->orphan = g_mirror_orphan; | gp->orphan = g_mirror_orphan; | ||||
gp->access = g_mirror_access; | gp->access = g_mirror_access; | ||||
gp->dumpconf = g_mirror_dumpconf; | gp->dumpconf = g_mirror_dumpconf; | ||||
sc->sc_type = type; | sc->sc_type = type; | ||||
sc->sc_id = md->md_mid; | sc->sc_id = md->md_mid; | ||||
sc->sc_slice = md->md_slice; | g_mirror_reinit_from_metadata(sc, md); | ||||
sc->sc_balance = md->md_balance; | |||||
sc->sc_mediasize = md->md_mediasize; | |||||
sc->sc_sectorsize = md->md_sectorsize; | sc->sc_sectorsize = md->md_sectorsize; | ||||
sc->sc_ndisks = md->md_all; | |||||
sc->sc_flags = md->md_mflags; | |||||
sc->sc_bump_id = 0; | sc->sc_bump_id = 0; | ||||
sc->sc_idle = 1; | sc->sc_idle = 1; | ||||
sc->sc_last_write = time_uptime; | sc->sc_last_write = time_uptime; | ||||
sc->sc_writes = 0; | sc->sc_writes = 0; | ||||
sc->sc_refcnt = 1; | sc->sc_refcnt = 1; | ||||
sx_init(&sc->sc_lock, "gmirror:lock"); | sx_init(&sc->sc_lock, "gmirror:lock"); | ||||
TAILQ_INIT(&sc->sc_queue); | TAILQ_INIT(&sc->sc_queue); | ||||
mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF); | mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF); | ||||
TAILQ_INIT(&sc->sc_regular_delayed); | TAILQ_INIT(&sc->sc_regular_delayed); | ||||
TAILQ_INIT(&sc->sc_inflight); | TAILQ_INIT(&sc->sc_inflight); | ||||
TAILQ_INIT(&sc->sc_sync_delayed); | TAILQ_INIT(&sc->sc_sync_delayed); | ||||
LIST_INIT(&sc->sc_disks); | LIST_INIT(&sc->sc_disks); | ||||
TAILQ_INIT(&sc->sc_events); | TAILQ_INIT(&sc->sc_events); | ||||
mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF); | mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF); | ||||
callout_init(&sc->sc_callout, 1); | callout_init(&sc->sc_callout, 1); | ||||
mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF); | mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF); | ||||
sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING; | sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING; | ||||
gp->softc = sc; | gp->softc = sc; | ||||
Done Inline ActionsWhy? Recomputing it is cheap. markj: Why? Recomputing it is cheap. | |||||
Done Inline ActionsIt just makes the code clearer; it's not about the computation cost. It could be replaced with some more complicated expression (LIST_FIRST(&sc->sc_disks)->d_genid) in g_mirror_refresh_device but that wouldn't really improve the code, as far as I can tell. cem: It just makes the code clearer; it's not about the computation cost. It could be replaced with… | |||||
Not Done Inline ActionsI mean, you can just loop over the disks each time, which is what we do in the INVARIANTS block anyway. Keeping it in the softc is just more state to keep track of, but I don't feel strongly about it. markj: I mean, you can just loop over the disks each time, which is what we do in the INVARIANTS block… | |||||
sc->sc_geom = gp; | sc->sc_geom = gp; | ||||
sc->sc_provider = NULL; | sc->sc_provider = NULL; | ||||
sc->sc_provider_open = 0; | sc->sc_provider_open = 0; | ||||
/* | /* | ||||
* Synchronization geom. | * Synchronization geom. | ||||
*/ | */ | ||||
gp = g_new_geomf(mp, "%s.sync", md->md_name); | gp = g_new_geomf(mp, "%s.sync", md->md_name); | ||||
gp->softc = sc; | gp->softc = sc; | ||||
▲ Show 20 Lines • Show All 386 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
static void | static void | ||||
g_mirror_fini(struct g_class *mp) | g_mirror_fini(struct g_class *mp) | ||||
{ | { | ||||
if (g_mirror_post_sync != NULL) | if (g_mirror_post_sync != NULL) | ||||
EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync); | EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync); | ||||
} | |||||
/* | |||||
* Refresh the mirror device's metadata when gmirror encounters a newer | |||||
* generation as the individual components are being added to the mirror set. | |||||
*/ | |||||
static int | |||||
g_mirror_refresh_device(struct g_mirror_softc *sc, const struct g_provider *pp, | |||||
const struct g_mirror_metadata *md) | |||||
{ | |||||
g_topology_assert_not(); | |||||
sx_assert(&sc->sc_lock, SX_XLOCKED); | |||||
KASSERT(sc->sc_genid <= md->md_genid, | |||||
("%s: attempted to refresh from stale component %s (device %s) " | |||||
Done Inline ActionsIndentation is off. markj: Indentation is off. | |||||
Done Inline ActionsWill fix cem: Will fix | |||||
"(%u < %u).", __func__, pp->name, sc->sc_name, md->md_genid, | |||||
sc->sc_genid)); | |||||
if (sc->sc_genid > md->md_genid || (sc->sc_genid == md->md_genid && | |||||
sc->sc_syncid >= md->md_syncid)) | |||||
return (0); | |||||
Done Inline ActionsExtra newline. markj: Extra newline. | |||||
G_MIRROR_DEBUG(0, "Found newer version for device %s (genid: curr=%u " | |||||
Done Inline ActionsIt'd find it a bit neater to write: if (sc->sc_genid > md->md_genid || (sc->sc_genid == md->md_genid && sc->sc_syncid >= md->md_syncid)) return (0); markj: It'd find it a bit neater to write:
```
if (sc->sc_genid > md->md_genid || (sc->sc_genid == md… | |||||
Done Inline ActionsYeah, me too (I'm not the original author of the patch). I tried to avoid making too stylistic changes (other than minimal style(9) stuff) to the Isilon patch when uploading to avoid adding conflict when we merge this back. I'm happy to make the change if you want, though. cem: Yeah, me too (I'm not the original author of the patch). I tried to avoid making too stylistic… | |||||
Done Inline ActionsI'd prefer to have the suggested change; in these situations I'd just do the merge back myself while the change is still fresh, rather than waiting for the tip of the merge branch to catch up. markj: I'd prefer to have the suggested change; in these situations I'd just do the merge back myself… | |||||
"new=%u; syncid: curr=%u new=%u; ndisks: curr=%u new=%u; " | |||||
"provider=%s).", sc->sc_name, sc->sc_genid, md->md_genid, | |||||
sc->sc_syncid, md->md_syncid, sc->sc_ndisks, md->md_all, pp->name); | |||||
if (sc->sc_state != G_MIRROR_DEVICE_STATE_STARTING) { | |||||
/* Probable data corruption detected */ | |||||
G_MIRROR_DEBUG(0, "Cannot refresh metadata in %s state " | |||||
"(device=%s genid=%u). A stale mirror device was launched.", | |||||
g_mirror_device_state2str(sc->sc_state), sc->sc_name, | |||||
sc->sc_genid); | |||||
return (EINVAL); | |||||
} | |||||
Done Inline ActionsI think this and g_mirror_create() should use a common subroutine to (re-)initialize sc fields from a disk. markj: I think this and g_mirror_create() should use a common subroutine to (re-)initialize sc fields… | |||||
Done Inline ActionsI agree and suggested it in the internal code review ;-). It just didn't happen. I'll make the change. cem: I agree and suggested it in the internal code review ;-). It just didn't happen. I'll make… | |||||
/* Update softc */ | |||||
g_mirror_reinit_from_metadata(sc, md); | |||||
G_MIRROR_DEBUG(1, "Refresh device %s (id=%u, state=%s) from disk %s " | |||||
"(genid=%u syncid=%u md_all=%u).", sc->sc_name, md->md_mid, | |||||
Done Inline ActionsWhy is sectorsize omitted? markj: Why is sectorsize omitted? | |||||
Done Inline ActionsIt can't be changed during the lifetime of a gmirror; the rest of these can. cem: It can't be changed during the lifetime of a gmirror; the rest of these can. | |||||
Done Inline ActionsHow can mediasize change? markj: How can mediasize change? | |||||
Done Inline Actionsg_mirror_ctl_resize (r258357). It's why the mediasize check was disabled in g_mirror_check_metadata (although I don't understand why it wasn't just deleted). cem: `g_mirror_ctl_resize` (r258357). It's why the mediasize check was disabled in… | |||||
g_mirror_device_state2str(sc->sc_state), pp->name, md->md_genid, | |||||
md->md_syncid, (unsigned)md->md_all); | |||||
return (0); | |||||
} | } | ||||
DECLARE_GEOM_CLASS(g_mirror_class, g_mirror); | DECLARE_GEOM_CLASS(g_mirror_class, g_mirror); | ||||
MODULE_VERSION(geom_mirror, 0); | MODULE_VERSION(geom_mirror, 0); |
"before launching mirrors"