Page MenuHomeFreeBSD

D51913.id160631.diff
No OneTemporary

D51913.id160631.diff

diff --git a/stand/libsa/zfs/zfsimpl.c b/stand/libsa/zfs/zfsimpl.c
--- a/stand/libsa/zfs/zfsimpl.c
+++ b/stand/libsa/zfs/zfsimpl.c
@@ -833,6 +833,14 @@
return (kid->v_read(kid, bp, buf, offset, bytes));
}
+/*
+ * List of vdevs that were fully initialized from their own label, but later a
+ * newer label was found that obsoleted the stale label, freeing its
+ * configuration tree. We keep those vdevs around, since a new configuration
+ * may include them.
+ */
+static vdev_list_t orphans = STAILQ_HEAD_INITIALIZER(orphans);
+
static vdev_t *
vdev_find(vdev_list_t *list, uint64_t guid)
{
@@ -854,6 +862,11 @@
vdev_t *vdev;
vdev_indirect_config_t *vic;
+ if ((vdev = vdev_find(&orphans, guid))) {
+ STAILQ_REMOVE(&orphans, vdev, vdev, v_childlink);
+ return (vdev);
+ }
+
vdev = calloc(1, sizeof(vdev_t));
if (vdev != NULL) {
STAILQ_INIT(&vdev->v_children);
@@ -1101,8 +1114,8 @@
}
static int
-vdev_from_nvlist(spa_t *spa, uint64_t top_guid, uint64_t txg,
- const nvlist_t *nvlist)
+vdev_from_nvlist(spa_t *spa, uint64_t top_guid, uint64_t label_guid,
+ uint64_t txg, const nvlist_t *nvlist)
{
vdev_t *top_vdev, *vdev;
nvlist_t **kids = NULL;
@@ -1116,6 +1129,7 @@
return (rc);
top_vdev->v_spa = spa;
top_vdev->v_top = top_vdev;
+ top_vdev->v_label = label_guid;
top_vdev->v_txg = txg;
(void )vdev_insert(spa->spa_root_vdev, top_vdev);
}
@@ -1160,12 +1174,14 @@
static int
vdev_init_from_label(spa_t *spa, const nvlist_t *nvlist)
{
- uint64_t top_guid, txg;
+ uint64_t top_guid, label_guid, txg;
nvlist_t *vdevs;
int rc;
if (nvlist_find(nvlist, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64,
NULL, &top_guid, NULL) ||
+ nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64,
+ NULL, &label_guid, NULL) != 0 ||
nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64,
NULL, &txg, NULL) != 0 ||
nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST,
@@ -1174,7 +1190,7 @@
return (ENOENT);
}
- rc = vdev_from_nvlist(spa, top_guid, txg, vdevs);
+ rc = vdev_from_nvlist(spa, top_guid, label_guid, txg, vdevs);
nvlist_destroy(vdevs);
return (rc);
}
@@ -1271,7 +1287,10 @@
STAILQ_FOREACH_SAFE(kid, &vdev->v_children, v_childlink, safe)
vdev_free(kid);
- free(vdev);
+ if (vdev->v_phys_read != NULL)
+ STAILQ_INSERT_HEAD(&orphans, vdev, v_childlink);
+ else
+ free(vdev);
}
static int
@@ -1323,7 +1342,7 @@
* XXXGL: how can this happen?
*/
if (vdev == NULL)
- rc = vdev_from_nvlist(spa, guid, 0, kids[i]);
+ rc = vdev_from_nvlist(spa, guid, 0, 0, kids[i]);
else
rc = vdev_update_from_nvlist(spa->spa_root_vdev, guid,
kids[i]);
@@ -1344,6 +1363,53 @@
return (rc);
}
+static bool
+nvlist_find_child_guid(const nvlist_t *nvlist, uint64_t guid)
+{
+ nvlist_t **kids = NULL;
+ int nkids, i;
+ bool rv = false;
+
+ if (nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY,
+ &nkids, &kids, NULL) != 0)
+ nkids = 0;
+
+ for (i = 0; i < nkids; i++) {
+ uint64_t kid_guid;
+
+ if (nvlist_find(kids[i], ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64,
+ NULL, &kid_guid, NULL) != 0)
+ break;
+ if (kid_guid == guid)
+ rv = true;
+ else
+ rv = nvlist_find_child_guid(kids[i], guid);
+ if (rv)
+ break;
+ }
+
+ for (i = 0; i < nkids; i++)
+ nvlist_destroy(kids[i]);
+ free(kids);
+
+ return (rv);
+}
+
+static bool
+nvlist_find_vdev_guid(const nvlist_t *nvlist, uint64_t guid)
+{
+ nvlist_t *vdevs;
+ bool rv;
+
+ if (nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, NULL,
+ &vdevs, NULL) != 0)
+ return (false);
+ rv = nvlist_find_child_guid(vdevs, guid);
+ nvlist_destroy(vdevs);
+
+ return (rv);
+}
+
static spa_t *
spa_find_by_guid(uint64_t guid)
{
@@ -2012,7 +2078,7 @@
{
vdev_t vtmp;
spa_t *spa;
- vdev_t *vdev;
+ vdev_t *vdev, *top;
nvlist_t *nvl;
uint64_t val;
uint64_t guid, pool_guid, top_guid, txg;
@@ -2109,22 +2175,47 @@
nvlist_destroy(nvl);
return (ENOMEM);
}
- } else {
- struct vdev *kid;
-
- STAILQ_FOREACH(kid, &spa->spa_root_vdev->v_children,
- v_childlink)
- if (kid->v_guid == top_guid && kid->v_txg < txg) {
- printf("ZFS: pool %s vdev %s ignoring stale "
- "label from txg 0x%jx, using 0x%jx@0x%jx\n",
- spa->spa_name, kid->v_name,
- kid->v_txg, guid, txg);
+ }
+
+ /*
+ * Check if configuration is already known. If configuration is known
+ * and txg numbers don't match, we got 2x2 scenarios here. First, is
+ * the label being read right now _newer_ than the one read before.
+ * Second, is the vdev that provided the stale label _present_ in the
+ * newer configuration. If neither is true, we completely ignore the
+ * label.
+ */
+ STAILQ_FOREACH(top, &spa->spa_root_vdev->v_children, v_childlink)
+ if (top->v_guid == top_guid) {
+ bool newer, present;
+
+ if (top->v_txg == txg)
+ break;
+ newer = (top->v_txg < txg);
+ present = newer ?
+ nvlist_find_vdev_guid(nvl, top->v_label) :
+ (vdev_find(&top->v_children, guid) != NULL);
+ printf("ZFS: pool %s vdev %s %s stale label from "
+ "0x%jx@0x%jx, %s 0x%jx@0x%jx\n",
+ spa->spa_name, top->v_name,
+ present ? "using" : "ignoring",
+ newer ? top->v_label : guid,
+ newer ? top->v_txg : txg,
+ present ? "referred by" : "using",
+ newer ? guid : top->v_label,
+ newer ? txg : top->v_txg);
+ if (newer) {
STAILQ_REMOVE(&spa->spa_root_vdev->v_children,
- kid, vdev, v_childlink);
- vdev_free(kid);
+ top, vdev, v_childlink);
+ vdev_free(top);
break;
+ } else if (present) {
+ break;
+ } else {
+ nvlist_destroy(nvl);
+ return (EIO);
}
- }
+ }
/*
* Get the vdev tree and create our in-core copy of it.
diff --git a/sys/cddl/boot/zfs/zfsimpl.h b/sys/cddl/boot/zfs/zfsimpl.h
--- a/sys/cddl/boot/zfs/zfsimpl.h
+++ b/sys/cddl/boot/zfs/zfsimpl.h
@@ -2024,7 +2024,8 @@
vdev_list_t v_children; /* children of this vdev */
const char *v_name; /* vdev name */
uint64_t v_guid; /* vdev guid */
- uint64_t v_txg; /* most recent transaction */
+ uint64_t v_label; /* label instantiated from (top vdev) */
+ uint64_t v_txg; /* most recent transaction (top vdev) */
uint64_t v_id; /* index in parent */
uint64_t v_psize; /* physical device capacity */
int v_ashift; /* offset to block shift */

File Metadata

Mime Type
text/plain
Expires
Tue, Apr 7, 4:23 AM (4 h, 48 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
30974190
Default Alt Text
D51913.id160631.diff (6 KB)

Event Timeline