Index: sys/boot/efi/boot1/zfs_module.c =================================================================== --- sys/boot/efi/boot1/zfs_module.c +++ sys/boot/efi/boot1/zfs_module.c @@ -40,6 +40,15 @@ static dev_info_t *devices; +uint64_t +ldi_get_size(void *priv) +{ + dev_info_t *devinfo = priv; + + return (devinfo->dev->Media->BlockSize * + (devinfo->dev->Media->LastBlock + 1)); +} + static int vdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) { Index: sys/boot/efi/loader/main.c =================================================================== --- sys/boot/efi/loader/main.c +++ sys/boot/efi/loader/main.c @@ -28,6 +28,7 @@ #include __FBSDID("$FreeBSD$"); +#include #include #include #include @@ -858,4 +859,14 @@ (void) zfs_probe_dev(devname, NULL); } } + +uint64_t +ldi_get_size(void *priv) +{ + int fd = (uintptr_t) priv; + uint64_t size; + + ioctl(fd, DIOCGMEDIASIZE, &size); + return (size); +} #endif Index: sys/boot/i386/common/drv.h =================================================================== --- sys/boot/i386/common/drv.h +++ sys/boot/i386/common/drv.h @@ -36,7 +36,7 @@ unsigned int slice; int part; daddr_t start; - int init; + uint64_t size; }; int drvread(struct dsk *dskp, void *buf, daddr_t lba, unsigned nblk); Index: sys/boot/i386/loader/main.c =================================================================== --- sys/boot/i386/loader/main.c +++ sys/boot/i386/loader/main.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "bootstrap.h" @@ -454,4 +455,14 @@ zfs_probe_dev(devname, NULL); } } + +uint64_t +ldi_get_size(void *priv) +{ + int fd = (uintptr_t) priv; + uint64_t size; + + ioctl(fd, DIOCGMEDIASIZE, &size); + return (size); +} #endif Index: sys/boot/i386/zfsboot/zfsboot.c =================================================================== --- sys/boot/i386/zfsboot/zfsboot.c +++ sys/boot/i386/zfsboot/zfsboot.c @@ -467,6 +467,23 @@ return (newdsk); } +/* + * The "layered" ioctl to read disk/partition size. Unfortunately + * the zfsboot case is hardest, because we do not have full software + * stack available, so we need to do some manual work here. + */ +uint64_t +ldi_get_size(void *priv) +{ + struct dsk *dskp = priv; + uint64_t size = dskp->size; + + if (dskp->start == 0) + size = drvsize(dskp); + + return (size * DEV_BSIZE); +} + static void probe_drive(struct dsk *dsk) { @@ -547,6 +564,7 @@ if (memcmp(&ent->ent_type, &freebsd_zfs_uuid, sizeof(uuid_t)) == 0) { dsk->start = ent->ent_lba_start; + dsk->size = ent->ent_lba_end - ent->ent_lba_start + 1; dsk->slice = part + 1; dsk->part = 255; if (vdev_probe(vdev_read, dsk, NULL) == 0) { @@ -590,6 +608,7 @@ if (!dp[i].dp_typ) continue; dsk->start = dp[i].dp_start; + dsk->size = dp[i].dp_size; dsk->slice = i + 1; if (vdev_probe(vdev_read, dsk, NULL) == 0) { dsk = copy_dsk(dsk); @@ -644,7 +663,7 @@ dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1; dsk->part = 0; dsk->start = 0; - dsk->init = 0; + dsk->size = 0; bootinfo.bi_version = BOOTINFO_VERSION; bootinfo.bi_size = sizeof(bootinfo); @@ -695,7 +714,7 @@ dsk->slice = 0; dsk->part = 0; dsk->start = 0; - dsk->init = 0; + dsk->size = 0; probe_drive(dsk); } Index: sys/boot/zfs/libzfs.h =================================================================== --- sys/boot/zfs/libzfs.h +++ sys/boot/zfs/libzfs.h @@ -63,6 +63,7 @@ char *zfs_fmtdev(void *vdev); int zfs_probe_dev(const char *devname, uint64_t *pool_guid); int zfs_list(const char *name); +uint64_t ldi_get_size(void *); void init_zfs_bootenv(char *currdev); int zfs_bootenv(const char *name); int zfs_belist_add(const char *name, uint64_t __unused); Index: sys/boot/zfs/zfsimpl.c =================================================================== --- sys/boot/zfs/zfsimpl.c +++ sys/boot/zfs/zfsimpl.c @@ -68,7 +68,7 @@ */ static spa_list_t zfs_pools; -static const dnode_phys_t *dnode_cache_obj = NULL; +static const dnode_phys_t *dnode_cache_obj; static uint64_t dnode_cache_bn; static char *dnode_cache_buf; static char *zap_scratch; @@ -523,12 +523,11 @@ int nkids, i, is_new; uint64_t is_offline, is_faulted, is_degraded, is_removed, isnt_present; - if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, - DATA_TYPE_UINT64, 0, &guid) - || nvlist_find(nvlist, ZPOOL_CONFIG_ID, - DATA_TYPE_UINT64, 0, &id) - || nvlist_find(nvlist, ZPOOL_CONFIG_TYPE, - DATA_TYPE_STRING, 0, &type)) { + if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, + NULL, &guid) + || nvlist_find(nvlist, ZPOOL_CONFIG_ID, DATA_TYPE_UINT64, NULL, &id) + || nvlist_find(nvlist, ZPOOL_CONFIG_TYPE, DATA_TYPE_STRING, + NULL, &type)) { printf("ZFS: can't find vdev details\n"); return (ENOENT); } @@ -546,15 +545,15 @@ is_offline = is_removed = is_faulted = is_degraded = isnt_present = 0; - nvlist_find(nvlist, ZPOOL_CONFIG_OFFLINE, DATA_TYPE_UINT64, 0, + nvlist_find(nvlist, ZPOOL_CONFIG_OFFLINE, DATA_TYPE_UINT64, NULL, &is_offline); - nvlist_find(nvlist, ZPOOL_CONFIG_REMOVED, DATA_TYPE_UINT64, 0, + nvlist_find(nvlist, ZPOOL_CONFIG_REMOVED, DATA_TYPE_UINT64, NULL, &is_removed); - nvlist_find(nvlist, ZPOOL_CONFIG_FAULTED, DATA_TYPE_UINT64, 0, + nvlist_find(nvlist, ZPOOL_CONFIG_FAULTED, DATA_TYPE_UINT64, NULL, &is_faulted); - nvlist_find(nvlist, ZPOOL_CONFIG_DEGRADED, DATA_TYPE_UINT64, 0, + nvlist_find(nvlist, ZPOOL_CONFIG_DEGRADED, DATA_TYPE_UINT64, NULL, &is_degraded); - nvlist_find(nvlist, ZPOOL_CONFIG_NOT_PRESENT, DATA_TYPE_UINT64, 0, + nvlist_find(nvlist, ZPOOL_CONFIG_NOT_PRESENT, DATA_TYPE_UINT64, NULL, &isnt_present); vdev = vdev_find(guid); @@ -573,17 +572,19 @@ vdev->v_id = id; vdev->v_top = pvdev != NULL ? pvdev : vdev; if (nvlist_find(nvlist, ZPOOL_CONFIG_ASHIFT, - DATA_TYPE_UINT64, 0, &ashift) == 0) + DATA_TYPE_UINT64, NULL, &ashift) == 0) { vdev->v_ashift = ashift; - else + } else { vdev->v_ashift = 0; + } if (nvlist_find(nvlist, ZPOOL_CONFIG_NPARITY, - DATA_TYPE_UINT64, 0, &nparity) == 0) + DATA_TYPE_UINT64, NULL, &nparity) == 0) { vdev->v_nparity = nparity; - else + } else { vdev->v_nparity = 0; + } if (nvlist_find(nvlist, ZPOOL_CONFIG_PATH, - DATA_TYPE_STRING, 0, &path) == 0) { + DATA_TYPE_STRING, NULL, &path) == 0) { if (strncmp(path, "/dev/", 5) == 0) path += 5; vdev->v_name = strdup(path); @@ -625,8 +626,8 @@ vdev->v_state = VDEV_STATE_CANT_OPEN; } - rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, - DATA_TYPE_NVLIST_ARRAY, &nkids, &kids); + rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY, + &nkids, &kids); /* * Its ok if we don't have any kids. */ @@ -744,12 +745,17 @@ #endif static spa_t * -spa_create(uint64_t guid) +spa_create(uint64_t guid, const char *name) { spa_t *spa; - spa = malloc(sizeof(spa_t)); + if ((spa = malloc(sizeof(spa_t))) == NULL) + return (NULL); memset(spa, 0, sizeof(spa_t)); + if ((spa->spa_name = strdup(name)) == NULL) { + free(spa); + return (NULL); + } STAILQ_INIT(&spa->spa_vdevs); spa->spa_guid = guid; STAILQ_INSERT_TAIL(&zfs_pools, spa, spa_link); @@ -905,24 +911,39 @@ return (ret); } +uint64_t +vdev_label_offset(uint64_t psize, int l, uint64_t offset) +{ + uint64_t label_offset; + + if (l < VDEV_LABELS / 2) + label_offset = 0; + else + label_offset = psize - VDEV_LABELS * sizeof (vdev_label_t); + + return (offset + l * sizeof (vdev_label_t) + label_offset); +} + static int vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap) { vdev_t vtmp; vdev_phys_t *vdev_label = (vdev_phys_t *) zap_scratch; + vdev_phys_t *tmp_label = zfs_alloc(sizeof(vdev_phys_t)); spa_t *spa; vdev_t *vdev, *top_vdev, *pool_vdev; off_t off; blkptr_t bp; - const unsigned char *nvlist; + const unsigned char *nvlist = NULL; uint64_t val; uint64_t guid; + uint64_t best_txg = 0; uint64_t pool_txg, pool_guid; - uint64_t is_log; + uint64_t psize; const char *pool_name; const unsigned char *vdevs; const unsigned char *features; - int i, rc, is_newer; + int i, l, rc, is_newer; char *upbuf; const struct uberblock *up; @@ -933,26 +954,47 @@ memset(&vtmp, 0, sizeof(vtmp)); vtmp.v_phys_read = _read; vtmp.v_read_priv = read_priv; - off = offsetof(vdev_label_t, vl_vdev_phys); - BP_ZERO(&bp); - BP_SET_LSIZE(&bp, sizeof(vdev_phys_t)); - BP_SET_PSIZE(&bp, sizeof(vdev_phys_t)); - BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); - BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); - DVA_SET_OFFSET(BP_IDENTITY(&bp), off); - ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0); - if (vdev_read_phys(&vtmp, &bp, vdev_label, off, 0)) - return (EIO); + psize = P2ALIGN(ldi_get_size(read_priv), + (uint64_t)sizeof (vdev_label_t)); - if (vdev_label->vp_nvlist[0] != NV_ENCODE_XDR) { - return (EIO); + for (l = 0; l < VDEV_LABELS; l++) { + off = vdev_label_offset(psize, l, + offsetof(vdev_label_t, vl_vdev_phys)); + + BP_ZERO(&bp); + BP_SET_LSIZE(&bp, sizeof(vdev_phys_t)); + BP_SET_PSIZE(&bp, sizeof(vdev_phys_t)); + BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); + BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); + DVA_SET_OFFSET(BP_IDENTITY(&bp), off); + ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0); + + if (vdev_read_phys(&vtmp, &bp, tmp_label, off, 0)) + continue; + + if (tmp_label->vp_nvlist[0] != NV_ENCODE_XDR) + continue; + + nvlist = (const unsigned char *) tmp_label->vp_nvlist + 4; + if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG, + DATA_TYPE_UINT64, NULL, &pool_txg) != 0) + continue; + + if (best_txg <= pool_txg) { + best_txg = pool_txg; + memcpy(vdev_label, tmp_label, sizeof (vdev_phys_t)); + } } + zfs_free(tmp_label, sizeof (vdev_phys_t)); + + if (vdev_label->vp_nvlist[0] != NV_ENCODE_XDR) + return (EIO); + nvlist = (const unsigned char *) vdev_label->vp_nvlist + 4; - if (nvlist_find(nvlist, - ZPOOL_CONFIG_VERSION, - DATA_TYPE_UINT64, 0, &val)) { + if (nvlist_find(nvlist, ZPOOL_CONFIG_VERSION, DATA_TYPE_UINT64, + NULL, &val) != 0) { return (EIO); } @@ -963,15 +1005,14 @@ } /* Check ZFS features for read */ - if (nvlist_find(nvlist, - ZPOOL_CONFIG_FEATURES_FOR_READ, - DATA_TYPE_NVLIST, 0, &features) == 0 - && nvlist_check_features_for_read(features) != 0) + if (nvlist_find(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ, + DATA_TYPE_NVLIST, NULL, &features) == 0 && + nvlist_check_features_for_read(features) != 0) { return (EIO); + } - if (nvlist_find(nvlist, - ZPOOL_CONFIG_POOL_STATE, - DATA_TYPE_UINT64, 0, &val)) { + if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_STATE, DATA_TYPE_UINT64, + NULL, &val) != 0) { return (EIO); } @@ -980,15 +1021,12 @@ return (EIO); } - if (nvlist_find(nvlist, - ZPOOL_CONFIG_POOL_TXG, - DATA_TYPE_UINT64, 0, &pool_txg) - || nvlist_find(nvlist, - ZPOOL_CONFIG_POOL_GUID, - DATA_TYPE_UINT64, 0, &pool_guid) - || nvlist_find(nvlist, - ZPOOL_CONFIG_POOL_NAME, - DATA_TYPE_STRING, 0, &pool_name)) { + if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64, + NULL, &pool_txg) != 0 || + nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, + NULL, &pool_guid) != 0 || + nvlist_find(nvlist, ZPOOL_CONFIG_POOL_NAME, DATA_TYPE_STRING, + NULL, &pool_name) != 0) { /* * Cache and spare devices end up here - just ignore * them. @@ -997,25 +1035,26 @@ return (EIO); } - is_log = 0; - (void) nvlist_find(nvlist, ZPOOL_CONFIG_IS_LOG, DATA_TYPE_UINT64, 0, - &is_log); - if (is_log) + if (nvlist_find(nvlist, ZPOOL_CONFIG_IS_LOG, DATA_TYPE_UINT64, + NULL, &val) == 0 && val != 0) { return (EIO); + } /* * Create the pool if this is the first time we've seen it. */ spa = spa_find_by_guid(pool_guid); - if (!spa) { - spa = spa_create(pool_guid); - spa->spa_name = strdup(pool_name); + if (spa == NULL) { + spa = spa_create(pool_guid, pool_name); + if (spa == NULL) + return (ENOMEM); } if (pool_txg > spa->spa_txg) { spa->spa_txg = pool_txg; is_newer = 1; - } else + } else { is_newer = 0; + } /* * Get the vdev tree and create our in-core copy of it. @@ -1023,23 +1062,21 @@ * be some kind of alias (overlapping slices, dangerously dedicated * disks etc). */ - if (nvlist_find(nvlist, - ZPOOL_CONFIG_GUID, - DATA_TYPE_UINT64, 0, &guid)) { + if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, + NULL, &guid) != 0) { return (EIO); } vdev = vdev_find(guid); if (vdev && vdev->v_phys_read) /* Has this vdev already been inited? */ return (EIO); - if (nvlist_find(nvlist, - ZPOOL_CONFIG_VDEV_TREE, - DATA_TYPE_NVLIST, 0, &vdevs)) { + if (nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, + NULL, &vdevs)) { return (EIO); } rc = vdev_init_from_nvlist(vdevs, NULL, &top_vdev, is_newer); - if (rc) + if (rc != 0) return (rc); /* @@ -1079,36 +1116,37 @@ */ upbuf = zfs_alloc(VDEV_UBERBLOCK_SIZE(vdev)); up = (const struct uberblock *)upbuf; - for (i = 0; - i < VDEV_UBERBLOCK_COUNT(vdev); - i++) { - off = VDEV_UBERBLOCK_OFFSET(vdev, i); - BP_ZERO(&bp); - DVA_SET_OFFSET(&bp.blk_dva[0], off); - BP_SET_LSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev)); - BP_SET_PSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev)); - BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); - BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); - ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0); + for (l = 0; l < VDEV_LABELS; l++) { + for (i = 0; i < VDEV_UBERBLOCK_COUNT(vdev); i++) { + off = vdev_label_offset(psize, l, + VDEV_UBERBLOCK_OFFSET(vdev, i)); + BP_ZERO(&bp); + DVA_SET_OFFSET(&bp.blk_dva[0], off); + BP_SET_LSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev)); + BP_SET_PSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev)); + BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); + BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); + ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0); - if (vdev_read_phys(vdev, &bp, upbuf, off, 0)) - continue; + if (vdev_read_phys(vdev, &bp, upbuf, off, 0)) + continue; - if (up->ub_magic != UBERBLOCK_MAGIC) - continue; - if (up->ub_txg < spa->spa_txg) - continue; - if (up->ub_txg > spa->spa_uberblock.ub_txg) { - spa->spa_uberblock = *up; - } else if (up->ub_txg == spa->spa_uberblock.ub_txg) { - if (up->ub_timestamp > spa->spa_uberblock.ub_timestamp) + if (up->ub_magic != UBERBLOCK_MAGIC) + continue; + if (up->ub_txg < spa->spa_txg) + continue; + if (up->ub_txg > spa->spa_uberblock.ub_txg || + (up->ub_txg == spa->spa_uberblock.ub_txg && + up->ub_timestamp > + spa->spa_uberblock.ub_timestamp)) { spa->spa_uberblock = *up; + } } } zfs_free(upbuf, VDEV_UBERBLOCK_SIZE(vdev)); vdev->spa = spa; - if (spap) + if (spap != NULL) *spap = spa; return (0); }