Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h @@ -213,6 +213,7 @@ extern const char *zpool_state_to_name(vdev_state_t, vdev_aux_t); extern const char *zpool_pool_state_to_name(pool_state_t); extern void zpool_free_handles(libzfs_handle_t *); +extern int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t, const char *); /* * Iterate over all active pools in the system. Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c @@ -4126,3 +4126,25 @@ libzfs_fini(hdl); return (ret); } + +int +zpool_nextboot(libzfs_handle_t *hdl, uint64_t pool_guid, uint64_t dev_guid, + const char *command) +{ + zfs_cmd_t zc = { 0 }; + nvlist_t *args; + char *packed; + size_t size; + int error; + + args = fnvlist_alloc(); + fnvlist_add_uint64(args, ZPOOL_CONFIG_POOL_GUID, pool_guid); + fnvlist_add_uint64(args, ZPOOL_CONFIG_GUID, dev_guid); + fnvlist_add_string(args, "command", command); + error = zcmd_write_src_nvlist(hdl, &zc, args); + if (error == 0) + error = ioctl(hdl->libzfs_fd, ZFS_IOC_NEXTBOOT, &zc); + zcmd_free_nvlists(&zc); + nvlist_free(args); + return (error); +} Index: sbin/Makefile =================================================================== --- sbin/Makefile +++ sbin/Makefile @@ -87,6 +87,7 @@ SUBDIR.${MK_PF}+= pflogd SUBDIR.${MK_QUOTAS}+= quotacheck SUBDIR.${MK_ROUTED}+= routed +SUBDIR.${MK_ZFS}+= zfsbootcfg SUBDIR.${MK_TESTS}+= tests Index: sbin/zfsbootcfg/Makefile =================================================================== --- /dev/null +++ sbin/zfsbootcfg/Makefile @@ -0,0 +1,27 @@ +# @(#)Makefile 8.4 (Berkeley) 6/22/95 +# $FreeBSD$ + +PROG= zfsbootcfg +WARNS?= 1 +MAN= zfsbootcfg.8 + +LIBADD+=zfs +LIBADD+=nvpair +LIBADD+=umem +LIBADD+=uutil +LIBADD+=geom + +CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include +CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/lib/libumem +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs/common +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs_core/common +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzpool/common +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libnvpair +CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris +CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs +CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/head + +CFLAGS+= -DNEED_SOLARIS_BOOLEAN + +.include Index: sbin/zfsbootcfg/zfsbootcfg.8 =================================================================== --- /dev/null +++ sbin/zfsbootcfg/zfsbootcfg.8 @@ -0,0 +1,113 @@ +.\" Copyright (c) 2016 Andriy Gapon +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd September 7, 2016 +.Dt ZFSBOOTCFG 8 +.Os +.Sh NAME +.Nm zfsbootcfg +.Nd "specify zfsboot options for the next reboot" +.Sh SYNOPSIS +.Nm +.Ao Ar options Ac +.Sh DESCRIPTION +.Nm +is used to set +.Xr boot.config 5 +style options to be used by +.Xr zfsboot 8 +or +.Xr gptzfsboot 8 +the next time the machine is booted. +Once +.Xr zfsboot 8 +or +.Xr gptzfsboot 8 +reads the information, it is deleted. +So, in case of a boot failure, +the machine will automatically revert to its previous +boot configuration after a reboot. +The information is stored in a special reserved area of a ZFS pool. +.Xr zfsboot 8 +or +.Xr gptzfsboot 8 +read the boot options information from a first found disk of a first +found ZFS pool. +.Sh ENVIRONMENT +.Bl -tag -width vfs.zfs.boot.primary_pool -compact +.It Ev vfs.zfs.boot.primary_pool +The +.Xr kenv 1 +variable that identifies a pool for which the options are written. +.It Ev vfs.zfs.boot.primary_vdev +The +.Xr kenv 1 +variable that identifies a disk within the pool where the options +are written. +.El +.Sh EXAMPLES +Try to boot to a new +.Em boot environment +without changing the +.Cm bootfs +property of a pool: +.Pp +.Dl "zfsbootcfg ""zfs:tank/ROOT/newbe:"" +.Pp +To clear the boot options: +.Pp +.Dl "zfsbootcfg """" +.Sh SEE ALSO +.Xr boot.config 5 , +.Xr gptzfsboot 8 , +.Xr zfsboot 8 +.Sh HISTORY +.Nm +appeared in +.Fx 12.0 . +.Sh AUTHORS +This manual page was written by +.An Andriy Gapon Aq Mt avg@FreeBSD.org . +.Sh CAVEATS +At the moment +.Nm +uses the +.Ev vfs.zfs.boot.primary_pool +and +.Ev vfs.zfs.boot.primary_vdev +.Xr kenv 1 +variables to determine a ZFS pool and a disk in it where the options +are to be stored. +The variables are set by the ZFS boot chain, so there is an assumption +that the same boot disk is going to be used for the next reboot. +There is no +.Nm +option to specify a different pool or a different disk. +.Pp +.Nm +should be extended to be able to install new +.Xr zfsboot 8 +blocks in a ZFS pool. Index: sbin/zfsbootcfg/zfsbootcfg.c =================================================================== --- /dev/null +++ sbin/zfsbootcfg/zfsbootcfg.c @@ -0,0 +1,71 @@ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* Keep in sync with zfsboot.c. */ +#define MAX_COMMAND_LEN 512 + +int main(int argc, const char * const *argv) +{ + char buf[32]; + libzfs_handle_t *hdl; + uint64_t pool_guid; + uint64_t vdev_guid; + int zfs_fd; + int len; + + if (argc != 2) { + fprintf(stderr, "usage: zfsbootcfg \n"); + return (1); + } + + len = strlen(argv[1]); + if (len >= MAX_COMMAND_LEN) { + fprintf(stderr, "options string is too long\n"); + return (1); + } + + if (kenv(KENV_GET, "vfs.zfs.boot.primary_pool", buf, sizeof(buf)) <= 0) { + perror("can't get vfs.zfs.boot.primary_pool"); + return (1); + } + pool_guid = strtoumax(buf, NULL, 10); + if (pool_guid == 0) { + perror("can't parse vfs.zfs.boot.primary_pool"); + return (1); + } + + if (kenv(KENV_GET, "vfs.zfs.boot.primary_vdev", buf, sizeof(buf)) <= 0) { + perror("can't get vfs.zfs.boot.primary_vdev"); + return (1); + } + vdev_guid = strtoumax(buf, NULL, 10); + if (vdev_guid == 0) { + perror("can't parse vfs.zfs.boot.primary_vdev"); + return (1); + } + + if ((hdl = libzfs_init()) == NULL) { + (void) fprintf(stderr, "internal error: failed to " + "initialize ZFS library\n"); + return (1); + } + + if (zpool_nextboot(hdl, pool_guid, vdev_guid, argv[1]) != 0) { + perror("ZFS_IOC_NEXTBOOT failed"); + libzfs_fini(hdl); + return (1); + } + + libzfs_fini(hdl); + printf("zfs next boot options are successfully written\n"); + return (0); +} Index: sys/boot/i386/common/drv.h =================================================================== --- sys/boot/i386/common/drv.h +++ sys/boot/i386/common/drv.h @@ -40,9 +40,9 @@ }; int drvread(struct dsk *dskp, void *buf, daddr_t lba, unsigned nblk); -#ifdef GPT +#if defined(GPT) || defined(ZFS) int drvwrite(struct dsk *dskp, void *buf, daddr_t lba, unsigned nblk); -#endif /* GPT */ +#endif /* GPT || ZFS */ uint64_t drvsize(struct dsk *dskp); #endif /* !_DRV_H_ */ Index: sys/boot/i386/common/drv.c =================================================================== --- sys/boot/i386/common/drv.c +++ sys/boot/i386/common/drv.c @@ -91,7 +91,7 @@ return (0); } -#ifdef GPT +#if defined(GPT) || defined(ZFS) int drvwrite(struct dsk *dskp, void *buf, daddr_t lba, unsigned nblk) { @@ -114,4 +114,4 @@ } return (0); } -#endif /* GPT */ +#endif /* GPT || ZFS */ Index: sys/boot/i386/gptzfsboot/Makefile =================================================================== --- sys/boot/i386/gptzfsboot/Makefile +++ sys/boot/i386/gptzfsboot/Makefile @@ -19,7 +19,7 @@ CFLAGS= -DBOOTPROG=\"gptzfsboot\" \ -O1 \ - -DGPT -DBOOT2 \ + -DGPT -DZFS -DBOOT2 \ -DSIOPRT=${BOOT_COMCONSOLE_PORT} \ -DSIOFMT=${B2SIOFMT} \ -DSIOSPD=${BOOT_COMCONSOLE_SPEED} \ Index: sys/boot/i386/zfsboot/Makefile =================================================================== --- sys/boot/i386/zfsboot/Makefile +++ sys/boot/i386/zfsboot/Makefile @@ -18,7 +18,7 @@ CFLAGS= -DBOOTPROG=\"zfsboot\" \ -O1 \ - -DBOOT2 \ + -DZFS -DBOOT2 \ -DSIOPRT=${BOOT_COMCONSOLE_PORT} \ -DSIOFMT=${B2SIOFMT} \ -DSIOSPD=${BOOT_COMCONSOLE_SPEED} \ Index: sys/boot/i386/zfsboot/zfsboot.c =================================================================== --- sys/boot/i386/zfsboot/zfsboot.c +++ sys/boot/i386/zfsboot/zfsboot.c @@ -119,6 +119,7 @@ static struct dmadat *dmadat; void exit(int); +void reboot(void); static void load(void); static int parse(void); static void bios_getmem(void); @@ -175,7 +176,7 @@ n = size; if (*offp + n > zp->zp_size) n = zp->zp_size - *offp; - + rc = dnode_read(spa, dnode, *offp, start, n); if (rc) return (-1); @@ -260,6 +261,35 @@ } static int +vdev_write(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) +{ + char *p; + daddr_t lba; + unsigned int nb; + struct dsk *dsk = (struct dsk *) priv; + + if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1))) + return -1; + + p = buf; + lba = off / DEV_BSIZE; + lba += dsk->start; + while (bytes > 0) { + nb = bytes / DEV_BSIZE; + if (nb > READ_BUF_SIZE / DEV_BSIZE) + nb = READ_BUF_SIZE / DEV_BSIZE; + memcpy(dmadat->rdbuf, p, nb * DEV_BSIZE); + if (drvwrite(dsk, dmadat->rdbuf, lba, nb)) + return -1; + p += nb * DEV_BSIZE; + lba += nb; + bytes -= nb * DEV_BSIZE; + } + + return 0; +} + +static int xfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte) { if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) { @@ -269,6 +299,52 @@ return 0; } +/* + * Read Pad2 (formerly "Boot Block Header") area of the first + * vdev label of the given vdev. + */ +static int +vdev_read_pad2(vdev_t *vdev, char *buf, size_t size) +{ + blkptr_t bp; + char *tmp = zap_scratch; + off_t off = offsetof(vdev_label_t, vl_pad2); + + if (size > VDEV_PAD_SIZE) + size = VDEV_PAD_SIZE; + + BP_ZERO(&bp); + BP_SET_LSIZE(&bp, VDEV_PAD_SIZE); + BP_SET_PSIZE(&bp, VDEV_PAD_SIZE); + BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); + BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); + DVA_SET_OFFSET(BP_IDENTITY(&bp), off); + if (vdev_read_phys(vdev, &bp, tmp, off, 0)) + return (EIO); + memcpy(buf, tmp, size); + return (0); +} + +static int +vdev_clear_pad2(vdev_t *vdev) +{ + char *zeroes = zap_scratch; + uint64_t *end; + off_t off = offsetof(vdev_label_t, vl_pad2); + + memset(zeroes, 0, VDEV_PAD_SIZE); + end = (uint64_t *)(zeroes + VDEV_PAD_SIZE); + /* ZIO_CHECKSUM_LABEL magic and pre-calcualted checksum for all zeros */ + end[-5] = 0x0210da7ab10c7a11; + end[-4] = 0x97f48f807f6e2a3f; + end[-3] = 0xaf909f1658aacefc; + end[-2] = 0xcbd1ea57ff6db48b; + end[-1] = 0x6ec692db0d465fab; + if (vdev_write(vdev, vdev->v_read_priv, off, zeroes, VDEV_PAD_SIZE)) + return (EIO); + return (0); +} + static void bios_getmem(void) { @@ -542,10 +618,12 @@ int main(void) { - int autoboot, i; dnode_phys_t dn; off_t off; struct dsk *dsk; + int autoboot, i; + int nextboot; + int rc; dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base); @@ -634,7 +712,39 @@ primary_spa = spa; primary_vdev = spa_get_primary_vdev(spa); - if (zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0) { + nextboot = 0; + rc = vdev_read_pad2(primary_vdev, cmd, sizeof(cmd)); + if (vdev_clear_pad2(primary_vdev)) + printf("failed to clear pad2 area of primary vdev\n"); + if (rc == 0) { + if (*cmd) { + /* + * We could find an old-style ZFS Boot Block header here. + * Simply ignore it. + */ + if (*(uint64_t *)cmd != 0x2f5b007b10c) { + /* + * Note that parse() is destructive to cmd[] and we also want + * to honor RBX_QUIET option that could be present in cmd[]. + */ + nextboot = 1; + memcpy(cmddup, cmd, sizeof(cmd)); + if (parse()) { + printf("failed to parse pad2 area of primary vdev\n"); + reboot(); + } + if (!OPT_CHECK(RBX_QUIET)) + printf("zfs nextboot: %s\n", cmddup); + } + /* Do not process this command twice */ + *cmd = 0; + } + } else + printf("failed to read pad2 area of primary vdev\n"); + + /* Mount ZFS only if it's not already mounted via nextboot parsing. */ + if (zfsmount.spa == NULL && + (zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0)) { printf("%s: failed to mount default pool %s\n", BOOTPROG, spa->spa_name); autoboot = 0; @@ -658,6 +768,10 @@ *cmd = 0; } + /* Do not risk waiting at the prompt forever. */ + if (nextboot && !autoboot) + reboot(); + /* * Try to exec /boot/loader. If interrupted by a keypress, * or in case of failure, try to load a kernel directly instead. @@ -707,6 +821,13 @@ void exit(int x) { + __exit(x); +} + +void +reboot(void) +{ + __exit(0); } static void Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h @@ -167,6 +167,8 @@ extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason); +extern int vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size); + #ifdef __cplusplus } #endif Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c @@ -871,6 +871,44 @@ return (error); } +int +vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size) +{ + spa_t *spa = vd->vdev_spa; + zio_t *zio; + char *pad2; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; + int error; + + if (size > VDEV_PAD_SIZE) + return (EINVAL); + + if (!vd->vdev_ops->vdev_op_leaf) + return (ENODEV); + if (vdev_is_dead(vd)) + return (ENXIO); + + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + pad2 = zio_buf_alloc(VDEV_PAD_SIZE); + bzero(pad2, VDEV_PAD_SIZE); + memcpy(pad2, buf, size); + +retry: + zio = zio_root(spa, NULL, NULL, flags); + vdev_label_write(zio, vd, 0, pad2, + offsetof(vdev_label_t, vl_pad2), + VDEV_PAD_SIZE, NULL, NULL, flags); + error = zio_wait(zio); + if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) { + flags |= ZIO_FLAG_TRYHARD; + goto retry; + } + + zio_buf_free(pad2, VDEV_PAD_SIZE); + return (error); +} + /* * ========================================================================== * uberblock load/sync Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -3471,6 +3471,53 @@ return (error); } +#ifdef __FreeBSD__ +static int +zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl) +{ + char name[MAXNAMELEN]; + spa_t *spa; + vdev_t *vd; + char *command; + uint64_t pool_guid; + uint64_t vdev_guid; + int error; + + if (nvlist_lookup_uint64(innvl, + ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) + return (EINVAL); + if (nvlist_lookup_uint64(innvl, + ZPOOL_CONFIG_GUID, &vdev_guid) != 0) + return (EINVAL); + if (nvlist_lookup_string(innvl, + "command", &command) != 0) + return (EINVAL); + + mutex_enter(&spa_namespace_lock); + spa = spa_by_guid(pool_guid, vdev_guid); + if (spa != NULL) + strcpy(name, spa_name(spa)); + mutex_exit(&spa_namespace_lock); + if (spa == NULL) + return (ENOENT); + + if ((error = spa_open(name, &spa, FTAG)) != 0) + return (error); + spa_vdev_state_enter(spa, SCL_ALL); + vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE); + if (vd == NULL) { + (void) spa_vdev_state_exit(spa, NULL, ENXIO); + spa_close(spa, FTAG); + return (ENODEV); + } + error = vdev_label_write_pad2(vd, command, strlen(command)); + (void) spa_vdev_state_exit(spa, NULL, 0); + txg_wait_synced(spa->spa_dsl_pool, 0); + spa_close(spa, FTAG); + return (error); +} +#endif + /* * The dp_config_rwlock must not be held when calling this, because the * unmount may need to write out data. @@ -6033,6 +6080,9 @@ zfs_secpolicy_config, POOL_CHECK_NONE); zfs_ioctl_register_dataset_nolog(ZFS_IOC_UNJAIL, zfs_ioc_unjail, zfs_secpolicy_config, POOL_CHECK_NONE); + zfs_ioctl_register("fbsd_nextboot", ZFS_IOC_NEXTBOOT, + zfs_ioc_nextboot, zfs_secpolicy_config, NO_NAME, + POOL_CHECK_NONE, B_FALSE, B_FALSE); #endif } Index: sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h +++ sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h @@ -891,6 +891,7 @@ ZFS_IOC_BOOKMARK, ZFS_IOC_GET_BOOKMARKS, ZFS_IOC_DESTROY_BOOKMARKS, + ZFS_IOC_NEXTBOOT, ZFS_IOC_LAST } zfs_ioc_t;