Index: Makefile.inc1 =================================================================== --- Makefile.inc1 +++ Makefile.inc1 @@ -2734,7 +2734,8 @@ lib/libufs \ lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \ ${_secure_lib_libcrypto} ${_secure_lib_libssl} \ - ${_lib_libldns} ${_secure_lib_libssh} + ${_lib_libldns} ${_secure_lib_libssh} \ + lib/libzfsbootenv .if ${MK_DIALOG} != "no" _prebuild_libs+= gnu/lib/libdialog Index: cddl/contrib/opensolaris/cmd/zinject/translate.c =================================================================== --- cddl/contrib/opensolaris/cmd/zinject/translate.c +++ cddl/contrib/opensolaris/cmd/zinject/translate.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2012, 2020 by Delphix. All rights reserved. */ #include @@ -484,7 +484,7 @@ record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1; break; case TYPE_LABEL_PAD2: - record->zi_start = offsetof(vdev_label_t, vl_pad2); + record->zi_start = offsetof(vdev_label_t, vl_be); record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1; break; } Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h @@ -836,6 +836,8 @@ extern int zpool_read_label(int, nvlist_t **); extern int zpool_read_all_labels(int, nvlist_t **); extern int zpool_clear_label(int); +extern int zpool_set_bootenv(zpool_handle_t *, const nvlist_t *); +extern int zpool_get_bootenv(zpool_handle_t *, nvlist_t **); /* is this zvol valid for use as a dump device? */ extern int zvol_check_dump_config(char *); Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2017 by Delphix. All rights reserved. + * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright 2016 Nexenta Systems, Inc. * Copyright 2016 Igor Kozhukhov @@ -395,7 +395,7 @@ * Assuming bootfs is a valid dataset name. */ static boolean_t -bootfs_name_valid(const char *pool, char *bootfs) +bootfs_name_valid(const char *pool, const char *bootfs) { int len = strlen(pool); @@ -4229,6 +4229,38 @@ (void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj); } free(mntpnt); +} + +int +zpool_set_bootenv(zpool_handle_t *zhp, const nvlist_t *envmap) +{ + int error = lzc_set_bootenv(zhp->zpool_name, envmap); + if (error != 0) { + (void) zpool_standard_error_fmt(zhp->zpool_hdl, error, + dgettext(TEXT_DOMAIN, + "error setting bootenv in pool '%s'"), zhp->zpool_name); + } + + return (error); +} + +int +zpool_get_bootenv(zpool_handle_t *zhp, nvlist_t **nvlp) +{ + nvlist_t *nvl; + int error; + + nvl = NULL; + error = lzc_get_bootenv(zhp->zpool_name, &nvl); + if (error != 0) { + (void) zpool_standard_error_fmt(zhp->zpool_hdl, error, + dgettext(TEXT_DOMAIN, + "error getting bootenv in pool '%s'"), zhp->zpool_name); + } else { + *nvlp = nvl; + } + + return (error); } #ifdef illumos Index: cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h =================================================================== --- cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h +++ cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + * Copyright (c) 2012, 2020 by Delphix. All rights reserved. * Copyright (c) 2013 by Martin Matuska . All rights reserved. * Copyright 2017 RackTop Systems. * Copyright (c) 2017 Datto Inc. @@ -105,6 +105,8 @@ int lzc_pool_checkpoint(const char *); int lzc_pool_checkpoint_discard(const char *); +int lzc_set_bootenv(const char *, const nvlist_t *); +int lzc_get_bootenv(const char *, nvlist_t **); #ifdef __cplusplus } #endif Index: cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c =================================================================== --- cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c +++ cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2012, 2020 by Delphix. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2017 RackTop Systems. @@ -1209,4 +1209,22 @@ fnvlist_free(args); return (error); +} + +/* + * Set the bootenv contents for the given pool. + */ +int +lzc_set_bootenv(const char *pool, const nvlist_t *env) +{ + return (lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, (nvlist_t *)env, NULL)); +} + +/* + * Get the contents of the bootenv of the given pool. + */ +int +lzc_get_bootenv(const char *pool, nvlist_t **outnvl) +{ + return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl)); } Index: lib/libbe/Makefile =================================================================== --- lib/libbe/Makefile +++ lib/libbe/Makefile @@ -17,7 +17,9 @@ LIBADD+= zfs LIBADD+= nvpair +LIBADD+= zfsbootenv +CFLAGS+= -I${SRCTOP}/lib/libzfsbootenv CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs/common CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include Index: lib/libbe/be.h =================================================================== --- lib/libbe/be.h +++ lib/libbe/be.h @@ -81,6 +81,7 @@ void be_prop_list_free(nvlist_t *be_list); int be_activate(libbe_handle_t *, const char *, bool); +int be_deactivate(libbe_handle_t *, const char *, bool); bool be_is_auto_snapshot_name(libbe_handle_t *, const char *); Index: lib/libbe/be.c =================================================================== --- lib/libbe/be.c +++ lib/libbe/be.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "be.h" #include "be_impl.h" @@ -1219,43 +1220,20 @@ } #endif /* SOON */ -static int -be_set_nextboot(libbe_handle_t *lbh, nvlist_t *config, uint64_t pool_guid, - const char *zfsdev) -{ - nvlist_t **child; - uint64_t vdev_guid; - int c, children; - - if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, &child, - &children) == 0) { - for (c = 0; c < children; ++c) - if (be_set_nextboot(lbh, child[c], pool_guid, zfsdev) != 0) - return (1); - return (0); - } - - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, - &vdev_guid) != 0) { - return (1); - } - - if (zpool_nextboot(lbh->lzh, pool_guid, vdev_guid, zfsdev) != 0) { - perror("ZFS_IOC_NEXTBOOT failed"); - return (1); - } - - return (0); -} - /* - * Deactivate old BE dataset; currently just sets canmount=noauto + * Deactivate old BE dataset; currently just sets canmount=noauto or + * resets boot once configuration. */ -static int -be_deactivate(libbe_handle_t *lbh, const char *ds) +int +be_deactivate(libbe_handle_t *lbh, const char *ds, bool temporary) { zfs_handle_t *zfs; + if (temporary) { + return (lzbe_set_boot_device( + zpool_get_name(lbh->active_phandle), NULL)); + } + if ((zfs = zfs_open(lbh->lzh, ds, ZFS_TYPE_DATASET)) == NULL) return (1); if (zfs_prop_set(zfs, "canmount", "noauto") != 0) @@ -1268,10 +1246,8 @@ be_activate(libbe_handle_t *lbh, const char *bootenv, bool temporary) { char be_path[BE_MAXPATHLEN]; - char buf[BE_MAXPATHLEN]; - nvlist_t *config, *dsprops, *vdevs; + nvlist_t *dsprops; char *origin; - uint64_t pool_guid; zfs_handle_t *zhp; int err; @@ -1282,27 +1258,10 @@ return (set_error(lbh, err)); if (temporary) { - config = zpool_get_config(lbh->active_phandle, NULL); - if (config == NULL) - /* config should be fetchable... */ - return (set_error(lbh, BE_ERR_UNKNOWN)); - - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, - &pool_guid) != 0) - /* Similarly, it shouldn't be possible */ - return (set_error(lbh, BE_ERR_UNKNOWN)); - - /* Expected format according to zfsbootcfg(8) man */ - snprintf(buf, sizeof(buf), "zfs:%s:", be_path); - - /* We have no config tree */ - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &vdevs) != 0) - return (set_error(lbh, BE_ERR_NOPOOL)); - - return (be_set_nextboot(lbh, vdevs, pool_guid, buf)); + return (lzbe_set_boot_device( + zpool_get_name(lbh->active_phandle), be_path)); } else { - if (be_deactivate(lbh, lbh->bootfs) != 0) + if (be_deactivate(lbh, lbh->bootfs, false) != 0) return (-1); /* Obtain bootenv zpool */ Index: lib/libbe/be_impl.h =================================================================== --- lib/libbe/be_impl.h +++ lib/libbe/be_impl.h @@ -63,6 +63,7 @@ nvlist_t *list; libbe_handle_t *lbh; bool single_object; /* list will contain props directly */ + char *bootonce; } prop_data_t; int prop_list_builder_cb(zfs_handle_t *, void *); Index: lib/libbe/be_info.c =================================================================== --- lib/libbe/be_info.c +++ lib/libbe/be_info.c @@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$"); #include +#include #include "be.h" #include "be_impl.h" @@ -108,6 +109,7 @@ data.lbh = lbh; data.list = dsnvl; data.single_object = false; + data.bootonce = NULL; return (be_proplist_update(&data)); } @@ -121,6 +123,7 @@ data.lbh = lbh; data.list = props; data.single_object = true; + data.bootonce = NULL; if ((snap_hdl = zfs_open(lbh->lzh, name, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT)) == NULL) return (BE_ERR_ZFSOPEN); @@ -140,6 +143,7 @@ data.lbh = lbh; data.list = props; data.single_object = false; + data.bootonce = NULL; if ((ds_hdl = zfs_open(lbh->lzh, name, ZFS_TYPE_FILESYSTEM)) == NULL) return (BE_ERR_ZFSOPEN); @@ -179,6 +183,10 @@ dataset = zfs_get_name(zfs_hdl); nvlist_add_string(props, "dataset", dataset); + if (data->bootonce != NULL && + strcmp(dataset, data->bootonce) == 0) + nvlist_add_boolean_value(props, "bootonce", true); + name = strrchr(dataset, '/') + 1; nvlist_add_string(props, "name", name); @@ -245,6 +253,9 @@ if ((root_hdl = zfs_open(data->lbh->lzh, data->lbh->root, ZFS_TYPE_FILESYSTEM)) == NULL) return (BE_ERR_ZFSOPEN); + + (void) lzbe_get_boot_device(zpool_get_name(data->lbh->active_phandle), + &data->bootonce); /* XXX TODO: some error checking here */ zfs_iter_filesystems(root_hdl, prop_list_builder_cb, data); Index: lib/libbe/libbe.3 =================================================================== --- lib/libbe/libbe.3 +++ lib/libbe/libbe.3 @@ -27,7 +27,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 16, 2019 +.Dd July 22, 2020 .Dt LIBBE 3 .Os .Sh NAME @@ -78,7 +78,11 @@ .Pp .Ft int .Fn be_activate "libbe_handle_t *hdl" "const char *be_name" "bool temporary" +.Pp .Ft int +.Fn be_deactivate "libbe_handle_t *hdl" "const char *be_name" "bool temporary" +.Pp +.Ft int .Fn be_destroy "libbe_handle_t *hdl" "const char *be_name" "int options" .Pp .Ft void @@ -270,8 +274,24 @@ .Fa temporary flag is set, then it will be active for the next boot only, as done by .Xr zfsbootcfg 8 . -Next boot functionality is currently only available when booting in x86 BIOS -mode. +.Pp +The +.Fn be_deactivate +function deactivates a boot environment. +If the +.Fa temporary +flag is set, then it will cause removal of boot once configuration, set by +.Fn be_activate +function or by +.Xr zfsbootcfg 8 . +If the +.Fa temporary +flag is not set, +.Fn be_deactivate +function will set zfs +.Dv canmount +property to +.Dv noauto . .Pp The .Fn be_destroy Index: lib/libzfsbootenv/Makefile =================================================================== --- /dev/null +++ lib/libzfsbootenv/Makefile @@ -0,0 +1,30 @@ +#! $FreeBSD$ + +.include + +PACKAGE= runtime +LIB= zfsbootenv +SHLIB_MAJOR= 0 +WARNS?= 1 + +LIBADD+= zfs +LIBADD+= nvpair + +INCS= libzfsbootenv.h +SRCS= lzbe_device.c lzbe_util.c lzbe_pair.c + +CFLAGS+= -I${.CURDIR} +CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include +CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/lib/libumem +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs/common +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs_core/common +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzpool/common +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libnvpair +CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris +CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs +CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/head + +CFLAGS+= -DNEED_SOLARIS_BOOLEAN + +.include Index: lib/libzfsbootenv/libzfsbootenv.h =================================================================== --- /dev/null +++ lib/libzfsbootenv/libzfsbootenv.h @@ -0,0 +1,47 @@ +/*- + * Copyright 2020 Toomas Soome + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _LIBZFSBOOTENV_H +#define _LIBZFSBOOTENV_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +extern int lzbe_set_pair(const char *, const char *, const char *, + const char *); +extern int lzbe_set_boot_device(const char *, const char *); +extern int lzbe_get_boot_device(const char *, char **); +extern int lzbe_bootenv_print(const char *, FILE *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBZFSBOOTENV_H */ Index: lib/libzfsbootenv/lzbe_device.c =================================================================== --- /dev/null +++ lib/libzfsbootenv/lzbe_device.c @@ -0,0 +1,145 @@ +/*- + * Copyright 2020 Toomas Soome + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +/* + * Store device name to zpool label bootenv area. + */ +int +lzbe_set_boot_device(const char *pool, const char *device) +{ + libzfs_handle_t *hdl; + zpool_handle_t *zphdl; + nvlist_t *nv; + char *descriptor; + int rv = -1; + + if (pool == NULL || *pool == '\0') + return (rv); + + if ((hdl = libzfs_init()) == NULL) { + return (rv); + } + + zphdl = zpool_open(hdl, pool); + if (zphdl == NULL) { + libzfs_fini(hdl); + return (rv); + } + + rv = zpool_get_bootenv(zphdl, &nv); + if (rv != 0) + nv = fnvlist_alloc(); + + /* + * If device name is empty, remove boot device configuration. + */ + if ((device == NULL || *device == '\0') && + nvlist_exists(nv, "command")) { + fnvlist_remove(nv, "command"); + } else { + /* + * Use device name directly if it does start with + * prefix "zfs:". Otherwise, add prefix and sufix. + */ + if (strncmp(device, "zfs:", 4) == 0) { + fnvlist_add_string(nv, "command", device); + } else { + descriptor = NULL; + if (asprintf(&descriptor, "zfs:%s:", device) > 0) + fnvlist_add_string(nv, "command", descriptor); + else + rv = ENOMEM; + free(descriptor); + } + } + + rv = zpool_set_bootenv(zphdl, nv); + fnvlist_free(nv); + zpool_close(zphdl); + libzfs_fini(hdl); + return (rv); +} + +/* + * Return boot device name from bootenv, if set. + */ +int +lzbe_get_boot_device(const char *pool, char **device) +{ + libzfs_handle_t *hdl; + zpool_handle_t *zphdl; + nvlist_t *nv; + char *val; + int rv = -1; + + if (pool == NULL || *pool == '\0' || device == NULL) + return (rv); + + if ((hdl = libzfs_init()) == NULL) { + return (rv); + } + + zphdl = zpool_open(hdl, pool); + if (zphdl == NULL) { + libzfs_fini(hdl); + return (rv); + } + + rv = zpool_get_bootenv(zphdl, &nv); + if (rv == 0) { + rv = nvlist_lookup_string(nv, "command", &val); + if (rv == 0) { + /* + * zfs device descriptor is in form of "zfs:dataset:", + * we only do need dataset name. + */ + if (strncmp(val, "zfs:", 4) == 0) + val += 4; + val = strdup(val); + if (val != NULL) { + size_t len = strlen(val); + + if (val[len - 1] == ':') + val[len - 1] = '\0'; + *device = val; + } else { + rv = ENOMEM; + } + } + } + + nvlist_free(nv); + zpool_close(zphdl); + libzfs_fini(hdl); + return (rv); +} Index: lib/libzfsbootenv/lzbe_pair.c =================================================================== --- /dev/null +++ lib/libzfsbootenv/lzbe_pair.c @@ -0,0 +1,76 @@ +/*- + * Copyright 2020 Toomas Soome + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +/* + * Store pair defined by key, type and value. + */ +int +lzbe_set_pair(const char *pool, const char *key, const char *type, + const char *value) +{ + libzfs_handle_t *hdl; + zpool_handle_t *zphdl; + nvlist_t *nv; + int rv = -1; + + if (pool == NULL || *pool == '\0' || type == NULL) + return (rv); + + if ((hdl = libzfs_init()) == NULL) { + return (rv); + } + + zphdl = zpool_open(hdl, pool); + if (zphdl == NULL) { + libzfs_fini(hdl); + return (rv); + } + + rv = zpool_get_bootenv(zphdl, &nv); + if (rv == 0) { + if (strcmp(type, "DATA_TYPE_STRING") == 0) { + if ((value == NULL || *value == '\0') && + nvlist_exists(nv, key)) { + fnvlist_remove(nv, key); + } else { + fnvlist_add_string(nv, key, value); + } + } + } + rv = zpool_set_bootenv(zphdl, nv); + + nvlist_free(nv); + zpool_close(zphdl); + libzfs_fini(hdl); + return (rv); +} Index: lib/libzfsbootenv/lzbe_util.c =================================================================== --- /dev/null +++ lib/libzfsbootenv/lzbe_util.c @@ -0,0 +1,66 @@ +/*- + * Copyright 2020 Toomas Soome + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +/* + * Output bootenv information. + */ +int +lzbe_bootenv_print(const char *pool, FILE *of) +{ + libzfs_handle_t *hdl; + zpool_handle_t *zphdl; + nvlist_t *nv; + int rv = -1; + + if (pool == NULL || *pool == '\0' || of == NULL) + return (rv); + + if ((hdl = libzfs_init()) == NULL) { + return (rv); + } + + zphdl = zpool_open(hdl, pool); + if (zphdl == NULL) { + libzfs_fini(hdl); + return (rv); + } + + rv = zpool_get_bootenv(zphdl, &nv); + if (rv == 0) + nvlist_print(of, nv); + + nvlist_free(nv); + zpool_close(zphdl); + libzfs_fini(hdl); + return (rv); +} Index: rescue/rescue/Makefile =================================================================== --- rescue/rescue/Makefile +++ rescue/rescue/Makefile @@ -129,7 +129,7 @@ CRUNCH_LIBS+= -l80211 -lalias -lcam -lncursesw -ldevstat -lipsec -llzma .if ${MK_ZFS} != "no" CRUNCH_LIBS+= -lavl -lzpool -lzfs_core -lzfs -lnvpair -lpthread -luutil -lumem -CRUNCH_LIBS+= -lbe +CRUNCH_LIBS+= -lbe -lzfsbootenv .else # liblzma needs pthread CRUNCH_LIBS+= -lpthread Index: sbin/bectl/Makefile =================================================================== --- sbin/bectl/Makefile +++ sbin/bectl/Makefile @@ -11,7 +11,9 @@ LIBADD+= jail LIBADD+= nvpair LIBADD+= util +LIBADD+= zfsbootenv +CFLAGS+= -I${SRCTOP}/lib/libzfsbootenv CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs/common CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common Index: sbin/bectl/bectl.8 =================================================================== --- sbin/bectl/bectl.8 +++ sbin/bectl/bectl.8 @@ -17,7 +17,7 @@ .\" .\" $FreeBSD$ .\" -.Dd April 18, 2020 +.Dd July 18, 2020 .Dt BECTL 8 .Os .Sh NAME @@ -26,7 +26,7 @@ .Sh SYNOPSIS .Nm .Cm activate -.Op Fl t +.Op Fl t | Fl T .Ar beName .Nm .Cm check @@ -95,15 +95,22 @@ .Bl -tag -width activate .It Xo .Cm activate -.Op Fl t +.Op Fl t | Fl T .Ar beName .Xc Activate the given .Ar beName as the default boot filesystem. If the -.Op Fl t +.Fl t flag is given, this takes effect only for the next boot. +Flag +.Fl T +removes temporary boot once configuration. +Without temporary configuration, the next boot will use zfs dataset specified +in boot pool +.Ar bootfs +property. .It Xo .Cm check .Xc @@ -260,8 +267,10 @@ .Pq Em \&N ; active on reboot .Pq Em \&R ; -or both -.Pq Em \&NR . +is used on next boot once +.Pq Em \&T ; +or combination of +.Pq Em \&NRT . .Pp .Bl -tag -width indent .It Fl a Index: sbin/bectl/bectl.c =================================================================== --- sbin/bectl/bectl.c +++ sbin/bectl/bectl.c @@ -72,6 +72,7 @@ "\tbectl add (path)*\n" #endif "\tbectl activate [-t] beName\n" + "\tbectl activate [-T]\n" "\tbectl check\n" "\tbectl create [-r] [-e {nonActiveBe | beName@snapshot}] beName\n" "\tbectl create [-r] beName@snapshot\n" @@ -139,14 +140,22 @@ bectl_cmd_activate(int argc, char *argv[]) { int err, opt; - bool temp; + bool temp, reset; temp = false; - while ((opt = getopt(argc, argv, "t")) != -1) { + reset = false; + while ((opt = getopt(argc, argv, "tT")) != -1) { switch (opt) { case 't': + if (reset) + return (usage(false)); temp = true; break; + case 'T': + if (temp) + return (usage(false)); + reset = true; + break; default: fprintf(stderr, "bectl activate: unknown option '-%c'\n", optopt); @@ -157,11 +166,18 @@ argc -= optind; argv += optind; - if (argc != 1) { + if (argc != 1 && (!reset || argc != 0) { fprintf(stderr, "bectl activate: wrong number of arguments\n"); return (usage(false)); } + if (reset) { + if ((err = be_deactivate(be, NULL, reset)) == 0) + printf("Temporary activation removed\n"); + else + printf("Failed to remove temporary activation\n"); + return (err); + } /* activate logic goes here */ if ((err = be_activate(be, argv[0], temp)) != 0) Index: sbin/bectl/bectl_list.c =================================================================== --- sbin/bectl/bectl_list.c +++ sbin/bectl/bectl_list.c @@ -182,7 +182,7 @@ const char *oname; char *dsname, *propstr; int active_colsz; - boolean_t active_now, active_reboot; + boolean_t active_now, active_reboot, bootonce; dsname = NULL; originprops = NULL; @@ -228,6 +228,11 @@ if (nvlist_lookup_boolean_value(dsprops, "nextboot", &active_reboot) == 0 && active_reboot) { printf("R"); + active_colsz--; + } + if (nvlist_lookup_boolean_value(dsprops, "bootonce", + &bootonce) == 0 && bootonce) { + printf("T"); active_colsz--; } if (active_colsz == pc->active_colsz_def) { Index: sbin/zfsbootcfg/Makefile =================================================================== --- sbin/zfsbootcfg/Makefile +++ sbin/zfsbootcfg/Makefile @@ -2,26 +2,10 @@ # $FreeBSD$ PROG= zfsbootcfg -WARNS?= 1 MAN= zfsbootcfg.8 -LIBADD+=zfs -LIBADD+=nvpair -LIBADD+=umem -LIBADD+=uutil -LIBADD+=geom +LIBADD+=zfsbootenv -CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include -CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/lib/libumem -CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs/common -CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs_core/common -CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzpool/common -CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libnvpair -CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris -CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs -CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common -CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/head - -CFLAGS+= -DNEED_SOLARIS_BOOLEAN +CFLAGS+=-I${SRCTOP}/lib/libzfsbootenv .include Index: sbin/zfsbootcfg/zfsbootcfg.c =================================================================== --- sbin/zfsbootcfg/zfsbootcfg.c +++ sbin/zfsbootcfg/zfsbootcfg.c @@ -32,115 +32,105 @@ #include #include #include +#include #include #include +#include -#include +#include -/* Keep in sync with zfsboot.c. */ -#define MAX_COMMAND_LEN 512 +#ifndef ZFS_MAXNAMELEN +#define ZFS_MAXNAMELEN 256 +#endif int -install_bootonce(libzfs_handle_t *hdl, uint64_t pool_guid, nvlist_t *nv, - const char * const data) +main(int argc, char * const *argv) { - nvlist_t **child; - uint_t children = 0; - uint64_t guid; + char buf[ZFS_MAXNAMELEN], *name; + const char *key, *value, *type; int rv; + bool print; - (void) nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, - &children); - - for (int c = 0; c < children; c++) { - rv = install_bootonce(hdl, pool_guid, child[c], data); + name = NULL; + key = NULL; + type = NULL; + value = NULL; + print = false; + while ((rv = getopt(argc, argv, "k:pt:v:z:")) != -1) { + switch (rv) { + case 'k': + key = optarg; + break; + case 'p': + print = true; + break; + case 't': + type = optarg; + break; + case 'v': + value = optarg; + break; + case 'z': + name = optarg; + break; + } } - if (children > 0) - return (rv); + argc -= optind; + argv += optind; - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) { - perror("can't get vdev guid"); - return (1); - } - if (zpool_nextboot(hdl, pool_guid, guid, data) != 0) { - perror("ZFS_IOC_NEXTBOOT failed"); - return (1); - } - return (0); -} + if (argc == 1) + value = argv[0]; -int main(int argc, const char * const *argv) -{ - char buf[32], *name; - libzfs_handle_t *hdl; - zpool_handle_t *zphdl; - uint64_t pool_guid; - nvlist_t *nv, *config; - int rv; - int len; - - if (argc != 2) { + if (argc > 1) { fprintf(stderr, "usage: zfsbootcfg \n"); return (1); } - len = strlen(argv[1]); - if (len >= MAX_COMMAND_LEN) { - fprintf(stderr, "options string is too long\n"); - return (1); - } + if (name == NULL) { + rv = kenv(KENV_GET, "vfs.root.mountfrom", buf, sizeof(buf)); + if (rv <= 0) { + perror("can't get vfs.root.mountfrom"); + return (1); + } - if (kenv(KENV_GET, "vfs.root.mountfrom", buf, sizeof(buf)) <= 0) { - perror("can't get vfs.root.mountfrom"); - return (1); + if (strncmp(buf, "zfs:", 4) == 0) { + name = strchr(buf + 4, '/'); + if (name != NULL) + *name = '\0'; + name = buf + 4; + } else { + perror("not a zfs root"); + return (1); + } } - if (strncmp(buf, "zfs:", 4) == 0) { - name = strchr(buf + 4, '/'); - if (name != NULL) - *name = '\0'; - name = buf + 4; - } else { - perror("not a zfs root"); - return (1); - } - - if ((hdl = libzfs_init()) == NULL) { - (void) fprintf(stderr, "internal error: failed to " - "initialize ZFS library\n"); - return (1); - } + rv = 0; + if (key != NULL || value != NULL) { + if (type == NULL) + type = "DATA_TYPE_STRING"; - zphdl = zpool_open(hdl, name); - if (zphdl == NULL) { - perror("can't open pool"); - libzfs_fini(hdl); - return (1); - } + if (key == NULL || strcmp(key, "command") == 0) + rv = lzbe_set_boot_device(name, value); + else + rv = lzbe_set_pair(name, key, type, value); - pool_guid = zpool_get_prop_int(zphdl, ZPOOL_PROP_GUID, NULL); + if (rv == 0) + printf("zfs bootenv is successfully written\n"); + else + printf("error: %d\n", rv); + } else { + char *ptr; - config = zpool_get_config(zphdl, NULL); - if (config == NULL) { - perror("can't get pool config"); - zpool_close(zphdl); - libzfs_fini(hdl); - return (1); + if (lzbe_get_boot_device(name, &ptr) == 0) { + printf("zfs:%s:\n", ptr); + free(ptr); + } } - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) != 0) { - perror("failed to get vdev tree"); - zpool_close(zphdl); - libzfs_fini(hdl); - return (1); + if (print) { + rv = lzbe_bootenv_print(name, stdout); } - rv = install_bootonce(hdl, pool_guid, nv, argv[1]); - - zpool_close(zphdl); - libzfs_fini(hdl); - if (rv == 0) - printf("zfs next boot options are successfully written\n"); return (rv); } Index: share/mk/bsd.libnames.mk =================================================================== --- share/mk/bsd.libnames.mk +++ share/mk/bsd.libnames.mk @@ -165,6 +165,7 @@ LIBZ?= ${LIBDESTDIR}${LIBDIR_BASE}/libz.a LIBZFS?= ${LIBDESTDIR}${LIBDIR_BASE}/libzfs.a LIBZFS_CORE?= ${LIBDESTDIR}${LIBDIR_BASE}/libzfs_core.a +LIBZFSBOOTENV?= ${LIBDESTDIR}${LIBDIR_BASE}/libzfsbootenv.a LIBZPOOL?= ${LIBDESTDIR}${LIBDIR_BASE}/libzpool.a # enforce the 2 -lpthread and -lc to always be the last in that exact order Index: share/mk/src.libnames.mk =================================================================== --- share/mk/src.libnames.mk +++ share/mk/src.libnames.mk @@ -197,6 +197,7 @@ z \ zfs_core \ zfs \ + zfsbootenv \ zpool \ .if ${MK_BLACKLIST} != "no" @@ -376,9 +377,10 @@ _DP_ipf= kvm _DP_zfs= md pthread umem util uutil m nvpair avl bsdxml geom nvpair z \ zfs_core +_DP_zfsbootenv= zfs nvpair _DP_zfs_core= nvpair _DP_zpool= md pthread z nvpair avl umem -_DP_be= zfs nvpair +_DP_be= zfs nvpair zfsbootenv # OFED support .if ${MK_OFED} != "no" @@ -587,6 +589,7 @@ LIBUUTILDIR= ${OBJTOP}/cddl/lib/libuutil LIBZFSDIR= ${OBJTOP}/cddl/lib/libzfs LIBZFS_COREDIR= ${OBJTOP}/cddl/lib/libzfs_core +LIBZFSBOOTENVDIR= ${OBJTOP}/cddl/lib/libzfsbootenv LIBZPOOLDIR= ${OBJTOP}/cddl/lib/libzpool # OFED support Index: stand/efi/boot1/zfs_module.c =================================================================== --- stand/efi/boot1/zfs_module.c +++ stand/efi/boot1/zfs_module.c @@ -124,7 +124,7 @@ } memcpy(tdev, dev, sizeof(*dev)); - if (vdev_probe(vdev_read, tdev, &spa) != 0) { + if (vdev_probe(vdev_read, NULL, tdev, &spa) != 0) { free(tdev); return (EFI_UNSUPPORTED); } Index: stand/efi/loader/main.c =================================================================== --- stand/efi/loader/main.c +++ stand/efi/loader/main.c @@ -275,9 +275,11 @@ if (rv) { buf = malloc(VDEV_PAD_SIZE); if (buf != NULL) { - if (zfs_nextboot(&currdev, buf, VDEV_PAD_SIZE) == 0) { + if (zfs_get_bootonce(&currdev, "command", buf, + VDEV_PAD_SIZE) == 0) { printf("zfs nextboot: %s\n", buf); set_currdev(buf); + setenv("zfs-bootonce", buf, 1); } free(buf); } Index: stand/i386/loader/main.c =================================================================== --- stand/i386/loader/main.c +++ stand/i386/loader/main.c @@ -274,6 +274,7 @@ struct i386_devdesc new_currdev; #ifdef LOADER_ZFS_SUPPORT char buf[20]; + char *bootonce; #endif int biosdev = -1; @@ -321,6 +322,15 @@ new_currdev.d_kind.zfs.root_guid = 0; } new_currdev.dd.d_dev = &zfs_dev; + + if ((bootonce = malloc(VDEV_PAD_SIZE)) != NULL) { + if (zfs_get_bootonce(&new_currdev, "bootonce-used", + bootonce, VDEV_PAD_SIZE) == 0) { + setenv("zfs-bootonce", bootonce, 1); + } + free(bootonce); + } + #endif } else if ((initial_bootdev & B_MAGICMASK) != B_DEVMAGIC) { /* The passed-in boot device is bad */ Index: stand/i386/zfsboot/zfsboot.c =================================================================== --- stand/i386/zfsboot/zfsboot.c +++ stand/i386/zfsboot/zfsboot.c @@ -218,7 +218,9 @@ if (bdev != NULL && bdev->dd.d_dev->dv_type == DEVT_ZFS) { /* set up proper device name string for ZFS */ strncpy(boot_devname, zfs_fmtdev(bdev), sizeof (boot_devname)); - if (zfs_nextboot(bdev, cmd, sizeof(cmd)) == 0) { + if (zfs_get_bootonce(bdev, "command", cmd, sizeof(cmd)) == 0) { + nvlist_t *benv; + nextboot = 1; memcpy(cmddup, cmd, sizeof(cmd)); if (parse_cmd()) { @@ -228,6 +230,12 @@ } if (!OPT_CHECK(RBX_QUIET)) printf("zfs nextboot: %s\n", cmddup); + + if (zfs_get_bootenv(bdev, &benv) == 0) { + nvlist_add_string(benv, "bootonce-used", + cmddup); + zfs_set_bootenv(bdev, benv); + } /* Do not process this command twice */ *cmd = 0; } Index: stand/libsa/zfs/libzfs.h =================================================================== --- stand/libsa/zfs/libzfs.h +++ stand/libsa/zfs/libzfs.h @@ -26,15 +26,15 @@ * $FreeBSD$ */ +#ifndef _BOOT_LIBZFS_H_ +#define _BOOT_LIBZFS_H_ + #include #ifdef LOADER_GELI_SUPPORT #include #endif -#ifndef _BOOT_LIBZFS_H_ -#define _BOOT_LIBZFS_H_ - #define ZFS_MAXNAMELEN 256 /* @@ -54,6 +54,7 @@ #define NV_UNIQUE_NAME_TYPE 0x2 #define NV_ALIGN4(x) (((x) + 3) & ~3) +#define NV_ALIGN(x) (((x) + 7) & ~7) /* * nvlist header. @@ -109,17 +110,21 @@ nvlist_t *nvlist_create(int); void nvlist_destroy(nvlist_t *); -nvlist_t *nvlist_import(const uint8_t *, char, char); +nvlist_t *nvlist_import(const char *, char, char); +int nvlist_export(nvlist_t *); int nvlist_remove(nvlist_t *, const char *, data_type_t); void nvlist_print(nvlist_t *, unsigned int); int nvlist_find(const nvlist_t *, const char *, data_type_t, int *, void *, int *); int nvlist_next(nvlist_t *); +int nvlist_add_string(nvlist_t *, const char *, const char *); int zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path); char *zfs_fmtdev(void *vdev); -int zfs_nextboot(void *vdev, char *buf, size_t size); +int zfs_get_bootonce(void *, const char *, char *, size_t); +int zfs_get_bootenv(void *, nvlist_t **); +int zfs_set_bootenv(void *, nvlist_t *); int zfs_probe_dev(const char *devname, uint64_t *pool_guid); int zfs_list(const char *name); uint64_t ldi_get_size(void *); @@ -127,6 +132,8 @@ int zfs_bootenv(const char *name); int zfs_belist_add(const char *name, uint64_t __unused); int zfs_set_env(void); + +nvlist_t *vdev_read_bootenv(vdev_t *); extern struct devsw zfs_dev; extern struct fs_ops zfs_fsops; Index: stand/libsa/zfs/nvlist.c =================================================================== --- stand/libsa/zfs/nvlist.c +++ stand/libsa/zfs/nvlist.c @@ -28,128 +28,234 @@ #include #include +#include #include #include "libzfs.h" +enum xdr_op { + XDR_OP_ENCODE = 1, + XDR_OP_DECODE = 2 +}; + typedef struct xdr { - int (*xdr_getint)(const struct xdr *, const void *, int *); + enum xdr_op xdr_op; + int (*xdr_getint)(const void *, int *); + int (*xdr_putint)(void *, int); + int (*xdr_getuint)(const void *, unsigned *); + int (*xdr_putuint)(void *, unsigned); } xdr_t; -static int xdr_int(const xdr_t *, const void *, int *); -static int mem_int(const xdr_t *, const void *, int *); -static void nvlist_decode_nvlist(const xdr_t *, nvlist_t *); +static int nvlist_xdr_nvlist(const xdr_t *, nvlist_t *); static int nvlist_size(const xdr_t *, const uint8_t *); +static int xdr_int(const xdr_t *, void *, int *); +static int xdr_u_int(const xdr_t *, void *, unsigned *); -/* - * transform data from network to host. - */ -xdr_t ntoh = { - .xdr_getint = xdr_int -}; +/* Basic primitives for XDR translation operations, getint and putint. */ +static int +_getint(const void *buf, int *ip) +{ + *ip = be32dec(buf); + return (sizeof(int)); +} +static int +_putint(void *buf, int i) +{ + int *ip = buf; + + *ip = htobe32(i); + return (sizeof(int)); +} + +static int +_getuint(const void *buf, unsigned *ip) +{ + *ip = be32dec(buf); + return (sizeof(unsigned)); +} + +static int +_putuint(void *buf, unsigned i) +{ + unsigned *up = buf; + + *up = htobe32(i); + return (sizeof(int)); +} + /* - * transform data from host to host. + * read native data without translation. */ -xdr_t native = { - .xdr_getint = mem_int -}; +static int +mem_int(const void *buf, int *i) +{ + *i = *(int *)buf; + return (sizeof(int)); +} +static int +mem_uint(const void *buf, unsigned *u) +{ + *u = *(int *)buf; + return (sizeof(int)); +} + /* - * transform data from host to network. + * XDR data translations. */ -xdr_t hton = { - .xdr_getint = xdr_int -}; - static int -xdr_short(const xdr_t *xdr, const uint8_t *buf, short *ip) +xdr_short(const xdr_t *xdr, uint8_t *buf, short *ip) { - int i, rv; + int i, rv = 0; - rv = xdr->xdr_getint(xdr, buf, &i); - *ip = i; + i = *ip; + rv = xdr_int(xdr, buf, &i); + if (xdr->xdr_op == XDR_OP_DECODE) { + *ip = i; + } return (rv); } static int -xdr_u_short(const xdr_t *xdr, const uint8_t *buf, unsigned short *ip) +xdr_u_short(const xdr_t *xdr, uint8_t *buf, unsigned short *ip) { unsigned u; int rv; - rv = xdr->xdr_getint(xdr, buf, &u); - *ip = u; + u = *ip; + rv = xdr_u_int(xdr, buf, &u); + if (xdr->xdr_op == XDR_OP_DECODE) { + *ip = u; + } return (rv); } static int -xdr_int(const xdr_t *xdr __unused, const void *buf, int *ip) +xdr_int(const xdr_t *xdr, void *buf, int *ip) { - *ip = be32dec(buf); - return (sizeof(int)); + int rv = 0; + int *i = buf; + + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *ip, store to buf */ + rv = xdr->xdr_putint(buf, *ip); + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + rv = xdr->xdr_getint(buf, i); + *ip = *i; + break; + } + return (rv); } static int -xdr_u_int(const xdr_t *xdr __unused, const void *buf, unsigned *ip) +xdr_u_int(const xdr_t *xdr, void *buf, unsigned *ip) { - *ip = be32dec(buf); - return (sizeof(unsigned)); + int rv = 0; + unsigned *u = buf; + + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *ip, store to buf */ + rv = xdr->xdr_putuint(buf, *ip); + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + rv = xdr->xdr_getuint(buf, u); + *ip = *u; + break; + } + return (rv); } static int xdr_string(const xdr_t *xdr, const void *buf, nv_string_t *s) { - int size; + int size = 0; - size = xdr->xdr_getint(xdr, buf, &s->nv_size); - size = NV_ALIGN4(size + s->nv_size); + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + size = s->nv_size; + size += xdr->xdr_putuint(&s->nv_size, s->nv_size); + size = NV_ALIGN4(size); + break; + + case XDR_OP_DECODE: + size = xdr->xdr_getuint(buf, &s->nv_size); + size = NV_ALIGN4(size + s->nv_size); + break; + } return (size); } static int -xdr_int64(const xdr_t *xdr, const uint8_t *buf, int64_t *lp) +xdr_int64(const xdr_t *xdr, uint8_t *buf, int64_t *lp) { - int hi, rv; + int hi, rv = 0; unsigned lo; - rv = xdr->xdr_getint(xdr, buf, &hi); - rv += xdr->xdr_getint(xdr, buf + rv, &lo); - *lp = (((int64_t)hi) << 32) | lo; + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *lp, store to buf */ + hi = *lp >> 32; + lo = *lp & UINT32_MAX; + rv = xdr->xdr_putint(buf, hi); + rv += xdr->xdr_putint(buf + rv, lo); + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + rv = xdr->xdr_getint(buf, &hi); + rv += xdr->xdr_getuint(buf + rv, &lo); + *lp = (((int64_t)hi) << 32) | lo; + } return (rv); } static int -xdr_uint64(const xdr_t *xdr, const uint8_t *buf, uint64_t *lp) +xdr_uint64(const xdr_t *xdr, uint8_t *buf, uint64_t *lp) { unsigned hi, lo; - int rv; + int rv = 0; - rv = xdr->xdr_getint(xdr, buf, &hi); - rv += xdr->xdr_getint(xdr, buf + rv, &lo); - *lp = (((int64_t)hi) << 32) | lo; + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *ip, store to buf */ + hi = *lp >> 32; + lo = *lp & UINT32_MAX; + rv = xdr->xdr_putint(buf, hi); + rv += xdr->xdr_putint(buf + rv, lo); + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + rv = xdr->xdr_getuint(buf, &hi); + rv += xdr->xdr_getuint(buf + rv, &lo); + *lp = (((uint64_t)hi) << 32) | lo; + } return (rv); } static int -xdr_char(const xdr_t *xdr, const uint8_t *buf, char *cp) +xdr_char(const xdr_t *xdr, uint8_t *buf, char *cp) { - int i, rv; + int i, rv = 0; - rv = xdr->xdr_getint(xdr, buf, &i); - *cp = i; + i = *cp; + rv = xdr_int(xdr, buf, &i); + if (xdr->xdr_op == XDR_OP_DECODE) { + *cp = i; + } return (rv); } /* - * read native data. + * nvlist management functions. */ -static int -mem_int(const xdr_t *xdr, const void *buf, int *i) -{ - *i = *(int *)buf; - return (sizeof(int)); -} - void nvlist_destroy(nvlist_t *nvl) { @@ -205,30 +311,42 @@ return (nvl); } -static void -nvlist_nvp_decode(const xdr_t *xdr, nvlist_t *nvl, nvp_header_t *nvph) +static int +nvlist_xdr_nvp(const xdr_t *xdr, nvlist_t *nvl) { nv_string_t *nv_string; nv_pair_data_t *nvp_data; nvlist_t nvlist; + unsigned type, nelem; + xdr_t xdrmem = { + .xdr_op = XDR_OP_DECODE, + .xdr_getint = mem_int, + .xdr_getuint = mem_uint + }; nv_string = (nv_string_t *)nvl->nv_idx; nvl->nv_idx += xdr_string(xdr, &nv_string->nv_size, nv_string); nvp_data = (nv_pair_data_t *)nvl->nv_idx; - nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_type, &nvp_data->nv_type); - nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_nelem, &nvp_data->nv_nelem); + type = nvp_data->nv_type; + nelem = nvp_data->nv_nelem; + nvl->nv_idx += xdr_u_int(xdr, nvl->nv_idx, &type); + nvl->nv_idx += xdr_u_int(xdr, nvl->nv_idx, &nelem); - switch (nvp_data->nv_type) { + switch (type) { case DATA_TYPE_NVLIST: case DATA_TYPE_NVLIST_ARRAY: - bzero(&nvlist, sizeof (nvlist)); + bzero(&nvlist, sizeof(nvlist)); nvlist.nv_data = &nvp_data->nv_data[0]; nvlist.nv_idx = nvlist.nv_data; - for (int i = 0; i < nvp_data->nv_nelem; i++) { - nvlist.nv_asize = - nvlist_size(xdr, nvlist.nv_data); - nvlist_decode_nvlist(xdr, &nvlist); + for (unsigned i = 0; i < nelem; i++) { + if (xdr->xdr_op == XDR_OP_ENCODE) + nvlist.nv_asize = + nvlist_size(&xdrmem, nvlist.nv_data); + else + nvlist.nv_asize = + nvlist_size(xdr, nvlist.nv_data); + nvlist_xdr_nvlist(xdr, &nvlist); nvl->nv_idx = nvlist.nv_idx; nvlist.nv_data = nvlist.nv_idx; } @@ -282,35 +400,86 @@ break; } + return (0); } -static void -nvlist_decode_nvlist(const xdr_t *xdr, nvlist_t *nvl) +static int +nvlist_xdr_nvlist(const xdr_t *xdr, nvlist_t *nvl) { nvp_header_t *nvph; - nvs_data_t *nvs = (nvs_data_t *)nvl->nv_data; + nvs_data_t *nvs; + unsigned encoded_size, decoded_size; + int rv; nvl->nv_idx = nvl->nv_data; - nvl->nv_idx += xdr->xdr_getint(xdr, (const uint8_t *)&nvs->nvl_version, - &nvs->nvl_version); - nvl->nv_idx += xdr->xdr_getint(xdr, (const uint8_t *)&nvs->nvl_nvflag, - &nvs->nvl_nvflag); - + nvs = (nvs_data_t *)nvl->nv_data; nvph = &nvs->nvl_pair; - nvl->nv_idx += xdr->xdr_getint(xdr, - (const uint8_t *)&nvph->encoded_size, &nvph->encoded_size); - nvl->nv_idx += xdr->xdr_getint(xdr, - (const uint8_t *)&nvph->decoded_size, &nvph->decoded_size); - while (nvph->encoded_size && nvph->decoded_size) { - nvlist_nvp_decode(xdr, nvl, nvph); + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + nvl->nv_idx += xdr->xdr_putuint(nvl->nv_idx, + nvs->nvl_version); + nvl->nv_idx += xdr->xdr_putuint(nvl->nv_idx, + nvs->nvl_nvflag); - nvph = (nvp_header_t *)(nvl->nv_idx); - nvl->nv_idx += xdr->xdr_getint(xdr, &nvph->encoded_size, + encoded_size = nvph->encoded_size; + decoded_size = nvph->decoded_size; + + nvl->nv_idx += xdr->xdr_putuint(nvl->nv_idx, + encoded_size); + nvl->nv_idx += xdr->xdr_putuint(nvl->nv_idx, + decoded_size); + break; + + case XDR_OP_DECODE: + nvl->nv_idx += xdr->xdr_getuint(nvl->nv_idx, + &nvs->nvl_version); + nvl->nv_idx += xdr->xdr_getuint(nvl->nv_idx, + &nvs->nvl_nvflag); + + nvl->nv_idx += xdr->xdr_getuint(nvl->nv_idx, &nvph->encoded_size); - nvl->nv_idx += xdr->xdr_getint(xdr, &nvph->decoded_size, + nvl->nv_idx += xdr->xdr_getuint(nvl->nv_idx, &nvph->decoded_size); + + encoded_size = nvph->encoded_size; + decoded_size = nvph->decoded_size; + break; + + default: + return (EINVAL); } + + rv = 0; + while (encoded_size && decoded_size) { + rv = nvlist_xdr_nvp(xdr, nvl); + if (rv != 0) + return (rv); + + nvph = (nvp_header_t *)(nvl->nv_idx); + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + encoded_size = nvph->encoded_size; + decoded_size = nvph->decoded_size; + + nvl->nv_idx += xdr->xdr_putuint(nvl->nv_idx, + encoded_size); + nvl->nv_idx += xdr->xdr_putuint(nvl->nv_idx, + decoded_size); + break; + + case XDR_OP_DECODE: + nvl->nv_idx += xdr->xdr_getuint(&nvph->encoded_size, + &nvph->encoded_size); + nvl->nv_idx += xdr->xdr_getuint(&nvph->decoded_size, + &nvph->decoded_size); + + encoded_size = nvph->encoded_size; + decoded_size = nvph->decoded_size; + break; + } + } + return (rv); } static int @@ -323,26 +492,53 @@ p += 2 * sizeof(unsigned); pair = p; - p += xdr->xdr_getint(xdr, p, &encoded_size); - p += xdr->xdr_getint(xdr, p, &decoded_size); + p += xdr->xdr_getuint(p, &encoded_size); + p += xdr->xdr_getuint(p, &decoded_size); while (encoded_size && decoded_size) { p = pair + encoded_size; pair = p; - p += xdr->xdr_getint(xdr, p, &encoded_size); - p += xdr->xdr_getint(xdr, p, &decoded_size); + p += xdr->xdr_getuint(p, &encoded_size); + p += xdr->xdr_getuint(p, &decoded_size); } return (p - stream); } /* + * Export nvlist to byte stream format. + */ +int +nvlist_export(nvlist_t *nvl) +{ + int rv; + xdr_t xdr = { + .xdr_op = XDR_OP_ENCODE, + .xdr_putint = _putint, + .xdr_putuint = _putuint + }; + + if (nvl->nv_header.nvh_encoding != NV_ENCODE_XDR) + return (ENOTSUP); + + nvl->nv_idx = nvl->nv_data; + rv = nvlist_xdr_nvlist(&xdr, nvl); + + return (rv); +} + +/* * Import nvlist from byte stream. * Determine the stream size and allocate private copy. * Then translate the data. */ nvlist_t * -nvlist_import(const uint8_t *stream, char encoding, char endian) +nvlist_import(const char *stream, char encoding, char endian) { nvlist_t *nvl; + xdr_t xdr = { + .xdr_op = XDR_OP_DECODE, + .xdr_getint = _getint, + .xdr_getuint = _getuint + }; if (encoding != NV_ENCODE_XDR) return (NULL); @@ -351,7 +547,11 @@ if (nvl == NULL) return (nvl); - nvl->nv_asize = nvl->nv_size = nvlist_size(&ntoh, stream); + nvl->nv_header.nvh_encoding = encoding; + nvl->nv_header.nvh_endian = endian; + nvl->nv_header.nvh_reserved1 = nvl->nv_header.nvh_reserved2 = 0; + nvl->nv_asize = nvl->nv_size = nvlist_size(&xdr, + (const uint8_t *)stream); nvl->nv_data = malloc(nvl->nv_asize); if (nvl->nv_data == NULL) { free(nvl); @@ -360,8 +560,14 @@ nvl->nv_idx = nvl->nv_data; bcopy(stream, nvl->nv_data, nvl->nv_asize); - nvlist_decode_nvlist(&ntoh, nvl); - nvl->nv_idx = nvl->nv_data; + if (nvlist_xdr_nvlist(&xdr, nvl) == 0) { + nvl->nv_idx = nvl->nv_data; + } else { + free(nvl->nv_data); + free(nvl); + nvl = NULL; + } + return (nvl); } @@ -432,7 +638,6 @@ while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { nvp_name = (nv_string_t *)((uint8_t *)nvp + sizeof(*nvp)); - nvp_data = (nv_pair_data_t *) NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + nvp_name->nv_size); @@ -443,7 +648,7 @@ *elementsp = nvp_data->nv_nelem; switch (nvp_data->nv_type) { case DATA_TYPE_UINT64: - *(uint64_t *)valuep = + *(uint64_t *)valuep = *(uint64_t *)nvp_data->nv_data; return (0); case DATA_TYPE_STRING: @@ -476,7 +681,55 @@ return (ENOENT); } -/* +int +nvlist_add_string(nvlist_t *nvl, const char *name, const char *value) +{ + nvs_data_t *nvs; + nvp_header_t head, *hp; + uint8_t *ptr; + size_t namelen, valuelen; + + nvs = (nvs_data_t *)nvl->nv_data; + if (nvs->nvl_nvflag & NV_UNIQUE_NAME) + (void) nvlist_remove(nvl, name, DATA_TYPE_STRING); + + namelen = strlen(name); + valuelen = strlen(value); + head.encoded_size = 4 + 4 + 4 + NV_ALIGN4(namelen) + 4 + 4 + + 4 + NV_ALIGN(valuelen + 1); + head.decoded_size = NV_ALIGN(4 * 4 + namelen + 1) + + NV_ALIGN(valuelen + 1); + + if (nvl->nv_asize - nvl->nv_size < head.encoded_size + 8) { + ptr = realloc(nvl->nv_data, nvl->nv_asize + head.encoded_size); + if (ptr == NULL) + return (ENOMEM); + nvl->nv_data = ptr; + nvl->nv_asize += head.encoded_size; + } + nvl->nv_idx = nvl->nv_data + nvl->nv_size - sizeof(*hp); + bzero(nvl->nv_idx, head.encoded_size + 8); + hp = (nvp_header_t *)nvl->nv_idx; + *hp = head; + nvl->nv_idx += sizeof(*hp); + *(unsigned *)nvl->nv_idx = namelen; + nvl->nv_idx += sizeof(unsigned); + strlcpy((char *)nvl->nv_idx, name, namelen + 1); + nvl->nv_idx += NV_ALIGN4(namelen); + *(unsigned *)nvl->nv_idx = DATA_TYPE_STRING; + nvl->nv_idx += sizeof(unsigned); + *(unsigned *)nvl->nv_idx = 1; + nvl->nv_idx += sizeof(unsigned); + *(unsigned *)nvl->nv_idx = valuelen; + nvl->nv_idx += sizeof(unsigned); + strlcpy((char *)nvl->nv_idx, value, valuelen + 1); + nvl->nv_idx += NV_ALIGN4(valuelen); + nvl->nv_size += head.encoded_size; + + return (0); +} + +/* * Return the next nvlist in an nvlist array. */ int @@ -535,7 +788,12 @@ nv_string_t *nvp_name; nv_pair_data_t *nvp_data; nvlist_t nvlist; - int i, j; + unsigned i, j; + xdr_t xdr = { + .xdr_op = XDR_OP_DECODE, + .xdr_getint = mem_int, + .xdr_getuint = mem_uint + }; data = (nvs_data_t *)nvl->nv_data; nvp = &data->nvl_pair; /* first pair in nvlist */ @@ -545,7 +803,7 @@ NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + nvp_name->nv_size); - for (int i = 0; i < indent; i++) + for (i = 0; i < indent; i++) printf(" "); printf("%s [%d] %.*s", typenames[nvp_data->nv_type], @@ -563,7 +821,7 @@ case DATA_TYPE_STRING: { nvp_name = (nv_string_t *)&nvp_data->nv_data[0]; printf(" = \"%.*s\"\n", nvp_name->nv_size, - nvp_name->nv_data ); + nvp_name->nv_data); break; } @@ -588,7 +846,7 @@ nvp_name->nv_data); } nvlist.nv_data = (uint8_t *)data + - nvlist_size(&native, nvlist.nv_data); + nvlist_size(&xdr, nvlist.nv_data); } break; Index: stand/libsa/zfs/zfs.c =================================================================== --- stand/libsa/zfs/zfs.c +++ stand/libsa/zfs/zfs.c @@ -483,8 +483,7 @@ } static int -vdev_write(vdev_t *vdev __unused, void *priv, off_t offset, void *buf, - size_t bytes) +vdev_write(vdev_t *vdev, off_t offset, void *buf, size_t bytes) { int fd, ret; size_t head, tail, total_size, full_sec_size; @@ -493,8 +492,8 @@ ssize_t res; char *outbuf, *bouncebuf; - fd = (uintptr_t)priv; - outbuf = (char *) buf; + fd = (uintptr_t)vdev->v_priv; + outbuf = (char *)buf; bouncebuf = NULL; ret = ioctl(fd, DIOCGSECTORSIZE, &secsz); @@ -529,14 +528,14 @@ /* Partial data for first sector */ if (head > 0) { res = read(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes)); (void) lseek(fd, -secsz, SEEK_CUR); res = write(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } @@ -552,20 +551,20 @@ if (full_sec_size > 0) { if (bytes < full_sec_size) { res = read(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } memcpy(bouncebuf, outbuf, bytes); (void) lseek(fd, -secsz, SEEK_CUR); res = write(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } } else { res = write(fd, outbuf, full_sec_size); - if (res != full_sec_size) { + if ((unsigned)res != full_sec_size) { ret = EIO; goto error; } @@ -576,14 +575,14 @@ /* Partial data write to last sector */ if (do_tail_write) { res = read(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } memcpy(bouncebuf, outbuf, secsz - tail); (void) lseek(fd, -secsz, SEEK_CUR); res = write(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } @@ -595,102 +594,6 @@ return (ret); } -static void -vdev_clear_pad2(vdev_t *vdev) -{ - vdev_t *kid; - vdev_boot_envblock_t *be; - off_t off = offsetof(vdev_label_t, vl_be); - zio_checksum_info_t *ci; - zio_cksum_t cksum; - - STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { - if (kid->v_state != VDEV_STATE_HEALTHY) - continue; - vdev_clear_pad2(kid); - } - - if (!STAILQ_EMPTY(&vdev->v_children)) - return; - - be = calloc(1, sizeof (*be)); - if (be == NULL) { - printf("failed to clear be area: out of memory\n"); - return; - } - - ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; - be->vbe_zbt.zec_magic = ZEC_MAGIC; - zio_checksum_label_verifier(&be->vbe_zbt.zec_cksum, off); - ci->ci_func[0](be, sizeof (*be), NULL, &cksum); - be->vbe_zbt.zec_cksum = cksum; - - if (vdev_write(vdev, vdev->v_read_priv, off, be, VDEV_PAD_SIZE)) { - printf("failed to clear be area of primary vdev: %d\n", - errno); - } - free(be); -} - -/* - * Read the next boot command from pad2. - * If any instance of pad2 is set to empty string, or the returned string - * values are not the same, we consider next boot not to be set. - */ -static char * -vdev_read_pad2(vdev_t *vdev) -{ - vdev_t *kid; - char *tmp, *result = NULL; - vdev_boot_envblock_t *be; - off_t off = offsetof(vdev_label_t, vl_be); - - STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { - if (kid->v_state != VDEV_STATE_HEALTHY) - continue; - tmp = vdev_read_pad2(kid); - if (tmp == NULL) - continue; - - /* The next boot is not set, we are done. */ - if (*tmp == '\0') { - free(result); - return (tmp); - } - if (result == NULL) { - result = tmp; - continue; - } - /* Are the next boot strings different? */ - if (strcmp(result, tmp) != 0) { - free(tmp); - *result = '\0'; - break; - } - free(tmp); - } - if (result != NULL) - return (result); - - be = malloc(sizeof (*be)); - if (be == NULL) - return (NULL); - - if (vdev_read(vdev, vdev->v_read_priv, off, be, sizeof (*be))) { - return (NULL); - } - - switch (be->vbe_version) { - case VB_RAW: - case VB_NVLIST: - result = strdup(be->vbe_bootenv); - default: - /* Backward compatibility with initial nextboot feaure. */ - result = strdup((char *)be); - } - return (result); -} - static int zfs_dev_init(void) { @@ -743,7 +646,7 @@ int ret; spa = NULL; - ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa); + ret = vdev_probe(vdev_read, vdev_write, (void *)(uintptr_t)fd, &spa); if (ret == 0 && pool_guid != NULL) *pool_guid = spa->spa_guid; return (ret); @@ -766,7 +669,7 @@ ppa = (struct zfs_probe_args *)arg; strncpy(devname, ppa->devname, strlen(ppa->devname) - 1); devname[strlen(ppa->devname) - 1] = '\0'; - sprintf(devname, "%s%s:", devname, partname); + snprintf(devname, sizeof(devname), "%s%s:", devname, partname); pa.fd = open(devname, O_RDWR); if (pa.fd == -1) return (0); @@ -789,58 +692,98 @@ return (0); } +/* + * Return bootenv nvlist from pool label. + */ int -zfs_nextboot(void *vdev, char *buf, size_t size) +zfs_get_bootenv(void *vdev, nvlist_t **benvp) { struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; - spa_t *spa; + nvlist_t *benv = NULL; vdev_t *vd; - char *result = NULL; + spa_t *spa; if (dev->dd.d_dev->dv_type != DEVT_ZFS) - return (1); + return (ENOTSUP); - if (dev->pool_guid == 0) - spa = STAILQ_FIRST(&zfs_pools); - else - spa = spa_find_by_guid(dev->pool_guid); + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); - if (spa == NULL) { - printf("ZFS: can't find pool by guid\n"); - return (1); - } + if (spa->spa_bootenv == NULL) { + STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, + v_childlink) { + benv = vdev_read_bootenv(vd); - STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { - char *tmp = vdev_read_pad2(vd); - - /* Continue on error. */ - if (tmp == NULL) - continue; - /* Nextboot is not set. */ - if (*tmp == '\0') { - free(result); - free(tmp); - return (1); + if (benv != NULL) + break; } - if (result == NULL) { - result = tmp; - continue; - } - free(tmp); + spa->spa_bootenv = benv; + } else { + benv = spa->spa_bootenv; } - if (result == NULL) - return (1); + if (benv == NULL) + return (ENOENT); + + *benvp = benv; + return (0); +} + +/* + * Store nvlist to pool label bootenv area. Also updates cached pointer in spa. + */ +int +zfs_set_bootenv(void *vdev, nvlist_t *benv) +{ + struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; + spa_t *spa; + vdev_t *vd; + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (ENOTSUP); + + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); + STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { - vdev_clear_pad2(vd); + vdev_write_bootenv(vd, benv); } - strlcpy(buf, result, size); - free(result); + spa->spa_bootenv = benv; return (0); } +/* + * Get bootonce value by key. The bootonce pair is removed + * from the bootenv nvlist and the remaining nvlist is committed back to disk. + */ int +zfs_get_bootonce(void *vdev, const char *key, char *buf, size_t size) +{ + nvlist_t *benv; + char *result = NULL; + int result_size, rv; + + if ((rv = zfs_get_bootenv(vdev, &benv)) != 0) + return (rv); + + if ((rv = nvlist_find(benv, key, DATA_TYPE_STRING, NULL, + &result, &result_size)) == 0) { + if (result_size == 0) { + /* ignore empty string */ + rv = ENOENT; + } else { + size = MIN((size_t)result_size + 1, size); + strlcpy(buf, result, size); + } + (void) nvlist_remove(benv, key, DATA_TYPE_STRING); + (void) zfs_set_bootenv(vdev, benv); + } + + return (rv); +} + +int zfs_probe_dev(const char *devname, uint64_t *pool_guid) { struct disk_devdesc *dev; @@ -936,12 +879,9 @@ dev = va_arg(args, struct zfs_devdesc *); va_end(args); - if (dev->pool_guid == 0) - spa = STAILQ_FIRST(&zfs_pools); - else - spa = spa_find_by_guid(dev->pool_guid); - if (!spa) + if ((spa = spa_find_by_dev(dev)) == NULL) return (ENXIO); + mount = malloc(sizeof(*mount)); if (mount == NULL) rv = ENOMEM; @@ -1070,10 +1010,11 @@ } if (rootname[0] == '\0') - sprintf(buf, "%s:%s:", dev->dd.d_dev->dv_name, spa->spa_name); + snprintf(buf, sizeof(buf), "%s:%s:", dev->dd.d_dev->dv_name, + spa->spa_name); else - sprintf(buf, "%s:%s/%s:", dev->dd.d_dev->dv_name, spa->spa_name, - rootname); + snprintf(buf, sizeof(buf), "%s:%s/%s:", dev->dd.d_dev->dv_name, + spa->spa_name, rootname); return (buf); } Index: stand/libsa/zfs/zfsimpl.c =================================================================== --- stand/libsa/zfs/zfsimpl.c +++ stand/libsa/zfs/zfsimpl.c @@ -31,6 +31,7 @@ * Stand-alone ZFS file reader. */ +#include #include #include #include @@ -219,8 +220,8 @@ size_t psize; int rc; - if (!vdev->v_phys_read) - return (EIO); + if (vdev->v_phys_read == NULL) + return (ENOTSUP); if (bp) { psize = BP_GET_PSIZE(bp); @@ -228,7 +229,7 @@ psize = size; } - rc = vdev->v_phys_read(vdev, vdev->v_read_priv, offset, buf, psize); + rc = vdev->v_phys_read(vdev, vdev->v_priv, offset, buf, psize); if (rc == 0) { if (bp != NULL) rc = zio_checksum_verify(vdev->v_spa, bp, buf); @@ -237,6 +238,15 @@ return (rc); } +static int +vdev_write_phys(vdev_t *vdev, void *buf, off_t offset, size_t size) +{ + if (vdev->v_phys_write == NULL) + return (ENOTSUP); + + return (vdev->v_phys_write(vdev, offset, buf, size)); +} + typedef struct remap_segment { vdev_t *rs_vd; uint64_t rs_offset; @@ -1337,6 +1347,19 @@ } static spa_t * +spa_find_by_dev(struct zfs_devdesc *dev) +{ + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (NULL); + + if (dev->pool_guid == 0) + return (STAILQ_FIRST(&zfs_pools)); + + return (spa_find_by_guid(dev->pool_guid)); +} + +static spa_t * spa_create(uint64_t guid, const char *name) { spa_t *spa; @@ -1586,6 +1609,193 @@ return (vdev_read_phys(vd, &bp, buf, off, size)); } +/* + * We do need to be sure we write to correct location. + * Our vdev label does consist of 4 fields: + * pad1 (8k), reserved. + * bootenv (8k), checksummed, previously reserved, may contian garbage. + * vdev_phys (112k), checksummed + * uberblock ring (128k), checksummed. + * + * Since bootenv area may contain garbage, we can not reliably read it, as + * we can get checksum errors. + * Next best thing is vdev_phys - it is just after bootenv. It still may + * be corrupted, but in such case we will miss this one write. + */ +static int +vdev_label_write_validate(vdev_t *vd, int l, uint64_t offset) +{ + uint64_t off, o_phys; + void *buf; + size_t size = VDEV_PHYS_SIZE; + int rc; + + o_phys = offsetof(vdev_label_t, vl_vdev_phys); + off = vdev_label_offset(vd->v_psize, l, o_phys); + + /* off should be 8K from bootenv */ + if (vdev_label_offset(vd->v_psize, l, offset) + VDEV_PAD_SIZE != off) + return (EINVAL); + + buf = malloc(size); + if (buf == NULL) + return (ENOMEM); + + /* Read vdev_phys */ + rc = vdev_label_read(vd, l, buf, o_phys, size); + free(buf); + return (rc); +} + +static int +vdev_label_write(vdev_t *vd, int l, vdev_boot_envblock_t *be, uint64_t offset) +{ + zio_checksum_info_t *ci; + zio_cksum_t cksum; + off_t off; + size_t size = VDEV_PAD_SIZE; + int rc; + + if (vd->v_phys_write == NULL) + return (ENOTSUP); + + off = vdev_label_offset(vd->v_psize, l, offset); + + rc = vdev_label_write_validate(vd, l, offset); + if (rc != 0) { + return (rc); + } + + ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; + be->vbe_zbt.zec_magic = ZEC_MAGIC; + zio_checksum_label_verifier(&be->vbe_zbt.zec_cksum, off); + ci->ci_func[0](be, size, NULL, &cksum); + be->vbe_zbt.zec_cksum = cksum; + + return (vdev_write_phys(vd, be, off, size)); +} + +static int +vdev_write_bootenv_impl(vdev_t *vdev, vdev_boot_envblock_t *be) +{ + vdev_t *kid; + int rv = 0, rc; + + STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { + if (kid->v_state != VDEV_STATE_HEALTHY) + continue; + rc = vdev_write_bootenv_impl(kid, be); + if (rv == 0) + rv = rc; + } + + /* + * Non-leaf vdevs do not have v_phys_write. + */ + if (vdev->v_phys_write == NULL) + return (rv); + + for (int l = 0; l < VDEV_LABELS; l++) { + rc = vdev_label_write(vdev, l, be, + offsetof(vdev_label_t, vl_be)); + if (rc != 0) { + printf("failed to write bootenv to %s label %d: %d\n", + vdev->v_name ? vdev->v_name : "unknown", l, rc); + rv = rc; + } + } + return (rv); +} + +int +vdev_write_bootenv(vdev_t *vdev, nvlist_t *nvl) +{ + vdev_boot_envblock_t *be; + nvlist_t nv; + int rv; + + if (nvl->nv_size > sizeof(be->vbe_nvlist)) + return (E2BIG); + + be = calloc(1, sizeof(*be)); + if (be == NULL) + return (ENOMEM); + + nv.nv_header = nvl->nv_header; + nv.nv_asize = nvl->nv_asize; + nv.nv_size = nvl->nv_size; + + *(nvs_header_t *)be->vbe_nvlist = nv.nv_header; + nv.nv_data = be->vbe_nvlist + sizeof(nvs_header_t); + bcopy(nvl->nv_data, nv.nv_data, nv.nv_size); + rv = nvlist_export(&nv); + if (rv == 0) { + rv = vdev_write_bootenv_impl(vdev, be); + } + free(be); + return (rv); +} + +/* + * Read the bootenv area from pool label, return the nvlist from it. + * We return from first successful read. + */ +nvlist_t * +vdev_read_bootenv(vdev_t *vdev) +{ + vdev_t *kid; + nvlist_t *benv; + vdev_boot_envblock_t *be; + int rv; + + STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { + if (kid->v_state != VDEV_STATE_HEALTHY) + continue; + + benv = vdev_read_bootenv(kid); + if (benv != NULL) + return (benv); + } + + be = malloc(sizeof (*be)); + if (be == NULL) + return (NULL); + + rv = 0; + for (int l = 0; l < VDEV_LABELS; l++) { + rv = vdev_label_read(vdev, l, be, + offsetof(vdev_label_t, vl_be), + sizeof (*be)); + if (rv == 0) + break; + } + if (rv != 0) { + free(be); + return (NULL); + } + benv = nvlist_import(be->vbe_nvlist + 4, be->vbe_nvlist[0], + be->vbe_nvlist[1]); + if (benv == NULL) { + char *command = (char *)be; + bool ok = false; + + /* Check for legacy zfsbootcfg command string */ + for (int i = 0; command[i] != '\0'; i++) { + if (iscntrl(command[i])) { + ok = false; + break; + } else { + ok = true; + } + } + benv = nvlist_create(NV_UNIQUE_NAME); + if (ok) + nvlist_add_string(benv, "command", command); + } + free(be); + return (benv); +} + static uint64_t vdev_get_label_asize(nvlist_t *nvl) { @@ -1652,15 +1862,13 @@ return (NULL); for (int l = 0; l < VDEV_LABELS; l++) { - const unsigned char *nvlist; - if (vdev_label_read(vd, l, label, offsetof(vdev_label_t, vl_vdev_phys), sizeof (vdev_phys_t))) continue; - nvlist = (const unsigned char *) label->vp_nvlist; - tmp = nvlist_import(nvlist + 4, nvlist[0], nvlist[1]); + tmp = nvlist_import(label->vp_nvlist + 4, + label->vp_nvlist[0], label->vp_nvlist[1]); if (tmp == NULL) continue; @@ -1725,7 +1933,8 @@ } static int -vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap) +vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv, + spa_t **spap) { vdev_t vtmp; spa_t *spa; @@ -1743,8 +1952,9 @@ */ memset(&vtmp, 0, sizeof(vtmp)); vtmp.v_phys_read = _read; - vtmp.v_read_priv = read_priv; - vtmp.v_psize = P2ALIGN(ldi_get_size(read_priv), + vtmp.v_phys_write = _write; + vtmp.v_priv = priv; + vtmp.v_psize = P2ALIGN(ldi_get_size(priv), (uint64_t)sizeof (vdev_label_t)); /* Test for minimum device size. */ @@ -1858,7 +2068,8 @@ vdev = vdev_find(guid); if (vdev != NULL) { vdev->v_phys_read = _read; - vdev->v_read_priv = read_priv; + vdev->v_phys_write = _write; + vdev->v_priv = priv; vdev->v_psize = vtmp.v_psize; /* * If no other state is set, mark vdev healthy. @@ -3128,7 +3339,7 @@ dnode_phys_t dir; size_t size; int rc; - unsigned char *nv; + char *nv; *value = NULL; if ((rc = objset_get_dnode(spa, &spa->spa_mos, obj, &dir)) != 0) Index: stand/userboot/test/test.c =================================================================== --- stand/userboot/test/test.c +++ stand/userboot/test/test.c @@ -261,6 +261,21 @@ } int +test_diskwrite(void *arg, int unit, uint64_t offset, void *src, size_t size, + size_t *resid_return) +{ + ssize_t n; + + if (unit > disk_index || disk_fd[unit] == -1) + return (EIO); + n = pwrite(disk_fd[unit], src, size, offset); + if (n < 0) + return (errno); + *resid_return = size - n; + return (0); +} + +int test_diskioctl(void *arg, int unit, u_long cmd, void *data) { struct stat sb; @@ -399,6 +414,7 @@ .stat = test_stat, .diskread = test_diskread, + .diskwrite = test_diskwrite, .diskioctl = test_diskioctl, .copyin = test_copyin, @@ -431,8 +447,9 @@ void (*func)(struct loader_callbacks *, void *, int, int) __dead2; int opt; const char *userboot_obj = "/boot/userboot.so"; + int oflag = O_RDONLY; - while ((opt = getopt(argc, argv, "b:d:h:")) != -1) { + while ((opt = getopt(argc, argv, "wb:d:h:")) != -1) { switch (opt) { case 'b': userboot_obj = optarg; @@ -442,13 +459,17 @@ disk_index++; disk_fd = reallocarray(disk_fd, disk_index + 1, sizeof (int)); - disk_fd[disk_index] = open(optarg, O_RDONLY); + disk_fd[disk_index] = open(optarg, oflag); if (disk_fd[disk_index] < 0) err(1, "Can't open disk image '%s'", optarg); break; case 'h': host_base = optarg; + break; + + case 'w': + oflag = O_RDWR; break; case '?': Index: stand/userboot/userboot.h =================================================================== --- stand/userboot/userboot.h +++ stand/userboot/userboot.h @@ -131,6 +131,12 @@ int (*diskread)(void *arg, int unit, uint64_t offset, void *dst, size_t size, size_t *resid_return); + /* + * Write to a disk image at the given offset + */ + int (*diskwrite)(void *arg, int unit, uint64_t offset, + void *src, size_t size, size_t *resid_return); + /* * Guest virtual machine i/o */ Index: stand/userboot/userboot/main.c =================================================================== --- stand/userboot/userboot/main.c +++ stand/userboot/userboot/main.c @@ -214,6 +214,16 @@ exit(0); } +static void +set_currdev(const char *devname) +{ + + env_setenv("currdev", EV_VOLATILE, devname, + userboot_setcurrdev, env_nounset); + env_setenv("loaddev", EV_VOLATILE, devname, + env_noset, env_nounset); +} + /* * Set the 'current device' by (if possible) recovering the boot device as * supplied by the initial bootstrap. @@ -225,6 +235,7 @@ struct devdesc *dd; #if defined(USERBOOT_ZFS_SUPPORT) struct zfs_devdesc zdev; + char *buf = NULL; if (userboot_zfs_found) { @@ -257,10 +268,18 @@ dd = &dev.dd; } - env_setenv("currdev", EV_VOLATILE, userboot_fmtdev(dd), - userboot_setcurrdev, env_nounset); - env_setenv("loaddev", EV_VOLATILE, userboot_fmtdev(dd), - env_noset, env_nounset); + set_currdev(userboot_fmtdev(dd)); + +#if defined(USERBOOT_ZFS_SUPPORT) + buf = malloc(VDEV_PAD_SIZE); + if (buf != NULL) { + if (zfs_nextboot(&zdev, buf, VDEV_PAD_SIZE) == 0) { + printf("zfs nextboot: %s\n", buf); + set_currdev(buf); + } + free(buf); + } +#endif } #if defined(USERBOOT_ZFS_SUPPORT) Index: stand/userboot/userboot/userboot_disk.c =================================================================== --- stand/userboot/userboot/userboot_disk.c +++ stand/userboot/userboot/userboot_disk.c @@ -211,15 +211,21 @@ size_t resid; int rc; - rw &= F_MASK; - if (rw == F_WRITE) - return (EROFS); - if (rw != F_READ) - return (EINVAL); if (rsize) *rsize = 0; off = dblk * ud_info[dev->dd.d_unit].sectorsize; - rc = CALLBACK(diskread, dev->dd.d_unit, off, buf, size, &resid); + switch (rw & F_MASK) { + case F_READ: + rc = CALLBACK(diskread, dev->dd.d_unit, off, buf, size, &resid); + break; + case F_WRITE: + rc = CALLBACK(diskwrite, dev->dd.d_unit, off, buf, size, + &resid); + break; + default: + rc = EINVAL; + break; + } if (rc) return (rc); if (rsize) Index: sys/cddl/boot/zfs/zfsimpl.h =================================================================== --- sys/cddl/boot/zfs/zfsimpl.h +++ sys/cddl/boot/zfs/zfsimpl.h @@ -526,21 +526,8 @@ zio_eck_t vp_zbt; } vdev_phys_t; -typedef enum vbe_vers { - /* The bootenv file is stored as ascii text in the envblock */ - VB_RAW = 0, - - /* - * The bootenv file is converted to an nvlist and then packed into the - * envblock. - */ - VB_NVLIST = 1 -} vbe_vers_t; - typedef struct vdev_boot_envblock { - uint64_t vbe_version; - char vbe_bootenv[VDEV_PAD_SIZE - sizeof (uint64_t) - - sizeof (zio_eck_t)]; + char vbe_nvlist[VDEV_PAD_SIZE - sizeof (zio_eck_t)]; zio_eck_t vbe_zbt; } vdev_boot_envblock_t; @@ -1662,10 +1649,9 @@ */ struct vdev; struct spa; -typedef int vdev_phys_read_t(struct vdev *vdev, void *priv, - off_t offset, void *buf, size_t bytes); -typedef int vdev_read_t(struct vdev *vdev, const blkptr_t *bp, - void *buf, off_t offset, size_t bytes); +typedef int vdev_phys_read_t(struct vdev *, void *, off_t, void *, size_t); +typedef int vdev_phys_write_t(struct vdev *, off_t, void *, size_t); +typedef int vdev_read_t(struct vdev *, const blkptr_t *, void *, off_t, size_t); typedef STAILQ_HEAD(vdev_list, vdev) vdev_list_t; @@ -1793,8 +1779,9 @@ size_t v_nchildren; /* # children */ vdev_state_t v_state; /* current state */ vdev_phys_read_t *v_phys_read; /* read from raw leaf vdev */ + vdev_phys_write_t *v_phys_write; /* write to raw leaf vdev */ vdev_read_t *v_read; /* read from vdev */ - void *v_read_priv; /* private data for read function */ + void *v_priv; /* data for read/write function */ boolean_t v_islog; struct spa *v_spa; /* link to spa */ /* @@ -1820,6 +1807,7 @@ zio_cksum_salt_t spa_cksum_salt; /* secret salt for cksum */ void *spa_cksum_tmpls[ZIO_CHECKSUM_FUNCTIONS]; boolean_t spa_with_log; /* this pool has log */ + void *spa_bootenv; /* bootenv from pool label */ } spa_t; /* IO related arguments. */ Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2017 by Delphix. All rights reserved. + * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -173,6 +173,8 @@ extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **); extern void vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset, uint64_t size, zio_done_func_t *done, void *priv, int flags); +extern int vdev_label_read_bootenv(vdev_t *, nvlist_t *); +extern int vdev_label_write_bootenv(vdev_t *, nvlist_t *); typedef enum { VDEV_LABEL_CREATE, /* create/add a new device */ @@ -184,8 +186,6 @@ } vdev_labeltype_t; extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason); - -extern int vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size); #ifdef __cplusplus } Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -392,7 +392,7 @@ #define VDEV_RAIDZ_MAXPARITY 3 #define VDEV_PAD_SIZE (8 << 10) -/* 2 padding areas (vl_pad1 and vl_pad2) to skip */ +/* 2 padding areas (vl_pad1 and vl_be) to skip */ #define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2 #define VDEV_PHYS_SIZE (112 << 10) #define VDEV_UBERBLOCK_RING (128 << 10) @@ -419,9 +419,16 @@ zio_eck_t vp_zbt; } vdev_phys_t; +typedef struct vdev_boot_envblock { + char vbe_nvlist[VDEV_PAD_SIZE - sizeof (zio_eck_t)]; + zio_eck_t vbe_zbt; +} vdev_boot_envblock_t; + +CTASSERT(sizeof (vdev_boot_envblock_t) == VDEV_PAD_SIZE); + typedef struct vdev_label { char vl_pad1[VDEV_PAD_SIZE]; /* 8K */ - char vl_pad2[VDEV_PAD_SIZE]; /* 8K */ + vdev_boot_envblock_t vl_be; /* 8K */ vdev_phys_t vl_vdev_phys; /* 112K */ char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */ } vdev_label_t; /* 256K total */ Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c @@ -1566,7 +1566,7 @@ for (int l = 1; l < VDEV_LABELS; l++) { zio_nowait(zio_read_phys(pio, vd, vdev_label_offset(vd->vdev_psize, l, - offsetof(vdev_label_t, vl_pad2)), VDEV_PAD_SIZE, + offsetof(vdev_label_t, vl_be)), VDEV_PAD_SIZE, abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE), ZIO_CHECKSUM_OFF, vdev_probe_done, vps, ZIO_PRIORITY_SYNC_READ, vps->vps_flags, B_TRUE)); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2012, 2020 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. * Copyright 2019 Joyent, Inc. */ @@ -781,7 +781,7 @@ nvlist_t *label; vdev_phys_t *vp; abd_t *vp_abd; - abd_t *pad2; + abd_t *bootenv; uberblock_t *ub; abd_t *ub_abd; zio_t *zio; @@ -956,8 +956,8 @@ ub->ub_txg = 0; /* Initialize the 2nd padding area. */ - pad2 = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); - abd_zero(pad2, VDEV_PAD_SIZE); + bootenv = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); + abd_zero(bootenv, VDEV_PAD_SIZE); /* * Write everything in parallel. @@ -976,8 +976,8 @@ * Zero out the 2nd padding area where it might have * left over data from previous filesystem format. */ - vdev_label_write(zio, vd, l, pad2, - offsetof(vdev_label_t, vl_pad2), + vdev_label_write(zio, vd, l, bootenv, + offsetof(vdev_label_t, vl_be), VDEV_PAD_SIZE, NULL, NULL, flags); vdev_label_write(zio, vd, l, ub_abd, @@ -993,7 +993,7 @@ } nvlist_free(label); - abd_free(pad2); + abd_free(bootenv); abd_free(ub_abd); abd_free(vp_abd); @@ -1016,41 +1016,181 @@ return (error); } +/* + * Done callback for vdev_label_read_bootenv_impl. If this is the first + * callback to finish, store our abd in the callback pointer. Otherwise, we + * just free our abd and return. + */ +static void +vdev_label_read_bootenv_done(zio_t *zio) +{ + zio_t *rio = zio->io_private; + abd_t **cbp = rio->io_private; + + ASSERT3U(zio->io_size, ==, VDEV_PAD_SIZE); + + if (zio->io_error == 0) { + mutex_enter(&rio->io_lock); + if (*cbp == NULL) { + /* Will free this buffer in vdev_label_read_bootenv. */ + *cbp = zio->io_abd; + } else { + abd_free(zio->io_abd); + } + mutex_exit(&rio->io_lock); + } else { + abd_free(zio->io_abd); + } +} + +static void +vdev_label_read_bootenv_impl(zio_t *zio, vdev_t *vd, int flags) +{ + for (int c = 0; c < vd->vdev_children; c++) + vdev_label_read_bootenv_impl(zio, vd->vdev_child[c], flags); + + /* + * We just use the first label that has a correct checksum; the + * bootloader should have rewritten them all to be the same on boot, + * and any changes we made since boot have been the same across all + * labels. + */ + if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) { + for (int l = 0; l < VDEV_LABELS; l++) { + vdev_label_read(zio, vd, l, + abd_alloc_linear(VDEV_PAD_SIZE, B_FALSE), + offsetof(vdev_label_t, vl_be), VDEV_PAD_SIZE, + vdev_label_read_bootenv_done, zio, flags); + } + } +} + int -vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size) +vdev_label_read_bootenv(vdev_t *rvd, nvlist_t *bootenv) { - spa_t *spa = vd->vdev_spa; + nvlist_t *config; + spa_t *spa = rvd->vdev_spa; + abd_t *abd = NULL; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD; + + ASSERT(bootenv); + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + zio_t *zio = zio_root(spa, NULL, &abd, flags); + vdev_label_read_bootenv_impl(zio, rvd, flags); + int err = zio_wait(zio); + + if (abd != NULL) { + vdev_boot_envblock_t *vbe = abd_to_buf(abd); + + err = nvlist_unpack(vbe->vbe_nvlist, + sizeof (vbe->vbe_nvlist), &config, 0); + + if (err != 0) { + char *buf; + + vbe->vbe_nvlist[sizeof (vbe->vbe_nvlist) - 1] = '\0'; + /* We have unstructured data, treat it as string. */ + buf = abd_to_buf(abd); + + /* + * We can have zeroed block (no data); + * or FreeBSD zfs bootnext command string; + * or uint64_t 0 and envmap string. + */ + if (*(uint64_t *)buf == 0) { + fnvlist_add_string(bootenv, "envmap", buf + 8); + } else { + fnvlist_add_string(bootenv, "command", buf); + } + } else { + fnvlist_merge(bootenv, config); + nvlist_free(config); + } + + /* + * abd was allocated in vdev_label_read_bootenv_impl() + */ + abd_free(abd); + /* + * If we managed to read any successfully, + * return success. + */ + return (0); + } + return (err); +} + +int +vdev_label_write_bootenv(vdev_t *vd, nvlist_t *env) +{ zio_t *zio; - abd_t *pad2; + spa_t *spa = vd->vdev_spa; + vdev_boot_envblock_t *bootenv; int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; int error; + size_t nvsize; + char *nvbuf; - if (size > VDEV_PAD_SIZE) - return (EINVAL); + error = nvlist_size(env, &nvsize, NV_ENCODE_XDR); + if (error != 0) + return (SET_ERROR(error)); - if (!vd->vdev_ops->vdev_op_leaf) - return (ENODEV); - if (vdev_is_dead(vd)) - return (ENXIO); + if (nvsize >= sizeof (bootenv->vbe_nvlist)) { + return (SET_ERROR(E2BIG)); + } ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - pad2 = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); - abd_zero(pad2, VDEV_PAD_SIZE); - abd_copy_from_buf(pad2, buf, size); + error = ENXIO; + for (int c = 0; c < vd->vdev_children; c++) { + int child_err; + child_err = vdev_label_write_bootenv(vd->vdev_child[c], env); + /* + * As long as any of the disks managed to write all of their + * labels successfully, return success. + */ + if (child_err == 0) + error = child_err; + } + + if (!vd->vdev_ops->vdev_op_leaf || vdev_is_dead(vd) || + !vdev_writeable(vd)) { + return (error); + } + ASSERT3U(sizeof (*bootenv), ==, VDEV_PAD_SIZE); + abd_t *abd = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); + abd_zero(abd, VDEV_PAD_SIZE); + + bootenv = abd_borrow_buf_copy(abd, VDEV_PAD_SIZE); + nvbuf = bootenv->vbe_nvlist; + nvsize = sizeof (bootenv->vbe_nvlist); + + error = nvlist_pack(env, &nvbuf, &nvsize, NV_ENCODE_XDR, KM_SLEEP); + if (error == 0) { + abd_return_buf_copy(abd, bootenv, VDEV_PAD_SIZE); + } else { + abd_free(abd); + return (SET_ERROR(error)); + } + retry: zio = zio_root(spa, NULL, NULL, flags); - vdev_label_write(zio, vd, 0, pad2, - offsetof(vdev_label_t, vl_pad2), - VDEV_PAD_SIZE, NULL, NULL, flags); + for (int l = 0; l < VDEV_LABELS; l++) { + vdev_label_write(zio, vd, l, abd, + offsetof(vdev_label_t, vl_be), + VDEV_PAD_SIZE, NULL, NULL, flags); + } + error = zio_wait(zio); if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) { flags |= ZIO_FLAG_TRYHARD; goto retry; } - abd_free(pad2); + abd_free(abd); return (error); } Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -3654,50 +3654,71 @@ return (error); } +/* + * This ioctl is used to set the bootenv configuration on the current + * pool. This configuration is stored in the second padding area of the label, + * and it is used by the bootloader(s) to store bootloader and/or system + * specific data. + * The data is stored as nvlist data stream, and is protected by + * an embedded checksum. + */ +/* ARGSUSED */ +static const zfs_ioc_key_t zfs_keys_set_bootenv[] = { + {"", DATA_TYPE_ANY, ZK_OPTIONAL | ZK_WILDCARDLIST}, +}; + +static int +zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl) +{ + int error; + spa_t *spa; + + if ((error = spa_open(name, &spa, FTAG)) != 0) + return (error); + spa_vdev_state_enter(spa, SCL_ALL); + error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl); + (void) spa_vdev_state_exit(spa, NULL, 0); + spa_close(spa, FTAG); + return (error); +} + +static const zfs_ioc_key_t zfs_keys_get_bootenv[] = { + /* no nvl keys */ +}; + + /* ARGSUSED */ +static int +zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl) +{ + spa_t *spa; + int error; + + if ((error = spa_open(name, &spa, FTAG)) != 0) + return (error); + spa_vdev_state_enter(spa, SCL_ALL); + error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl); + (void) spa_vdev_state_exit(spa, NULL, 0); + spa_close(spa, FTAG); + return (error); +} + #ifdef __FreeBSD__ static const zfs_ioc_key_t zfs_keys_nextboot[] = { {"command", DATA_TYPE_STRING, 0}, }; static int -zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl) +zfs_ioc_nextboot(const char *name, nvlist_t *innvl, nvlist_t *outnvl) { - char name[MAXNAMELEN]; spa_t *spa; - vdev_t *vd; - char *command; - uint64_t pool_guid; - uint64_t vdev_guid; int error; - if (nvlist_lookup_uint64(innvl, - ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) - return (EINVAL); - if (nvlist_lookup_uint64(innvl, - ZPOOL_CONFIG_GUID, &vdev_guid) != 0) - return (EINVAL); - command = fnvlist_lookup_string(innvl, "command"); - - mutex_enter(&spa_namespace_lock); - spa = spa_by_guid(pool_guid, vdev_guid); - if (spa != NULL) - strcpy(name, spa_name(spa)); - mutex_exit(&spa_namespace_lock); - if (spa == NULL) - return (ENOENT); - if ((error = spa_open(name, &spa, FTAG)) != 0) return (error); + spa_vdev_state_enter(spa, SCL_ALL); - vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE); - if (vd == NULL) { - (void) spa_vdev_state_exit(spa, NULL, ENXIO); - spa_close(spa, FTAG); - return (ENODEV); - } - error = vdev_label_write_pad2(vd, command, strlen(command)); + error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl); (void) spa_vdev_state_exit(spa, NULL, 0); - txg_wait_synced(spa->spa_dsl_pool, 0); spa_close(spa, FTAG); return (error); } @@ -6564,6 +6585,16 @@ zfs_ioctl_register("reopen", ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE, zfs_keys_pool_reopen, ARRAY_SIZE(zfs_keys_pool_reopen)); + + zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV, + zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE, + zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv)); + + zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV, + zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE, + zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv)); /* IOCTLS that use the legacy function signature */ Index: sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h +++ sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, Martin Matuska . All rights reserved. * Copyright (c) 2014 Integros [integros.com] @@ -1057,6 +1057,8 @@ ZFS_IOC_POOL_DISCARD_CHECKPOINT, ZFS_IOC_POOL_INITIALIZE, ZFS_IOC_POOL_SYNC, + ZFS_IOC_SET_BOOTENV, + ZFS_IOC_GET_BOOTENV, ZFS_IOC_LAST } zfs_ioc_t; Index: tools/tools/zfsboottest/zfsboottest.c =================================================================== --- tools/tools/zfsboottest/zfsboottest.c +++ tools/tools/zfsboottest/zfsboottest.c @@ -147,7 +147,7 @@ warn("open(%s) failed", argv[i]); continue; } - if (vdev_probe(vdev_read, &fd[i - 1], NULL) != 0) { + if (vdev_probe(vdev_read, NULL, &fd[i - 1], NULL) != 0) { warnx("vdev_probe(%s) failed", argv[i]); close(fd[i - 1]); }