Index: Makefile.inc1 =================================================================== --- Makefile.inc1 +++ Makefile.inc1 @@ -2753,7 +2753,7 @@ ${_cddl_lib_libuutil} \ ${_cddl_lib_libavl} \ ${_cddl_lib_libzfs_core} ${_cddl_lib_libzfs} \ - ${_cddl_lib_libctf} \ + ${_cddl_lib_libctf} ${_cddl_lib_libzfsbootenv} \ lib/libufs \ lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \ ${_secure_lib_libcrypto} ${_secure_lib_libssl} \ @@ -2825,6 +2825,7 @@ .if ${MK_ZFS} != "no" _cddl_lib_libzfs_core= cddl/lib/libzfs_core _cddl_lib_libzfs= cddl/lib/libzfs +_cddl_lib_libzfsbootenv= cddl/lib/libzfsbootenv cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L @@ -2832,7 +2833,8 @@ cddl/lib/libzfs__L: lib/libthr__L lib/libmd__L lib/libz__L cddl/lib/libumem__L cddl/lib/libzfs__L: cddl/lib/libuutil__L cddl/lib/libavl__L lib/libgeom__L -lib/libbe__L: cddl/lib/libzfs__L +cddl/lib/libzfsbootenv__L: cddl/lib/libzfs__L +lib/libbe__L: cddl/lib/libzfs__L cddl/lib/libzfsbootenv__L .endif _cddl_lib_libctf= cddl/lib/libctf _cddl_lib= cddl/lib Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h @@ -837,8 +837,8 @@ extern int zpool_read_label(int, nvlist_t **); extern int zpool_read_all_labels(int, nvlist_t **); extern int zpool_clear_label(int); -extern int zpool_set_bootenv(zpool_handle_t *, const char *); -extern int zpool_get_bootenv(zpool_handle_t *, char *, size_t, off_t); +extern int zpool_set_bootenv(zpool_handle_t *, const nvlist_t *); +extern int zpool_get_bootenv(zpool_handle_t *, nvlist_t **); /* is this zvol valid for use as a dump device? */ extern int zvol_check_dump_config(char *); Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c @@ -4234,7 +4234,7 @@ } int -zpool_set_bootenv(zpool_handle_t *zhp, const char *envmap) +zpool_set_bootenv(zpool_handle_t *zhp, const nvlist_t *envmap) { int error = lzc_set_bootenv(zhp->zpool_name, envmap); if (error != 0) { @@ -4247,26 +4247,22 @@ } int -zpool_get_bootenv(zpool_handle_t *zhp, char *outbuf, size_t size, off_t offset) +zpool_get_bootenv(zpool_handle_t *zhp, nvlist_t **nvlp) { nvlist_t *nvl; - int error = lzc_get_bootenv(zhp->zpool_name, &nvl);; + int error; + + nvl = NULL; + error = lzc_get_bootenv(zhp->zpool_name, &nvl); if (error != 0) { (void) zpool_standard_error_fmt(zhp->zpool_hdl, error, dgettext(TEXT_DOMAIN, "error getting bootenv in pool '%s'"), zhp->zpool_name); - return (-1); + } else { + *nvlp = nvl; } - char *envmap = fnvlist_lookup_string(nvl, "envmap"); - if (offset >= strlen(envmap)) { - fnvlist_free(nvl); - return (0); - } - strlcpy(outbuf, envmap + offset, size); - int bytes = MIN(strlen(envmap + offset), size); - fnvlist_free(nvl); - return (bytes); + return (error); } #ifdef illumos Index: cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h =================================================================== --- cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h +++ cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h @@ -105,7 +105,7 @@ int lzc_pool_checkpoint(const char *); int lzc_pool_checkpoint_discard(const char *); -int lzc_set_bootenv(const char *, const char *); +int lzc_set_bootenv(const char *, const nvlist_t *); int lzc_get_bootenv(const char *, nvlist_t **); #ifdef __cplusplus } Index: cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c =================================================================== --- cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c +++ cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c @@ -1215,13 +1215,9 @@ * Set the bootenv contents for the given pool. */ int -lzc_set_bootenv(const char *pool, const char *env) +lzc_set_bootenv(const char *pool, const nvlist_t *env) { - nvlist_t *args = fnvlist_alloc(); - fnvlist_add_string(args, "envmap", env); - int error = lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, args, NULL); - fnvlist_free(args); - return (error); + return (lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, (nvlist_t *)env, NULL)); } /* Index: cddl/contrib/opensolaris/lib/libzfsbootenv/common/libzfsbootenv.h =================================================================== --- /dev/null +++ cddl/contrib/opensolaris/lib/libzfsbootenv/common/libzfsbootenv.h @@ -0,0 +1,33 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Toomas Soome + */ + +#ifndef _LIBZFSBOOTENV_H +#define _LIBZFSBOOTENV_H + +#ifdef __cplusplus +extern "C" { +#endif + +extern int lzbe_set_pair(const char *, const char *, const char *, + const char *); +extern int lzbe_set_boot_device(const char *, const char *); +extern int lzbe_get_boot_device(const char *, char **); +extern int lzbe_bootenv_print(const char *, FILE *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBZFSBOOTENV_H */ Index: cddl/contrib/opensolaris/lib/libzfsbootenv/common/lzbe_device.c =================================================================== --- /dev/null +++ cddl/contrib/opensolaris/lib/libzfsbootenv/common/lzbe_device.c @@ -0,0 +1,140 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ +/* + * Copyright 2020 Toomas Soome + */ + +#include +#include +#include +#include +#include +#include + +/* + * Store device name to zpool label bootenv area. + */ +int +lzbe_set_boot_device(const char *pool, const char *device) +{ + libzfs_handle_t *hdl; + zpool_handle_t *zphdl; + nvlist_t *nv; + char *descriptor; + int rv = -1; + + if (pool == NULL || *pool == '\0') + return (rv); + + if ((hdl = libzfs_init()) == NULL) { + return (rv); + } + + zphdl = zpool_open(hdl, pool); + if (zphdl == NULL) { + libzfs_fini(hdl); + return (rv); + } + + rv = zpool_get_bootenv(zphdl, &nv); + if (rv != 0) + nv = fnvlist_alloc(); + + /* version is mandatory */ + if (!nvlist_exists(nv, BOOTENV_VERSION)) + fnvlist_add_uint64(nv, BOOTENV_VERSION, VB_NVLIST); + + /* + * If device name is empty, remove boot device configuration. + */ + if ((device == NULL || *device == '\0')) { + if (nvlist_exists(nv, OS_BOOTONCE)) + fnvlist_remove(nv, OS_BOOTONCE); + } else { + /* + * Use device name directly if it does start with + * prefix "zfs:". Otherwise, add prefix and sufix. + */ + if (strncmp(device, "zfs:", 4) == 0) { + fnvlist_add_string(nv, OS_BOOTONCE, device); + } else { + descriptor = NULL; + if (asprintf(&descriptor, "zfs:%s:", device) > 0) + fnvlist_add_string(nv, OS_BOOTONCE, descriptor); + else + rv = ENOMEM; + free(descriptor); + } + } + + rv = zpool_set_bootenv(zphdl, nv); + if (rv != 0) + fprintf(stderr, "%s\n", libzfs_error_description(hdl)); + + fnvlist_free(nv); + zpool_close(zphdl); + libzfs_fini(hdl); + return (rv); +} + +/* + * Return boot device name from bootenv, if set. + */ +int +lzbe_get_boot_device(const char *pool, char **device) +{ + libzfs_handle_t *hdl; + zpool_handle_t *zphdl; + nvlist_t *nv; + char *val; + int rv = -1; + + if (pool == NULL || *pool == '\0' || device == NULL) + return (rv); + + if ((hdl = libzfs_init()) == NULL) { + return (rv); + } + + zphdl = zpool_open(hdl, pool); + if (zphdl == NULL) { + libzfs_fini(hdl); + return (rv); + } + + rv = zpool_get_bootenv(zphdl, &nv); + if (rv == 0) { + rv = nvlist_lookup_string(nv, OS_BOOTONCE, &val); + if (rv == 0) { + /* + * zfs device descriptor is in form of "zfs:dataset:", + * we only do need dataset name. + */ + if (strncmp(val, "zfs:", 4) == 0) + val += 4; + val = strdup(val); + if (val != NULL) { + size_t len = strlen(val); + + if (val[len - 1] == ':') + val[len - 1] = '\0'; + *device = val; + } else { + rv = ENOMEM; + } + } + nvlist_free(nv); + } + + zpool_close(zphdl); + libzfs_fini(hdl); + return (rv); +} Index: cddl/contrib/opensolaris/lib/libzfsbootenv/common/lzbe_pair.c =================================================================== --- /dev/null +++ cddl/contrib/opensolaris/lib/libzfsbootenv/common/lzbe_pair.c @@ -0,0 +1,62 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ +/* + * Copyright 2020 Toomas Soome + */ + +#include +#include +#include +#include + +/* + * Store pair defined by key, type and value. + */ +int +lzbe_set_pair(const char *pool, const char *key, const char *type, + const char *value) +{ + libzfs_handle_t *hdl; + zpool_handle_t *zphdl; + nvlist_t *nv; + int rv = -1; + + if (pool == NULL || *pool == '\0' || type == NULL) + return (rv); + + if ((hdl = libzfs_init()) == NULL) { + return (rv); + } + + zphdl = zpool_open(hdl, pool); + if (zphdl == NULL) { + libzfs_fini(hdl); + return (rv); + } + + rv = zpool_get_bootenv(zphdl, &nv); + if (rv == 0) { + if (strcmp(type, "DATA_TYPE_STRING") == 0) { + if ((value == NULL || *value == '\0') && + nvlist_exists(nv, key)) { + fnvlist_remove(nv, key); + } else { + fnvlist_add_string(nv, key, value); + } + } + rv = zpool_set_bootenv(zphdl, nv); + nvlist_free(nv); + } + + zpool_close(zphdl); + libzfs_fini(hdl); + return (rv); +} Index: cddl/contrib/opensolaris/lib/libzfsbootenv/common/lzbe_util.c =================================================================== --- /dev/null +++ cddl/contrib/opensolaris/lib/libzfsbootenv/common/lzbe_util.c @@ -0,0 +1,53 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ +/* + * Copyright 2020 Toomas Soome + */ + +#include +#include +#include +#include + +/* + * Output bootenv information. + */ +int +lzbe_bootenv_print(const char *pool, FILE *of) +{ + libzfs_handle_t *hdl; + zpool_handle_t *zphdl; + nvlist_t *nv; + int rv = -1; + + if (pool == NULL || *pool == '\0' || of == NULL) + return (rv); + + if ((hdl = libzfs_init()) == NULL) { + return (rv); + } + + zphdl = zpool_open(hdl, pool); + if (zphdl == NULL) { + libzfs_fini(hdl); + return (rv); + } + + rv = zpool_get_bootenv(zphdl, &nv); + if (rv == 0) { + nvlist_print(of, nv); + nvlist_free(nv); + } + + zpool_close(zphdl); + libzfs_fini(hdl); + return (rv); +} Index: cddl/lib/Makefile =================================================================== --- cddl/lib/Makefile +++ cddl/lib/Makefile @@ -11,6 +11,7 @@ libuutil \ ${_libzfs_core} \ ${_libzfs} \ + ${_libzfsbootenv} \ ${_libzpool} \ SUBDIR.${MK_TESTS}+= tests @@ -18,6 +19,7 @@ .if ${MK_ZFS} != "no" _libzfs_core= libzfs_core _libzfs= libzfs +_libzfsbootenv= libzfsbootenv .if ${MK_LIBTHR} != "no" _libzpool= libzpool .endif @@ -27,6 +29,7 @@ SUBDIR_DEPEND_libzfs_core= libnvpair SUBDIR_DEPEND_libzfs= libavl libnvpair libumem libuutil libzfs_core SUBDIR_DEPEND_libzpool= libavl libnvpair libumem +SUBDIR_DEPEND_libzfsbootenv= libzfs libnvpair SUBDIR_PARALLEL= Index: cddl/lib/libzfsbootenv/Makefile =================================================================== --- /dev/null +++ cddl/lib/libzfsbootenv/Makefile @@ -0,0 +1,31 @@ +#! $FreeBSD$ + +.PATH: ${SRCTOP}/cddl/contrib/opensolaris/lib/libzfsbootenv/common + +PACKAGE= runtime +LIB= zfsbootenv +SHLIB_MAJOR= 1 +WARNS?= 1 + +LIBADD+= zfs +LIBADD+= nvpair + +INCS= libzfsbootenv.h +SRCS= lzbe_device.c lzbe_util.c lzbe_pair.c + +CFLAGS+= -I${.CURDIR} +CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include +CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/lib/libumem +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs/common +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfsbootenv/common +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs_core/common +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzpool/common +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libnvpair +CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris +CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs +CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/head + +CFLAGS+= -DNEED_SOLARIS_BOOLEAN + +.include Index: lib/libbe/Makefile =================================================================== --- lib/libbe/Makefile +++ lib/libbe/Makefile @@ -17,8 +17,10 @@ LIBADD+= zfs LIBADD+= nvpair +LIBADD+= zfsbootenv CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs/common +CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfsbootenv/common CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/lib/libumem Index: lib/libbe/be.h =================================================================== --- lib/libbe/be.h +++ lib/libbe/be.h @@ -81,6 +81,7 @@ void be_prop_list_free(nvlist_t *be_list); int be_activate(libbe_handle_t *, const char *, bool); +int be_deactivate(libbe_handle_t *, const char *, bool); bool be_is_auto_snapshot_name(libbe_handle_t *, const char *); Index: lib/libbe/be.c =================================================================== --- lib/libbe/be.c +++ lib/libbe/be.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "be.h" #include "be_impl.h" @@ -1219,43 +1220,20 @@ } #endif /* SOON */ -static int -be_set_nextboot(libbe_handle_t *lbh, nvlist_t *config, uint64_t pool_guid, - const char *zfsdev) -{ - nvlist_t **child; - uint64_t vdev_guid; - int c, children; - - if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, &child, - &children) == 0) { - for (c = 0; c < children; ++c) - if (be_set_nextboot(lbh, child[c], pool_guid, zfsdev) != 0) - return (1); - return (0); - } - - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, - &vdev_guid) != 0) { - return (1); - } - - if (zpool_nextboot(lbh->lzh, pool_guid, vdev_guid, zfsdev) != 0) { - perror("ZFS_IOC_NEXTBOOT failed"); - return (1); - } - - return (0); -} - /* - * Deactivate old BE dataset; currently just sets canmount=noauto + * Deactivate old BE dataset; currently just sets canmount=noauto or + * resets boot once configuration. */ -static int -be_deactivate(libbe_handle_t *lbh, const char *ds) +int +be_deactivate(libbe_handle_t *lbh, const char *ds, bool temporary) { zfs_handle_t *zfs; + if (temporary) { + return (lzbe_set_boot_device( + zpool_get_name(lbh->active_phandle), NULL)); + } + if ((zfs = zfs_open(lbh->lzh, ds, ZFS_TYPE_DATASET)) == NULL) return (1); if (zfs_prop_set(zfs, "canmount", "noauto") != 0) @@ -1268,10 +1246,8 @@ be_activate(libbe_handle_t *lbh, const char *bootenv, bool temporary) { char be_path[BE_MAXPATHLEN]; - char buf[BE_MAXPATHLEN]; - nvlist_t *config, *dsprops, *vdevs; + nvlist_t *dsprops; char *origin; - uint64_t pool_guid; zfs_handle_t *zhp; int err; @@ -1282,27 +1258,10 @@ return (set_error(lbh, err)); if (temporary) { - config = zpool_get_config(lbh->active_phandle, NULL); - if (config == NULL) - /* config should be fetchable... */ - return (set_error(lbh, BE_ERR_UNKNOWN)); - - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, - &pool_guid) != 0) - /* Similarly, it shouldn't be possible */ - return (set_error(lbh, BE_ERR_UNKNOWN)); - - /* Expected format according to zfsbootcfg(8) man */ - snprintf(buf, sizeof(buf), "zfs:%s:", be_path); - - /* We have no config tree */ - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &vdevs) != 0) - return (set_error(lbh, BE_ERR_NOPOOL)); - - return (be_set_nextboot(lbh, vdevs, pool_guid, buf)); + return (lzbe_set_boot_device( + zpool_get_name(lbh->active_phandle), be_path)); } else { - if (be_deactivate(lbh, lbh->bootfs) != 0) + if (be_deactivate(lbh, lbh->bootfs, false) != 0) return (-1); /* Obtain bootenv zpool */ Index: lib/libbe/be_impl.h =================================================================== --- lib/libbe/be_impl.h +++ lib/libbe/be_impl.h @@ -63,6 +63,7 @@ nvlist_t *list; libbe_handle_t *lbh; bool single_object; /* list will contain props directly */ + char *bootonce; } prop_data_t; int prop_list_builder_cb(zfs_handle_t *, void *); Index: lib/libbe/be_info.c =================================================================== --- lib/libbe/be_info.c +++ lib/libbe/be_info.c @@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$"); #include +#include #include "be.h" #include "be_impl.h" @@ -108,6 +109,7 @@ data.lbh = lbh; data.list = dsnvl; data.single_object = false; + data.bootonce = NULL; return (be_proplist_update(&data)); } @@ -121,6 +123,7 @@ data.lbh = lbh; data.list = props; data.single_object = true; + data.bootonce = NULL; if ((snap_hdl = zfs_open(lbh->lzh, name, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT)) == NULL) return (BE_ERR_ZFSOPEN); @@ -140,6 +143,7 @@ data.lbh = lbh; data.list = props; data.single_object = false; + data.bootonce = NULL; if ((ds_hdl = zfs_open(lbh->lzh, name, ZFS_TYPE_FILESYSTEM)) == NULL) return (BE_ERR_ZFSOPEN); @@ -179,6 +183,10 @@ dataset = zfs_get_name(zfs_hdl); nvlist_add_string(props, "dataset", dataset); + if (data->bootonce != NULL && + strcmp(dataset, data->bootonce) == 0) + nvlist_add_boolean_value(props, "bootonce", true); + name = strrchr(dataset, '/') + 1; nvlist_add_string(props, "name", name); @@ -245,6 +253,9 @@ if ((root_hdl = zfs_open(data->lbh->lzh, data->lbh->root, ZFS_TYPE_FILESYSTEM)) == NULL) return (BE_ERR_ZFSOPEN); + + (void) lzbe_get_boot_device(zpool_get_name(data->lbh->active_phandle), + &data->bootonce); /* XXX TODO: some error checking here */ zfs_iter_filesystems(root_hdl, prop_list_builder_cb, data); Index: lib/libbe/libbe.3 =================================================================== --- lib/libbe/libbe.3 +++ lib/libbe/libbe.3 @@ -27,7 +27,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 16, 2019 +.Dd July 22, 2020 .Dt LIBBE 3 .Os .Sh NAME @@ -78,7 +78,11 @@ .Pp .Ft int .Fn be_activate "libbe_handle_t *hdl" "const char *be_name" "bool temporary" +.Pp .Ft int +.Fn be_deactivate "libbe_handle_t *hdl" "const char *be_name" "bool temporary" +.Pp +.Ft int .Fn be_destroy "libbe_handle_t *hdl" "const char *be_name" "int options" .Pp .Ft void @@ -270,8 +274,24 @@ .Fa temporary flag is set, then it will be active for the next boot only, as done by .Xr zfsbootcfg 8 . -Next boot functionality is currently only available when booting in x86 BIOS -mode. +.Pp +The +.Fn be_deactivate +function deactivates a boot environment. +If the +.Fa temporary +flag is set, then it will cause removal of boot once configuration, set by +.Fn be_activate +function or by +.Xr zfsbootcfg 8 . +If the +.Fa temporary +flag is not set, +.Fn be_deactivate +function will set zfs +.Dv canmount +property to +.Dv noauto . .Pp The .Fn be_destroy Index: libexec/rc/rc.conf =================================================================== --- libexec/rc/rc.conf +++ libexec/rc/rc.conf @@ -64,6 +64,7 @@ # ZFS support zfs_enable="NO" # Set to YES to automatically mount ZFS file systems +zfs_bootonce_activate="NO" # Set YES to make successful bootonce BE permanent # ZFSD support zfsd_enable="NO" # Set to YES to automatically start the ZFS fault Index: libexec/rc/rc.d/zfsbe =================================================================== --- libexec/rc/rc.d/zfsbe +++ libexec/rc/rc.d/zfsbe @@ -48,6 +48,21 @@ done } +activate_bootonce() +{ + local _dev + local _bootonce + local _be + + _dev=$1 + _be=${_dev##*/} + + _bootonce="`kenv -q zfs-bootonce`" + if [ "$_bootonce" = "zfs:${_dev}:" ] ; then + bectl activate $_be + fi +} + be_start() { if [ `$SYSCTL_N security.jail.jailed` -eq 1 ]; then @@ -57,6 +72,9 @@ [ $_mp = "/" ] || continue if [ $_type = "zfs" ] ; then mount_subordinate $_dev + if checkyesno zfs_bootonce_activate; then + activate_bootonce $_dev + fi fi break done Index: rescue/rescue/Makefile =================================================================== --- rescue/rescue/Makefile +++ rescue/rescue/Makefile @@ -129,7 +129,7 @@ CRUNCH_LIBS+= -l80211 -lalias -lcam -lncursesw -ldevstat -lipsec -llzma .if ${MK_ZFS} != "no" CRUNCH_LIBS+= -lavl -lzpool -lzfs_core -lzfs -lnvpair -lpthread -luutil -lumem -CRUNCH_LIBS+= -lbe +CRUNCH_LIBS+= -lbe -lzfsbootenv .else # liblzma needs pthread CRUNCH_LIBS+= -lpthread Index: sbin/bectl/Makefile =================================================================== --- sbin/bectl/Makefile +++ sbin/bectl/Makefile @@ -11,7 +11,9 @@ LIBADD+= jail LIBADD+= nvpair LIBADD+= util +LIBADD+= zfsbootenv +CFLAGS+= -I${SRCTOP}/lib/libzfsbootenv CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs/common CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common Index: sbin/bectl/bectl.8 =================================================================== --- sbin/bectl/bectl.8 +++ sbin/bectl/bectl.8 @@ -17,7 +17,7 @@ .\" .\" $FreeBSD$ .\" -.Dd August 17, 2020 +.Dd August 19, 2020 .Dt BECTL 8 .Os .Sh NAME @@ -26,7 +26,7 @@ .Sh SYNOPSIS .Nm .Cm activate -.Op Fl t +.Op Fl t | Fl T .Ar beName .Nm .Cm check @@ -95,7 +95,7 @@ .Bl -tag -width activate .It Xo .Cm activate -.Op Fl t +.Op Fl t | Fl T .Ar beName .Xc Activate the given @@ -104,6 +104,13 @@ If the .Fl t flag is given, this takes effect only for the next boot. +Flag +.Fl T +removes temporary boot once configuration. +Without temporary configuration, the next boot will use zfs dataset specified +in boot pool +.Ar bootfs +property. .It Xo .Cm check .Xc @@ -260,8 +267,10 @@ .Pq Em \&N ; active on reboot .Pq Em \&R ; -or both -.Pq Em \&NR . +is used on next boot once +.Pq Em \&T ; +or combination of +.Pq Em \&NRT . .Pp .Bl -tag -width indent .It Fl a Index: sbin/bectl/bectl.c =================================================================== --- sbin/bectl/bectl.c +++ sbin/bectl/bectl.c @@ -72,6 +72,7 @@ "\tbectl add (path)*\n" #endif "\tbectl activate [-t] beName\n" + "\tbectl activate [-T]\n" "\tbectl check\n" "\tbectl create [-r] [-e {nonActiveBe | beName@snapshot}] beName\n" "\tbectl create [-r] beName@snapshot\n" @@ -139,14 +140,22 @@ bectl_cmd_activate(int argc, char *argv[]) { int err, opt; - bool temp; + bool temp, reset; temp = false; - while ((opt = getopt(argc, argv, "t")) != -1) { + reset = false; + while ((opt = getopt(argc, argv, "tT")) != -1) { switch (opt) { case 't': + if (reset) + return (usage(false)); temp = true; break; + case 'T': + if (temp) + return (usage(false)); + reset = true; + break; default: fprintf(stderr, "bectl activate: unknown option '-%c'\n", optopt); @@ -157,11 +166,18 @@ argc -= optind; argv += optind; - if (argc != 1) { + if (argc != 1 && (!reset || argc != 0)) { fprintf(stderr, "bectl activate: wrong number of arguments\n"); return (usage(false)); } + if (reset) { + if ((err = be_deactivate(be, NULL, reset)) == 0) + printf("Temporary activation removed\n"); + else + printf("Failed to remove temporary activation\n"); + return (err); + } /* activate logic goes here */ if ((err = be_activate(be, argv[0], temp)) != 0) Index: sbin/bectl/bectl_list.c =================================================================== --- sbin/bectl/bectl_list.c +++ sbin/bectl/bectl_list.c @@ -182,7 +182,7 @@ const char *oname; char *dsname, *propstr; int active_colsz; - boolean_t active_now, active_reboot; + boolean_t active_now, active_reboot, bootonce; dsname = NULL; originprops = NULL; @@ -228,6 +228,11 @@ if (nvlist_lookup_boolean_value(dsprops, "nextboot", &active_reboot) == 0 && active_reboot) { printf("R"); + active_colsz--; + } + if (nvlist_lookup_boolean_value(dsprops, "bootonce", + &bootonce) == 0 && bootonce) { + printf("T"); active_colsz--; } if (active_colsz == pc->active_colsz_def) { Index: sbin/zfsbootcfg/Makefile =================================================================== --- sbin/zfsbootcfg/Makefile +++ sbin/zfsbootcfg/Makefile @@ -2,26 +2,10 @@ # $FreeBSD$ PROG= zfsbootcfg -WARNS?= 1 MAN= zfsbootcfg.8 -LIBADD+=zfs -LIBADD+=nvpair -LIBADD+=umem -LIBADD+=uutil -LIBADD+=geom +LIBADD+=zfsbootenv -CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include -CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/lib/libumem -CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs/common -CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs_core/common -CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzpool/common -CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libnvpair -CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris -CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs -CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common -CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/head - -CFLAGS+= -DNEED_SOLARIS_BOOLEAN +CFLAGS+=-I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfsbootenv/common .include Index: sbin/zfsbootcfg/zfsbootcfg.8 =================================================================== --- sbin/zfsbootcfg/zfsbootcfg.8 +++ sbin/zfsbootcfg/zfsbootcfg.8 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd May 24, 2017 +.Dd July 22, 2020 .Dt ZFSBOOTCFG 8 .Os .Sh NAME @@ -33,39 +33,69 @@ .Sh SYNOPSIS .Nm .Ao Ar options Ac +.Nm +.Op Fl k Ar key +.Op Fl p +.Op Fl t Ar type +.Op Fl v Ar value +.Op Fl z Ar pool +.Nm .Sh DESCRIPTION .Nm is used to set .Xr boot.config 5 Ns -style options to be used by -.Xr zfsboot 8 -or +.Xr zfsboot 8 , .Xr gptzfsboot 8 +or +.Xr loader 8 the next time the machine is booted. Once .Xr zfsboot 8 or .Xr gptzfsboot 8 +or +.Xr loader 8 reads the information, it is deleted. If booting fails, the machine automatically reverts to the previous boot configuration. -The information is stored in a special reserved area of a ZFS pool. -.Xr zfsboot 8 -or -.Xr gptzfsboot 8 -read the boot option information from the first disk found in the first -ZFS pool found. +The information is stored in a special boot environment area of a ZFS pool. +.Pp +If used without arguments, +.Nm +will output the current boot configuration, if set. +.Pp +The following options are supported by +.Nm : +.Bl -tag -width indent +.It Fl k Ar key +Define key for +.Ao key , value Ac +pair. +.It Fl p +Print all information stored in ZFS pool bootenv area. +.It Fl t Ar type +Set type of +.Ar value +used in +.Ao key , value Ac +pair. +Currently, the only supported type is +.Ar DATA_TYPE_STRING . +.It Fl v Ar value +Define value for +.Ao key , value Ac +pair. +.It Fl z Ar pool +Operate on +.Ar pool . +.El .Sh ENVIRONMENT -.Bl -tag -width vfs.zfs.boot.primary_pool -compact -.It Ev vfs.zfs.boot.primary_pool +.Bl -tag -width vfs.root.mountfrom -compact +.It Ev vfs.root.mountfrom The .Xr kenv 1 variable that identifies a pool for which the options are written. -.It Ev vfs.zfs.boot.primary_vdev -The -.Xr kenv 1 -variable that identifies a disk within the pool where the options -are written. .El .Sh EXAMPLES Try to boot to a new @@ -81,7 +111,9 @@ .Dl "zfsbootcfg """" .Sh SEE ALSO .Xr boot.config 5 , +.Xr bectl 8 , .Xr gptzfsboot 8 , +.Xr loader 8 , .Xr zfsboot 8 .Sh HISTORY .Nm @@ -90,23 +122,3 @@ .Sh AUTHORS This manual page was written by .An Andriy Gapon Aq Mt avg@FreeBSD.org . -.Sh CAVEATS -At the moment, -.Nm -uses the -.Ev vfs.zfs.boot.primary_pool -and -.Ev vfs.zfs.boot.primary_vdev -.Xr kenv 1 -variables to determine a ZFS pool and a disk in it where the options -are to be stored. -The variables are set by the ZFS boot chain, so there is an assumption -that the same boot disk is going to be used for the next reboot. -There is no -.Nm -option to specify a different pool or a different disk. -.Pp -.Nm -should be extended to install new -.Xr zfsboot 8 -blocks in a ZFS pool. Index: sbin/zfsbootcfg/zfsbootcfg.c =================================================================== --- sbin/zfsbootcfg/zfsbootcfg.c +++ sbin/zfsbootcfg/zfsbootcfg.c @@ -32,115 +32,105 @@ #include #include #include +#include #include #include +#include -#include +#include -/* Keep in sync with zfsboot.c. */ -#define MAX_COMMAND_LEN 512 +#ifndef ZFS_MAXNAMELEN +#define ZFS_MAXNAMELEN 256 +#endif int -install_bootonce(libzfs_handle_t *hdl, uint64_t pool_guid, nvlist_t *nv, - const char * const data) +main(int argc, char * const *argv) { - nvlist_t **child; - uint_t children = 0; - uint64_t guid; + char buf[ZFS_MAXNAMELEN], *name; + const char *key, *value, *type; int rv; + bool print; - (void) nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, - &children); - - for (int c = 0; c < children; c++) { - rv = install_bootonce(hdl, pool_guid, child[c], data); + name = NULL; + key = NULL; + type = NULL; + value = NULL; + print = false; + while ((rv = getopt(argc, argv, "k:pt:v:z:")) != -1) { + switch (rv) { + case 'k': + key = optarg; + break; + case 'p': + print = true; + break; + case 't': + type = optarg; + break; + case 'v': + value = optarg; + break; + case 'z': + name = optarg; + break; + } } - if (children > 0) - return (rv); + argc -= optind; + argv += optind; - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) { - perror("can't get vdev guid"); - return (1); - } - if (zpool_nextboot(hdl, pool_guid, guid, data) != 0) { - perror("ZFS_IOC_NEXTBOOT failed"); - return (1); - } - return (0); -} + if (argc == 1) + value = argv[0]; -int main(int argc, const char * const *argv) -{ - char buf[32], *name; - libzfs_handle_t *hdl; - zpool_handle_t *zphdl; - uint64_t pool_guid; - nvlist_t *nv, *config; - int rv; - int len; - - if (argc != 2) { + if (argc > 1) { fprintf(stderr, "usage: zfsbootcfg \n"); return (1); } - len = strlen(argv[1]); - if (len >= MAX_COMMAND_LEN) { - fprintf(stderr, "options string is too long\n"); - return (1); - } + if (name == NULL) { + rv = kenv(KENV_GET, "vfs.root.mountfrom", buf, sizeof(buf)); + if (rv <= 0) { + perror("can't get vfs.root.mountfrom"); + return (1); + } - if (kenv(KENV_GET, "vfs.root.mountfrom", buf, sizeof(buf)) <= 0) { - perror("can't get vfs.root.mountfrom"); - return (1); + if (strncmp(buf, "zfs:", 4) == 0) { + name = strchr(buf + 4, '/'); + if (name != NULL) + *name = '\0'; + name = buf + 4; + } else { + perror("not a zfs root"); + return (1); + } } - if (strncmp(buf, "zfs:", 4) == 0) { - name = strchr(buf + 4, '/'); - if (name != NULL) - *name = '\0'; - name = buf + 4; - } else { - perror("not a zfs root"); - return (1); - } - - if ((hdl = libzfs_init()) == NULL) { - (void) fprintf(stderr, "internal error: failed to " - "initialize ZFS library\n"); - return (1); - } + rv = 0; + if (key != NULL || value != NULL) { + if (type == NULL) + type = "DATA_TYPE_STRING"; - zphdl = zpool_open(hdl, name); - if (zphdl == NULL) { - perror("can't open pool"); - libzfs_fini(hdl); - return (1); - } + if (key == NULL || strcmp(key, "command") == 0) + rv = lzbe_set_boot_device(name, value); + else + rv = lzbe_set_pair(name, key, type, value); - pool_guid = zpool_get_prop_int(zphdl, ZPOOL_PROP_GUID, NULL); + if (rv == 0) + printf("zfs bootenv is successfully written\n"); + else + printf("error: %d\n", rv); + } else if (!print) { + char *ptr; - config = zpool_get_config(zphdl, NULL); - if (config == NULL) { - perror("can't get pool config"); - zpool_close(zphdl); - libzfs_fini(hdl); - return (1); + if (lzbe_get_boot_device(name, &ptr) == 0) { + printf("zfs:%s:\n", ptr); + free(ptr); + } } - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) != 0) { - perror("failed to get vdev tree"); - zpool_close(zphdl); - libzfs_fini(hdl); - return (1); + if (print) { + rv = lzbe_bootenv_print(name, stdout); } - rv = install_bootonce(hdl, pool_guid, nv, argv[1]); - - zpool_close(zphdl); - libzfs_fini(hdl); - if (rv == 0) - printf("zfs next boot options are successfully written\n"); return (rv); } Index: share/mk/bsd.libnames.mk =================================================================== --- share/mk/bsd.libnames.mk +++ share/mk/bsd.libnames.mk @@ -165,6 +165,7 @@ LIBZ?= ${LIBDESTDIR}${LIBDIR_BASE}/libz.a LIBZFS?= ${LIBDESTDIR}${LIBDIR_BASE}/libzfs.a LIBZFS_CORE?= ${LIBDESTDIR}${LIBDIR_BASE}/libzfs_core.a +LIBZFSBOOTENV?= ${LIBDESTDIR}${LIBDIR_BASE}/libzfsbootenv.a LIBZPOOL?= ${LIBDESTDIR}${LIBDIR_BASE}/libzpool.a # enforce the 2 -lpthread and -lc to always be the last in that exact order Index: share/mk/src.libnames.mk =================================================================== --- share/mk/src.libnames.mk +++ share/mk/src.libnames.mk @@ -198,6 +198,7 @@ z \ zfs_core \ zfs \ + zfsbootenv \ zpool \ .if ${MK_BLACKLIST} != "no" @@ -377,9 +378,10 @@ _DP_ipf= kvm _DP_zfs= md pthread umem util uutil m nvpair avl bsdxml geom nvpair z \ zfs_core +_DP_zfsbootenv= zfs nvpair _DP_zfs_core= nvpair _DP_zpool= md pthread z nvpair avl umem -_DP_be= zfs nvpair +_DP_be= zfs nvpair zfsbootenv # OFED support .if ${MK_OFED} != "no" @@ -588,6 +590,7 @@ LIBUUTILDIR= ${OBJTOP}/cddl/lib/libuutil LIBZFSDIR= ${OBJTOP}/cddl/lib/libzfs LIBZFS_COREDIR= ${OBJTOP}/cddl/lib/libzfs_core +LIBZFSBOOTENVDIR= ${OBJTOP}/cddl/lib/libzfsbootenv LIBZPOOLDIR= ${OBJTOP}/cddl/lib/libzpool # OFED support Index: stand/common/bootstrap.h =================================================================== --- stand/common/bootstrap.h +++ stand/common/bootstrap.h @@ -345,6 +345,26 @@ void dev_cleanup(void); +/* + * nvstore API. + */ +typedef int (nvstore_getter_cb_t)(void *, const char *, void **); +typedef int (nvstore_setter_cb_t)(void *, const char *, const void *, size_t); +typedef int (nvstore_unset_cb_t)(void *, const char *); +typedef int (nvstore_print_cb_t)(void *, void *); +typedef int (nvstore_iterate_cb_t)(void *, int (*)(void *, void *)); + +typedef struct nvs_callbacks { + nvstore_getter_cb_t *nvs_getter; + nvstore_setter_cb_t *nvs_setter; + nvstore_unset_cb_t *nvs_unset; + nvstore_print_cb_t *nvs_print; + nvstore_iterate_cb_t *nvs_iterate; +} nvs_callbacks_t; + +int nvstore_init(const char *, nvs_callbacks_t *, void *); +int nvstore_fini(const char *); + #ifndef CTASSERT #define CTASSERT(x) _Static_assert(x, "compile-time assertion failed") #endif Index: stand/common/nvstore.c =================================================================== --- /dev/null +++ stand/common/nvstore.c @@ -0,0 +1,291 @@ +/*- + * Copyright 2020 Toomas Soome + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Big Theory Statement. + * + * nvstore is abstraction layer to implement data read/write to different + * types of non-volatile storage. + * + * User interfaces: + * Provide mapping via environment: setenv/unsetenv/putenv. Access via + * environment functions/commands is available once nvstore has + * attached the backend and stored textual data is mapped to environment. + * + * Provide command "nvstore" to create new data instances. + * + * API: TBD. + * nvstore_init(): attach new backend and create the environment mapping. + * nvstore_fini: detach backend and unmap the related environment. + * + * The disk based storage, such as UFS file or ZFS bootenv label area, is + * only accessible after root file system is set. Root file system change + * will switch the back end storage. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include "stand.h" + +typedef struct nvstore { + char *nvs_name; + void *nvs_data; + nvs_callbacks_t *nvs_cb; + STAILQ_ENTRY(nvstore) nvs_next; +} nvstore_t; + +typedef STAILQ_HEAD(store_list, nvstore) nvstore_list_t; + +nvstore_list_t stores = STAILQ_HEAD_INITIALIZER(stores); + +static nvstore_t * +nvstore_get_store(const char *name) +{ + nvstore_t *st; + + st = NULL; + + STAILQ_FOREACH(st, &stores, nvs_next) { + if (strcmp(name, st->nvs_name) == 0) + break; + } + + return (st); +} + +int +nvstore_init(const char *name, nvs_callbacks_t *cb, void *data) +{ + nvstore_t *st; + + st = nvstore_get_store(name); + if (st != NULL) + return (EEXIST); + + if ((st = malloc(sizeof (*st))) == NULL) + return (ENOMEM); + + if ((st->nvs_name = strdup(name)) == NULL) { + free(st); + return (ENOMEM); + } + + st->nvs_data = data; + st->nvs_cb = cb; + + STAILQ_INSERT_TAIL(&stores, st, nvs_next); + return (0); +} + +int +nvstore_fini(const char *name) +{ + nvstore_t *st; + + st = nvstore_get_store(name); + if (st == NULL) + return (ENOENT); + + STAILQ_REMOVE(&stores, st, nvstore, nvs_next); + + free(st->nvs_name); + free(st->nvs_data); + free(st); + return (0); +} + +int +nvstore_print(nvstore_t *st) +{ + + return (st->nvs_cb->nvs_iterate(st->nvs_data, st->nvs_cb->nvs_print)); +} + +int +nvstore_get_var(nvstore_t *st, const char *name, void **data) +{ + + return (st->nvs_cb->nvs_getter(st->nvs_data, name, data)); +} + +int +nvstore_set_var(nvstore_t *st, const char *name, void *data, size_t size) +{ + + return (st->nvs_cb->nvs_setter(st->nvs_data, name, data, size)); +} + +int +nvstore_unset_var(nvstore_t *st, const char *name) +{ + + return (st->nvs_cb->nvs_unset(st->nvs_data, name)); +} + +COMMAND_SET(nvstore, "nvstore", "manage non-volatile data", command_nvstore); + +static void +nvstore_usage(const char *me) +{ + printf("Usage:\t%s -l\n", me); + printf("\t%s store -l\n", me); + printf("\t%s store [-t type] key=value\n", me); + printf("\t%s store -g key\n", me); + printf("\t%s store -d key\n", me); +} + +/* + * Usage: nvstore -l # list stores + * nvstore store -l # list data in store + * nvstore store [-t type] key=value + * nvstore store -g key # get value + * nvstore store -d key # delete key + */ +static int +command_nvstore(int argc, char *argv[]) +{ + int c; + bool list, get, delete; + nvstore_t *st; + char *me, *name; + + me = argv[0]; + optind = 1; + optreset = 1; + + list = false; + while ((c = getopt(argc, argv, "l")) != -1) { + switch (c) { + case 'l': + list = true; + break; + case '?': + default: + return (CMD_ERROR); + } + } + + argc -= optind; + argv += optind; + + if (argc == 0) { + if (list) { + if (STAILQ_EMPTY(&stores)) { + printf("No configured nvstores\n"); + return (CMD_OK); + } + printf("List of configured nvstores:\n"); + STAILQ_FOREACH(st, &stores, nvs_next) { + printf("\t%s\n", st->nvs_name); + } + return (CMD_OK); + } + nvstore_usage(me); + return (CMD_ERROR); + } + + if (argc == 0 || (argc != 0 && list)) { + nvstore_usage(me); + return (CMD_ERROR); + } + + st = nvstore_get_store(argv[0]); + if (st == NULL) { + nvstore_usage(me); + return (CMD_ERROR); + } + + optind = 1; + optreset = 1; + name = NULL; + get = delete = false; + + while ((c = getopt(argc, argv, "d:g:l")) != -1) { + switch (c) { + case 'd': + if (list || get) { + nvstore_usage(me); + return (CMD_ERROR); + } + name = optarg; + delete = true; + break; + case 'g': + if (delete || list) { + nvstore_usage(me); + return (CMD_ERROR); + } + name = optarg; + get = true; + break; + case 'l': + if (delete || get) { + nvstore_usage(me); + return (CMD_ERROR); + } + list = true; + break; + case '?': + default: + return (CMD_ERROR); + } + } + + argc -= optind; + argv += optind; + + if (list) { + (void) nvstore_print(st); + return (CMD_OK); + } + + if (delete && name != NULL) { + (void) nvstore_unset_var(st, name); + return (CMD_OK); + } + + if (get && name != NULL) { + char *ptr = NULL; + + if (nvstore_get_var(st, name, (void **)&ptr) == 0) + printf("%s = %s\n", name, ptr); + return (CMD_OK); + } + + if (argc == 2) { + c = nvstore_set_var(st, argv[0], argv[1], + strlen(argv[1]) + 1); + if (c != 0) + printf("error: %d\n", c); + return (CMD_OK); + } + + nvstore_usage(me); + return (CMD_OK); +} Index: stand/efi/boot1/Makefile =================================================================== --- stand/efi/boot1/Makefile +++ stand/efi/boot1/Makefile @@ -37,6 +37,7 @@ CFLAGS.zfs_module.c+= -I${SYSDIR}/cddl/boot/zfs CFLAGS.zfs_module.c+= -I${SYSDIR}/crypto/skein CFLAGS.zfs_module.c+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common +CFLAGS.zfs_module.c+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common/fs/zfs CFLAGS.zfs_module.c+= -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 CFLAGS+= -DEFI_ZFS_BOOT Index: stand/efi/boot1/zfs_module.c =================================================================== --- stand/efi/boot1/zfs_module.c +++ stand/efi/boot1/zfs_module.c @@ -124,7 +124,7 @@ } memcpy(tdev, dev, sizeof(*dev)); - if (vdev_probe(vdev_read, tdev, &spa) != 0) { + if (vdev_probe(vdev_read, NULL, tdev, &spa) != 0) { free(tdev); return (EFI_UNSUPPORTED); } Index: stand/efi/loader/Makefile =================================================================== --- stand/efi/loader/Makefile +++ stand/efi/loader/Makefile @@ -27,6 +27,7 @@ CFLAGS+= -I${.CURDIR}/../loader .if ${MK_LOADER_ZFS} != "no" CFLAGS+= -I${ZFSSRC} +CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common/fs/zfs CFLAGS+= -DEFI_ZFS_BOOT HAVE_ZFS= yes .endif Index: stand/efi/loader/main.c =================================================================== --- stand/efi/loader/main.c +++ stand/efi/loader/main.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -260,7 +261,7 @@ { char *devname; struct zfs_devdesc currdev; - char *buf = NULL; + char *bootonce; bool rv; currdev.dd.d_dev = &zfs_dev; @@ -273,13 +274,16 @@ rv = sanity_check_currdev(); if (rv) { - buf = malloc(VDEV_PAD_SIZE); - if (buf != NULL) { - if (zfs_nextboot(&currdev, buf, VDEV_PAD_SIZE) == 0) { - printf("zfs nextboot: %s\n", buf); - set_currdev(buf); + bootonce = malloc(VDEV_PAD_SIZE); + if (bootonce != NULL) { + if (zfs_get_bootonce(&currdev, OS_BOOTONCE, bootonce, + VDEV_PAD_SIZE) == 0) { + printf("zfs bootonce: %s\n", bootonce); + set_currdev(bootonce); + setenv("zfs-bootonce", bootonce, 1); } - free(buf); + free(bootonce); + (void) zfs_attach_nvstore(&currdev); } } return (rv); Index: stand/i386/gptzfsboot/Makefile =================================================================== --- stand/i386/gptzfsboot/Makefile +++ stand/i386/gptzfsboot/Makefile @@ -33,6 +33,7 @@ -I${SYSDIR}/crypto/skein \ -I${SYSDIR}/cddl/boot/zfs \ -I${SYSDIR}/cddl/contrib/opensolaris/uts/common \ + -I${SYSDIR}/cddl/contrib/opensolaris/uts/common/fs/zfs \ -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 \ -I${BOOTSRC}/i386/btx/lib \ -I${BOOTSRC}/i386/boot2 \ Index: stand/i386/loader/Makefile =================================================================== --- stand/i386/loader/Makefile +++ stand/i386/loader/Makefile @@ -37,6 +37,10 @@ LIBFIREWIRE= ${BOOTOBJ}/i386/libfirewire/libfirewire.a .endif +.if ${MK_LOADER_ZFS} == "yes" +CFLAGS.main.c+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common/fs/zfs +.endif + .if exists(${.CURDIR}/help.i386) HELP_FILES= ${.CURDIR}/help.i386 .endif Index: stand/i386/loader/main.c =================================================================== --- stand/i386/loader/main.c +++ stand/i386/loader/main.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include "bootstrap.h" @@ -274,6 +275,7 @@ struct i386_devdesc new_currdev; #ifdef LOADER_ZFS_SUPPORT char buf[20]; + char *bootonce; #endif int biosdev = -1; @@ -321,6 +323,16 @@ new_currdev.d_kind.zfs.root_guid = 0; } new_currdev.dd.d_dev = &zfs_dev; + + if ((bootonce = malloc(VDEV_PAD_SIZE)) != NULL) { + if (zfs_get_bootonce(&new_currdev, OS_BOOTONCE_USED, + bootonce, VDEV_PAD_SIZE) == 0) { + setenv("zfs-bootonce", bootonce, 1); + } + free(bootonce); + (void) zfs_attach_nvstore(&new_currdev); + } + #endif } else if ((initial_bootdev & B_MAGICMASK) != B_DEVMAGIC) { /* The passed-in boot device is bad */ Index: stand/i386/zfsboot/Makefile =================================================================== --- stand/i386/zfsboot/Makefile +++ stand/i386/zfsboot/Makefile @@ -32,6 +32,7 @@ -I${SYSDIR}/crypto/skein \ -I${SYSDIR}/cddl/boot/zfs \ -I${SYSDIR}/cddl/contrib/opensolaris/uts/common \ + -I${SYSDIR}/cddl/contrib/opensolaris/uts/common/fs/zfs \ -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 \ -I${BOOTSRC}/i386/boot2 \ -Wall -Waggregate-return -Wbad-function-cast -Wno-cast-align \ Index: stand/i386/zfsboot/zfsboot.c =================================================================== --- stand/i386/zfsboot/zfsboot.c +++ stand/i386/zfsboot/zfsboot.c @@ -26,6 +26,7 @@ #endif #include #include +#include #include #include @@ -218,16 +219,26 @@ if (bdev != NULL && bdev->dd.d_dev->dv_type == DEVT_ZFS) { /* set up proper device name string for ZFS */ strncpy(boot_devname, zfs_fmtdev(bdev), sizeof (boot_devname)); - if (zfs_nextboot(bdev, cmd, sizeof(cmd)) == 0) { + if (zfs_get_bootonce(bdev, OS_BOOTONCE, cmd, + sizeof(cmd)) == 0) { + nvlist_t *benv; + nextboot = 1; memcpy(cmddup, cmd, sizeof(cmd)); if (parse_cmd()) { if (!OPT_CHECK(RBX_QUIET)) - printf("failed to parse pad2 area\n"); + printf("failed to parse bootonce " + "command\n"); exit(0); } if (!OPT_CHECK(RBX_QUIET)) - printf("zfs nextboot: %s\n", cmddup); + printf("zfs bootonce: %s\n", cmddup); + + if (zfs_get_bootenv(bdev, &benv) == 0) { + nvlist_add_string(benv, OS_BOOTONCE_USED, + cmddup); + zfs_set_bootenv(bdev, benv); + } /* Do not process this command twice */ *cmd = 0; } Index: stand/libsa/zfs/Makefile.inc =================================================================== --- stand/libsa/zfs/Makefile.inc +++ stand/libsa/zfs/Makefile.inc @@ -10,8 +10,9 @@ CFLAGS+= -I${LDRSRC} CFLAGS+= -I${SYSDIR}/cddl/boot/zfs CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common +CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common/fs/zfs CFLAGS+= -I${SYSDIR}/crypto/skein -CFLAGS.zfs.c+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/common/lz4 +CFLAGS.zfs.c+= -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 CFLAGS+= -Wformat -Wall Index: stand/libsa/zfs/libzfs.h =================================================================== --- stand/libsa/zfs/libzfs.h +++ stand/libsa/zfs/libzfs.h @@ -26,15 +26,15 @@ * $FreeBSD$ */ +#ifndef _BOOT_LIBZFS_H_ +#define _BOOT_LIBZFS_H_ + #include #ifdef LOADER_GELI_SUPPORT #include #endif -#ifndef _BOOT_LIBZFS_H_ -#define _BOOT_LIBZFS_H_ - #define ZFS_MAXNAMELEN 256 /* @@ -54,6 +54,7 @@ #define NV_UNIQUE_NAME_TYPE 0x2 #define NV_ALIGN4(x) (((x) + 3) & ~3) +#define NV_ALIGN(x) (((x) + 7) & ~7) /* * nvlist header. @@ -109,24 +110,60 @@ nvlist_t *nvlist_create(int); void nvlist_destroy(nvlist_t *); -nvlist_t *nvlist_import(const uint8_t *, char, char); +nvlist_t *nvlist_import(const char *, size_t); +int nvlist_export(nvlist_t *); int nvlist_remove(nvlist_t *, const char *, data_type_t); -void nvlist_print(nvlist_t *, unsigned int); +void nvpair_print(nvp_header_t *, unsigned int); +void nvlist_print(const nvlist_t *, unsigned int); +char *nvstring_get(nv_string_t *); int nvlist_find(const nvlist_t *, const char *, data_type_t, int *, void *, int *); -int nvlist_next(nvlist_t *); +nvp_header_t *nvlist_next_nvpair(nvlist_t *, nvp_header_t *); +int nvlist_add_boolean(nvlist_t *, const char *); +int nvlist_add_boolean_value(nvlist_t *, const char *, boolean_t); +int nvlist_add_byte(nvlist_t *, const char *, uint8_t); +int nvlist_add_int8(nvlist_t *, const char *, int8_t); +int nvlist_add_uint8(nvlist_t *, const char *, uint8_t); +int nvlist_add_int16(nvlist_t *, const char *, int16_t); +int nvlist_add_uint16(nvlist_t *, const char *, uint16_t); +int nvlist_add_int32(nvlist_t *, const char *, int32_t); +int nvlist_add_uint32(nvlist_t *, const char *, uint32_t); +int nvlist_add_int64(nvlist_t *, const char *, int64_t); +int nvlist_add_uint64(nvlist_t *, const char *, uint64_t); +int nvlist_add_string(nvlist_t *, const char *, const char *); +int nvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint32_t); +int nvlist_add_byte_array(nvlist_t *, const char *, uint8_t *, uint32_t); +int nvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint32_t); +int nvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint32_t); +int nvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint32_t); +int nvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint32_t); +int nvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint32_t); +int nvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint32_t); +int nvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint32_t); +int nvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint32_t); +int nvlist_add_string_array(nvlist_t *, const char *, char * const *, uint32_t); +int nvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *); +int nvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint32_t); + int zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path); char *zfs_fmtdev(void *vdev); -int zfs_nextboot(void *vdev, char *buf, size_t size); int zfs_probe_dev(const char *devname, uint64_t *pool_guid); int zfs_list(const char *name); +int zfs_get_bootonce(void *, const char *, char *, size_t); +int zfs_get_bootenv(void *, nvlist_t **); +int zfs_set_bootenv(void *, nvlist_t *); +int zfs_attach_nvstore(void *); uint64_t ldi_get_size(void *); + void init_zfs_boot_options(const char *currdev); int zfs_bootenv(const char *name); +int zfs_attach_nvstore(void *); int zfs_belist_add(const char *name, uint64_t __unused); int zfs_set_env(void); + +nvlist_t *vdev_read_bootenv(vdev_t *); extern struct devsw zfs_dev; extern struct fs_ops zfs_fsops; Index: stand/libsa/zfs/nvlist.c =================================================================== --- stand/libsa/zfs/nvlist.c +++ stand/libsa/zfs/nvlist.c @@ -27,129 +27,278 @@ __FBSDID("$FreeBSD$"); #include +#include #include +#include #include #include "libzfs.h" +enum xdr_op { + XDR_OP_ENCODE = 1, + XDR_OP_DECODE = 2 +}; + typedef struct xdr { - int (*xdr_getint)(const struct xdr *, const void *, int *); + enum xdr_op xdr_op; + int (*xdr_getint)(struct xdr *, int *); + int (*xdr_putint)(struct xdr *, int); + int (*xdr_getuint)(struct xdr *, unsigned *); + int (*xdr_putuint)(struct xdr *, unsigned); + const uint8_t *xdr_buf; + uint8_t *xdr_idx; + size_t xdr_buf_size; } xdr_t; -static int xdr_int(const xdr_t *, const void *, int *); -static int mem_int(const xdr_t *, const void *, int *); -static void nvlist_decode_nvlist(const xdr_t *, nvlist_t *); -static int nvlist_size(const xdr_t *, const uint8_t *); +static int nvlist_xdr_nvlist(xdr_t *, nvlist_t *); +static bool nvlist_size_xdr(xdr_t *, size_t *); +static bool nvlist_size_native(xdr_t *, size_t *); +static bool xdr_int(xdr_t *, int *); +static bool xdr_u_int(xdr_t *, unsigned *); -/* - * transform data from network to host. - */ -xdr_t ntoh = { - .xdr_getint = xdr_int -}; +typedef int (*xdrproc_t)(xdr_t *, void *); -/* - * transform data from host to host. - */ -xdr_t native = { - .xdr_getint = mem_int -}; +/* Basic primitives for XDR translation operations, getint and putint. */ +static int +_getint(struct xdr *xdr, int *ip) +{ + *ip = be32dec(xdr->xdr_idx); + return (sizeof (int)); +} -/* - * transform data from host to network. - */ -xdr_t hton = { - .xdr_getint = xdr_int -}; - static int -xdr_short(const xdr_t *xdr, const uint8_t *buf, short *ip) +_putint(struct xdr *xdr, int i) { - int i, rv; + int *ip = (int *)xdr->xdr_idx; - rv = xdr->xdr_getint(xdr, buf, &i); - *ip = i; - return (rv); + *ip = htobe32(i); + return (sizeof (int)); } static int -xdr_u_short(const xdr_t *xdr, const uint8_t *buf, unsigned short *ip) +_getuint(struct xdr *xdr, unsigned *ip) { - unsigned u; - int rv; + *ip = be32dec(xdr->xdr_idx); + return (sizeof (unsigned)); +} - rv = xdr->xdr_getint(xdr, buf, &u); - *ip = u; +static int +_putuint(struct xdr *xdr, unsigned i) +{ + unsigned *up = (unsigned *)xdr->xdr_idx; + + *up = htobe32(i); + return (sizeof (int)); +} + +/* + * XDR data translations. + */ +static bool +xdr_short(xdr_t *xdr, short *ip) +{ + int i; + bool rv; + + i = *ip; + if ((rv = xdr_int(xdr, &i))) { + if (xdr->xdr_op == XDR_OP_DECODE) + *ip = i; + } return (rv); } -static int -xdr_int(const xdr_t *xdr __unused, const void *buf, int *ip) +static bool +xdr_u_short(xdr_t *xdr, unsigned short *ip) { - *ip = be32dec(buf); - return (sizeof(int)); + unsigned u; + bool rv; + + u = *ip; + if ((rv = xdr_u_int(xdr, &u))) { + if (xdr->xdr_op == XDR_OP_DECODE) + *ip = u; + } + return (rv); } -static int -xdr_u_int(const xdr_t *xdr __unused, const void *buf, unsigned *ip) +/* + * translate xdr->xdr_idx, increment it by size of int. + */ +static bool +xdr_int(xdr_t *xdr, int *ip) { - *ip = be32dec(buf); - return (sizeof(unsigned)); + bool rv = false; + int *i = (int *)xdr->xdr_idx; + + if (xdr->xdr_idx + sizeof (int) > xdr->xdr_buf + xdr->xdr_buf_size) + return (rv); + + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *ip, store to buf */ + xdr->xdr_idx += xdr->xdr_putint(xdr, *ip); + rv = true; + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + xdr->xdr_idx += xdr->xdr_getint(xdr, i); + *ip = *i; + rv = true; + break; + } + return (rv); } -static int -xdr_string(const xdr_t *xdr, const void *buf, nv_string_t *s) +/* + * translate xdr->xdr_idx, increment it by size of unsigned int. + */ +static bool +xdr_u_int(xdr_t *xdr, unsigned *ip) { - int size; + bool rv = false; + unsigned *u = (unsigned *)xdr->xdr_idx; - size = xdr->xdr_getint(xdr, buf, &s->nv_size); - size = NV_ALIGN4(size + s->nv_size); - return (size); + if (xdr->xdr_idx + sizeof (unsigned) > xdr->xdr_buf + xdr->xdr_buf_size) + return (rv); + + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *ip, store to buf */ + xdr->xdr_idx += xdr->xdr_putuint(xdr, *ip); + rv = true; + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + xdr->xdr_idx += xdr->xdr_getuint(xdr, u); + *ip = *u; + rv = true; + break; + } + return (rv); } -static int -xdr_int64(const xdr_t *xdr, const uint8_t *buf, int64_t *lp) +static bool +xdr_int64(xdr_t *xdr, int64_t *lp) { - int hi, rv; + int hi; unsigned lo; + bool rv = false; - rv = xdr->xdr_getint(xdr, buf, &hi); - rv += xdr->xdr_getint(xdr, buf + rv, &lo); - *lp = (((int64_t)hi) << 32) | lo; + if (xdr->xdr_idx + sizeof (int64_t) > xdr->xdr_buf + xdr->xdr_buf_size) + return (rv); + + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *lp, store to buf */ + hi = *lp >> 32; + lo = *lp & UINT32_MAX; + xdr->xdr_idx += xdr->xdr_putint(xdr, hi); + xdr->xdr_idx += xdr->xdr_putint(xdr, lo); + rv = true; + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + xdr->xdr_idx += xdr->xdr_getint(xdr, &hi); + xdr->xdr_idx += xdr->xdr_getuint(xdr, &lo); + *lp = (((int64_t)hi) << 32) | lo; + rv = true; + } return (rv); } -static int -xdr_uint64(const xdr_t *xdr, const uint8_t *buf, uint64_t *lp) +static bool +xdr_uint64(xdr_t *xdr, uint64_t *lp) { unsigned hi, lo; - int rv; + bool rv = false; - rv = xdr->xdr_getint(xdr, buf, &hi); - rv += xdr->xdr_getint(xdr, buf + rv, &lo); - *lp = (((int64_t)hi) << 32) | lo; + if (xdr->xdr_idx + sizeof (uint64_t) > xdr->xdr_buf + xdr->xdr_buf_size) + return (rv); + + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *ip, store to buf */ + hi = *lp >> 32; + lo = *lp & UINT32_MAX; + xdr->xdr_idx += xdr->xdr_putint(xdr, hi); + xdr->xdr_idx += xdr->xdr_putint(xdr, lo); + rv = true; + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + xdr->xdr_idx += xdr->xdr_getuint(xdr, &hi); + xdr->xdr_idx += xdr->xdr_getuint(xdr, &lo); + *lp = (((uint64_t)hi) << 32) | lo; + rv = true; + } return (rv); } -static int -xdr_char(const xdr_t *xdr, const uint8_t *buf, char *cp) +static bool +xdr_char(xdr_t *xdr, char *cp) { - int i, rv; + int i; + bool rv = false; - rv = xdr->xdr_getint(xdr, buf, &i); - *cp = i; + i = *cp; + if ((rv = xdr_int(xdr, &i))) { + if (xdr->xdr_op == XDR_OP_DECODE) + *cp = i; + } return (rv); } -/* - * read native data. - */ -static int -mem_int(const xdr_t *xdr, const void *buf, int *i) +static bool +xdr_string(xdr_t *xdr, nv_string_t *s) { - *i = *(int *)buf; - return (sizeof(int)); + int size = 0; + bool rv = false; + + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + size = s->nv_size; + if (xdr->xdr_idx + sizeof (unsigned) + NV_ALIGN4(size) > + xdr->xdr_buf + xdr->xdr_buf_size) + break; + xdr->xdr_idx += xdr->xdr_putuint(xdr, s->nv_size); + xdr->xdr_idx += NV_ALIGN4(size); + rv = true; + break; + + case XDR_OP_DECODE: + if (xdr->xdr_idx + sizeof (unsigned) > + xdr->xdr_buf + xdr->xdr_buf_size) + break; + size = xdr->xdr_getuint(xdr, &s->nv_size); + size = NV_ALIGN4(size + s->nv_size); + if (xdr->xdr_idx + size > xdr->xdr_buf + xdr->xdr_buf_size) + break; + xdr->xdr_idx += size; + rv = true; + break; + } + return (rv); } +static bool +xdr_array(xdr_t *xdr, const unsigned nelem, const xdrproc_t elproc) +{ + bool rv = true; + + for (unsigned i = 0; i < nelem; i++) { + if (!elproc(xdr, xdr->xdr_idx)) + return (false); + } + return (rv); +} + +/* + * nvlist management functions. + */ void nvlist_destroy(nvlist_t *nvl) { @@ -184,14 +333,14 @@ nvlist_t *nvl; nvs_data_t *nvs; - nvl = calloc(1, sizeof(*nvl)); + nvl = calloc(1, sizeof (*nvl)); if (nvl == NULL) return (nvl); nvl->nv_header.nvh_encoding = NV_ENCODE_XDR; nvl->nv_header.nvh_endian = _BYTE_ORDER == _LITTLE_ENDIAN; - nvl->nv_asize = nvl->nv_size = sizeof(*nvs); + nvl->nv_asize = nvl->nv_size = sizeof (*nvs); nvs = calloc(1, nvl->nv_asize); if (nvs == NULL) { free(nvl); @@ -205,32 +354,59 @@ return (nvl); } -static void -nvlist_nvp_decode(const xdr_t *xdr, nvlist_t *nvl, nvp_header_t *nvph) +static bool +nvlist_xdr_nvp(xdr_t *xdr, nvlist_t *nvl) { nv_string_t *nv_string; nv_pair_data_t *nvp_data; nvlist_t nvlist; + unsigned type, nelem; + xdr_t nv_xdr; - nv_string = (nv_string_t *)nvl->nv_idx; - nvl->nv_idx += xdr_string(xdr, &nv_string->nv_size, nv_string); - nvp_data = (nv_pair_data_t *)nvl->nv_idx; + nv_string = (nv_string_t *)xdr->xdr_idx; + if (!xdr_string(xdr, nv_string)) { + return (false); + } + nvp_data = (nv_pair_data_t *)xdr->xdr_idx; - nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_type, &nvp_data->nv_type); - nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_nelem, &nvp_data->nv_nelem); + type = nvp_data->nv_type; + nelem = nvp_data->nv_nelem; + if (!xdr_u_int(xdr, &type) || !xdr_u_int(xdr, &nelem)) + return (false); - switch (nvp_data->nv_type) { + switch (type) { case DATA_TYPE_NVLIST: case DATA_TYPE_NVLIST_ARRAY: bzero(&nvlist, sizeof (nvlist)); - nvlist.nv_data = &nvp_data->nv_data[0]; + nvlist.nv_data = xdr->xdr_idx; nvlist.nv_idx = nvlist.nv_data; - for (int i = 0; i < nvp_data->nv_nelem; i++) { - nvlist.nv_asize = - nvlist_size(xdr, nvlist.nv_data); - nvlist_decode_nvlist(xdr, &nvlist); - nvl->nv_idx = nvlist.nv_idx; - nvlist.nv_data = nvlist.nv_idx; + + /* Set up xdr for this nvlist. */ + nv_xdr = *xdr; + nv_xdr.xdr_buf = nvlist.nv_data; + nv_xdr.xdr_idx = nvlist.nv_data; + nv_xdr.xdr_buf_size = + nvl->nv_data + nvl->nv_size - nvlist.nv_data; + + for (unsigned i = 0; i < nelem; i++) { + if (xdr->xdr_op == XDR_OP_ENCODE) { + if (!nvlist_size_native(&nv_xdr, + &nvlist.nv_size)) + return (false); + } else { + if (!nvlist_size_xdr(&nv_xdr, + &nvlist.nv_size)) + return (false); + } + if (nvlist_xdr_nvlist(xdr, &nvlist) != 0) + return (false); + + nvlist.nv_data = nv_xdr.xdr_idx; + nvlist.nv_idx = nv_xdr.xdr_idx; + + nv_xdr.xdr_buf = nv_xdr.xdr_idx; + nv_xdr.xdr_buf_size = + nvl->nv_data + nvl->nv_size - nvlist.nv_data; } break; @@ -240,128 +416,331 @@ case DATA_TYPE_BYTE: case DATA_TYPE_INT8: case DATA_TYPE_UINT8: - nvl->nv_idx += xdr_char(xdr, &nvp_data->nv_data[0], - (char *)&nvp_data->nv_data[0]); + if (!xdr_char(xdr, (char *)&nvp_data->nv_data[0])) + return (false); break; case DATA_TYPE_INT16: - nvl->nv_idx += xdr_short(xdr, &nvp_data->nv_data[0], - (short *)&nvp_data->nv_data[0]); + if (!xdr_short(xdr, (short *)&nvp_data->nv_data[0])) + return (false); break; case DATA_TYPE_UINT16: - nvl->nv_idx += xdr_u_short(xdr, &nvp_data->nv_data[0], - (unsigned short *)&nvp_data->nv_data[0]); + if (!xdr_u_short(xdr, (unsigned short *)&nvp_data->nv_data[0])) + return (false); break; case DATA_TYPE_BOOLEAN_VALUE: case DATA_TYPE_INT32: - nvl->nv_idx += xdr_int(xdr, &nvp_data->nv_data[0], - (int *)&nvp_data->nv_data[0]); + if (!xdr_int(xdr, (int *)&nvp_data->nv_data[0])) + return (false); break; case DATA_TYPE_UINT32: - nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_data[0], - (unsigned *)&nvp_data->nv_data[0]); + if (!xdr_u_int(xdr, (unsigned *)&nvp_data->nv_data[0])) + return (false); break; + case DATA_TYPE_HRTIME: case DATA_TYPE_INT64: - nvl->nv_idx += xdr_int64(xdr, &nvp_data->nv_data[0], - (int64_t *)&nvp_data->nv_data[0]); + if (!xdr_int64(xdr, (int64_t *)&nvp_data->nv_data[0])) + return (false); break; case DATA_TYPE_UINT64: - nvl->nv_idx += xdr_uint64(xdr, &nvp_data->nv_data[0], - (uint64_t *)&nvp_data->nv_data[0]); + if (!xdr_uint64(xdr, (uint64_t *)&nvp_data->nv_data[0])) + return (false); break; + case DATA_TYPE_BYTE_ARRAY: case DATA_TYPE_STRING: nv_string = (nv_string_t *)&nvp_data->nv_data[0]; - nvl->nv_idx += xdr_string(xdr, &nvp_data->nv_data[0], - nv_string); + if (!xdr_string(xdr, nv_string)) + return (false); + break; + case DATA_TYPE_STRING_ARRAY: + nv_string = (nv_string_t *)&nvp_data->nv_data[0]; + for (unsigned i = 0; i < nelem; i++) { + if (!xdr_string(xdr, nv_string)) + return (false); + nv_string = (nv_string_t *)xdr->xdr_idx; + } break; + + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + if (!xdr_array(xdr, nelem, (xdrproc_t)xdr_u_int)) + return (false); + break; + + case DATA_TYPE_INT64_ARRAY: + case DATA_TYPE_UINT64_ARRAY: + if (!xdr_array(xdr, nelem, (xdrproc_t)xdr_uint64)) + return (false); + break; } + return (true); } -static void -nvlist_decode_nvlist(const xdr_t *xdr, nvlist_t *nvl) +static int +nvlist_xdr_nvlist(xdr_t *xdr, nvlist_t *nvl) { nvp_header_t *nvph; - nvs_data_t *nvs = (nvs_data_t *)nvl->nv_data; + nvs_data_t *nvs; + unsigned encoded_size, decoded_size; + int rv; - nvl->nv_idx = nvl->nv_data; - nvl->nv_idx += xdr->xdr_getint(xdr, (const uint8_t *)&nvs->nvl_version, - &nvs->nvl_version); - nvl->nv_idx += xdr->xdr_getint(xdr, (const uint8_t *)&nvs->nvl_nvflag, - &nvs->nvl_nvflag); - + nvs = (nvs_data_t *)xdr->xdr_idx; nvph = &nvs->nvl_pair; - nvl->nv_idx += xdr->xdr_getint(xdr, - (const uint8_t *)&nvph->encoded_size, &nvph->encoded_size); - nvl->nv_idx += xdr->xdr_getint(xdr, - (const uint8_t *)&nvph->decoded_size, &nvph->decoded_size); - while (nvph->encoded_size && nvph->decoded_size) { - nvlist_nvp_decode(xdr, nvl, nvph); + if (!xdr_u_int(xdr, &nvs->nvl_version)) + return (EINVAL); + if (!xdr_u_int(xdr, &nvs->nvl_nvflag)) + return (EINVAL); - nvph = (nvp_header_t *)(nvl->nv_idx); - nvl->nv_idx += xdr->xdr_getint(xdr, &nvph->encoded_size, - &nvph->encoded_size); - nvl->nv_idx += xdr->xdr_getint(xdr, &nvph->decoded_size, - &nvph->decoded_size); + encoded_size = nvph->encoded_size; + decoded_size = nvph->decoded_size; + + if (xdr->xdr_op == XDR_OP_ENCODE) { + if (!xdr_u_int(xdr, &nvph->encoded_size)) + return (EINVAL); + if (!xdr_u_int(xdr, &nvph->decoded_size)) + return (EINVAL); + } else { + xdr->xdr_idx += 2 * sizeof (unsigned); } + + rv = 0; + while (encoded_size && decoded_size) { + if (!nvlist_xdr_nvp(xdr, nvl)) + return (EINVAL); + + nvph = (nvp_header_t *)(xdr->xdr_idx); + encoded_size = nvph->encoded_size; + decoded_size = nvph->decoded_size; + if (xdr->xdr_op == XDR_OP_ENCODE) { + if (!xdr_u_int(xdr, &nvph->encoded_size)) + return (EINVAL); + if (!xdr_u_int(xdr, &nvph->decoded_size)) + return (EINVAL); + } else { + xdr->xdr_idx += 2 * sizeof (unsigned); + } + } + return (rv); } -static int -nvlist_size(const xdr_t *xdr, const uint8_t *stream) +/* + * Calculate nvlist size, translating encoded_size and decoded_size. + */ +static bool +nvlist_size_xdr(xdr_t *xdr, size_t *size) { - const uint8_t *p, *pair; + uint8_t *pair; unsigned encoded_size, decoded_size; - p = stream; - p += 2 * sizeof(unsigned); + xdr->xdr_idx += 2 * sizeof (unsigned); - pair = p; - p += xdr->xdr_getint(xdr, p, &encoded_size); - p += xdr->xdr_getint(xdr, p, &decoded_size); + pair = xdr->xdr_idx; + if (!xdr_u_int(xdr, &encoded_size) || !xdr_u_int(xdr, &decoded_size)) + return (false); + while (encoded_size && decoded_size) { - p = pair + encoded_size; - pair = p; - p += xdr->xdr_getint(xdr, p, &encoded_size); - p += xdr->xdr_getint(xdr, p, &decoded_size); + xdr->xdr_idx = pair + encoded_size; + pair = xdr->xdr_idx; + if (!xdr_u_int(xdr, &encoded_size) || + !xdr_u_int(xdr, &decoded_size)) + return (false); } - return (p - stream); + *size = xdr->xdr_idx - xdr->xdr_buf; + + return (true); } +nvp_header_t * +nvlist_next_nvpair(nvlist_t *nvl, nvp_header_t *nvh) +{ + uint8_t *pair; + unsigned encoded_size, decoded_size; + xdr_t xdr; + + xdr.xdr_buf = nvl->nv_data; + xdr.xdr_idx = nvl->nv_data; + xdr.xdr_buf_size = nvl->nv_size; + + xdr.xdr_idx += 2 * sizeof (unsigned); + + /* Skip tp current pair */ + if (nvh != NULL) { + xdr.xdr_idx = (uint8_t *)nvh; + } + + pair = xdr.xdr_idx; + if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) + return (NULL); + + encoded_size = *(unsigned *)xdr.xdr_idx; + xdr.xdr_idx += sizeof (unsigned); + if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) + return (NULL); + + decoded_size = *(unsigned *)xdr.xdr_idx; + xdr.xdr_idx += sizeof (unsigned); + if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) + return (NULL); + + while (encoded_size && decoded_size) { + if (nvh == NULL) + return ((nvp_header_t *)pair); + + xdr.xdr_idx = pair + encoded_size; + nvh = (nvp_header_t *)xdr.xdr_idx; + + if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) + return (NULL); + + encoded_size = *(unsigned *)xdr.xdr_idx; + xdr.xdr_idx += sizeof (unsigned); + if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) + return (NULL); + decoded_size = *(unsigned *)xdr.xdr_idx; + xdr.xdr_idx += sizeof (unsigned); + if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) + return (NULL); + + if (encoded_size != 0 && decoded_size != 0) { + return (nvh); + } + } + return (NULL); +} + /* + * Calculate nvlist size by walking in memory data. + */ +static bool +nvlist_size_native(xdr_t *xdr, size_t *size) +{ + uint8_t *pair; + unsigned encoded_size, decoded_size; + + xdr->xdr_idx += 2 * sizeof (unsigned); + + pair = xdr->xdr_idx; + if (xdr->xdr_idx > xdr->xdr_buf + xdr->xdr_buf_size) + return (false); + + encoded_size = *(unsigned *)xdr->xdr_idx; + xdr->xdr_idx += sizeof (unsigned); + if (xdr->xdr_idx > xdr->xdr_buf + xdr->xdr_buf_size) + return (false); + decoded_size = *(unsigned *)xdr->xdr_idx; + xdr->xdr_idx += sizeof (unsigned); + while (encoded_size && decoded_size) { + xdr->xdr_idx = pair + encoded_size; + pair = xdr->xdr_idx; + if (xdr->xdr_idx > xdr->xdr_buf + xdr->xdr_buf_size) + return (false); + encoded_size = *(unsigned *)xdr->xdr_idx; + xdr->xdr_idx += sizeof (unsigned); + if (xdr->xdr_idx > xdr->xdr_buf + xdr->xdr_buf_size) + return (false); + decoded_size = *(unsigned *)xdr->xdr_idx; + xdr->xdr_idx += sizeof (unsigned); + } + *size = xdr->xdr_idx - xdr->xdr_buf; + + return (true); +} + +/* + * Export nvlist to byte stream format. + */ +int +nvlist_export(nvlist_t *nvl) +{ + int rv; + xdr_t xdr = { + .xdr_op = XDR_OP_ENCODE, + .xdr_putint = _putint, + .xdr_putuint = _putuint, + .xdr_buf = nvl->nv_data, + .xdr_idx = nvl->nv_data, + .xdr_buf_size = nvl->nv_size + }; + + if (nvl->nv_header.nvh_encoding != NV_ENCODE_XDR) + return (ENOTSUP); + + nvl->nv_idx = nvl->nv_data; + rv = nvlist_xdr_nvlist(&xdr, nvl); + + return (rv); +} + +/* * Import nvlist from byte stream. * Determine the stream size and allocate private copy. * Then translate the data. */ nvlist_t * -nvlist_import(const uint8_t *stream, char encoding, char endian) +nvlist_import(const char *stream, size_t size) { nvlist_t *nvl; + xdr_t xdr = { + .xdr_op = XDR_OP_DECODE, + .xdr_getint = _getint, + .xdr_getuint = _getuint + }; - if (encoding != NV_ENCODE_XDR) + /* Check the nvlist head. */ + if (stream[0] != NV_ENCODE_XDR || + (stream[1] != '\0' && stream[1] != '\1') || + stream[2] != '\0' || stream[3] != '\0' || + be32toh(*(uint32_t *)(stream + 4)) != NV_VERSION || + be32toh(*(uint32_t *)(stream + 8)) != NV_UNIQUE_NAME) return (NULL); - nvl = malloc(sizeof(*nvl)); + nvl = malloc(sizeof (*nvl)); if (nvl == NULL) return (nvl); - nvl->nv_asize = nvl->nv_size = nvlist_size(&ntoh, stream); + nvl->nv_header.nvh_encoding = stream[0]; + nvl->nv_header.nvh_endian = stream[1]; + nvl->nv_header.nvh_reserved1 = stream[2]; + nvl->nv_header.nvh_reserved2 = stream[3]; + + xdr.xdr_buf = xdr.xdr_idx = (uint8_t *)stream + 4; + xdr.xdr_buf_size = size - 4; + + if (!nvlist_size_xdr(&xdr, &nvl->nv_asize)) { + free(nvl); + return (NULL); + } + nvl->nv_size = nvl->nv_asize; nvl->nv_data = malloc(nvl->nv_asize); if (nvl->nv_data == NULL) { free(nvl); return (NULL); } nvl->nv_idx = nvl->nv_data; - bcopy(stream, nvl->nv_data, nvl->nv_asize); + bcopy(stream + 4, nvl->nv_data, nvl->nv_asize); - nvlist_decode_nvlist(&ntoh, nvl); - nvl->nv_idx = nvl->nv_data; + xdr.xdr_buf = xdr.xdr_idx = nvl->nv_data; + xdr.xdr_buf_size = nvl->nv_asize; + + if (nvlist_xdr_nvlist(&xdr, nvl) != 0) { + free(nvl->nv_data); + free(nvl); + nvl = NULL; + } + return (nvl); } @@ -377,30 +756,37 @@ nv_string_t *nvp_name; nv_pair_data_t *nvp_data; size_t size; + xdr_t xdr; if (nvl == NULL || nvl->nv_data == NULL || name == NULL) return (EINVAL); - head = nvl->nv_data; - data = (nvs_data_t *)head; + /* Make sure the nvlist size is set correct */ + xdr.xdr_idx = nvl->nv_data; + xdr.xdr_buf = xdr.xdr_idx; + xdr.xdr_buf_size = nvl->nv_size; + if (!nvlist_size_native(&xdr, &nvl->nv_size)) + return (EINVAL); + + data = (nvs_data_t *)nvl->nv_data; nvp = &data->nvl_pair; /* first pair in nvlist */ head = (uint8_t *)nvp; while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { - nvp_name = (nv_string_t *)(head + sizeof(*nvp)); + nvp_name = (nv_string_t *)(nvp + 1); - nvp_data = (nv_pair_data_t *) - NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + - nvp_name->nv_size); + nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] + + NV_ALIGN4(nvp_name->nv_size)); - if (memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0 && - nvp_data->nv_type == type) { + if (strlen(name) == nvp_name->nv_size && + memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0 && + (nvp_data->nv_type == type || type == DATA_TYPE_UNKNOWN)) { /* * set tail to point to next nvpair and size * is the length of the tail. */ tail = head + nvp->encoded_size; - size = nvl->nv_data + nvl->nv_size - tail; + size = nvl->nv_size - (tail - nvl->nv_data); /* adjust the size of the nvlist. */ nvl->nv_size -= nvp->encoded_size; @@ -414,6 +800,51 @@ return (ENOENT); } +static int +clone_nvlist(const nvlist_t *nvl, const uint8_t *ptr, unsigned size, + nvlist_t **nvlist) +{ + nvlist_t *nv; + + nv = calloc(1, sizeof (*nv)); + if (nv == NULL) + return (ENOMEM); + + nv->nv_header = nvl->nv_header; + nv->nv_asize = size; + nv->nv_size = size; + nv->nv_data = malloc(nv->nv_asize); + if (nv->nv_data == NULL) { + free(nv); + return (ENOMEM); + } + + bcopy(ptr, nv->nv_data, nv->nv_asize); + *nvlist = nv; + return (0); +} + +/* + * Return the next nvlist in an nvlist array. + */ +static uint8_t * +nvlist_next(const uint8_t *ptr) +{ + nvs_data_t *data; + nvp_header_t *nvp; + + data = (nvs_data_t *)ptr; + nvp = &data->nvl_pair; /* first pair in nvlist */ + + while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { + nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); + } + return ((uint8_t *)nvp + sizeof (*nvp)); +} + +/* + * Note: nvlist and nvlist array must be freed by caller. + */ int nvlist_find(const nvlist_t *nvl, const char *name, data_type_t type, int *elementsp, void *valuep, int *sizep) @@ -422,7 +853,9 @@ nvp_header_t *nvp; nv_string_t *nvp_name; nv_pair_data_t *nvp_data; - nvlist_t *nvlist; + nvlist_t **nvlist, *nv; + uint8_t *ptr; + int rv; if (nvl == NULL || nvl->nv_data == NULL || name == NULL) return (EINVAL); @@ -431,19 +864,23 @@ nvp = &data->nvl_pair; /* first pair in nvlist */ while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { - nvp_name = (nv_string_t *)((uint8_t *)nvp + sizeof(*nvp)); + nvp_name = (nv_string_t *)((uint8_t *)nvp + sizeof (*nvp)); + if (nvl->nv_data + nvl->nv_size < + nvp_name->nv_data + nvp_name->nv_size) + return (EIO); nvp_data = (nv_pair_data_t *) NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + nvp_name->nv_size); - if (memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0 && - nvp_data->nv_type == type) { + if (strlen(name) == nvp_name->nv_size && + memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0 && + (nvp_data->nv_type == type || type == DATA_TYPE_UNKNOWN)) { if (elementsp != NULL) *elementsp = nvp_data->nv_nelem; switch (nvp_data->nv_type) { case DATA_TYPE_UINT64: - *(uint64_t *)valuep = + *(uint64_t *)valuep = *(uint64_t *)nvp_data->nv_data; return (0); case DATA_TYPE_STRING: @@ -455,146 +892,631 @@ &nvp_name->nv_data[0]; return (0); case DATA_TYPE_NVLIST: + ptr = &nvp_data->nv_data[0]; + rv = clone_nvlist(nvl, ptr, + nvlist_next(ptr) - ptr, &nv); + if (rv == 0) { + *(nvlist_t **)valuep = nv; + } + return (rv); + case DATA_TYPE_NVLIST_ARRAY: - nvlist = malloc(sizeof(*nvlist)); - if (nvlist != NULL) { - nvlist->nv_header = nvl->nv_header; - nvlist->nv_asize = 0; - nvlist->nv_size = 0; - nvlist->nv_idx = NULL; - nvlist->nv_data = &nvp_data->nv_data[0]; - *(nvlist_t **)valuep = nvlist; - return (0); + nvlist = calloc(nvp_data->nv_nelem, + sizeof (nvlist_t *)); + if (nvlist == NULL) + return (ENOMEM); + ptr = &nvp_data->nv_data[0]; + rv = 0; + for (unsigned i = 0; i < nvp_data->nv_nelem; + i++) { + rv = clone_nvlist(nvl, ptr, + nvlist_next(ptr) - ptr, &nvlist[i]); + if (rv != 0) + goto error; + ptr = nvlist_next(ptr); } - return (ENOMEM); + *(nvlist_t ***)valuep = nvlist; + return (rv); } return (EIO); } /* Not our pair, skip to next. */ nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); + if (nvl->nv_data + nvl->nv_size < (uint8_t *)nvp) + return (EIO); } return (ENOENT); +error: + for (unsigned i = 0; i < nvp_data->nv_nelem; i++) { + free(nvlist[i]->nv_data); + free(nvlist[i]); + } + free(nvlist); + return (rv); } -/* - * Return the next nvlist in an nvlist array. - */ -int -nvlist_next(nvlist_t *nvl) +static int +get_value_size(data_type_t type, const void *data, uint32_t nelem) { - nvs_data_t *data; - nvp_header_t *nvp; + uint64_t value_sz = 0; - if (nvl == NULL || nvl->nv_data == NULL || nvl->nv_asize != 0) + switch (type) { + case DATA_TYPE_BOOLEAN: + value_sz = 0; + break; + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + case DATA_TYPE_INT16: + case DATA_TYPE_UINT16: + case DATA_TYPE_INT32: + case DATA_TYPE_UINT32: + /* Our smallest data unit is 32-bit */ + value_sz = sizeof (uint32_t); + break; + case DATA_TYPE_HRTIME: + case DATA_TYPE_INT64: + value_sz = sizeof (int64_t); + break; + case DATA_TYPE_UINT64: + value_sz = sizeof (uint64_t); + break; + case DATA_TYPE_STRING: + if (data == NULL) + value_sz = 0; + else + value_sz = strlen(data) + 1; + break; + case DATA_TYPE_BYTE_ARRAY: + value_sz = nelem * sizeof (uint8_t); + break; + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint32_t); + break; + case DATA_TYPE_INT64_ARRAY: + value_sz = (uint64_t)nelem * sizeof (int64_t); + break; + case DATA_TYPE_UINT64_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint64_t); + break; + case DATA_TYPE_STRING_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint64_t); + + if (data != NULL) { + char *const *strs = data; + uint32_t i; + + for (i = 0; i < nelem; i++) { + if (strs[i] == NULL) + return (-1); + value_sz += strlen(strs[i]) + 1; + } + } + break; + case DATA_TYPE_NVLIST: + /* + * The decoded size of nvlist is constant. + */ + value_sz = NV_ALIGN(6 * 4); /* sizeof nvlist_t */ + break; + case DATA_TYPE_NVLIST_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint64_t) + + (uint64_t)nelem * NV_ALIGN(6 * 4); /* sizeof nvlist_t */ + break; + default: + return (-1); + } + + return (value_sz > INT32_MAX ? -1 : (int)value_sz); +} + +static int +get_nvp_data_size(data_type_t type, const void *data, uint32_t nelem) +{ + uint64_t value_sz = 0; + xdr_t xdr; + size_t size; + + switch (type) { + case DATA_TYPE_BOOLEAN: + value_sz = 0; + break; + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + case DATA_TYPE_INT16: + case DATA_TYPE_UINT16: + case DATA_TYPE_INT32: + case DATA_TYPE_UINT32: + /* Our smallest data unit is 32-bit */ + value_sz = sizeof (uint32_t); + break; + case DATA_TYPE_HRTIME: + case DATA_TYPE_INT64: + case DATA_TYPE_UINT64: + value_sz = sizeof (uint64_t); + break; + case DATA_TYPE_STRING: + value_sz = 4 + NV_ALIGN4(strlen(data)); + break; + case DATA_TYPE_BYTE_ARRAY: + value_sz = NV_ALIGN4(nelem); + break; + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + value_sz = 4 + (uint64_t)nelem * sizeof (uint32_t); + break; + case DATA_TYPE_INT64_ARRAY: + case DATA_TYPE_UINT64_ARRAY: + value_sz = 4 + (uint64_t)nelem * sizeof (uint64_t); + break; + case DATA_TYPE_STRING_ARRAY: + if (data != NULL) { + char *const *strs = data; + uint32_t i; + + for (i = 0; i < nelem; i++) { + value_sz += 4 + NV_ALIGN4(strlen(strs[i])); + } + } + break; + case DATA_TYPE_NVLIST: + xdr.xdr_idx = ((nvlist_t *)data)->nv_data; + xdr.xdr_buf = xdr.xdr_idx; + xdr.xdr_buf_size = ((nvlist_t *)data)->nv_size; + + if (!nvlist_size_native(&xdr, &size)) + return (-1); + + value_sz = size; + break; + case DATA_TYPE_NVLIST_ARRAY: + value_sz = 0; + for (uint32_t i = 0; i < nelem; i++) { + xdr.xdr_idx = ((nvlist_t **)data)[i]->nv_data; + xdr.xdr_buf = xdr.xdr_idx; + xdr.xdr_buf_size = ((nvlist_t **)data)[i]->nv_size; + + if (!nvlist_size_native(&xdr, &size)) + return (-1); + value_sz += size; + } + break; + default: + return (-1); + } + + return (value_sz > INT32_MAX ? -1 : (int)value_sz); +} + +#define NVPE_SIZE(name_len, data_len) \ + (4 + 4 + 4 + NV_ALIGN4(name_len) + 4 + 4 + data_len) +#define NVP_SIZE(name_len, data_len) \ + (NV_ALIGN((4 * 4) + (name_len)) + NV_ALIGN(data_len)) + +static int +nvlist_add_common(nvlist_t *nvl, const char *name, data_type_t type, + uint32_t nelem, const void *data) +{ + nvs_data_t *nvs; + nvp_header_t head, *hp; + uint8_t *ptr; + size_t namelen; + int decoded_size, encoded_size; + xdr_t xdr; + + nvs = (nvs_data_t *)nvl->nv_data; + if (nvs->nvl_nvflag & NV_UNIQUE_NAME) + (void) nvlist_remove(nvl, name, type); + + xdr.xdr_buf = nvl->nv_data; + xdr.xdr_idx = nvl->nv_data; + xdr.xdr_buf_size = nvl->nv_size; + if (!nvlist_size_native(&xdr, &nvl->nv_size)) return (EINVAL); - data = (nvs_data_t *)nvl->nv_data; - nvp = &data->nvl_pair; /* first pair in nvlist */ + namelen = strlen(name); + if ((decoded_size = get_value_size(type, data, nelem)) < 0) + return (EINVAL); + if ((encoded_size = get_nvp_data_size(type, data, nelem)) < 0) + return (EINVAL); - while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { - nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); + /* + * The encoded size is calculated as: + * encode_size (4) + decode_size (4) + + * name string size (4 + NV_ALIGN4(namelen) + + * data type (4) + nelem size (4) + datalen + * + * The decoded size is calculated as: + * Note: namelen is with terminating 0. + * NV_ALIGN(sizeof (nvpair_t) (4 * 4) + namelen + 1) + + * NV_ALIGN(data_len) + */ + + head.encoded_size = NVPE_SIZE(namelen, encoded_size); + head.decoded_size = NVP_SIZE(namelen + 1, decoded_size); + + if (nvl->nv_asize - nvl->nv_size < head.encoded_size + 8) { + ptr = realloc(nvl->nv_data, nvl->nv_asize + head.encoded_size); + if (ptr == NULL) + return (ENOMEM); + nvl->nv_data = ptr; + nvl->nv_asize += head.encoded_size; } - nvl->nv_data = (uint8_t *)nvp + sizeof(*nvp); + nvl->nv_idx = nvl->nv_data + nvl->nv_size - sizeof (*hp); + bzero(nvl->nv_idx, head.encoded_size + 8); + hp = (nvp_header_t *)nvl->nv_idx; + *hp = head; + nvl->nv_idx += sizeof (*hp); + *(unsigned *)nvl->nv_idx = namelen; + nvl->nv_idx += sizeof (unsigned); + strlcpy((char *)nvl->nv_idx, name, namelen + 1); + nvl->nv_idx += NV_ALIGN4(namelen); + *(unsigned *)nvl->nv_idx = type; + nvl->nv_idx += sizeof (unsigned); + *(unsigned *)nvl->nv_idx = nelem; + nvl->nv_idx += sizeof (unsigned); + + switch (type) { + case DATA_TYPE_BOOLEAN: + break; + case DATA_TYPE_BYTE_ARRAY: + *(unsigned *)nvl->nv_idx = encoded_size; + nvl->nv_idx += sizeof (unsigned); + bcopy(data, nvl->nv_idx, nelem); + nvl->nv_idx += encoded_size; + break; + case DATA_TYPE_STRING: + encoded_size = strlen(data); + *(unsigned *)nvl->nv_idx = encoded_size; + nvl->nv_idx += sizeof (unsigned); + strlcpy((char *)nvl->nv_idx, data, encoded_size + 1); + nvl->nv_idx += NV_ALIGN4(encoded_size); + break; + case DATA_TYPE_STRING_ARRAY: + for (uint32_t i = 0; i < nelem; i++) { + encoded_size = strlen(((char **)data)[i]); + *(unsigned *)nvl->nv_idx = encoded_size; + nvl->nv_idx += sizeof (unsigned); + strlcpy((char *)nvl->nv_idx, ((char **)data)[i], + encoded_size + 1); + nvl->nv_idx += NV_ALIGN4(encoded_size); + } + break; + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + for (uint32_t i = 0; i < nelem; i++) { + *(unsigned *)nvl->nv_idx = ((uint8_t *)data)[i]; + nvl->nv_idx += sizeof (unsigned); + } + break; + case DATA_TYPE_INT16: + case DATA_TYPE_UINT16: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + for (uint32_t i = 0; i < nelem; i++) { + *(unsigned *)nvl->nv_idx = ((uint16_t *)data)[i]; + nvl->nv_idx += sizeof (unsigned); + } + break; + case DATA_TYPE_NVLIST: + bcopy(((nvlist_t *)data)->nv_data, nvl->nv_idx, encoded_size); + break; + case DATA_TYPE_NVLIST_ARRAY: { + uint8_t *buf = nvl->nv_idx; + size_t size; + xdr_t xdr; + + for (uint32_t i = 0; i < nelem; i++) { + xdr.xdr_idx = ((nvlist_t **)data)[i]->nv_data; + xdr.xdr_buf = xdr.xdr_idx; + xdr.xdr_buf_size = ((nvlist_t **)data)[i]->nv_size; + + if (!nvlist_size_native(&xdr, &size)) + return (EINVAL); + + bcopy(((nvlist_t **)data)[i]->nv_data, buf, size); + buf += size; + } + break; + } + default: + bcopy(data, nvl->nv_idx, encoded_size); + } + + nvl->nv_size += head.encoded_size; + return (0); } +int +nvlist_add_boolean(nvlist_t *nvl, const char *name) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN, 0, NULL)); +} + +int +nvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_VALUE, 1, + &value)); +} + +int +nvlist_add_byte(nvlist_t *nvl, const char *name, uint8_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE, 1, &value)); +} + +int +nvlist_add_int8(nvlist_t *nvl, const char *name, int8_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT8, 1, &value)); +} + +int +nvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8, 1, &value)); +} + +int +nvlist_add_int16(nvlist_t *nvl, const char *name, int16_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT16, 1, &value)); +} + +int +nvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16, 1, &value)); +} + +int +nvlist_add_int32(nvlist_t *nvl, const char *name, int32_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT32, 1, &value)); +} + +int +nvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32, 1, &value)); +} + +int +nvlist_add_int64(nvlist_t *nvl, const char *name, int64_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT64, 1, &value)); +} + +int +nvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64, 1, &value)); +} + +int +nvlist_add_string(nvlist_t *nvl, const char *name, const char *value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_STRING, 1, value)); +} + +int +nvlist_add_boolean_array(nvlist_t *nvl, const char *name, + boolean_t *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_ARRAY, n, a)); +} + +int +nvlist_add_byte_array(nvlist_t *nvl, const char *name, uint8_t *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a)); +} + +int +nvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a)); +} + +int +nvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a)); +} + +int +nvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a)); +} + +int +nvlist_add_uint16_array(nvlist_t *nvl, const char *name, uint16_t *a, + uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a)); +} + +int +nvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a)); +} + +int +nvlist_add_uint32_array(nvlist_t *nvl, const char *name, uint32_t *a, + uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a)); +} + +int +nvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a)); +} + +int +nvlist_add_uint64_array(nvlist_t *nvl, const char *name, uint64_t *a, + uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a)); +} + +int +nvlist_add_string_array(nvlist_t *nvl, const char *name, + char * const *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a)); +} + +int +nvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST, 1, val)); +} + +int +nvlist_add_nvlist_array(nvlist_t *nvl, const char *name, nvlist_t **a, + uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a)); +} + +static const char *typenames[] = { + "DATA_TYPE_UNKNOWN", + "DATA_TYPE_BOOLEAN", + "DATA_TYPE_BYTE", + "DATA_TYPE_INT16", + "DATA_TYPE_UINT16", + "DATA_TYPE_INT32", + "DATA_TYPE_UINT32", + "DATA_TYPE_INT64", + "DATA_TYPE_UINT64", + "DATA_TYPE_STRING", + "DATA_TYPE_BYTE_ARRAY", + "DATA_TYPE_INT16_ARRAY", + "DATA_TYPE_UINT16_ARRAY", + "DATA_TYPE_INT32_ARRAY", + "DATA_TYPE_UINT32_ARRAY", + "DATA_TYPE_INT64_ARRAY", + "DATA_TYPE_UINT64_ARRAY", + "DATA_TYPE_STRING_ARRAY", + "DATA_TYPE_HRTIME", + "DATA_TYPE_NVLIST", + "DATA_TYPE_NVLIST_ARRAY", + "DATA_TYPE_BOOLEAN_VALUE", + "DATA_TYPE_INT8", + "DATA_TYPE_UINT8", + "DATA_TYPE_BOOLEAN_ARRAY", + "DATA_TYPE_INT8_ARRAY", + "DATA_TYPE_UINT8_ARRAY" +}; + void -nvlist_print(nvlist_t *nvl, unsigned int indent) +nvpair_print(nvp_header_t *nvp, unsigned int indent) { - static const char *typenames[] = { - "DATA_TYPE_UNKNOWN", - "DATA_TYPE_BOOLEAN", - "DATA_TYPE_BYTE", - "DATA_TYPE_INT16", - "DATA_TYPE_UINT16", - "DATA_TYPE_INT32", - "DATA_TYPE_UINT32", - "DATA_TYPE_INT64", - "DATA_TYPE_UINT64", - "DATA_TYPE_STRING", - "DATA_TYPE_BYTE_ARRAY", - "DATA_TYPE_INT16_ARRAY", - "DATA_TYPE_UINT16_ARRAY", - "DATA_TYPE_INT32_ARRAY", - "DATA_TYPE_UINT32_ARRAY", - "DATA_TYPE_INT64_ARRAY", - "DATA_TYPE_UINT64_ARRAY", - "DATA_TYPE_STRING_ARRAY", - "DATA_TYPE_HRTIME", - "DATA_TYPE_NVLIST", - "DATA_TYPE_NVLIST_ARRAY", - "DATA_TYPE_BOOLEAN_VALUE", - "DATA_TYPE_INT8", - "DATA_TYPE_UINT8", - "DATA_TYPE_BOOLEAN_ARRAY", - "DATA_TYPE_INT8_ARRAY", - "DATA_TYPE_UINT8_ARRAY" - }; - nvs_data_t *data; - nvp_header_t *nvp; nv_string_t *nvp_name; nv_pair_data_t *nvp_data; nvlist_t nvlist; - int i, j; + xdr_t xdr; + unsigned i, j; - data = (nvs_data_t *)nvl->nv_data; - nvp = &data->nvl_pair; /* first pair in nvlist */ - while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { - nvp_name = (nv_string_t *)((uintptr_t)nvp + sizeof(*nvp)); - nvp_data = (nv_pair_data_t *) - NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + - nvp_name->nv_size); + nvp_name = (nv_string_t *)((uintptr_t)nvp + sizeof (*nvp)); + nvp_data = (nv_pair_data_t *) + NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + nvp_name->nv_size); - for (int i = 0; i < indent; i++) - printf(" "); + for (i = 0; i < indent; i++) + printf(" "); - printf("%s [%d] %.*s", typenames[nvp_data->nv_type], - nvp_data->nv_nelem, nvp_name->nv_size, nvp_name->nv_data); + printf("%s [%d] %.*s", typenames[nvp_data->nv_type], + nvp_data->nv_nelem, nvp_name->nv_size, nvp_name->nv_data); - switch (nvp_data->nv_type) { - case DATA_TYPE_UINT64: { - uint64_t val; + switch (nvp_data->nv_type) { + case DATA_TYPE_UINT64: { + uint64_t val; - val = *(uint64_t *)nvp_data->nv_data; - printf(" = 0x%jx\n", (uintmax_t)val); - break; - } + val = *(uint64_t *)nvp_data->nv_data; + printf(" = 0x%jx\n", (uintmax_t)val); + break; + } - case DATA_TYPE_STRING: { - nvp_name = (nv_string_t *)&nvp_data->nv_data[0]; + case DATA_TYPE_STRING: + case DATA_TYPE_STRING_ARRAY: + nvp_name = (nv_string_t *)&nvp_data->nv_data[0]; + for (i = 0; i < nvp_data->nv_nelem; i++) { printf(" = \"%.*s\"\n", nvp_name->nv_size, - nvp_name->nv_data ); - break; + nvp_name->nv_data); } + break; - case DATA_TYPE_NVLIST: - printf("\n"); - nvlist.nv_data = &nvp_data->nv_data[0]; - nvlist_print(&nvlist, indent + 2); - break; + case DATA_TYPE_NVLIST: + printf("\n"); + nvlist.nv_data = &nvp_data->nv_data[0]; + nvlist_print(&nvlist, indent + 2); + break; - case DATA_TYPE_NVLIST_ARRAY: - nvlist.nv_data = &nvp_data->nv_data[0]; - for (j = 0; j < nvp_data->nv_nelem; j++) { - data = (nvs_data_t *)nvlist.nv_data; - printf("[%d]\n", j); - nvlist_print(&nvlist, indent + 2); - if (j != nvp_data->nv_nelem - 1) { - for (i = 0; i < indent; i++) - printf(" "); - printf("%s %.*s", - typenames[nvp_data->nv_type], - nvp_name->nv_size, - nvp_name->nv_data); - } - nvlist.nv_data = (uint8_t *)data + - nvlist_size(&native, nvlist.nv_data); + case DATA_TYPE_NVLIST_ARRAY: + nvlist.nv_data = &nvp_data->nv_data[0]; + for (j = 0; j < nvp_data->nv_nelem; j++) { + size_t size; + + printf("[%d]\n", j); + nvlist_print(&nvlist, indent + 2); + if (j != nvp_data->nv_nelem - 1) { + for (i = 0; i < indent; i++) + printf(" "); + printf("%s %.*s", + typenames[nvp_data->nv_type], + nvp_name->nv_size, + nvp_name->nv_data); } - break; + xdr.xdr_idx = nvlist.nv_data; + xdr.xdr_buf = xdr.xdr_idx; + xdr.xdr_buf_size = nvp->encoded_size - + (xdr.xdr_idx - (uint8_t *)nvp); - default: - printf("\n"); + if (!nvlist_size_native(&xdr, &size)) + return; + + nvlist.nv_data += size; } + break; + + default: + printf("\n"); + } +} + +void +nvlist_print(const nvlist_t *nvl, unsigned int indent) +{ + nvs_data_t *data; + nvp_header_t *nvp; + + data = (nvs_data_t *)nvl->nv_data; + nvp = &data->nvl_pair; /* first pair in nvlist */ + while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { + nvpair_print(nvp, indent); nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); } printf("%*s\n", indent + 13, "End of nvlist"); Index: stand/libsa/zfs/zfs.c =================================================================== --- stand/libsa/zfs/zfs.c +++ stand/libsa/zfs/zfs.c @@ -486,8 +486,7 @@ } static int -vdev_write(vdev_t *vdev __unused, void *priv, off_t offset, void *buf, - size_t bytes) +vdev_write(vdev_t *vdev, off_t offset, void *buf, size_t bytes) { int fd, ret; size_t head, tail, total_size, full_sec_size; @@ -496,8 +495,8 @@ ssize_t res; char *outbuf, *bouncebuf; - fd = (uintptr_t)priv; - outbuf = (char *) buf; + fd = (uintptr_t)vdev->v_priv; + outbuf = (char *)buf; bouncebuf = NULL; ret = ioctl(fd, DIOCGSECTORSIZE, &secsz); @@ -532,14 +531,14 @@ /* Partial data for first sector */ if (head > 0) { res = read(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes)); (void) lseek(fd, -secsz, SEEK_CUR); res = write(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } @@ -555,20 +554,20 @@ if (full_sec_size > 0) { if (bytes < full_sec_size) { res = read(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } memcpy(bouncebuf, outbuf, bytes); (void) lseek(fd, -secsz, SEEK_CUR); res = write(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } } else { res = write(fd, outbuf, full_sec_size); - if (res != full_sec_size) { + if ((unsigned)res != full_sec_size) { ret = EIO; goto error; } @@ -579,14 +578,14 @@ /* Partial data write to last sector */ if (do_tail_write) { res = read(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } memcpy(bouncebuf, outbuf, secsz - tail); (void) lseek(fd, -secsz, SEEK_CUR); res = write(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } @@ -598,102 +597,6 @@ return (ret); } -static void -vdev_clear_pad2(vdev_t *vdev) -{ - vdev_t *kid; - vdev_boot_envblock_t *be; - off_t off = offsetof(vdev_label_t, vl_be); - zio_checksum_info_t *ci; - zio_cksum_t cksum; - - STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { - if (kid->v_state != VDEV_STATE_HEALTHY) - continue; - vdev_clear_pad2(kid); - } - - if (!STAILQ_EMPTY(&vdev->v_children)) - return; - - be = calloc(1, sizeof (*be)); - if (be == NULL) { - printf("failed to clear be area: out of memory\n"); - return; - } - - ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; - be->vbe_zbt.zec_magic = ZEC_MAGIC; - zio_checksum_label_verifier(&be->vbe_zbt.zec_cksum, off); - ci->ci_func[0](be, sizeof (*be), NULL, &cksum); - be->vbe_zbt.zec_cksum = cksum; - - if (vdev_write(vdev, vdev->v_read_priv, off, be, VDEV_PAD_SIZE)) { - printf("failed to clear be area of primary vdev: %d\n", - errno); - } - free(be); -} - -/* - * Read the next boot command from pad2. - * If any instance of pad2 is set to empty string, or the returned string - * values are not the same, we consider next boot not to be set. - */ -static char * -vdev_read_pad2(vdev_t *vdev) -{ - vdev_t *kid; - char *tmp, *result = NULL; - vdev_boot_envblock_t *be; - off_t off = offsetof(vdev_label_t, vl_be); - - STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { - if (kid->v_state != VDEV_STATE_HEALTHY) - continue; - tmp = vdev_read_pad2(kid); - if (tmp == NULL) - continue; - - /* The next boot is not set, we are done. */ - if (*tmp == '\0') { - free(result); - return (tmp); - } - if (result == NULL) { - result = tmp; - continue; - } - /* Are the next boot strings different? */ - if (strcmp(result, tmp) != 0) { - free(tmp); - *result = '\0'; - break; - } - free(tmp); - } - if (result != NULL) - return (result); - - be = malloc(sizeof (*be)); - if (be == NULL) - return (NULL); - - if (vdev_read(vdev, vdev->v_read_priv, off, be, sizeof (*be))) { - return (NULL); - } - - switch (be->vbe_version) { - case VB_RAW: - case VB_NVLIST: - result = strdup(be->vbe_bootenv); - default: - /* Backward compatibility with initial nextboot feaure. */ - result = strdup((char *)be); - } - return (result); -} - static int zfs_dev_init(void) { @@ -746,7 +649,7 @@ int ret; spa = NULL; - ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa); + ret = vdev_probe(vdev_read, vdev_write, (void *)(uintptr_t)fd, &spa); if (ret == 0 && pool_guid != NULL) *pool_guid = spa->spa_guid; return (ret); @@ -769,7 +672,7 @@ ppa = (struct zfs_probe_args *)arg; strncpy(devname, ppa->devname, strlen(ppa->devname) - 1); devname[strlen(ppa->devname) - 1] = '\0'; - sprintf(devname, "%s%s:", devname, partname); + snprintf(devname, sizeof(devname), "%s%s:", devname, partname); pa.fd = open(devname, O_RDWR); if (pa.fd == -1) return (0); @@ -792,58 +695,381 @@ return (0); } +/* + * Return bootenv nvlist from pool label. + */ int -zfs_nextboot(void *vdev, char *buf, size_t size) +zfs_get_bootenv(void *vdev, nvlist_t **benvp) { struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; + nvlist_t *benv = NULL; + vdev_t *vd; spa_t *spa; + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (ENOTSUP); + + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); + + if (spa->spa_bootenv == NULL) { + STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, + v_childlink) { + benv = vdev_read_bootenv(vd); + + if (benv != NULL) + break; + } + spa->spa_bootenv = benv; + } else { + benv = spa->spa_bootenv; + } + + if (benv == NULL) + return (ENOENT); + + *benvp = benv; + return (0); +} + +/* + * Store nvlist to pool label bootenv area. Also updates cached pointer in spa. + */ +int +zfs_set_bootenv(void *vdev, nvlist_t *benv) +{ + struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; + spa_t *spa; vdev_t *vd; + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (ENOTSUP); + + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); + + STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { + vdev_write_bootenv(vd, benv); + } + + spa->spa_bootenv = benv; + return (0); +} + +/* + * Get bootonce value by key. The bootonce pair is removed + * from the bootenv nvlist and the remaining nvlist is committed back to disk. + */ +int +zfs_get_bootonce(void *vdev, const char *key, char *buf, size_t size) +{ + nvlist_t *benv; char *result = NULL; + int result_size, rv; + if ((rv = zfs_get_bootenv(vdev, &benv)) != 0) + return (rv); + + if ((rv = nvlist_find(benv, key, DATA_TYPE_STRING, NULL, + &result, &result_size)) == 0) { + if (result_size == 0) { + /* ignore empty string */ + rv = ENOENT; + } else { + size = MIN((size_t)result_size + 1, size); + strlcpy(buf, result, size); + } + (void) nvlist_remove(benv, key, DATA_TYPE_STRING); + (void) zfs_set_bootenv(vdev, benv); + } + + return (rv); +} + +/* + * nvstore backend. + */ + +static int zfs_nvstore_setter(void *, const char *, const void *, size_t); +static int zfs_nvstore_unset_impl(void *, const char *, bool); + +/* + * nvstore is only present for current rootfs pool. + */ +static int +zfs_nvstore_sethook(struct env_var *ev, int flags __unused, const void *value) +{ + struct zfs_devdesc *dev; + int rv; + + archsw.arch_getdev((void **)&dev, NULL, NULL); + if (dev == NULL) + return (ENXIO); + + rv = zfs_nvstore_setter(dev, ev->ev_name, value, strlen(value)); + + free(dev); + return (rv); +} + +/* + * nvstore is only present for current rootfs pool. + */ +static int +zfs_nvstore_unsethook(struct env_var *ev) +{ + struct zfs_devdesc *dev; + int rv; + + archsw.arch_getdev((void **)&dev, NULL, NULL); + if (dev == NULL) + return (ENXIO); + + rv = zfs_nvstore_unset_impl(dev, ev->ev_name, false); + + free(dev); + return (rv); +} + +static int +zfs_nvstore_getter(void *vdev, const char *name, void **data) +{ + struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; + spa_t *spa; + nvlist_t *nv; + char *str, **ptr; + int size; + int rv; + if (dev->dd.d_dev->dv_type != DEVT_ZFS) - return (1); + return (ENOTSUP); - if (dev->pool_guid == 0) - spa = STAILQ_FIRST(&zfs_pools); - else - spa = spa_find_by_guid(dev->pool_guid); + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); - if (spa == NULL) { - printf("ZFS: can't find pool by guid\n"); - return (1); + if (spa->spa_bootenv == NULL) + return (ENXIO); + + if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST, + NULL, &nv, NULL) != 0) + return (ENOENT); + + rv = nvlist_find(nv, name, DATA_TYPE_STRING, NULL, &str, &size); + if (rv == 0) { + ptr = (char **)data; + asprintf(ptr, "%.*s", size, str); + if (*data == NULL) + rv = ENOMEM; } + nvlist_destroy(nv); + return (rv); +} - STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { - char *tmp = vdev_read_pad2(vd); +static int +zfs_nvstore_setter(void *vdev, const char *name, const void *data, + size_t size __unused) +{ + struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; + spa_t *spa; + nvlist_t *nv; + int rv; - /* Continue on error. */ - if (tmp == NULL) - continue; - /* Nextboot is not set. */ - if (*tmp == '\0') { - free(result); - free(tmp); - return (1); + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (ENOTSUP); + + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); + + if (spa->spa_bootenv == NULL) + return (ENXIO); + + if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST, + NULL, &nv, NULL) != 0) { + nv = nvlist_create(NV_UNIQUE_NAME); + if (nv == NULL) + return (ENOMEM); + } + + rv = nvlist_add_string(nv, name, data); + if (rv == 0) { + rv = nvlist_add_nvlist(spa->spa_bootenv, OS_NVSTORE, nv); + if (rv == 0) + rv = zfs_set_bootenv(vdev, spa->spa_bootenv); + } + nvlist_destroy(nv); + rv = env_setenv(name, EV_VOLATILE | EV_NOHOOK, data, + zfs_nvstore_sethook, zfs_nvstore_unsethook); + + return (rv); +} + +static int +zfs_nvstore_unset_impl(void *vdev, const char *name, bool unset_env) +{ + struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; + spa_t *spa; + nvlist_t *nv; + int rv; + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (ENOTSUP); + + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); + + if (spa->spa_bootenv == NULL) + return (ENXIO); + + if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST, + NULL, &nv, NULL) != 0) + return (ENOENT); + + rv = nvlist_remove(nv, name, DATA_TYPE_UNKNOWN); + if (rv == 0) { + if (nvlist_next_nvpair(nv, NULL) == NULL) { + rv = nvlist_remove(spa->spa_bootenv, OS_NVSTORE, + DATA_TYPE_NVLIST); + } else { + rv = nvlist_add_nvlist(spa->spa_bootenv, + OS_NVSTORE, nv); } - if (result == NULL) { - result = tmp; - continue; - } - free(tmp); + if (rv == 0) + rv = zfs_set_bootenv(vdev, spa->spa_bootenv); } - if (result == NULL) - return (1); - STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { - vdev_clear_pad2(vd); + if (unset_env) + env_discard(env_getenv(name)); + return (rv); +} + +static int +zfs_nvstore_unset(void *vdev, const char *name) +{ + return (zfs_nvstore_unset_impl(vdev, name, true)); +} + +static int +zfs_nvstore_print(void *vdev __unused, void *ptr) +{ + + nvpair_print(ptr, 0); + return (0); +} + +/* + * Create environment variable from nvpair. + * At this time, we only support DATA_TYPE_STRING. + * set hook will update nvstore with new value, unset hook will remove + * variable from nvstore. + */ +static int +zfs_nvstore_setenv(void *vdev __unused, void *ptr) +{ + nvp_header_t *nvh = ptr; + nv_string_t *nvp_name, *nvp_value; + nv_pair_data_t *nvp_data; + char *name, *value; + int rv = 0; + + nvp_name = (nv_string_t *)(nvh + 1); + nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] + + NV_ALIGN4(nvp_name->nv_size)); + + switch (nvp_data->nv_type) { + case DATA_TYPE_STRING: + nvp_value = (nv_string_t *)&nvp_data->nv_data[0]; + if ((name = nvstring_get(nvp_name)) == NULL) + return (ENOMEM); + if ((value = nvstring_get(nvp_value)) == NULL) { + free(name); + return (ENOMEM); + } + rv = env_setenv(name, EV_VOLATILE | EV_NOHOOK, value, + zfs_nvstore_sethook, zfs_nvstore_unsethook); + free(name); + free(value); + break; + default: + break; } + return (rv); +} - strlcpy(buf, result, size); - free(result); +static int +zfs_nvstore_iterate(void *vdev, int (*cb)(void *, void *)) +{ + struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; + spa_t *spa; + nvlist_t *nv; + nvp_header_t *nvh; + int rv; + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (ENOTSUP); + + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); + + if (spa->spa_bootenv == NULL) + return (ENXIO); + + if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST, + NULL, &nv, NULL) != 0) + return (ENOENT); + + rv = 0; + nvh = NULL; + while ((nvh = nvlist_next_nvpair(nv, nvh)) != NULL) { + rv = cb(vdev, nvh); + if (rv != 0) + break; + } return (0); } +nvs_callbacks_t nvstore_zfs_cb = { + .nvs_getter = zfs_nvstore_getter, + .nvs_setter = zfs_nvstore_setter, + .nvs_unset = zfs_nvstore_unset, + .nvs_print = zfs_nvstore_print, + .nvs_iterate = zfs_nvstore_iterate +}; + int +zfs_attach_nvstore(void *vdev) +{ + struct zfs_devdesc *dev = vdev; + spa_t *spa; + uint64_t version; + int rv; + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (ENOTSUP); + + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); + + rv = nvlist_find(spa->spa_bootenv, BOOTENV_VERSION, DATA_TYPE_UINT64, + NULL, &version, NULL); + + if (rv != 0 || version != VB_NVLIST) { + return (ENXIO); + } + + dev = malloc(sizeof (*dev)); + if (dev == NULL) + return (ENOMEM); + memcpy(dev, vdev, sizeof (*dev)); + + rv = nvstore_init(spa->spa_name, &nvstore_zfs_cb, dev); + if (rv != 0) + free(dev); + else + (void) zfs_nvstore_iterate(dev, zfs_nvstore_setenv); + return (rv); +} + +int zfs_probe_dev(const char *devname, uint64_t *pool_guid) { struct disk_devdesc *dev; @@ -939,12 +1165,9 @@ dev = va_arg(args, struct zfs_devdesc *); va_end(args); - if (dev->pool_guid == 0) - spa = STAILQ_FIRST(&zfs_pools); - else - spa = spa_find_by_guid(dev->pool_guid); - if (!spa) + if ((spa = spa_find_by_dev(dev)) == NULL) return (ENXIO); + mount = malloc(sizeof(*mount)); if (mount == NULL) rv = ENOMEM; @@ -1073,10 +1296,11 @@ } if (rootname[0] == '\0') - sprintf(buf, "%s:%s:", dev->dd.d_dev->dv_name, spa->spa_name); + snprintf(buf, sizeof(buf), "%s:%s:", dev->dd.d_dev->dv_name, + spa->spa_name); else - sprintf(buf, "%s:%s/%s:", dev->dd.d_dev->dv_name, spa->spa_name, - rootname); + snprintf(buf, sizeof(buf), "%s:%s/%s:", dev->dd.d_dev->dv_name, + spa->spa_name, rootname); return (buf); } Index: stand/libsa/zfs/zfsimpl.c =================================================================== --- stand/libsa/zfs/zfsimpl.c +++ stand/libsa/zfs/zfsimpl.c @@ -31,10 +31,12 @@ * Stand-alone ZFS file reader. */ +#include #include #include #include #include +#include #include #include "zfsimpl.h" @@ -219,8 +221,8 @@ size_t psize; int rc; - if (!vdev->v_phys_read) - return (EIO); + if (vdev->v_phys_read == NULL) + return (ENOTSUP); if (bp) { psize = BP_GET_PSIZE(bp); @@ -228,7 +230,7 @@ psize = size; } - rc = vdev->v_phys_read(vdev, vdev->v_read_priv, offset, buf, psize); + rc = vdev->v_phys_read(vdev, vdev->v_priv, offset, buf, psize); if (rc == 0) { if (bp != NULL) rc = zio_checksum_verify(vdev->v_spa, bp, buf); @@ -237,6 +239,15 @@ return (rc); } +static int +vdev_write_phys(vdev_t *vdev, void *buf, off_t offset, size_t size) +{ + if (vdev->v_phys_write == NULL) + return (ENOTSUP); + + return (vdev->v_phys_write(vdev, offset, buf, size)); +} + typedef struct remap_segment { vdev_t *rs_vd; uint64_t rs_offset; @@ -1083,7 +1094,7 @@ vdev_from_nvlist(spa_t *spa, uint64_t top_guid, const nvlist_t *nvlist) { vdev_t *top_vdev, *vdev; - nvlist_t *kids = NULL; + nvlist_t **kids = NULL; int rc, nkids; /* Get top vdev. */ @@ -1104,27 +1115,18 @@ for (int i = 0; i < nkids; i++) { uint64_t guid; - rc = nvlist_find(kids, ZPOOL_CONFIG_GUID, + rc = nvlist_find(kids[i], ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, NULL, &guid, NULL); - if (rc != 0) { - nvlist_destroy(kids); - return (rc); - } - rc = vdev_init(guid, kids, &vdev); - if (rc != 0) { - nvlist_destroy(kids); - return (rc); - } + if (rc != 0) + goto done; + rc = vdev_init(guid, kids[i], &vdev); + if (rc != 0) + goto done; + vdev->v_spa = spa; vdev->v_top = top_vdev; vdev_insert(top_vdev, vdev); - - rc = nvlist_next(kids); - if (rc != 0) { - nvlist_destroy(kids); - return (rc); - } } } else { /* @@ -1133,7 +1135,12 @@ */ rc = 0; } - nvlist_destroy(kids); +done: + if (kids != NULL) { + for (int i = 0; i < nkids; i++) + nvlist_destroy(kids[i]); + free(kids); + } return (rc); } @@ -1209,7 +1216,7 @@ vdev_update_from_nvlist(uint64_t top_guid, const nvlist_t *nvlist) { vdev_t *vdev; - nvlist_t *kids = NULL; + nvlist_t **kids = NULL; int rc, nkids; /* Update top vdev. */ @@ -1224,23 +1231,23 @@ for (int i = 0; i < nkids; i++) { uint64_t guid; - rc = nvlist_find(kids, ZPOOL_CONFIG_GUID, + rc = nvlist_find(kids[i], ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, NULL, &guid, NULL); if (rc != 0) break; vdev = vdev_find(guid); if (vdev != NULL) - vdev_set_initial_state(vdev, kids); - - rc = nvlist_next(kids); - if (rc != 0) - break; + vdev_set_initial_state(vdev, kids[i]); } } else { rc = 0; } - nvlist_destroy(kids); + if (kids != NULL) { + for (int i = 0; i < nkids; i++) + nvlist_destroy(kids[i]); + free(kids); + } return (rc); } @@ -1249,7 +1256,7 @@ vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist) { uint64_t pool_guid, vdev_children; - nvlist_t *vdevs = NULL, *kids = NULL; + nvlist_t *vdevs = NULL, **kids = NULL; int rc, nkids; if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, @@ -1284,7 +1291,7 @@ uint64_t guid; vdev_t *vdev; - rc = nvlist_find(kids, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, + rc = nvlist_find(kids[i], ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, NULL, &guid, NULL); if (rc != 0) break; @@ -1293,16 +1300,17 @@ * Top level vdev is missing, create it. */ if (vdev == NULL) - rc = vdev_from_nvlist(spa, guid, kids); + rc = vdev_from_nvlist(spa, guid, kids[i]); else - rc = vdev_update_from_nvlist(guid, kids); + rc = vdev_update_from_nvlist(guid, kids[i]); if (rc != 0) break; - rc = nvlist_next(kids); - if (rc != 0) - break; } - nvlist_destroy(kids); + if (kids != NULL) { + for (int i = 0; i < nkids; i++) + nvlist_destroy(kids[i]); + free(kids); + } /* * Re-evaluate top-level vdev state. @@ -1337,6 +1345,19 @@ } static spa_t * +spa_find_by_dev(struct zfs_devdesc *dev) +{ + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (NULL); + + if (dev->pool_guid == 0) + return (STAILQ_FIRST(&zfs_pools)); + + return (spa_find_by_guid(dev->pool_guid)); +} + +static spa_t * spa_create(uint64_t guid, const char *name) { spa_t *spa; @@ -1588,6 +1609,254 @@ return (vdev_read_phys(vd, &bp, buf, off, size)); } +/* + * We do need to be sure we write to correct location. + * Our vdev label does consist of 4 fields: + * pad1 (8k), reserved. + * bootenv (8k), checksummed, previously reserved, may contian garbage. + * vdev_phys (112k), checksummed + * uberblock ring (128k), checksummed. + * + * Since bootenv area may contain garbage, we can not reliably read it, as + * we can get checksum errors. + * Next best thing is vdev_phys - it is just after bootenv. It still may + * be corrupted, but in such case we will miss this one write. + */ +static int +vdev_label_write_validate(vdev_t *vd, int l, uint64_t offset) +{ + uint64_t off, o_phys; + void *buf; + size_t size = VDEV_PHYS_SIZE; + int rc; + + o_phys = offsetof(vdev_label_t, vl_vdev_phys); + off = vdev_label_offset(vd->v_psize, l, o_phys); + + /* off should be 8K from bootenv */ + if (vdev_label_offset(vd->v_psize, l, offset) + VDEV_PAD_SIZE != off) + return (EINVAL); + + buf = malloc(size); + if (buf == NULL) + return (ENOMEM); + + /* Read vdev_phys */ + rc = vdev_label_read(vd, l, buf, o_phys, size); + free(buf); + return (rc); +} + +static int +vdev_label_write(vdev_t *vd, int l, vdev_boot_envblock_t *be, uint64_t offset) +{ + zio_checksum_info_t *ci; + zio_cksum_t cksum; + off_t off; + size_t size = VDEV_PAD_SIZE; + int rc; + + if (vd->v_phys_write == NULL) + return (ENOTSUP); + + off = vdev_label_offset(vd->v_psize, l, offset); + + rc = vdev_label_write_validate(vd, l, offset); + if (rc != 0) { + return (rc); + } + + ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; + be->vbe_zbt.zec_magic = ZEC_MAGIC; + zio_checksum_label_verifier(&be->vbe_zbt.zec_cksum, off); + ci->ci_func[0](be, size, NULL, &cksum); + be->vbe_zbt.zec_cksum = cksum; + + return (vdev_write_phys(vd, be, off, size)); +} + +static int +vdev_write_bootenv_impl(vdev_t *vdev, vdev_boot_envblock_t *be) +{ + vdev_t *kid; + int rv = 0, rc; + + STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { + if (kid->v_state != VDEV_STATE_HEALTHY) + continue; + rc = vdev_write_bootenv_impl(kid, be); + if (rv == 0) + rv = rc; + } + + /* + * Non-leaf vdevs do not have v_phys_write. + */ + if (vdev->v_phys_write == NULL) + return (rv); + + for (int l = 0; l < VDEV_LABELS; l++) { + rc = vdev_label_write(vdev, l, be, + offsetof(vdev_label_t, vl_be)); + if (rc != 0) { + printf("failed to write bootenv to %s label %d: %d\n", + vdev->v_name ? vdev->v_name : "unknown", l, rc); + rv = rc; + } + } + return (rv); +} + +int +vdev_write_bootenv(vdev_t *vdev, nvlist_t *nvl) +{ + vdev_boot_envblock_t *be; + nvlist_t nv, *nvp; + uint64_t version; + int rv; + + if (nvl->nv_size > sizeof(be->vbe_bootenv)) + return (E2BIG); + + version = VB_RAW; + nvp = vdev_read_bootenv(vdev); + if (nvp != NULL) { + nvlist_find(nvp, BOOTENV_VERSION, DATA_TYPE_UINT64, NULL, + &version, NULL); + nvlist_destroy(nvp); + } + + be = calloc(1, sizeof(*be)); + if (be == NULL) + return (ENOMEM); + + be->vbe_version = version; + switch (version) { + case VB_RAW: + /* + * If there is no envmap, we will just wipe bootenv. + */ + nvlist_find(nvl, GRUB_ENVMAP, DATA_TYPE_STRING, NULL, + be->vbe_bootenv, NULL); + rv = 0; + break; + + case VB_NVLIST: + nv.nv_header = nvl->nv_header; + nv.nv_asize = nvl->nv_asize; + nv.nv_size = nvl->nv_size; + + *(nvs_header_t *)be->vbe_bootenv = nv.nv_header; + nv.nv_data = be->vbe_bootenv + sizeof(nvs_header_t); + bcopy(nvl->nv_data, nv.nv_data, nv.nv_size); + rv = nvlist_export(&nv); + break; + + default: + rv = EINVAL; + break; + } + + if (rv == 0) { + be->vbe_version = htobe64(be->vbe_version); + rv = vdev_write_bootenv_impl(vdev, be); + } + free(be); + return (rv); +} + +/* + * Read the bootenv area from pool label, return the nvlist from it. + * We return from first successful read. + */ +nvlist_t * +vdev_read_bootenv(vdev_t *vdev) +{ + vdev_t *kid; + nvlist_t *benv; + vdev_boot_envblock_t *be; + char *command; + bool ok; + int rv; + + STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { + if (kid->v_state != VDEV_STATE_HEALTHY) + continue; + + benv = vdev_read_bootenv(kid); + if (benv != NULL) + return (benv); + } + + be = malloc(sizeof (*be)); + if (be == NULL) + return (NULL); + + rv = 0; + for (int l = 0; l < VDEV_LABELS; l++) { + rv = vdev_label_read(vdev, l, be, + offsetof(vdev_label_t, vl_be), + sizeof (*be)); + if (rv == 0) + break; + } + if (rv != 0) { + free(be); + return (NULL); + } + + be->vbe_version = be64toh(be->vbe_version); + switch (be->vbe_version) { + case VB_RAW: + /* + * we have textual data in vbe_bootenv, create nvlist + * with key "envmap". + */ + benv = nvlist_create(NV_UNIQUE_NAME); + if (benv != NULL) { + if (*be->vbe_bootenv == '\0') { + nvlist_add_uint64(benv, BOOTENV_VERSION, + VB_NVLIST); + break; + } + nvlist_add_uint64(benv, BOOTENV_VERSION, VB_RAW); + be->vbe_bootenv[sizeof (be->vbe_bootenv) - 1] = '\0'; + nvlist_add_string(benv, GRUB_ENVMAP, be->vbe_bootenv); + } + break; + + case VB_NVLIST: + benv = nvlist_import(be->vbe_bootenv, sizeof(be->vbe_bootenv)); + break; + + default: + command = (char *)be; + ok = false; + + /* Check for legacy zfsbootcfg command string */ + for (int i = 0; command[i] != '\0'; i++) { + if (iscntrl(command[i])) { + ok = false; + break; + } else { + ok = true; + } + } + benv = nvlist_create(NV_UNIQUE_NAME); + if (benv != NULL) { + if (ok) + nvlist_add_string(benv, FREEBSD_BOOTONCE, + command); + else + nvlist_add_uint64(benv, BOOTENV_VERSION, + VB_NVLIST); + } + break; + } + free(be); + return (benv); +} + static uint64_t vdev_get_label_asize(nvlist_t *nvl) { @@ -1620,7 +1889,7 @@ goto done; if (memcmp(type, VDEV_TYPE_RAIDZ, len) == 0) { - nvlist_t *kids; + nvlist_t **kids; int nkids; if (nvlist_find(vdevs, ZPOOL_CONFIG_CHILDREN, @@ -1630,7 +1899,9 @@ } asize /= nkids; - nvlist_destroy(kids); + for (int i = 0; i < nkids; i++) + nvlist_destroy(kids[i]); + free(kids); } asize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE; @@ -1654,15 +1925,13 @@ return (NULL); for (int l = 0; l < VDEV_LABELS; l++) { - const unsigned char *nvlist; - if (vdev_label_read(vd, l, label, offsetof(vdev_label_t, vl_vdev_phys), sizeof (vdev_phys_t))) continue; - nvlist = (const unsigned char *) label->vp_nvlist; - tmp = nvlist_import(nvlist + 4, nvlist[0], nvlist[1]); + tmp = nvlist_import(label->vp_nvlist, + sizeof(label->vp_nvlist)); if (tmp == NULL) continue; @@ -1727,7 +1996,8 @@ } static int -vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap) +vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv, + spa_t **spap) { vdev_t vtmp; spa_t *spa; @@ -1745,8 +2015,9 @@ */ memset(&vtmp, 0, sizeof(vtmp)); vtmp.v_phys_read = _read; - vtmp.v_read_priv = read_priv; - vtmp.v_psize = P2ALIGN(ldi_get_size(read_priv), + vtmp.v_phys_write = _write; + vtmp.v_priv = priv; + vtmp.v_psize = P2ALIGN(ldi_get_size(priv), (uint64_t)sizeof (vdev_label_t)); /* Test for minimum device size. */ @@ -1860,7 +2131,8 @@ vdev = vdev_find(guid); if (vdev != NULL) { vdev->v_phys_read = _read; - vdev->v_read_priv = read_priv; + vdev->v_phys_write = _write; + vdev->v_priv = priv; vdev->v_psize = vtmp.v_psize; /* * If no other state is set, mark vdev healthy. @@ -3131,7 +3403,7 @@ dnode_phys_t dir; size_t size; int rc; - unsigned char *nv; + char *nv; *value = NULL; if ((rc = objset_get_dnode(spa, spa->spa_mos, obj, &dir)) != 0) @@ -3155,7 +3427,7 @@ nv = NULL; return (rc); } - *value = nvlist_import(nv + 4, nv[0], nv[1]); + *value = nvlist_import(nv, size); free(nv); return (rc); } Index: stand/loader.mk =================================================================== --- stand/loader.mk +++ stand/loader.mk @@ -6,7 +6,7 @@ SRCS+= boot.c commands.c console.c devopen.c interp.c SRCS+= interp_backslash.c interp_parse.c ls.c misc.c -SRCS+= module.c +SRCS+= module.c nvstore.c .if ${MACHINE} == "i386" || ${MACHINE_CPUARCH} == "amd64" SRCS+= load_elf32.c load_elf32_obj.c reloc_elf32.c Index: stand/userboot/test/test.c =================================================================== --- stand/userboot/test/test.c +++ stand/userboot/test/test.c @@ -261,6 +261,21 @@ } int +test_diskwrite(void *arg, int unit, uint64_t offset, void *src, size_t size, + size_t *resid_return) +{ + ssize_t n; + + if (unit > disk_index || disk_fd[unit] == -1) + return (EIO); + n = pwrite(disk_fd[unit], src, size, offset); + if (n < 0) + return (errno); + *resid_return = size - n; + return (0); +} + +int test_diskioctl(void *arg, int unit, u_long cmd, void *data) { struct stat sb; @@ -399,6 +414,7 @@ .stat = test_stat, .diskread = test_diskread, + .diskwrite = test_diskwrite, .diskioctl = test_diskioctl, .copyin = test_copyin, @@ -431,8 +447,9 @@ void (*func)(struct loader_callbacks *, void *, int, int) __dead2; int opt; const char *userboot_obj = "/boot/userboot.so"; + int oflag = O_RDONLY; - while ((opt = getopt(argc, argv, "b:d:h:")) != -1) { + while ((opt = getopt(argc, argv, "wb:d:h:")) != -1) { switch (opt) { case 'b': userboot_obj = optarg; @@ -442,13 +459,17 @@ disk_index++; disk_fd = reallocarray(disk_fd, disk_index + 1, sizeof (int)); - disk_fd[disk_index] = open(optarg, O_RDONLY); + disk_fd[disk_index] = open(optarg, oflag); if (disk_fd[disk_index] < 0) err(1, "Can't open disk image '%s'", optarg); break; case 'h': host_base = optarg; + break; + + case 'w': + oflag = O_RDWR; break; case '?': Index: stand/userboot/userboot.h =================================================================== --- stand/userboot/userboot.h +++ stand/userboot/userboot.h @@ -131,6 +131,12 @@ int (*diskread)(void *arg, int unit, uint64_t offset, void *dst, size_t size, size_t *resid_return); + /* + * Write to a disk image at the given offset + */ + int (*diskwrite)(void *arg, int unit, uint64_t offset, + void *src, size_t size, size_t *resid_return); + /* * Guest virtual machine i/o */ Index: stand/userboot/userboot/Makefile =================================================================== --- stand/userboot/userboot/Makefile +++ stand/userboot/userboot/Makefile @@ -35,6 +35,7 @@ CFLAGS+= -I${BOOTSRC}/userboot CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common +CFLAGS.main.c += -I${SYSDIR}/cddl/contrib/opensolaris/uts/common/fs/zfs CWARNFLAGS.main.c += -Wno-implicit-function-declaration LDFLAGS+= -nostdlib -Wl,-Bsymbolic Index: stand/userboot/userboot/main.c =================================================================== --- stand/userboot/userboot/main.c +++ stand/userboot/userboot/main.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "bootstrap.h" #include "disk.h" @@ -214,6 +215,16 @@ exit(0); } +static void +set_currdev(const char *devname) +{ + + env_setenv("currdev", EV_VOLATILE, devname, + userboot_setcurrdev, env_nounset); + env_setenv("loaddev", EV_VOLATILE, devname, + env_noset, env_nounset); +} + /* * Set the 'current device' by (if possible) recovering the boot device as * supplied by the initial bootstrap. @@ -225,6 +236,7 @@ struct devdesc *dd; #if defined(USERBOOT_ZFS_SUPPORT) struct zfs_devdesc zdev; + char *buf = NULL; if (userboot_zfs_found) { @@ -257,10 +269,23 @@ dd = &dev.dd; } - env_setenv("currdev", EV_VOLATILE, userboot_fmtdev(dd), - userboot_setcurrdev, env_nounset); - env_setenv("loaddev", EV_VOLATILE, userboot_fmtdev(dd), - env_noset, env_nounset); + set_currdev(userboot_fmtdev(dd)); + +#if defined(USERBOOT_ZFS_SUPPORT) + if (userboot_zfs_found) { + buf = malloc(VDEV_PAD_SIZE); + if (buf != NULL) { + if (zfs_get_bootonce(&zdev, OS_BOOTONCE, buf, + VDEV_PAD_SIZE) == 0) { + printf("zfs bootonce: %s\n", buf); + set_currdev(buf); + setenv("zfs-bootonce", buf, 1); + } + free(buf); + (void) zfs_attach_nvstore(&zdev); + } + } +#endif } #if defined(USERBOOT_ZFS_SUPPORT) Index: stand/userboot/userboot/userboot_disk.c =================================================================== --- stand/userboot/userboot/userboot_disk.c +++ stand/userboot/userboot/userboot_disk.c @@ -211,15 +211,21 @@ size_t resid; int rc; - rw &= F_MASK; - if (rw == F_WRITE) - return (EROFS); - if (rw != F_READ) - return (EINVAL); if (rsize) *rsize = 0; off = dblk * ud_info[dev->dd.d_unit].sectorsize; - rc = CALLBACK(diskread, dev->dd.d_unit, off, buf, size, &resid); + switch (rw & F_MASK) { + case F_READ: + rc = CALLBACK(diskread, dev->dd.d_unit, off, buf, size, &resid); + break; + case F_WRITE: + rc = CALLBACK(diskwrite, dev->dd.d_unit, off, buf, size, + &resid); + break; + default: + rc = EINVAL; + break; + } if (rc) return (rc); if (rsize) Index: sys/cddl/boot/zfs/zfsimpl.h =================================================================== --- sys/cddl/boot/zfs/zfsimpl.h +++ sys/cddl/boot/zfs/zfsimpl.h @@ -527,20 +527,20 @@ } vdev_phys_t; typedef enum vbe_vers { - /* The bootenv file is stored as ascii text in the envblock */ - VB_RAW = 0, + /* The bootenv file is stored as ascii text in the envblock */ + VB_RAW = 0, - /* - * The bootenv file is converted to an nvlist and then packed into the - * envblock. - */ - VB_NVLIST = 1 + /* + * The bootenv file is converted to an nvlist and then packed into the + * envblock. + */ + VB_NVLIST = 1 } vbe_vers_t; typedef struct vdev_boot_envblock { - uint64_t vbe_version; - char vbe_bootenv[VDEV_PAD_SIZE - sizeof (uint64_t) - - sizeof (zio_eck_t)]; + uint64_t vbe_version; + char vbe_bootenv[VDEV_PAD_SIZE - sizeof (uint64_t) - + sizeof (zio_eck_t)]; zio_eck_t vbe_zbt; } vdev_boot_envblock_t; @@ -1663,10 +1663,9 @@ */ struct vdev; struct spa; -typedef int vdev_phys_read_t(struct vdev *vdev, void *priv, - off_t offset, void *buf, size_t bytes); -typedef int vdev_read_t(struct vdev *vdev, const blkptr_t *bp, - void *buf, off_t offset, size_t bytes); +typedef int vdev_phys_read_t(struct vdev *, void *, off_t, void *, size_t); +typedef int vdev_phys_write_t(struct vdev *, off_t, void *, size_t); +typedef int vdev_read_t(struct vdev *, const blkptr_t *, void *, off_t, size_t); typedef STAILQ_HEAD(vdev_list, vdev) vdev_list_t; @@ -1794,8 +1793,9 @@ size_t v_nchildren; /* # children */ vdev_state_t v_state; /* current state */ vdev_phys_read_t *v_phys_read; /* read from raw leaf vdev */ + vdev_phys_write_t *v_phys_write; /* write to raw leaf vdev */ vdev_read_t *v_read; /* read from vdev */ - void *v_read_priv; /* private data for read function */ + void *v_priv; /* data for read/write function */ boolean_t v_islog; struct spa *v_spa; /* link to spa */ /* @@ -1821,6 +1821,7 @@ zio_cksum_salt_t spa_cksum_salt; /* secret salt for cksum */ void *spa_cksum_tmpls[ZIO_CHECKSUM_FUNCTIONS]; boolean_t spa_with_log; /* this pool has log */ + void *spa_bootenv; /* bootenv from pool label */ struct uberblock spa_uberblock_master; /* best uberblock so far */ objset_phys_t spa_mos_master; /* MOS for this pool */ Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h @@ -174,7 +174,7 @@ extern void vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset, uint64_t size, zio_done_func_t *done, void *priv, int flags); extern int vdev_label_read_bootenv(vdev_t *, nvlist_t *); -extern int vdev_label_write_bootenv(vdev_t *, char *); +extern int vdev_label_write_bootenv(vdev_t *, nvlist_t *); typedef enum { VDEV_LABEL_CREATE, /* create/add a new device */ @@ -186,8 +186,6 @@ } vdev_labeltype_t; extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason); - -extern int vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size); #ifdef __cplusplus } Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bootenv.h =================================================================== --- /dev/null +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bootenv.h @@ -0,0 +1,52 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Toomas Soome + */ + +#ifndef _ZFS_BOOTENV_H +#define _ZFS_BOOTENV_H + +/* + * Define macros for label bootenv nvlist pair keys. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define BOOTENV_VERSION "version" + +#define BE_ILLUMOS_VENDOR "illumos" +#define BE_FREEBSD_VENDOR "freebsd" +#define BE_GRUB_VENDOR "grub" + +#define BOOTENV_OS BE_FREEBSD_VENDOR + +#define GRUB_ENVMAP BE_GRUB_VENDOR ":" "envmap" + +#define FREEBSD_BOOTONCE BE_FREEBSD_VENDOR ":" "bootonce" +#define FREEBSD_BOOTONCE_USED BE_FREEBSD_VENDOR ":" "bootonce-used" +#define ILLUMOS_BOOTONCE BE_ILLUMOS_VENDOR ":" "bootonce" +#define ILLUMOS_BOOTONCE_USED BE_ILLUMOS_VENDOR ":" "bootonce-used" +#define FREEBSD_NVSTORE BE_FREEBSD_VENDOR ":" "nvstore" +#define ILLUMOS_NVSTORE BE_ILLUMOS_VENDOR ":" "nvstore" + +#define OS_BOOTONCE BOOTENV_OS ":" "bootonce" +#define OS_BOOTONCE_USED BOOTENV_OS ":" "bootonce-used" +#define OS_NVSTORE BOOTENV_OS ":" "nvstore" + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_BOOTENV_H */ Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c @@ -151,6 +151,8 @@ #include #include #include +#include +#include static boolean_t vdev_trim_on_init = B_TRUE; SYSCTL_DECL(_vfs_zfs_vdev); @@ -1029,7 +1031,7 @@ ASSERT3U(zio->io_size, ==, VDEV_PAD_SIZE); - if (zio->io_error == 0) { + if (zio->io_error == 0) { mutex_enter(&rio->io_lock); if (*cbp == NULL) { /* Will free this buffer in vdev_label_read_bootenv. */ @@ -1054,13 +1056,9 @@ * bootloader should have rewritten them all to be the same on boot, * and any changes we made since boot have been the same across all * labels. - * - * While grub supports writing to all four labels, other bootloaders - * don't, so we only use the first two labels to store boot - * information. */ if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) { - for (int l = 0; l < VDEV_LABELS / 2; l++) { + for (int l = 0; l < VDEV_LABELS; l++) { vdev_label_read(zio, vd, l, abd_alloc_linear(VDEV_PAD_SIZE, B_FALSE), offsetof(vdev_label_t, vl_be), VDEV_PAD_SIZE, @@ -1070,14 +1068,15 @@ } int -vdev_label_read_bootenv(vdev_t *rvd, nvlist_t *command) +vdev_label_read_bootenv(vdev_t *rvd, nvlist_t *bootenv) { + nvlist_t *config; spa_t *spa = rvd->vdev_spa; abd_t *abd = NULL; int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD; - ASSERT(command); + ASSERT(bootenv); ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); zio_t *zio = zio_root(spa, NULL, &abd, flags); @@ -1085,39 +1084,86 @@ int err = zio_wait(zio); if (abd != NULL) { + char *buf; vdev_boot_envblock_t *vbe = abd_to_buf(abd); - if (vbe->vbe_version != VB_RAW) { - abd_free(abd); - return (SET_ERROR(ENOTSUP)); + + vbe->vbe_version = ntohll(vbe->vbe_version); + switch (vbe->vbe_version) { + case VB_RAW: + /* + * if we have textual data in vbe_bootenv, create nvlist + * with key "envmap". + */ + if (*vbe->vbe_bootenv == '\0') { + fnvlist_add_uint64(bootenv, BOOTENV_VERSION, + VB_NVLIST); + break; + } + fnvlist_add_uint64(bootenv, BOOTENV_VERSION, VB_RAW); + vbe->vbe_bootenv[sizeof (vbe->vbe_bootenv) - 1] = '\0'; + fnvlist_add_string(bootenv, GRUB_ENVMAP, + vbe->vbe_bootenv); + break; + + case VB_NVLIST: + err = nvlist_unpack(vbe->vbe_bootenv, + sizeof (vbe->vbe_bootenv), &config, 0); + if (err == 0) { + fnvlist_merge(bootenv, config); + nvlist_free(config); + break; + } + /* FALLTHROUGH */ + default: + /* Check for FreeBSD zfs bootonce command string */ + buf = abd_to_buf(abd); + if (*buf == '\0') { + fnvlist_add_uint64(bootenv, BOOTENV_VERSION, + VB_NVLIST); + break; + } + fnvlist_add_string(bootenv, FREEBSD_BOOTONCE, buf); } - vbe->vbe_bootenv[sizeof (vbe->vbe_bootenv) - 1] = '\0'; - fnvlist_add_string(command, "envmap", vbe->vbe_bootenv); - /* abd was allocated in vdev_label_read_bootenv_impl() */ + + /* + * abd was allocated in vdev_label_read_bootenv_impl() + */ abd_free(abd); - /* If we managed to read any successfully, return success. */ + /* + * If we managed to read any successfully, + * return success. + */ return (0); } return (err); } int -vdev_label_write_bootenv(vdev_t *vd, char *envmap) +vdev_label_write_bootenv(vdev_t *vd, nvlist_t *env) { zio_t *zio; spa_t *spa = vd->vdev_spa; vdev_boot_envblock_t *bootenv; int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; - int error = ENXIO; + int error; + size_t nvsize; + char *nvbuf; - if (strlen(envmap) >= sizeof (bootenv->vbe_bootenv)) { + error = nvlist_size(env, &nvsize, NV_ENCODE_XDR); + if (error != 0) + return (SET_ERROR(error)); + + if (nvsize >= sizeof (bootenv->vbe_bootenv)) { return (SET_ERROR(E2BIG)); } ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + error = ENXIO; for (int c = 0; c < vd->vdev_children; c++) { - int child_err = vdev_label_write_bootenv(vd->vdev_child[c], - envmap); + int child_err; + + child_err = vdev_label_write_bootenv(vd->vdev_child[c], env); /* * As long as any of the disks managed to write all of their * labels successfully, return success. @@ -1133,16 +1179,41 @@ ASSERT3U(sizeof (*bootenv), ==, VDEV_PAD_SIZE); abd_t *abd = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); abd_zero(abd, VDEV_PAD_SIZE); + bootenv = abd_borrow_buf_copy(abd, VDEV_PAD_SIZE); + nvbuf = bootenv->vbe_bootenv; + nvsize = sizeof (bootenv->vbe_bootenv); - char *buf = bootenv->vbe_bootenv; - (void) strlcpy(buf, envmap, sizeof (bootenv->vbe_bootenv)); - bootenv->vbe_version = VB_RAW; - abd_return_buf_copy(abd, bootenv, VDEV_PAD_SIZE); + bootenv->vbe_version = fnvlist_lookup_uint64(env, BOOTENV_VERSION); + switch (bootenv->vbe_version) { + case VB_RAW: + if (nvlist_lookup_string(env, GRUB_ENVMAP, &nvbuf) == 0) { + (void) strlcpy(bootenv->vbe_bootenv, nvbuf, nvsize); + } + error = 0; + break; + case VB_NVLIST: + error = nvlist_pack(env, &nvbuf, &nvsize, NV_ENCODE_XDR, + KM_SLEEP); + break; + + default: + error = EINVAL; + break; + } + + if (error == 0) { + bootenv->vbe_version = htonll(bootenv->vbe_version); + abd_return_buf_copy(abd, bootenv, VDEV_PAD_SIZE); + } else { + abd_free(abd); + return (SET_ERROR(error)); + } + retry: zio = zio_root(spa, NULL, NULL, flags); - for (int l = 0; l < VDEV_LABELS / 2; l++) { + for (int l = 0; l < VDEV_LABELS; l++) { vdev_label_write(zio, vd, l, abd, offsetof(vdev_label_t, vl_be), VDEV_PAD_SIZE, NULL, NULL, flags); @@ -1155,44 +1226,6 @@ } abd_free(abd); - return (error); -} - -int -vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size) -{ - spa_t *spa = vd->vdev_spa; - zio_t *zio; - abd_t *pad2; - int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; - int error; - - if (size > VDEV_PAD_SIZE) - return (EINVAL); - - if (!vd->vdev_ops->vdev_op_leaf) - return (ENODEV); - if (vdev_is_dead(vd)) - return (ENXIO); - - ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - - pad2 = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); - abd_zero(pad2, VDEV_PAD_SIZE); - abd_copy_from_buf(pad2, buf, size); - -retry: - zio = zio_root(spa, NULL, NULL, flags); - vdev_label_write(zio, vd, 0, pad2, - offsetof(vdev_label_t, vl_be), - VDEV_PAD_SIZE, NULL, NULL, flags); - error = zio_wait(zio); - if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) { - flags |= ZIO_FLAG_TRYHARD; - goto retry; - } - - abd_free(pad2); return (error); } Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -213,6 +213,7 @@ #include #include #include +#include #include "zfs_namecheck.h" #include "zfs_prop.h" @@ -3657,30 +3658,27 @@ /* * This ioctl is used to set the bootenv configuration on the current * pool. This configuration is stored in the second padding area of the label, - * and it is used by the GRUB bootloader used on Linux to store the contents - * of the grubenv file. The file is stored as raw ASCII, and is protected by - * an embedded checksum. By default, GRUB will check if the boot filesystem - * supports storing the environment data in a special location, and if so, - * will invoke filesystem specific logic to retrieve it. This can be overriden - * by a variable, should the user so desire. + * and it is used by the bootloader(s) to store bootloader and/or system + * specific data. + * The data is stored as nvlist data stream, and is protected by + * an embedded checksum. */ /* ARGSUSED */ static const zfs_ioc_key_t zfs_keys_set_bootenv[] = { - {"envmap", DATA_TYPE_STRING, 0}, + {BOOTENV_VERSION, DATA_TYPE_UINT64, 0}, + {"", DATA_TYPE_ANY, ZK_OPTIONAL | ZK_WILDCARDLIST}, }; static int zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl) { - char *envmap; int error; spa_t *spa; - envmap = fnvlist_lookup_string(innvl, "envmap"); if ((error = spa_open(name, &spa, FTAG)) != 0) return (error); spa_vdev_state_enter(spa, SCL_ALL); - error = vdev_label_write_bootenv(spa->spa_root_vdev, envmap); + error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl); (void) spa_vdev_state_exit(spa, NULL, 0); spa_close(spa, FTAG); return (error); @@ -3708,30 +3706,24 @@ #ifdef __FreeBSD__ static const zfs_ioc_key_t zfs_keys_nextboot[] = { - {"command", DATA_TYPE_STRING, 0}, + {BOOTENV_VERSION, DATA_TYPE_UINT64, 0}, {ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, 0}, - {ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, 0} + {ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, 0}, + {FREEBSD_BOOTONCE, DATA_TYPE_STRING, 0}, }; +/* This interface should be deprecated */ static int -zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl) +zfs_ioc_nextboot(const char *unused __unused, nvlist_t *innvl, nvlist_t *outnvl) { char name[MAXNAMELEN]; spa_t *spa; - vdev_t *vd; - char *command; uint64_t pool_guid; uint64_t vdev_guid; int error; - if (nvlist_lookup_uint64(innvl, - ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) - return (EINVAL); - if (nvlist_lookup_uint64(innvl, - ZPOOL_CONFIG_GUID, &vdev_guid) != 0) - return (EINVAL); - command = fnvlist_lookup_string(innvl, "command"); - + pool_guid = fnvlist_lookup_uint64(innvl, ZPOOL_CONFIG_POOL_GUID); + vdev_guid = fnvlist_lookup_uint64(innvl, ZPOOL_CONFIG_GUID); mutex_enter(&spa_namespace_lock); spa = spa_by_guid(pool_guid, vdev_guid); if (spa != NULL) @@ -3742,16 +3734,14 @@ if ((error = spa_open(name, &spa, FTAG)) != 0) return (error); + + /* No need to store pool and vdev guids */ + fnvlist_remove(innvl, ZPOOL_CONFIG_POOL_GUID); + fnvlist_remove(innvl, ZPOOL_CONFIG_GUID); + spa_vdev_state_enter(spa, SCL_ALL); - vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE); - if (vd == NULL) { - (void) spa_vdev_state_exit(spa, NULL, ENXIO); - spa_close(spa, FTAG); - return (ENODEV); - } - error = vdev_label_write_pad2(vd, command, strlen(command)); + error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl); (void) spa_vdev_state_exit(spa, NULL, 0); - txg_wait_synced(spa->spa_dsl_pool, 0); spa_close(spa, FTAG); return (error); } Index: tools/tools/zfsboottest/zfsboottest.c =================================================================== --- tools/tools/zfsboottest/zfsboottest.c +++ tools/tools/zfsboottest/zfsboottest.c @@ -147,7 +147,7 @@ warn("open(%s) failed", argv[i]); continue; } - if (vdev_probe(vdev_read, &fd[i - 1], NULL) != 0) { + if (vdev_probe(vdev_read, NULL, &fd[i - 1], NULL) != 0) { warnx("vdev_probe(%s) failed", argv[i]); close(fd[i - 1]); }