Index: cddl/contrib/opensolaris/cmd/zinject/translate.c =================================================================== --- cddl/contrib/opensolaris/cmd/zinject/translate.c +++ cddl/contrib/opensolaris/cmd/zinject/translate.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2012, 2020 by Delphix. All rights reserved. */ #include @@ -484,7 +484,7 @@ record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1; break; case TYPE_LABEL_PAD2: - record->zi_start = offsetof(vdev_label_t, vl_pad2); + record->zi_start = offsetof(vdev_label_t, vl_be); record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1; break; } Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h @@ -836,6 +836,8 @@ extern int zpool_read_label(int, nvlist_t **); extern int zpool_read_all_labels(int, nvlist_t **); extern int zpool_clear_label(int); +extern int zpool_set_bootenv(zpool_handle_t *, const nvlist_t *); +extern int zpool_get_bootenv(zpool_handle_t *, nvlist_t **); /* is this zvol valid for use as a dump device? */ extern int zvol_check_dump_config(char *); Index: cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c =================================================================== --- cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c +++ cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2017 by Delphix. All rights reserved. + * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright 2016 Nexenta Systems, Inc. * Copyright 2016 Igor Kozhukhov @@ -395,7 +395,7 @@ * Assuming bootfs is a valid dataset name. */ static boolean_t -bootfs_name_valid(const char *pool, char *bootfs) +bootfs_name_valid(const char *pool, const char *bootfs) { int len = strlen(pool); @@ -4229,6 +4229,38 @@ (void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj); } free(mntpnt); +} + +int +zpool_set_bootenv(zpool_handle_t *zhp, const nvlist_t *envmap) +{ + int error = lzc_set_bootenv(zhp->zpool_name, envmap); + if (error != 0) { + (void) zpool_standard_error_fmt(zhp->zpool_hdl, error, + dgettext(TEXT_DOMAIN, + "error setting bootenv in pool '%s'"), zhp->zpool_name); + } + + return (error); +} + +int +zpool_get_bootenv(zpool_handle_t *zhp, nvlist_t **nvlp) +{ + nvlist_t *nvl; + int error; + + nvl = NULL; + error = lzc_get_bootenv(zhp->zpool_name, &nvl); + if (error != 0) { + (void) zpool_standard_error_fmt(zhp->zpool_hdl, error, + dgettext(TEXT_DOMAIN, + "error getting bootenv in pool '%s'"), zhp->zpool_name); + } else { + *nvlp = nvl; + } + + return (error); } #ifdef illumos Index: cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h =================================================================== --- cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h +++ cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + * Copyright (c) 2012, 2020 by Delphix. All rights reserved. * Copyright (c) 2013 by Martin Matuska . All rights reserved. * Copyright 2017 RackTop Systems. * Copyright (c) 2017 Datto Inc. @@ -105,6 +105,8 @@ int lzc_pool_checkpoint(const char *); int lzc_pool_checkpoint_discard(const char *); +int lzc_set_bootenv(const char *, const nvlist_t *); +int lzc_get_bootenv(const char *, nvlist_t **); #ifdef __cplusplus } #endif Index: cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c =================================================================== --- cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c +++ cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2012, 2020 by Delphix. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2017 RackTop Systems. @@ -1209,4 +1209,22 @@ fnvlist_free(args); return (error); +} + +/* + * Set the bootenv contents for the given pool. + */ +int +lzc_set_bootenv(const char *pool, const nvlist_t *env) +{ + return (lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, (nvlist_t *)env, NULL)); +} + +/* + * Get the contents of the bootenv of the given pool. + */ +int +lzc_get_bootenv(const char *pool, nvlist_t **outnvl) +{ + return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl)); } Index: sbin/zfsbootcfg/zfsbootcfg.c =================================================================== --- sbin/zfsbootcfg/zfsbootcfg.c +++ sbin/zfsbootcfg/zfsbootcfg.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -40,57 +41,45 @@ /* Keep in sync with zfsboot.c. */ #define MAX_COMMAND_LEN 512 -int -install_bootonce(libzfs_handle_t *hdl, uint64_t pool_guid, nvlist_t *nv, - const char * const data) +int main(int argc, char * const *argv) { - nvlist_t **child; - uint_t children = 0; - uint64_t guid; + char buf[MAX_COMMAND_LEN], *name; + const char *key, *value; + libzfs_handle_t *hdl; + zpool_handle_t *zphdl; + nvlist_t *nv; + nvpair_t *nvp; int rv; + int len; + bool print = false; - (void) nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, - &children); - - for (int c = 0; c < children; c++) { - rv = install_bootonce(hdl, pool_guid, child[c], data); + key = "command"; + value = NULL; + print = 0; + while ((rv = getopt(argc, argv, "k:v:p")) != -1) { + switch (rv) { + case 'k': + key = optarg; + break; + case 'v': + value = optarg; + break; + case 'p': + print = true; + } } - if (children > 0) - return (rv); + argc -= optind; + argv += optind; - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) { - perror("can't get vdev guid"); - return (1); - } - if (zpool_nextboot(hdl, pool_guid, guid, data) != 0) { - perror("ZFS_IOC_NEXTBOOT failed"); - return (1); - } - return (0); -} + if (argc == 1) + value = argv[0]; -int main(int argc, const char * const *argv) -{ - char buf[32], *name; - libzfs_handle_t *hdl; - zpool_handle_t *zphdl; - uint64_t pool_guid; - nvlist_t *nv, *config; - int rv; - int len; - - if (argc != 2) { + if (argc > 1) { fprintf(stderr, "usage: zfsbootcfg \n"); return (1); } - len = strlen(argv[1]); - if (len >= MAX_COMMAND_LEN) { - fprintf(stderr, "options string is too long\n"); - return (1); - } - if (kenv(KENV_GET, "vfs.root.mountfrom", buf, sizeof(buf)) <= 0) { perror("can't get vfs.root.mountfrom"); return (1); @@ -112,6 +101,8 @@ return (1); } + libzfs_print_on_error(hdl, B_TRUE); + zphdl = zpool_open(hdl, name); if (zphdl == NULL) { perror("can't open pool"); @@ -119,28 +110,51 @@ return (1); } - pool_guid = zpool_get_prop_int(zphdl, ZPOOL_PROP_GUID, NULL); + if (value != NULL) { + len = strlen(value); + if (len >= MAX_COMMAND_LEN) { + fprintf(stderr, "options string is too long\n"); + } else { + rv = zpool_get_bootenv(zphdl, &nv); + if (rv != 0) + nv = fnvlist_alloc(); - config = zpool_get_config(zphdl, NULL); - if (config == NULL) { - perror("can't get pool config"); - zpool_close(zphdl); - libzfs_fini(hdl); - return (1); - } + if (value[0] == '\0') + fnvlist_remove(nv, key); + else + fnvlist_add_string(nv, key, value); - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) != 0) { - perror("failed to get vdev tree"); - zpool_close(zphdl); - libzfs_fini(hdl); - return (1); + rv = zpool_set_bootenv(zphdl, nv); + if (rv == 0) + printf("zfs next boot options are successfully written\n"); + else + printf("error: %d\n", rv); + fnvlist_free(nv); + } } - rv = install_bootonce(hdl, pool_guid, nv, argv[1]); + if (print) { + rv = zpool_get_bootenv(zphdl, &nv); + if (rv == 0) { + nvp = NULL; + while (nvp = nvlist_next_nvpair(nv, nvp)) { + char *ptr; + printf("%s: ", nvpair_name(nvp)); + switch (nvpair_type(nvp)) { + case DATA_TYPE_STRING: + nvpair_value_string(nvp, &ptr); + printf("%s\n", ptr); + break; + default: + break; + } + } + } + fnvlist_free(nv); + } + zpool_close(zphdl); libzfs_fini(hdl); - if (rv == 0) - printf("zfs next boot options are successfully written\n"); return (rv); } Index: stand/efi/boot1/zfs_module.c =================================================================== --- stand/efi/boot1/zfs_module.c +++ stand/efi/boot1/zfs_module.c @@ -124,7 +124,7 @@ } memcpy(tdev, dev, sizeof(*dev)); - if (vdev_probe(vdev_read, tdev, &spa) != 0) { + if (vdev_probe(vdev_read, NULL, tdev, &spa) != 0) { free(tdev); return (EFI_UNSUPPORTED); } Index: stand/libsa/zfs/libzfs.h =================================================================== --- stand/libsa/zfs/libzfs.h +++ stand/libsa/zfs/libzfs.h @@ -26,15 +26,15 @@ * $FreeBSD$ */ +#ifndef _BOOT_LIBZFS_H_ +#define _BOOT_LIBZFS_H_ + #include #ifdef LOADER_GELI_SUPPORT #include #endif -#ifndef _BOOT_LIBZFS_H_ -#define _BOOT_LIBZFS_H_ - #define ZFS_MAXNAMELEN 256 /* @@ -54,6 +54,7 @@ #define NV_UNIQUE_NAME_TYPE 0x2 #define NV_ALIGN4(x) (((x) + 3) & ~3) +#define NV_ALIGN(x) (((x) + 7) & ~7) /* * nvlist header. @@ -109,12 +110,14 @@ nvlist_t *nvlist_create(int); void nvlist_destroy(nvlist_t *); -nvlist_t *nvlist_import(const uint8_t *, char, char); +nvlist_t *nvlist_import(const char *, char, char); +int nvlist_export(nvlist_t *); int nvlist_remove(nvlist_t *, const char *, data_type_t); void nvlist_print(nvlist_t *, unsigned int); int nvlist_find(const nvlist_t *, const char *, data_type_t, int *, void *, int *); int nvlist_next(nvlist_t *); +int nvlist_add_string(nvlist_t *, const char *, const char *); int zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path); @@ -127,6 +130,8 @@ int zfs_bootenv(const char *name); int zfs_belist_add(const char *name, uint64_t __unused); int zfs_set_env(void); + +nvlist_t *vdev_read_bootenv(vdev_t *); extern struct devsw zfs_dev; extern struct fs_ops zfs_fsops; Index: stand/libsa/zfs/nvlist.c =================================================================== --- stand/libsa/zfs/nvlist.c +++ stand/libsa/zfs/nvlist.c @@ -28,128 +28,234 @@ #include #include +#include #include #include "libzfs.h" +enum xdr_op { + XDR_OP_ENCODE = 1, + XDR_OP_DECODE = 2 +}; + typedef struct xdr { - int (*xdr_getint)(const struct xdr *, const void *, int *); + enum xdr_op xdr_op; + int (*xdr_getint)(const void *, int *); + int (*xdr_putint)(void *, int); + int (*xdr_getuint)(const void *, unsigned *); + int (*xdr_putuint)(void *, unsigned); } xdr_t; -static int xdr_int(const xdr_t *, const void *, int *); -static int mem_int(const xdr_t *, const void *, int *); -static void nvlist_decode_nvlist(const xdr_t *, nvlist_t *); +static int nvlist_xdr_nvlist(const xdr_t *, nvlist_t *); static int nvlist_size(const xdr_t *, const uint8_t *); +static int xdr_int(const xdr_t *, void *, int *); +static int xdr_u_int(const xdr_t *, void *, unsigned *); -/* - * transform data from network to host. - */ -xdr_t ntoh = { - .xdr_getint = xdr_int -}; +/* Basic primitives for XDR translation operations, getint and putint. */ +static int +_getint(const void *buf, int *ip) +{ + *ip = be32dec(buf); + return (sizeof(int)); +} +static int +_putint(void *buf, int i) +{ + int *ip = buf; + + *ip = htobe32(i); + return (sizeof(int)); +} + +static int +_getuint(const void *buf, unsigned *ip) +{ + *ip = be32dec(buf); + return (sizeof(unsigned)); +} + +static int +_putuint(void *buf, unsigned i) +{ + unsigned *up = buf; + + *up = htobe32(i); + return (sizeof(int)); +} + /* - * transform data from host to host. + * read native data without translation. */ -xdr_t native = { - .xdr_getint = mem_int -}; +static int +mem_int(const void *buf, int *i) +{ + *i = *(int *)buf; + return (sizeof(int)); +} +static int +mem_uint(const void *buf, unsigned *u) +{ + *u = *(int *)buf; + return (sizeof(int)); +} + /* - * transform data from host to network. + * XDR data translations. */ -xdr_t hton = { - .xdr_getint = xdr_int -}; - static int -xdr_short(const xdr_t *xdr, const uint8_t *buf, short *ip) +xdr_short(const xdr_t *xdr, uint8_t *buf, short *ip) { - int i, rv; + int i, rv = 0; - rv = xdr->xdr_getint(xdr, buf, &i); - *ip = i; + i = *ip; + rv = xdr_int(xdr, buf, &i); + if (xdr->xdr_op == XDR_OP_DECODE) { + *ip = i; + } return (rv); } static int -xdr_u_short(const xdr_t *xdr, const uint8_t *buf, unsigned short *ip) +xdr_u_short(const xdr_t *xdr, uint8_t *buf, unsigned short *ip) { unsigned u; int rv; - rv = xdr->xdr_getint(xdr, buf, &u); - *ip = u; + u = *ip; + rv = xdr_u_int(xdr, buf, &u); + if (xdr->xdr_op == XDR_OP_DECODE) { + *ip = u; + } return (rv); } static int -xdr_int(const xdr_t *xdr __unused, const void *buf, int *ip) +xdr_int(const xdr_t *xdr, void *buf, int *ip) { - *ip = be32dec(buf); - return (sizeof(int)); + int rv = 0; + int *i = buf; + + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *ip, store to buf */ + rv = xdr->xdr_putint(buf, *ip); + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + rv = xdr->xdr_getint(buf, i); + *ip = *i; + break; + } + return (rv); } static int -xdr_u_int(const xdr_t *xdr __unused, const void *buf, unsigned *ip) +xdr_u_int(const xdr_t *xdr, void *buf, unsigned *ip) { - *ip = be32dec(buf); - return (sizeof(unsigned)); + int rv = 0; + unsigned *u = buf; + + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *ip, store to buf */ + rv = xdr->xdr_putuint(buf, *ip); + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + rv = xdr->xdr_getuint(buf, u); + *ip = *u; + break; + } + return (rv); } static int xdr_string(const xdr_t *xdr, const void *buf, nv_string_t *s) { - int size; + int size = 0; - size = xdr->xdr_getint(xdr, buf, &s->nv_size); - size = NV_ALIGN4(size + s->nv_size); + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + size = s->nv_size; + size += xdr->xdr_putuint(&s->nv_size, s->nv_size); + size = NV_ALIGN4(size); + break; + + case XDR_OP_DECODE: + size = xdr->xdr_getuint(buf, &s->nv_size); + size = NV_ALIGN4(size + s->nv_size); + break; + } return (size); } static int -xdr_int64(const xdr_t *xdr, const uint8_t *buf, int64_t *lp) +xdr_int64(const xdr_t *xdr, uint8_t *buf, int64_t *lp) { - int hi, rv; + int hi, rv = 0; unsigned lo; - rv = xdr->xdr_getint(xdr, buf, &hi); - rv += xdr->xdr_getint(xdr, buf + rv, &lo); - *lp = (((int64_t)hi) << 32) | lo; + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *lp, store to buf */ + hi = *lp >> 32; + lo = *lp & UINT32_MAX; + rv = xdr->xdr_putint(buf, hi); + rv += xdr->xdr_putint(buf + rv, lo); + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + rv = xdr->xdr_getint(buf, &hi); + rv += xdr->xdr_getuint(buf + rv, &lo); + *lp = (((int64_t)hi) << 32) | lo; + } return (rv); } static int -xdr_uint64(const xdr_t *xdr, const uint8_t *buf, uint64_t *lp) +xdr_uint64(const xdr_t *xdr, uint8_t *buf, uint64_t *lp) { unsigned hi, lo; - int rv; + int rv = 0; - rv = xdr->xdr_getint(xdr, buf, &hi); - rv += xdr->xdr_getint(xdr, buf + rv, &lo); - *lp = (((int64_t)hi) << 32) | lo; + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *ip, store to buf */ + hi = *lp >> 32; + lo = *lp & UINT32_MAX; + rv = xdr->xdr_putint(buf, hi); + rv += xdr->xdr_putint(buf + rv, lo); + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + rv = xdr->xdr_getuint(buf, &hi); + rv += xdr->xdr_getuint(buf + rv, &lo); + *lp = (((uint64_t)hi) << 32) | lo; + } return (rv); } static int -xdr_char(const xdr_t *xdr, const uint8_t *buf, char *cp) +xdr_char(const xdr_t *xdr, uint8_t *buf, char *cp) { - int i, rv; + int i, rv = 0; - rv = xdr->xdr_getint(xdr, buf, &i); - *cp = i; + i = *cp; + rv = xdr_int(xdr, buf, &i); + if (xdr->xdr_op == XDR_OP_DECODE) { + *cp = i; + } return (rv); } /* - * read native data. + * nvlist management functions. */ -static int -mem_int(const xdr_t *xdr, const void *buf, int *i) -{ - *i = *(int *)buf; - return (sizeof(int)); -} - void nvlist_destroy(nvlist_t *nvl) { @@ -205,30 +311,42 @@ return (nvl); } -static void -nvlist_nvp_decode(const xdr_t *xdr, nvlist_t *nvl, nvp_header_t *nvph) +static int +nvlist_xdr_nvp(const xdr_t *xdr, nvlist_t *nvl) { nv_string_t *nv_string; nv_pair_data_t *nvp_data; nvlist_t nvlist; + unsigned type, nelem; + xdr_t xdrmem = { + .xdr_op = XDR_OP_DECODE, + .xdr_getint = mem_int, + .xdr_getuint = mem_uint + }; nv_string = (nv_string_t *)nvl->nv_idx; nvl->nv_idx += xdr_string(xdr, &nv_string->nv_size, nv_string); nvp_data = (nv_pair_data_t *)nvl->nv_idx; - nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_type, &nvp_data->nv_type); - nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_nelem, &nvp_data->nv_nelem); + type = nvp_data->nv_type; + nelem = nvp_data->nv_nelem; + nvl->nv_idx += xdr_u_int(xdr, nvl->nv_idx, &type); + nvl->nv_idx += xdr_u_int(xdr, nvl->nv_idx, &nelem); - switch (nvp_data->nv_type) { + switch (type) { case DATA_TYPE_NVLIST: case DATA_TYPE_NVLIST_ARRAY: - bzero(&nvlist, sizeof (nvlist)); + bzero(&nvlist, sizeof(nvlist)); nvlist.nv_data = &nvp_data->nv_data[0]; nvlist.nv_idx = nvlist.nv_data; - for (int i = 0; i < nvp_data->nv_nelem; i++) { - nvlist.nv_asize = - nvlist_size(xdr, nvlist.nv_data); - nvlist_decode_nvlist(xdr, &nvlist); + for (unsigned i = 0; i < nelem; i++) { + if (xdr->xdr_op == XDR_OP_ENCODE) + nvlist.nv_asize = + nvlist_size(&xdrmem, nvlist.nv_data); + else + nvlist.nv_asize = + nvlist_size(xdr, nvlist.nv_data); + nvlist_xdr_nvlist(xdr, &nvlist); nvl->nv_idx = nvlist.nv_idx; nvlist.nv_data = nvlist.nv_idx; } @@ -282,35 +400,86 @@ break; } + return (0); } -static void -nvlist_decode_nvlist(const xdr_t *xdr, nvlist_t *nvl) +static int +nvlist_xdr_nvlist(const xdr_t *xdr, nvlist_t *nvl) { nvp_header_t *nvph; - nvs_data_t *nvs = (nvs_data_t *)nvl->nv_data; + nvs_data_t *nvs; + unsigned encoded_size, decoded_size; + int rv; nvl->nv_idx = nvl->nv_data; - nvl->nv_idx += xdr->xdr_getint(xdr, (const uint8_t *)&nvs->nvl_version, - &nvs->nvl_version); - nvl->nv_idx += xdr->xdr_getint(xdr, (const uint8_t *)&nvs->nvl_nvflag, - &nvs->nvl_nvflag); - + nvs = (nvs_data_t *)nvl->nv_data; nvph = &nvs->nvl_pair; - nvl->nv_idx += xdr->xdr_getint(xdr, - (const uint8_t *)&nvph->encoded_size, &nvph->encoded_size); - nvl->nv_idx += xdr->xdr_getint(xdr, - (const uint8_t *)&nvph->decoded_size, &nvph->decoded_size); - while (nvph->encoded_size && nvph->decoded_size) { - nvlist_nvp_decode(xdr, nvl, nvph); + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + nvl->nv_idx += xdr->xdr_putuint(nvl->nv_idx, + nvs->nvl_version); + nvl->nv_idx += xdr->xdr_putuint(nvl->nv_idx, + nvs->nvl_nvflag); - nvph = (nvp_header_t *)(nvl->nv_idx); - nvl->nv_idx += xdr->xdr_getint(xdr, &nvph->encoded_size, + encoded_size = nvph->encoded_size; + decoded_size = nvph->decoded_size; + + nvl->nv_idx += xdr->xdr_putuint(nvl->nv_idx, + encoded_size); + nvl->nv_idx += xdr->xdr_putuint(nvl->nv_idx, + decoded_size); + break; + + case XDR_OP_DECODE: + nvl->nv_idx += xdr->xdr_getuint(nvl->nv_idx, + &nvs->nvl_version); + nvl->nv_idx += xdr->xdr_getuint(nvl->nv_idx, + &nvs->nvl_nvflag); + + nvl->nv_idx += xdr->xdr_getuint(nvl->nv_idx, &nvph->encoded_size); - nvl->nv_idx += xdr->xdr_getint(xdr, &nvph->decoded_size, + nvl->nv_idx += xdr->xdr_getuint(nvl->nv_idx, &nvph->decoded_size); + + encoded_size = nvph->encoded_size; + decoded_size = nvph->decoded_size; + break; + + default: + return (EINVAL); } + + rv = 0; + while (encoded_size && decoded_size) { + rv = nvlist_xdr_nvp(xdr, nvl); + if (rv != 0) + return (rv); + + nvph = (nvp_header_t *)(nvl->nv_idx); + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + encoded_size = nvph->encoded_size; + decoded_size = nvph->decoded_size; + + nvl->nv_idx += xdr->xdr_putuint(nvl->nv_idx, + encoded_size); + nvl->nv_idx += xdr->xdr_putuint(nvl->nv_idx, + decoded_size); + break; + + case XDR_OP_DECODE: + nvl->nv_idx += xdr->xdr_getuint(&nvph->encoded_size, + &nvph->encoded_size); + nvl->nv_idx += xdr->xdr_getuint(&nvph->decoded_size, + &nvph->decoded_size); + + encoded_size = nvph->encoded_size; + decoded_size = nvph->decoded_size; + break; + } + } + return (rv); } static int @@ -323,26 +492,53 @@ p += 2 * sizeof(unsigned); pair = p; - p += xdr->xdr_getint(xdr, p, &encoded_size); - p += xdr->xdr_getint(xdr, p, &decoded_size); + p += xdr->xdr_getuint(p, &encoded_size); + p += xdr->xdr_getuint(p, &decoded_size); while (encoded_size && decoded_size) { p = pair + encoded_size; pair = p; - p += xdr->xdr_getint(xdr, p, &encoded_size); - p += xdr->xdr_getint(xdr, p, &decoded_size); + p += xdr->xdr_getuint(p, &encoded_size); + p += xdr->xdr_getuint(p, &decoded_size); } return (p - stream); } /* + * Export nvlist to byte stream format. + */ +int +nvlist_export(nvlist_t *nvl) +{ + int rv; + xdr_t xdr = { + .xdr_op = XDR_OP_ENCODE, + .xdr_putint = _putint, + .xdr_putuint = _putuint + }; + + if (nvl->nv_header.nvh_encoding != NV_ENCODE_XDR) + return (ENOTSUP); + + nvl->nv_idx = nvl->nv_data; + rv = nvlist_xdr_nvlist(&xdr, nvl); + + return (rv); +} + +/* * Import nvlist from byte stream. * Determine the stream size and allocate private copy. * Then translate the data. */ nvlist_t * -nvlist_import(const uint8_t *stream, char encoding, char endian) +nvlist_import(const char *stream, char encoding, char endian) { nvlist_t *nvl; + xdr_t xdr = { + .xdr_op = XDR_OP_DECODE, + .xdr_getint = _getint, + .xdr_getuint = _getuint + }; if (encoding != NV_ENCODE_XDR) return (NULL); @@ -351,7 +547,11 @@ if (nvl == NULL) return (nvl); - nvl->nv_asize = nvl->nv_size = nvlist_size(&ntoh, stream); + nvl->nv_header.nvh_encoding = encoding; + nvl->nv_header.nvh_endian = endian; + nvl->nv_header.nvh_reserved1 = nvl->nv_header.nvh_reserved2 = 0; + nvl->nv_asize = nvl->nv_size = nvlist_size(&xdr, + (const uint8_t *)stream); nvl->nv_data = malloc(nvl->nv_asize); if (nvl->nv_data == NULL) { free(nvl); @@ -360,8 +560,14 @@ nvl->nv_idx = nvl->nv_data; bcopy(stream, nvl->nv_data, nvl->nv_asize); - nvlist_decode_nvlist(&ntoh, nvl); - nvl->nv_idx = nvl->nv_data; + if (nvlist_xdr_nvlist(&xdr, nvl) == 0) { + nvl->nv_idx = nvl->nv_data; + } else { + free(nvl->nv_data); + free(nvl); + nvl = NULL; + } + return (nvl); } @@ -432,7 +638,6 @@ while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { nvp_name = (nv_string_t *)((uint8_t *)nvp + sizeof(*nvp)); - nvp_data = (nv_pair_data_t *) NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + nvp_name->nv_size); @@ -443,7 +648,7 @@ *elementsp = nvp_data->nv_nelem; switch (nvp_data->nv_type) { case DATA_TYPE_UINT64: - *(uint64_t *)valuep = + *(uint64_t *)valuep = *(uint64_t *)nvp_data->nv_data; return (0); case DATA_TYPE_STRING: @@ -476,7 +681,55 @@ return (ENOENT); } -/* +int +nvlist_add_string(nvlist_t *nvl, const char *name, const char *value) +{ + nvs_data_t *nvs; + nvp_header_t head, *hp; + uint8_t *ptr; + size_t namelen, valuelen; + + nvs = (nvs_data_t *)nvl->nv_data; + if (nvs->nvl_nvflag & NV_UNIQUE_NAME) + (void) nvlist_remove(nvl, name, DATA_TYPE_STRING); + + namelen = strlen(name); + valuelen = strlen(value); + head.encoded_size = 4 + 4 + 4 + NV_ALIGN4(namelen) + 4 + 4 + + 4 + NV_ALIGN(valuelen + 1); + head.decoded_size = NV_ALIGN(4 * 4 + namelen + 1) + + NV_ALIGN(valuelen + 1); + + if (nvl->nv_asize - nvl->nv_size < head.encoded_size + 8) { + ptr = realloc(nvl->nv_data, nvl->nv_asize + head.encoded_size); + if (ptr == NULL) + return (ENOMEM); + nvl->nv_data = ptr; + nvl->nv_asize += head.encoded_size; + } + nvl->nv_idx = nvl->nv_data + nvl->nv_size - sizeof(*hp); + bzero(nvl->nv_idx, head.encoded_size + 8); + hp = (nvp_header_t *)nvl->nv_idx; + *hp = head; + nvl->nv_idx += sizeof(*hp); + *(unsigned *)nvl->nv_idx = namelen; + nvl->nv_idx += sizeof(unsigned); + strlcpy((char *)nvl->nv_idx, name, namelen + 1); + nvl->nv_idx += NV_ALIGN4(namelen); + *(unsigned *)nvl->nv_idx = DATA_TYPE_STRING; + nvl->nv_idx += sizeof(unsigned); + *(unsigned *)nvl->nv_idx = 1; + nvl->nv_idx += sizeof(unsigned); + *(unsigned *)nvl->nv_idx = valuelen; + nvl->nv_idx += sizeof(unsigned); + strlcpy((char *)nvl->nv_idx, value, valuelen + 1); + nvl->nv_idx += NV_ALIGN4(valuelen); + nvl->nv_size += head.encoded_size; + + return (0); +} + +/* * Return the next nvlist in an nvlist array. */ int @@ -535,7 +788,12 @@ nv_string_t *nvp_name; nv_pair_data_t *nvp_data; nvlist_t nvlist; - int i, j; + unsigned i, j; + xdr_t xdr = { + .xdr_op = XDR_OP_DECODE, + .xdr_getint = mem_int, + .xdr_getuint = mem_uint + }; data = (nvs_data_t *)nvl->nv_data; nvp = &data->nvl_pair; /* first pair in nvlist */ @@ -545,7 +803,7 @@ NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + nvp_name->nv_size); - for (int i = 0; i < indent; i++) + for (i = 0; i < indent; i++) printf(" "); printf("%s [%d] %.*s", typenames[nvp_data->nv_type], @@ -563,7 +821,7 @@ case DATA_TYPE_STRING: { nvp_name = (nv_string_t *)&nvp_data->nv_data[0]; printf(" = \"%.*s\"\n", nvp_name->nv_size, - nvp_name->nv_data ); + nvp_name->nv_data); break; } @@ -588,7 +846,7 @@ nvp_name->nv_data); } nvlist.nv_data = (uint8_t *)data + - nvlist_size(&native, nvlist.nv_data); + nvlist_size(&xdr, nvlist.nv_data); } break; Index: stand/libsa/zfs/zfs.c =================================================================== --- stand/libsa/zfs/zfs.c +++ stand/libsa/zfs/zfs.c @@ -483,8 +483,7 @@ } static int -vdev_write(vdev_t *vdev __unused, void *priv, off_t offset, void *buf, - size_t bytes) +vdev_write(vdev_t *vdev, off_t offset, void *buf, size_t bytes) { int fd, ret; size_t head, tail, total_size, full_sec_size; @@ -493,8 +492,8 @@ ssize_t res; char *outbuf, *bouncebuf; - fd = (uintptr_t)priv; - outbuf = (char *) buf; + fd = (uintptr_t)vdev->v_priv; + outbuf = (char *)buf; bouncebuf = NULL; ret = ioctl(fd, DIOCGSECTORSIZE, &secsz); @@ -529,14 +528,14 @@ /* Partial data for first sector */ if (head > 0) { res = read(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes)); (void) lseek(fd, -secsz, SEEK_CUR); res = write(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } @@ -552,20 +551,20 @@ if (full_sec_size > 0) { if (bytes < full_sec_size) { res = read(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } memcpy(bouncebuf, outbuf, bytes); (void) lseek(fd, -secsz, SEEK_CUR); res = write(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } } else { res = write(fd, outbuf, full_sec_size); - if (res != full_sec_size) { + if ((unsigned)res != full_sec_size) { ret = EIO; goto error; } @@ -576,14 +575,14 @@ /* Partial data write to last sector */ if (do_tail_write) { res = read(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } memcpy(bouncebuf, outbuf, secsz - tail); (void) lseek(fd, -secsz, SEEK_CUR); res = write(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } @@ -595,102 +594,6 @@ return (ret); } -static void -vdev_clear_pad2(vdev_t *vdev) -{ - vdev_t *kid; - vdev_boot_envblock_t *be; - off_t off = offsetof(vdev_label_t, vl_be); - zio_checksum_info_t *ci; - zio_cksum_t cksum; - - STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { - if (kid->v_state != VDEV_STATE_HEALTHY) - continue; - vdev_clear_pad2(kid); - } - - if (!STAILQ_EMPTY(&vdev->v_children)) - return; - - be = calloc(1, sizeof (*be)); - if (be == NULL) { - printf("failed to clear be area: out of memory\n"); - return; - } - - ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; - be->vbe_zbt.zec_magic = ZEC_MAGIC; - zio_checksum_label_verifier(&be->vbe_zbt.zec_cksum, off); - ci->ci_func[0](be, sizeof (*be), NULL, &cksum); - be->vbe_zbt.zec_cksum = cksum; - - if (vdev_write(vdev, vdev->v_read_priv, off, be, VDEV_PAD_SIZE)) { - printf("failed to clear be area of primary vdev: %d\n", - errno); - } - free(be); -} - -/* - * Read the next boot command from pad2. - * If any instance of pad2 is set to empty string, or the returned string - * values are not the same, we consider next boot not to be set. - */ -static char * -vdev_read_pad2(vdev_t *vdev) -{ - vdev_t *kid; - char *tmp, *result = NULL; - vdev_boot_envblock_t *be; - off_t off = offsetof(vdev_label_t, vl_be); - - STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { - if (kid->v_state != VDEV_STATE_HEALTHY) - continue; - tmp = vdev_read_pad2(kid); - if (tmp == NULL) - continue; - - /* The next boot is not set, we are done. */ - if (*tmp == '\0') { - free(result); - return (tmp); - } - if (result == NULL) { - result = tmp; - continue; - } - /* Are the next boot strings different? */ - if (strcmp(result, tmp) != 0) { - free(tmp); - *result = '\0'; - break; - } - free(tmp); - } - if (result != NULL) - return (result); - - be = malloc(sizeof (*be)); - if (be == NULL) - return (NULL); - - if (vdev_read(vdev, vdev->v_read_priv, off, be, sizeof (*be))) { - return (NULL); - } - - switch (be->vbe_version) { - case VB_RAW: - case VB_NVLIST: - result = strdup(be->vbe_bootenv); - default: - /* Backward compatibility with initial nextboot feaure. */ - result = strdup((char *)be); - } - return (result); -} - static int zfs_dev_init(void) { @@ -743,7 +646,7 @@ int ret; spa = NULL; - ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa); + ret = vdev_probe(vdev_read, vdev_write, (void *)(uintptr_t)fd, &spa); if (ret == 0 && pool_guid != NULL) *pool_guid = spa->spa_guid; return (ret); @@ -796,6 +699,8 @@ spa_t *spa; vdev_t *vd; char *result = NULL; + nvlist_t *benv = NULL; + int result_size, rv; if (dev->dd.d_dev->dv_type != DEVT_ZFS) return (1); @@ -807,37 +712,35 @@ if (spa == NULL) { printf("ZFS: can't find pool by guid\n"); - return (1); + return (1); } STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { - char *tmp = vdev_read_pad2(vd); + benv = vdev_read_bootenv(vd); - /* Continue on error. */ - if (tmp == NULL) - continue; - /* Nextboot is not set. */ - if (*tmp == '\0') { - free(result); - free(tmp); - return (1); - } - if (result == NULL) { - result = tmp; - continue; - } - free(tmp); + if (benv != NULL) + break; } - if (result == NULL) + spa->spa_bootenv = benv; + if (benv == NULL) return (1); + if ((rv = nvlist_find(benv, "command", DATA_TYPE_STRING, NULL, + &result, &result_size)) == 0) { + if (result_size == 0) { + /* ignore empty string */ + rv = ENOENT; + } else { + size = MIN((size_t)result_size + 1, size); + strlcpy(buf, result, size); + } + (void) nvlist_remove(benv, "command", DATA_TYPE_STRING); + } STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { - vdev_clear_pad2(vd); + vdev_write_bootenv(vd, benv); } - strlcpy(buf, result, size); - free(result); - return (0); + return (rv); } int Index: stand/libsa/zfs/zfsimpl.c =================================================================== --- stand/libsa/zfs/zfsimpl.c +++ stand/libsa/zfs/zfsimpl.c @@ -31,6 +31,7 @@ * Stand-alone ZFS file reader. */ +#include #include #include #include @@ -228,7 +229,7 @@ psize = size; } - rc = vdev->v_phys_read(vdev, vdev->v_read_priv, offset, buf, psize); + rc = vdev->v_phys_read(vdev, vdev->v_priv, offset, buf, psize); if (rc == 0) { if (bp != NULL) rc = zio_checksum_verify(vdev->v_spa, bp, buf); @@ -237,6 +238,15 @@ return (rc); } +static int +vdev_write_phys(vdev_t *vdev, void *buf, off_t offset, size_t size) +{ + if (vdev->v_phys_write == NULL) + return (ENXIO); + + return (vdev->v_phys_write(vdev, offset, buf, size)); +} + typedef struct remap_segment { vdev_t *rs_vd; uint64_t rs_offset; @@ -1586,6 +1596,135 @@ return (vdev_read_phys(vd, &bp, buf, off, size)); } +static int +vdev_label_write(vdev_t *vd, int l, vdev_boot_envblock_t *be, uint64_t offset) +{ + zio_checksum_info_t *ci; + zio_cksum_t cksum; + off_t off; + + off = vdev_label_offset(vd->v_psize, l, offset); + + ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; + be->vbe_zbt.zec_magic = ZEC_MAGIC; + zio_checksum_label_verifier(&be->vbe_zbt.zec_cksum, off); + ci->ci_func[0](be, sizeof (*be), NULL, &cksum); + be->vbe_zbt.zec_cksum = cksum; + + return (vdev_write_phys(vd, be, off, VDEV_PAD_SIZE)); +} + +static int +vdev_write_bootenv_impl(vdev_t *vdev, vdev_boot_envblock_t *be) +{ + vdev_t *kid; + int rv; + + STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { + if (kid->v_state != VDEV_STATE_HEALTHY) + continue; + rv = vdev_write_bootenv_impl(kid, be); + } + + for (int l = 0; l < VDEV_LABELS; l++) { + rv = vdev_label_write(vdev, l, be, + offsetof(vdev_label_t, vl_be)); + if (rv != 0) { + printf("failed to write bootenv to %s label %d\n", + vdev->v_name, l); + } + } + return (0); +} + +int +vdev_write_bootenv(vdev_t *vdev, nvlist_t *nvl) +{ + vdev_boot_envblock_t *be; + nvlist_t nv; + int rv; + + if (nvl->nv_size > sizeof(be->vbe_nvlist)) + return (E2BIG); + + be = malloc(sizeof(*be)); + if (be == NULL) + return (ENOMEM); + + nv.nv_header = nvl->nv_header; + nv.nv_asize = nvl->nv_asize; + nv.nv_size = nvl->nv_size; + + *(nvs_header_t *)be->vbe_nvlist = nv.nv_header; + nv.nv_data = be->vbe_nvlist + sizeof(nvs_header_t); + bcopy(nvl->nv_data, nv.nv_data, nv.nv_size); + rv = nvlist_export(&nv); + if (rv == 0) { + rv = vdev_write_bootenv_impl(vdev, be); + } + free(be); + return (rv); +} + +/* + * Read the bootenv area from pool label, return the nvlist from it. + * We return from first successful read. + */ +nvlist_t * +vdev_read_bootenv(vdev_t *vdev) +{ + vdev_t *kid; + nvlist_t *benv; + vdev_boot_envblock_t *be; + int rv; + + STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { + if (kid->v_state != VDEV_STATE_HEALTHY) + continue; + + benv = vdev_read_bootenv(kid); + if (benv != NULL) + return (benv); + } + + be = malloc(sizeof (*be)); + if (be == NULL) + return (NULL); + + rv = 0; + for (int l = 0; l < VDEV_LABELS; l++) { + rv = vdev_label_read(vdev, l, be, + offsetof(vdev_label_t, vl_be), + sizeof (*be)); + if (rv == 0) + break; + } + if (rv != 0) { + free(be); + return (NULL); + } + benv = nvlist_import(be->vbe_nvlist + 4, be->vbe_nvlist[0], + be->vbe_nvlist[1]); + if (benv == NULL) { + char *command = (char *)be; + bool ok = false; + + /* Check for legacy zfsbootcfg command string */ + for (int i = 0; command[i] != '\0'; i++) { + if (iscntrl(command[i])) { + ok = false; + break; + } else { + ok = true; + } + } + benv = nvlist_create(NV_UNIQUE_NAME); + if (ok) + nvlist_add_string(benv, "command", command); + } + return (benv); +} + static nvlist_t * vdev_label_read_config(vdev_t *vd, uint64_t txg) { @@ -1601,15 +1740,13 @@ return (NULL); for (int l = 0; l < VDEV_LABELS; l++) { - const unsigned char *nvlist; - if (vdev_label_read(vd, l, label, offsetof(vdev_label_t, vl_vdev_phys), sizeof (vdev_phys_t))) continue; - nvlist = (const unsigned char *) label->vp_nvlist; - tmp = nvlist_import(nvlist + 4, nvlist[0], nvlist[1]); + tmp = nvlist_import(label->vp_nvlist + 4, + label->vp_nvlist[0], label->vp_nvlist[1]); if (tmp == NULL) continue; @@ -1675,7 +1812,8 @@ } static int -vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap) +vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv, + spa_t **spap) { vdev_t vtmp; spa_t *spa; @@ -1693,8 +1831,9 @@ */ memset(&vtmp, 0, sizeof(vtmp)); vtmp.v_phys_read = _read; - vtmp.v_read_priv = read_priv; - vtmp.v_psize = P2ALIGN(ldi_get_size(read_priv), + vtmp.v_phys_write = _write; + vtmp.v_priv = priv; + vtmp.v_psize = P2ALIGN(ldi_get_size(priv), (uint64_t)sizeof (vdev_label_t)); /* Test for minimum device size. */ @@ -1808,7 +1947,8 @@ vdev = vdev_find(guid); if (vdev != NULL) { vdev->v_phys_read = _read; - vdev->v_read_priv = read_priv; + vdev->v_phys_write = _write; + vdev->v_priv = priv; vdev->v_psize = vtmp.v_psize; /* * If no other state is set, mark vdev healthy. @@ -3078,7 +3218,7 @@ dnode_phys_t dir; size_t size; int rc; - unsigned char *nv; + char *nv; *value = NULL; if ((rc = objset_get_dnode(spa, &spa->spa_mos, obj, &dir)) != 0) Index: stand/userboot/test/test.c =================================================================== --- stand/userboot/test/test.c +++ stand/userboot/test/test.c @@ -261,6 +261,21 @@ } int +test_diskwrite(void *arg, int unit, uint64_t offset, void *src, size_t size, + size_t *resid_return) +{ + ssize_t n; + + if (unit > disk_index || disk_fd[unit] == -1) + return (EIO); + n = pwrite(disk_fd[unit], src, size, offset); + if (n < 0) + return (errno); + *resid_return = size - n; + return (0); +} + +int test_diskioctl(void *arg, int unit, u_long cmd, void *data) { struct stat sb; @@ -399,6 +414,7 @@ .stat = test_stat, .diskread = test_diskread, + .diskwrite = test_diskwrite, .diskioctl = test_diskioctl, .copyin = test_copyin, @@ -431,8 +447,9 @@ void (*func)(struct loader_callbacks *, void *, int, int) __dead2; int opt; const char *userboot_obj = "/boot/userboot.so"; + int oflag = O_RDONLY; - while ((opt = getopt(argc, argv, "b:d:h:")) != -1) { + while ((opt = getopt(argc, argv, "wb:d:h:")) != -1) { switch (opt) { case 'b': userboot_obj = optarg; @@ -442,13 +459,17 @@ disk_index++; disk_fd = reallocarray(disk_fd, disk_index + 1, sizeof (int)); - disk_fd[disk_index] = open(optarg, O_RDONLY); + disk_fd[disk_index] = open(optarg, oflag); if (disk_fd[disk_index] < 0) err(1, "Can't open disk image '%s'", optarg); break; case 'h': host_base = optarg; + break; + + case 'w': + oflag = O_RDWR; break; case '?': Index: stand/userboot/userboot.h =================================================================== --- stand/userboot/userboot.h +++ stand/userboot/userboot.h @@ -131,6 +131,12 @@ int (*diskread)(void *arg, int unit, uint64_t offset, void *dst, size_t size, size_t *resid_return); + /* + * Write to a disk image at the given offset + */ + int (*diskwrite)(void *arg, int unit, uint64_t offset, + void *src, size_t size, size_t *resid_return); + /* * Guest virtual machine i/o */ Index: stand/userboot/userboot/main.c =================================================================== --- stand/userboot/userboot/main.c +++ stand/userboot/userboot/main.c @@ -214,6 +214,16 @@ exit(0); } +static void +set_currdev(const char *devname) +{ + + env_setenv("currdev", EV_VOLATILE, devname, + userboot_setcurrdev, env_nounset); + env_setenv("loaddev", EV_VOLATILE, devname, + env_noset, env_nounset); +} + /* * Set the 'current device' by (if possible) recovering the boot device as * supplied by the initial bootstrap. @@ -225,6 +235,7 @@ struct devdesc *dd; #if defined(USERBOOT_ZFS_SUPPORT) struct zfs_devdesc zdev; + char *buf = NULL; if (userboot_zfs_found) { @@ -257,10 +268,18 @@ dd = &dev.dd; } - env_setenv("currdev", EV_VOLATILE, userboot_fmtdev(dd), - userboot_setcurrdev, env_nounset); - env_setenv("loaddev", EV_VOLATILE, userboot_fmtdev(dd), - env_noset, env_nounset); + set_currdev(userboot_fmtdev(dd)); + +#if defined(USERBOOT_ZFS_SUPPORT) + buf = malloc(VDEV_PAD_SIZE); + if (buf != NULL) { + if (zfs_nextboot(&zdev, buf, VDEV_PAD_SIZE) == 0) { + printf("zfs nextboot: %s\n", buf); + set_currdev(buf); + } + free(buf); + } +#endif } #if defined(USERBOOT_ZFS_SUPPORT) Index: stand/userboot/userboot/userboot_disk.c =================================================================== --- stand/userboot/userboot/userboot_disk.c +++ stand/userboot/userboot/userboot_disk.c @@ -211,15 +211,21 @@ size_t resid; int rc; - rw &= F_MASK; - if (rw == F_WRITE) - return (EROFS); - if (rw != F_READ) - return (EINVAL); if (rsize) *rsize = 0; off = dblk * ud_info[dev->dd.d_unit].sectorsize; - rc = CALLBACK(diskread, dev->dd.d_unit, off, buf, size, &resid); + switch (rw & F_MASK) { + case F_READ: + rc = CALLBACK(diskread, dev->dd.d_unit, off, buf, size, &resid); + break; + case F_WRITE: + rc = CALLBACK(diskwrite, dev->dd.d_unit, off, buf, size, + &resid); + break; + default: + rc = EINVAL; + break; + } if (rc) return (rc); if (rsize) Index: sys/cddl/boot/zfs/zfsimpl.h =================================================================== --- sys/cddl/boot/zfs/zfsimpl.h +++ sys/cddl/boot/zfs/zfsimpl.h @@ -526,21 +526,8 @@ zio_eck_t vp_zbt; } vdev_phys_t; -typedef enum vbe_vers { - /* The bootenv file is stored as ascii text in the envblock */ - VB_RAW = 0, - - /* - * The bootenv file is converted to an nvlist and then packed into the - * envblock. - */ - VB_NVLIST = 1 -} vbe_vers_t; - typedef struct vdev_boot_envblock { - uint64_t vbe_version; - char vbe_bootenv[VDEV_PAD_SIZE - sizeof (uint64_t) - - sizeof (zio_eck_t)]; + char vbe_nvlist[VDEV_PAD_SIZE - sizeof (zio_eck_t)]; zio_eck_t vbe_zbt; } vdev_boot_envblock_t; @@ -1662,10 +1649,9 @@ */ struct vdev; struct spa; -typedef int vdev_phys_read_t(struct vdev *vdev, void *priv, - off_t offset, void *buf, size_t bytes); -typedef int vdev_read_t(struct vdev *vdev, const blkptr_t *bp, - void *buf, off_t offset, size_t bytes); +typedef int vdev_phys_read_t(struct vdev *, void *, off_t, void *, size_t); +typedef int vdev_phys_write_t(struct vdev *, off_t, void *, size_t); +typedef int vdev_read_t(struct vdev *, const blkptr_t *, void *, off_t, size_t); typedef STAILQ_HEAD(vdev_list, vdev) vdev_list_t; @@ -1793,8 +1779,9 @@ size_t v_nchildren; /* # children */ vdev_state_t v_state; /* current state */ vdev_phys_read_t *v_phys_read; /* read from raw leaf vdev */ + vdev_phys_write_t *v_phys_write; /* write to raw leaf vdev */ vdev_read_t *v_read; /* read from vdev */ - void *v_read_priv; /* private data for read function */ + void *v_priv; /* data for read/write function */ boolean_t v_islog; struct spa *v_spa; /* link to spa */ /* @@ -1820,6 +1807,7 @@ zio_cksum_salt_t spa_cksum_salt; /* secret salt for cksum */ void *spa_cksum_tmpls[ZIO_CHECKSUM_FUNCTIONS]; boolean_t spa_with_log; /* this pool has log */ + void *spa_bootenv; /* bootenv from pool label */ } spa_t; /* IO related arguments. */ Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2017 by Delphix. All rights reserved. + * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -173,6 +173,8 @@ extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **); extern void vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset, uint64_t size, zio_done_func_t *done, void *priv, int flags); +extern int vdev_label_read_bootenv(vdev_t *, nvlist_t *); +extern int vdev_label_write_bootenv(vdev_t *, nvlist_t *); typedef enum { VDEV_LABEL_CREATE, /* create/add a new device */ @@ -184,8 +186,6 @@ } vdev_labeltype_t; extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason); - -extern int vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size); #ifdef __cplusplus } Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -392,7 +392,7 @@ #define VDEV_RAIDZ_MAXPARITY 3 #define VDEV_PAD_SIZE (8 << 10) -/* 2 padding areas (vl_pad1 and vl_pad2) to skip */ +/* 2 padding areas (vl_pad1 and vl_be) to skip */ #define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2 #define VDEV_PHYS_SIZE (112 << 10) #define VDEV_UBERBLOCK_RING (128 << 10) @@ -419,9 +419,16 @@ zio_eck_t vp_zbt; } vdev_phys_t; +typedef struct vdev_boot_envblock { + char vbe_nvlist[VDEV_PAD_SIZE - sizeof (zio_eck_t)]; + zio_eck_t vbe_zbt; +} vdev_boot_envblock_t; + +CTASSERT(sizeof (vdev_boot_envblock_t) == VDEV_PAD_SIZE); + typedef struct vdev_label { char vl_pad1[VDEV_PAD_SIZE]; /* 8K */ - char vl_pad2[VDEV_PAD_SIZE]; /* 8K */ + vdev_boot_envblock_t vl_be; /* 8K */ vdev_phys_t vl_vdev_phys; /* 112K */ char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */ } vdev_label_t; /* 256K total */ Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c @@ -1566,7 +1566,7 @@ for (int l = 1; l < VDEV_LABELS; l++) { zio_nowait(zio_read_phys(pio, vd, vdev_label_offset(vd->vdev_psize, l, - offsetof(vdev_label_t, vl_pad2)), VDEV_PAD_SIZE, + offsetof(vdev_label_t, vl_be)), VDEV_PAD_SIZE, abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE), ZIO_CHECKSUM_OFF, vdev_probe_done, vps, ZIO_PRIORITY_SYNC_READ, vps->vps_flags, B_TRUE)); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2012, 2020 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. * Copyright 2019 Joyent, Inc. */ @@ -781,7 +781,7 @@ nvlist_t *label; vdev_phys_t *vp; abd_t *vp_abd; - abd_t *pad2; + abd_t *bootenv; uberblock_t *ub; abd_t *ub_abd; zio_t *zio; @@ -956,8 +956,8 @@ ub->ub_txg = 0; /* Initialize the 2nd padding area. */ - pad2 = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); - abd_zero(pad2, VDEV_PAD_SIZE); + bootenv = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); + abd_zero(bootenv, VDEV_PAD_SIZE); /* * Write everything in parallel. @@ -976,8 +976,8 @@ * Zero out the 2nd padding area where it might have * left over data from previous filesystem format. */ - vdev_label_write(zio, vd, l, pad2, - offsetof(vdev_label_t, vl_pad2), + vdev_label_write(zio, vd, l, bootenv, + offsetof(vdev_label_t, vl_be), VDEV_PAD_SIZE, NULL, NULL, flags); vdev_label_write(zio, vd, l, ub_abd, @@ -993,7 +993,7 @@ } nvlist_free(label); - abd_free(pad2); + abd_free(bootenv); abd_free(ub_abd); abd_free(vp_abd); @@ -1016,41 +1016,181 @@ return (error); } +/* + * Done callback for vdev_label_read_bootenv_impl. If this is the first + * callback to finish, store our abd in the callback pointer. Otherwise, we + * just free our abd and return. + */ +static void +vdev_label_read_bootenv_done(zio_t *zio) +{ + zio_t *rio = zio->io_private; + abd_t **cbp = rio->io_private; + + ASSERT3U(zio->io_size, ==, VDEV_PAD_SIZE); + + if (zio->io_error == 0) { + mutex_enter(&rio->io_lock); + if (*cbp == NULL) { + /* Will free this buffer in vdev_label_read_bootenv. */ + *cbp = zio->io_abd; + } else { + abd_free(zio->io_abd); + } + mutex_exit(&rio->io_lock); + } else { + abd_free(zio->io_abd); + } +} + +static void +vdev_label_read_bootenv_impl(zio_t *zio, vdev_t *vd, int flags) +{ + for (int c = 0; c < vd->vdev_children; c++) + vdev_label_read_bootenv_impl(zio, vd->vdev_child[c], flags); + + /* + * We just use the first label that has a correct checksum; the + * bootloader should have rewritten them all to be the same on boot, + * and any changes we made since boot have been the same across all + * labels. + */ + if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) { + for (int l = 0; l < VDEV_LABELS; l++) { + vdev_label_read(zio, vd, l, + abd_alloc_linear(VDEV_PAD_SIZE, B_FALSE), + offsetof(vdev_label_t, vl_be), VDEV_PAD_SIZE, + vdev_label_read_bootenv_done, zio, flags); + } + } +} + int -vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size) +vdev_label_read_bootenv(vdev_t *rvd, nvlist_t *bootenv) { - spa_t *spa = vd->vdev_spa; + nvlist_t *config; + spa_t *spa = rvd->vdev_spa; + abd_t *abd = NULL; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD; + + ASSERT(bootenv); + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + zio_t *zio = zio_root(spa, NULL, &abd, flags); + vdev_label_read_bootenv_impl(zio, rvd, flags); + int err = zio_wait(zio); + + if (abd != NULL) { + vdev_boot_envblock_t *vbe = abd_to_buf(abd); + + err = nvlist_unpack(vbe->vbe_nvlist, + sizeof (vbe->vbe_nvlist), &config, 0); + + if (err != 0) { + char *buf; + + vbe->vbe_nvlist[sizeof (vbe->vbe_nvlist) - 1] = '\0'; + /* We have unstructured data, treat it as string. */ + buf = abd_to_buf(abd); + + /* + * We can have zeroed block (no data); + * or FreeBSD zfs bootnext command string; + * or uint64_t 0 and envmap string. + */ + if (*(uint64_t *)buf == 0) { + fnvlist_add_string(bootenv, "envmap", buf + 8); + } else { + fnvlist_add_string(bootenv, "command", buf); + } + } else { + fnvlist_merge(bootenv, config); + nvlist_free(config); + } + + /* + * abd was allocated in vdev_label_read_bootenv_impl() + */ + abd_free(abd); + /* + * If we managed to read any successfully, + * return success. + */ + return (0); + } + return (err); +} + +int +vdev_label_write_bootenv(vdev_t *vd, nvlist_t *env) +{ zio_t *zio; - abd_t *pad2; + spa_t *spa = vd->vdev_spa; + vdev_boot_envblock_t *bootenv; int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; int error; + size_t nvsize; + char *nvbuf; - if (size > VDEV_PAD_SIZE) - return (EINVAL); + error = nvlist_size(env, &nvsize, NV_ENCODE_XDR); + if (error != 0) + return (SET_ERROR(error)); - if (!vd->vdev_ops->vdev_op_leaf) - return (ENODEV); - if (vdev_is_dead(vd)) - return (ENXIO); + if (nvsize >= sizeof (bootenv->vbe_nvlist)) { + return (SET_ERROR(E2BIG)); + } ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - pad2 = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); - abd_zero(pad2, VDEV_PAD_SIZE); - abd_copy_from_buf(pad2, buf, size); + error = ENXIO; + for (int c = 0; c < vd->vdev_children; c++) { + int child_err; + child_err = vdev_label_write_bootenv(vd->vdev_child[c], env); + /* + * As long as any of the disks managed to write all of their + * labels successfully, return success. + */ + if (child_err == 0) + error = child_err; + } + + if (!vd->vdev_ops->vdev_op_leaf || vdev_is_dead(vd) || + !vdev_writeable(vd)) { + return (error); + } + ASSERT3U(sizeof (*bootenv), ==, VDEV_PAD_SIZE); + abd_t *abd = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); + abd_zero(abd, VDEV_PAD_SIZE); + + bootenv = abd_borrow_buf_copy(abd, VDEV_PAD_SIZE); + nvbuf = bootenv->vbe_nvlist; + nvsize = sizeof (bootenv->vbe_nvlist); + + error = nvlist_pack(env, &nvbuf, &nvsize, NV_ENCODE_XDR, KM_SLEEP); + if (error == 0) { + abd_return_buf_copy(abd, bootenv, VDEV_PAD_SIZE); + } else { + abd_free(abd); + return (SET_ERROR(error)); + } + retry: zio = zio_root(spa, NULL, NULL, flags); - vdev_label_write(zio, vd, 0, pad2, - offsetof(vdev_label_t, vl_pad2), - VDEV_PAD_SIZE, NULL, NULL, flags); + for (int l = 0; l < VDEV_LABELS; l++) { + vdev_label_write(zio, vd, l, abd, + offsetof(vdev_label_t, vl_be), + VDEV_PAD_SIZE, NULL, NULL, flags); + } + error = zio_wait(zio); if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) { flags |= ZIO_FLAG_TRYHARD; goto retry; } - abd_free(pad2); + abd_free(abd); return (error); } Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -3654,50 +3654,73 @@ return (error); } +/* + * This ioctl is used to set the bootenv configuration on the current + * pool. This configuration is stored in the second padding area of the label, + * and it is used by the GRUB bootloader used on Linux to store the contents + * of the grubenv file. The file is stored as raw ASCII, and is protected by + * an embedded checksum. By default, GRUB will check if the boot filesystem + * supports storing the environment data in a special location, and if so, + * will invoke filesystem specific logic to retrieve it. This can be overriden + * by a variable, should the user so desire. + */ +/* ARGSUSED */ +static const zfs_ioc_key_t zfs_keys_set_bootenv[] = { + {"", DATA_TYPE_ANY, ZK_OPTIONAL | ZK_WILDCARDLIST}, +}; + +static int +zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl) +{ + int error; + spa_t *spa; + + if ((error = spa_open(name, &spa, FTAG)) != 0) + return (error); + spa_vdev_state_enter(spa, SCL_ALL); + error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl); + (void) spa_vdev_state_exit(spa, NULL, 0); + spa_close(spa, FTAG); + return (error); +} + +static const zfs_ioc_key_t zfs_keys_get_bootenv[] = { + /* no nvl keys */ +}; + + /* ARGSUSED */ +static int +zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl) +{ + spa_t *spa; + int error; + + if ((error = spa_open(name, &spa, FTAG)) != 0) + return (error); + spa_vdev_state_enter(spa, SCL_ALL); + error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl); + (void) spa_vdev_state_exit(spa, NULL, 0); + spa_close(spa, FTAG); + return (error); +} + #ifdef __FreeBSD__ static const zfs_ioc_key_t zfs_keys_nextboot[] = { {"command", DATA_TYPE_STRING, 0}, }; static int -zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl) +zfs_ioc_nextboot(const char *name, nvlist_t *innvl, nvlist_t *outnvl) { - char name[MAXNAMELEN]; spa_t *spa; - vdev_t *vd; - char *command; - uint64_t pool_guid; - uint64_t vdev_guid; int error; - if (nvlist_lookup_uint64(innvl, - ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0) - return (EINVAL); - if (nvlist_lookup_uint64(innvl, - ZPOOL_CONFIG_GUID, &vdev_guid) != 0) - return (EINVAL); - command = fnvlist_lookup_string(innvl, "command"); - - mutex_enter(&spa_namespace_lock); - spa = spa_by_guid(pool_guid, vdev_guid); - if (spa != NULL) - strcpy(name, spa_name(spa)); - mutex_exit(&spa_namespace_lock); - if (spa == NULL) - return (ENOENT); - if ((error = spa_open(name, &spa, FTAG)) != 0) return (error); + spa_vdev_state_enter(spa, SCL_ALL); - vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE); - if (vd == NULL) { - (void) spa_vdev_state_exit(spa, NULL, ENXIO); - spa_close(spa, FTAG); - return (ENODEV); - } - error = vdev_label_write_pad2(vd, command, strlen(command)); + error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl); (void) spa_vdev_state_exit(spa, NULL, 0); - txg_wait_synced(spa->spa_dsl_pool, 0); spa_close(spa, FTAG); return (error); } @@ -6564,6 +6587,16 @@ zfs_ioctl_register("reopen", ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE, zfs_keys_pool_reopen, ARRAY_SIZE(zfs_keys_pool_reopen)); + + zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV, + zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE, + zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv)); + + zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV, + zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE, + zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv)); /* IOCTLS that use the legacy function signature */ Index: sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h +++ sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, Martin Matuska . All rights reserved. * Copyright (c) 2014 Integros [integros.com] @@ -1057,6 +1057,8 @@ ZFS_IOC_POOL_DISCARD_CHECKPOINT, ZFS_IOC_POOL_INITIALIZE, ZFS_IOC_POOL_SYNC, + ZFS_IOC_SET_BOOTENV, + ZFS_IOC_GET_BOOTENV, ZFS_IOC_LAST } zfs_ioc_t; Index: tools/tools/zfsboottest/zfsboottest.c =================================================================== --- tools/tools/zfsboottest/zfsboottest.c +++ tools/tools/zfsboottest/zfsboottest.c @@ -147,7 +147,7 @@ warn("open(%s) failed", argv[i]); continue; } - if (vdev_probe(vdev_read, &fd[i - 1], NULL) != 0) { + if (vdev_probe(vdev_read, NULL, &fd[i - 1], NULL) != 0) { warnx("vdev_probe(%s) failed", argv[i]); close(fd[i - 1]); }