diff --git a/stand/libsa/zfs/libzfs.h b/stand/libsa/zfs/libzfs.h index 27fde493670c..e8676c0d53b8 100644 --- a/stand/libsa/zfs/libzfs.h +++ b/stand/libsa/zfs/libzfs.h @@ -1,172 +1,73 @@ /*- * Copyright (c) 2012 Andriy Gapon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _BOOT_LIBZFS_H_ #define _BOOT_LIBZFS_H_ #include #ifdef LOADER_GELI_SUPPORT #include #endif +#include "nvlist.h" + #define ZFS_MAXNAMELEN 256 /* * ZFS fully-qualified device descriptor. */ struct zfs_devdesc { struct devdesc dd; /* Must be first. */ uint64_t pool_guid; uint64_t root_guid; }; -/* nvp implementation version */ -#define NV_VERSION 0 - -/* nvlist persistent unique name flags, stored in nvl_nvflags */ -#define NV_UNIQUE_NAME 0x1 -#define NV_UNIQUE_NAME_TYPE 0x2 - -#define NV_ALIGN4(x) (((x) + 3) & ~3) -#define NV_ALIGN(x) (((x) + 7) & ~7) - -/* - * nvlist header. - * nvlist has 4 bytes header followed by version and flags, then nvpairs - * and the list is terminated by double zero. - */ -typedef struct { - char nvh_encoding; - char nvh_endian; - char nvh_reserved1; - char nvh_reserved2; -} nvs_header_t; - -typedef struct { - nvs_header_t nv_header; - size_t nv_asize; - size_t nv_size; - uint8_t *nv_data; - uint8_t *nv_idx; -} nvlist_t; - -/* - * nvpair header. - * nvpair has encoded and decoded size - * name string (size and data) - * data type and number of elements - * data - */ -typedef struct { - unsigned encoded_size; - unsigned decoded_size; -} nvp_header_t; - -/* - * nvlist stream head. - */ -typedef struct { - unsigned nvl_version; - unsigned nvl_nvflag; - nvp_header_t nvl_pair; -} nvs_data_t; - -typedef struct { - unsigned nv_size; - uint8_t nv_data[]; /* NV_ALIGN4(string) */ -} nv_string_t; - -typedef struct { - unsigned nv_type; /* data_type_t */ - unsigned nv_nelem; /* number of elements */ - uint8_t nv_data[]; /* data stream */ -} nv_pair_data_t; - -nvlist_t *nvlist_create(int); -void nvlist_destroy(nvlist_t *); -nvlist_t *nvlist_import(const char *, size_t); -int nvlist_export(nvlist_t *); -int nvlist_remove(nvlist_t *, const char *, data_type_t); -int nvpair_type_from_name(const char *); -nvp_header_t *nvpair_find(nvlist_t *, const char *); -void nvpair_print(nvp_header_t *, unsigned int); -void nvlist_print(const nvlist_t *, unsigned int); -char *nvstring_get(nv_string_t *); -int nvlist_find(const nvlist_t *, const char *, data_type_t, - int *, void *, int *); -nvp_header_t *nvlist_next_nvpair(nvlist_t *, nvp_header_t *); - -int nvlist_add_boolean_value(nvlist_t *, const char *, boolean_t); -int nvlist_add_byte(nvlist_t *, const char *, uint8_t); -int nvlist_add_int8(nvlist_t *, const char *, int8_t); -int nvlist_add_uint8(nvlist_t *, const char *, uint8_t); -int nvlist_add_int16(nvlist_t *, const char *, int16_t); -int nvlist_add_uint16(nvlist_t *, const char *, uint16_t); -int nvlist_add_int32(nvlist_t *, const char *, int32_t); -int nvlist_add_uint32(nvlist_t *, const char *, uint32_t); -int nvlist_add_int64(nvlist_t *, const char *, int64_t); -int nvlist_add_uint64(nvlist_t *, const char *, uint64_t); -int nvlist_add_string(nvlist_t *, const char *, const char *); -int nvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint32_t); -int nvlist_add_byte_array(nvlist_t *, const char *, uint8_t *, uint32_t); -int nvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint32_t); -int nvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint32_t); -int nvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint32_t); -int nvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint32_t); -int nvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint32_t); -int nvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint32_t); -int nvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint32_t); -int nvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint32_t); -int nvlist_add_string_array(nvlist_t *, const char *, char * const *, uint32_t); -int nvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *); -int nvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint32_t); - int zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path); char *zfs_fmtdev(void *vdev); int zfs_probe_dev(const char *devname, uint64_t *pool_guid); int zfs_list(const char *name); int zfs_get_bootonce(void *, const char *, char *, size_t); int zfs_get_bootenv(void *, nvlist_t **); int zfs_set_bootenv(void *, nvlist_t *); int zfs_attach_nvstore(void *); uint64_t ldi_get_size(void *); void init_zfs_boot_options(const char *currdev); int zfs_bootenv(const char *name); int zfs_attach_nvstore(void *); int zfs_belist_add(const char *name, uint64_t __unused); int zfs_set_env(void); nvlist_t *vdev_read_bootenv(vdev_t *); extern struct devsw zfs_dev; extern struct fs_ops zfs_fsops; #endif /*_BOOT_LIBZFS_H_*/ diff --git a/stand/libsa/zfs/nvlist.c b/stand/libsa/zfs/nvlist.c index 84a0edafe182..e5e0abecb274 100644 --- a/stand/libsa/zfs/nvlist.c +++ b/stand/libsa/zfs/nvlist.c @@ -1,1695 +1,1701 @@ /*- * Copyright 2020 Toomas Soome * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); -#include -#include +#include #include #include -#include -#include -#include "libzfs.h" +#ifdef _STANDALONE +#include +#else +#include +#include +#include +#include +#include +#endif + +#include "nvlist.h" enum xdr_op { XDR_OP_ENCODE = 1, XDR_OP_DECODE = 2 }; typedef struct xdr { enum xdr_op xdr_op; int (*xdr_getint)(struct xdr *, int *); int (*xdr_putint)(struct xdr *, int); int (*xdr_getuint)(struct xdr *, unsigned *); int (*xdr_putuint)(struct xdr *, unsigned); const uint8_t *xdr_buf; uint8_t *xdr_idx; size_t xdr_buf_size; } xdr_t; static int nvlist_xdr_nvlist(xdr_t *, nvlist_t *); static bool nvlist_size_xdr(xdr_t *, size_t *); static bool nvlist_size_native(xdr_t *, size_t *); static bool xdr_int(xdr_t *, int *); static bool xdr_u_int(xdr_t *, unsigned *); typedef bool (*xdrproc_t)(xdr_t *, void *); /* Basic primitives for XDR translation operations, getint and putint. */ static int _getint(struct xdr *xdr, int *ip) { *ip = be32dec(xdr->xdr_idx); return (sizeof(int)); } static int _putint(struct xdr *xdr, int i) { int *ip = (int *)xdr->xdr_idx; *ip = htobe32(i); return (sizeof(int)); } static int _getuint(struct xdr *xdr, unsigned *ip) { *ip = be32dec(xdr->xdr_idx); return (sizeof(unsigned)); } static int _putuint(struct xdr *xdr, unsigned i) { unsigned *up = (unsigned *)xdr->xdr_idx; *up = htobe32(i); return (sizeof(int)); } static int _getint_mem(struct xdr *xdr, int *ip) { *ip = *(int *)xdr->xdr_idx; return (sizeof(int)); } static int _putint_mem(struct xdr *xdr, int i) { int *ip = (int *)xdr->xdr_idx; *ip = i; return (sizeof(int)); } static int _getuint_mem(struct xdr *xdr, unsigned *ip) { *ip = *(unsigned *)xdr->xdr_idx; return (sizeof(unsigned)); } static int _putuint_mem(struct xdr *xdr, unsigned i) { unsigned *up = (unsigned *)xdr->xdr_idx; *up = i; return (sizeof(int)); } /* * XDR data translations. */ static bool xdr_short(xdr_t *xdr, short *ip) { int i; bool rv; i = *ip; if ((rv = xdr_int(xdr, &i))) { if (xdr->xdr_op == XDR_OP_DECODE) *ip = i; } return (rv); } static bool xdr_u_short(xdr_t *xdr, unsigned short *ip) { unsigned u; bool rv; u = *ip; if ((rv = xdr_u_int(xdr, &u))) { if (xdr->xdr_op == XDR_OP_DECODE) *ip = u; } return (rv); } /* * translate xdr->xdr_idx, increment it by size of int. */ static bool xdr_int(xdr_t *xdr, int *ip) { bool rv = false; int *i = (int *)xdr->xdr_idx; if (xdr->xdr_idx + sizeof(int) > xdr->xdr_buf + xdr->xdr_buf_size) return (rv); switch (xdr->xdr_op) { case XDR_OP_ENCODE: /* Encode value *ip, store to buf */ xdr->xdr_idx += xdr->xdr_putint(xdr, *ip); rv = true; break; case XDR_OP_DECODE: /* Decode buf, return value to *ip */ xdr->xdr_idx += xdr->xdr_getint(xdr, i); *ip = *i; rv = true; break; } return (rv); } /* * translate xdr->xdr_idx, increment it by size of unsigned int. */ static bool xdr_u_int(xdr_t *xdr, unsigned *ip) { bool rv = false; unsigned *u = (unsigned *)xdr->xdr_idx; if (xdr->xdr_idx + sizeof(unsigned) > xdr->xdr_buf + xdr->xdr_buf_size) return (rv); switch (xdr->xdr_op) { case XDR_OP_ENCODE: /* Encode value *ip, store to buf */ xdr->xdr_idx += xdr->xdr_putuint(xdr, *ip); rv = true; break; case XDR_OP_DECODE: /* Decode buf, return value to *ip */ xdr->xdr_idx += xdr->xdr_getuint(xdr, u); *ip = *u; rv = true; break; } return (rv); } static bool xdr_int64(xdr_t *xdr, int64_t *lp) { bool rv = false; if (xdr->xdr_idx + sizeof(int64_t) > xdr->xdr_buf + xdr->xdr_buf_size) return (rv); switch (xdr->xdr_op) { case XDR_OP_ENCODE: /* Encode value *lp, store to buf */ if (xdr->xdr_putint == _putint) *(int64_t *)xdr->xdr_idx = htobe64(*lp); else *(int64_t *)xdr->xdr_idx = *lp; xdr->xdr_idx += sizeof(int64_t); rv = true; break; case XDR_OP_DECODE: /* Decode buf, return value to *ip */ if (xdr->xdr_getint == _getint) *lp = be64toh(*(int64_t *)xdr->xdr_idx); else *lp = *(int64_t *)xdr->xdr_idx; xdr->xdr_idx += sizeof(int64_t); rv = true; } return (rv); } static bool xdr_uint64(xdr_t *xdr, uint64_t *lp) { bool rv = false; if (xdr->xdr_idx + sizeof(uint64_t) > xdr->xdr_buf + xdr->xdr_buf_size) return (rv); switch (xdr->xdr_op) { case XDR_OP_ENCODE: /* Encode value *ip, store to buf */ if (xdr->xdr_putint == _putint) *(uint64_t *)xdr->xdr_idx = htobe64(*lp); else *(uint64_t *)xdr->xdr_idx = *lp; xdr->xdr_idx += sizeof(uint64_t); rv = true; break; case XDR_OP_DECODE: /* Decode buf, return value to *ip */ if (xdr->xdr_getuint == _getuint) *lp = be64toh(*(uint64_t *)xdr->xdr_idx); else *lp = *(uint64_t *)xdr->xdr_idx; xdr->xdr_idx += sizeof(uint64_t); rv = true; } return (rv); } static bool xdr_char(xdr_t *xdr, char *cp) { int i; bool rv = false; i = *cp; if ((rv = xdr_int(xdr, &i))) { if (xdr->xdr_op == XDR_OP_DECODE) *cp = i; } return (rv); } static bool xdr_string(xdr_t *xdr, nv_string_t *s) { int size = 0; bool rv = false; switch (xdr->xdr_op) { case XDR_OP_ENCODE: size = s->nv_size; if (xdr->xdr_idx + sizeof(unsigned) + NV_ALIGN4(size) > xdr->xdr_buf + xdr->xdr_buf_size) break; xdr->xdr_idx += xdr->xdr_putuint(xdr, s->nv_size); xdr->xdr_idx += NV_ALIGN4(size); rv = true; break; case XDR_OP_DECODE: if (xdr->xdr_idx + sizeof(unsigned) > xdr->xdr_buf + xdr->xdr_buf_size) break; size = xdr->xdr_getuint(xdr, &s->nv_size); size = NV_ALIGN4(size + s->nv_size); if (xdr->xdr_idx + size > xdr->xdr_buf + xdr->xdr_buf_size) break; xdr->xdr_idx += size; rv = true; break; } return (rv); } static bool xdr_array(xdr_t *xdr, const unsigned nelem, const xdrproc_t elproc) { bool rv = true; unsigned c = nelem; if (!xdr_u_int(xdr, &c)) return (false); for (unsigned i = 0; i < nelem; i++) { if (!elproc(xdr, xdr->xdr_idx)) return (false); } return (rv); } /* * nvlist management functions. */ void nvlist_destroy(nvlist_t *nvl) { if (nvl != NULL) { /* Free data if it was allocated by us. */ if (nvl->nv_asize > 0) free(nvl->nv_data); } free(nvl); } char * nvstring_get(nv_string_t *nvs) { char *s; s = malloc(nvs->nv_size + 1); if (s != NULL) { bcopy(nvs->nv_data, s, nvs->nv_size); s[nvs->nv_size] = '\0'; } return (s); } /* * Create empty nvlist. * The nvlist is terminated by 2x zeros (8 bytes). */ nvlist_t * nvlist_create(int flag) { nvlist_t *nvl; nvs_data_t *nvs; nvl = calloc(1, sizeof(*nvl)); if (nvl == NULL) return (nvl); nvl->nv_header.nvh_encoding = NV_ENCODE_XDR; nvl->nv_header.nvh_endian = _BYTE_ORDER == _LITTLE_ENDIAN; nvl->nv_asize = nvl->nv_size = sizeof(*nvs); nvs = calloc(1, nvl->nv_asize); if (nvs == NULL) { free(nvl); return (NULL); } /* data in nvlist is byte stream */ nvl->nv_data = (uint8_t *)nvs; nvs->nvl_version = NV_VERSION; nvs->nvl_nvflag = flag; return (nvl); } static bool nvlist_xdr_nvp(xdr_t *xdr, nvlist_t *nvl) { nv_string_t *nv_string; nv_pair_data_t *nvp_data; nvlist_t nvlist; unsigned type, nelem; xdr_t nv_xdr; nv_string = (nv_string_t *)xdr->xdr_idx; if (!xdr_string(xdr, nv_string)) { return (false); } nvp_data = (nv_pair_data_t *)xdr->xdr_idx; type = nvp_data->nv_type; nelem = nvp_data->nv_nelem; if (!xdr_u_int(xdr, &type) || !xdr_u_int(xdr, &nelem)) return (false); switch (type) { case DATA_TYPE_NVLIST: case DATA_TYPE_NVLIST_ARRAY: bzero(&nvlist, sizeof(nvlist)); nvlist.nv_data = xdr->xdr_idx; nvlist.nv_idx = nvlist.nv_data; /* Set up xdr for this nvlist. */ nv_xdr = *xdr; nv_xdr.xdr_buf = nvlist.nv_data; nv_xdr.xdr_idx = nvlist.nv_data; nv_xdr.xdr_buf_size = nvl->nv_data + nvl->nv_size - nvlist.nv_data; for (unsigned i = 0; i < nelem; i++) { if (xdr->xdr_op == XDR_OP_ENCODE) { if (!nvlist_size_native(&nv_xdr, &nvlist.nv_size)) return (false); } else { if (!nvlist_size_xdr(&nv_xdr, &nvlist.nv_size)) return (false); } if (nvlist_xdr_nvlist(xdr, &nvlist) != 0) return (false); nvlist.nv_data = nv_xdr.xdr_idx; nvlist.nv_idx = nv_xdr.xdr_idx; nv_xdr.xdr_buf = nv_xdr.xdr_idx; nv_xdr.xdr_buf_size = nvl->nv_data + nvl->nv_size - nvlist.nv_data; } break; case DATA_TYPE_BOOLEAN: /* BOOLEAN does not take value space */ break; case DATA_TYPE_BYTE: case DATA_TYPE_INT8: case DATA_TYPE_UINT8: if (!xdr_char(xdr, (char *)&nvp_data->nv_data[0])) return (false); break; case DATA_TYPE_INT16: if (!xdr_short(xdr, (short *)&nvp_data->nv_data[0])) return (false); break; case DATA_TYPE_UINT16: if (!xdr_u_short(xdr, (unsigned short *)&nvp_data->nv_data[0])) return (false); break; case DATA_TYPE_BOOLEAN_VALUE: case DATA_TYPE_INT32: if (!xdr_int(xdr, (int *)&nvp_data->nv_data[0])) return (false); break; case DATA_TYPE_UINT32: if (!xdr_u_int(xdr, (unsigned *)&nvp_data->nv_data[0])) return (false); break; case DATA_TYPE_HRTIME: case DATA_TYPE_INT64: if (!xdr_int64(xdr, (int64_t *)&nvp_data->nv_data[0])) return (false); break; case DATA_TYPE_UINT64: if (!xdr_uint64(xdr, (uint64_t *)&nvp_data->nv_data[0])) return (false); break; case DATA_TYPE_BYTE_ARRAY: case DATA_TYPE_STRING: nv_string = (nv_string_t *)&nvp_data->nv_data[0]; if (!xdr_string(xdr, nv_string)) return (false); break; case DATA_TYPE_STRING_ARRAY: nv_string = (nv_string_t *)&nvp_data->nv_data[0]; for (unsigned i = 0; i < nelem; i++) { if (!xdr_string(xdr, nv_string)) return (false); nv_string = (nv_string_t *)xdr->xdr_idx; } break; case DATA_TYPE_INT8_ARRAY: case DATA_TYPE_UINT8_ARRAY: case DATA_TYPE_INT16_ARRAY: case DATA_TYPE_UINT16_ARRAY: case DATA_TYPE_BOOLEAN_ARRAY: case DATA_TYPE_INT32_ARRAY: case DATA_TYPE_UINT32_ARRAY: if (!xdr_array(xdr, nelem, (xdrproc_t)xdr_u_int)) return (false); break; case DATA_TYPE_INT64_ARRAY: case DATA_TYPE_UINT64_ARRAY: if (!xdr_array(xdr, nelem, (xdrproc_t)xdr_uint64)) return (false); break; } return (true); } static int nvlist_xdr_nvlist(xdr_t *xdr, nvlist_t *nvl) { nvp_header_t *nvph; nvs_data_t *nvs; unsigned encoded_size, decoded_size; int rv; nvs = (nvs_data_t *)xdr->xdr_idx; nvph = &nvs->nvl_pair; if (!xdr_u_int(xdr, &nvs->nvl_version)) return (EINVAL); if (!xdr_u_int(xdr, &nvs->nvl_nvflag)) return (EINVAL); encoded_size = nvph->encoded_size; decoded_size = nvph->decoded_size; if (xdr->xdr_op == XDR_OP_ENCODE) { if (!xdr_u_int(xdr, &nvph->encoded_size)) return (EINVAL); if (!xdr_u_int(xdr, &nvph->decoded_size)) return (EINVAL); } else { xdr->xdr_idx += 2 * sizeof(unsigned); } rv = 0; while (encoded_size && decoded_size) { if (!nvlist_xdr_nvp(xdr, nvl)) return (EINVAL); nvph = (nvp_header_t *)(xdr->xdr_idx); encoded_size = nvph->encoded_size; decoded_size = nvph->decoded_size; if (xdr->xdr_op == XDR_OP_ENCODE) { if (!xdr_u_int(xdr, &nvph->encoded_size)) return (EINVAL); if (!xdr_u_int(xdr, &nvph->decoded_size)) return (EINVAL); } else { xdr->xdr_idx += 2 * sizeof(unsigned); } } return (rv); } /* * Calculate nvlist size, translating encoded_size and decoded_size. */ static bool nvlist_size_xdr(xdr_t *xdr, size_t *size) { uint8_t *pair; unsigned encoded_size, decoded_size; xdr->xdr_idx += 2 * sizeof(unsigned); pair = xdr->xdr_idx; if (!xdr_u_int(xdr, &encoded_size) || !xdr_u_int(xdr, &decoded_size)) return (false); while (encoded_size && decoded_size) { xdr->xdr_idx = pair + encoded_size; pair = xdr->xdr_idx; if (!xdr_u_int(xdr, &encoded_size) || !xdr_u_int(xdr, &decoded_size)) return (false); } *size = xdr->xdr_idx - xdr->xdr_buf; return (true); } nvp_header_t * nvlist_next_nvpair(nvlist_t *nvl, nvp_header_t *nvh) { uint8_t *pair; unsigned encoded_size, decoded_size; xdr_t xdr; if (nvl == NULL) return (NULL); xdr.xdr_buf = nvl->nv_data; xdr.xdr_idx = nvl->nv_data; xdr.xdr_buf_size = nvl->nv_size; xdr.xdr_idx += 2 * sizeof(unsigned); /* Skip tp current pair */ if (nvh != NULL) { xdr.xdr_idx = (uint8_t *)nvh; } pair = xdr.xdr_idx; if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) return (NULL); encoded_size = *(unsigned *)xdr.xdr_idx; xdr.xdr_idx += sizeof(unsigned); if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) return (NULL); decoded_size = *(unsigned *)xdr.xdr_idx; xdr.xdr_idx += sizeof(unsigned); if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) return (NULL); while (encoded_size && decoded_size) { if (nvh == NULL) return ((nvp_header_t *)pair); xdr.xdr_idx = pair + encoded_size; nvh = (nvp_header_t *)xdr.xdr_idx; if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) return (NULL); encoded_size = *(unsigned *)xdr.xdr_idx; xdr.xdr_idx += sizeof(unsigned); if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) return (NULL); decoded_size = *(unsigned *)xdr.xdr_idx; xdr.xdr_idx += sizeof(unsigned); if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) return (NULL); if (encoded_size != 0 && decoded_size != 0) { return (nvh); } } return (NULL); } /* * Calculate nvlist size by walking in memory data. */ static bool nvlist_size_native(xdr_t *xdr, size_t *size) { uint8_t *pair; unsigned encoded_size, decoded_size; xdr->xdr_idx += 2 * sizeof(unsigned); pair = xdr->xdr_idx; if (xdr->xdr_idx > xdr->xdr_buf + xdr->xdr_buf_size) return (false); encoded_size = *(unsigned *)xdr->xdr_idx; xdr->xdr_idx += sizeof(unsigned); if (xdr->xdr_idx > xdr->xdr_buf + xdr->xdr_buf_size) return (false); decoded_size = *(unsigned *)xdr->xdr_idx; xdr->xdr_idx += sizeof(unsigned); while (encoded_size && decoded_size) { xdr->xdr_idx = pair + encoded_size; pair = xdr->xdr_idx; if (xdr->xdr_idx > xdr->xdr_buf + xdr->xdr_buf_size) return (false); encoded_size = *(unsigned *)xdr->xdr_idx; xdr->xdr_idx += sizeof(unsigned); if (xdr->xdr_idx > xdr->xdr_buf + xdr->xdr_buf_size) return (false); decoded_size = *(unsigned *)xdr->xdr_idx; xdr->xdr_idx += sizeof(unsigned); } *size = xdr->xdr_idx - xdr->xdr_buf; return (true); } /* * Export nvlist to byte stream format. */ int nvlist_export(nvlist_t *nvl) { int rv; xdr_t xdr = { .xdr_op = XDR_OP_ENCODE, .xdr_putint = _putint, .xdr_putuint = _putuint, .xdr_buf = nvl->nv_data, .xdr_idx = nvl->nv_data, .xdr_buf_size = nvl->nv_size }; if (nvl->nv_header.nvh_encoding != NV_ENCODE_XDR) return (ENOTSUP); nvl->nv_idx = nvl->nv_data; rv = nvlist_xdr_nvlist(&xdr, nvl); return (rv); } /* * Import nvlist from byte stream. * Determine the stream size and allocate private copy. * Then translate the data. */ nvlist_t * nvlist_import(const char *stream, size_t size) { nvlist_t *nvl; xdr_t xdr = { .xdr_op = XDR_OP_DECODE, .xdr_getint = _getint, .xdr_getuint = _getuint }; /* Check the nvlist head. */ if (stream[0] != NV_ENCODE_XDR || (stream[1] != '\0' && stream[1] != '\1') || stream[2] != '\0' || stream[3] != '\0' || be32toh(*(uint32_t *)(stream + 4)) != NV_VERSION || be32toh(*(uint32_t *)(stream + 8)) != NV_UNIQUE_NAME) return (NULL); nvl = malloc(sizeof(*nvl)); if (nvl == NULL) return (nvl); nvl->nv_header.nvh_encoding = stream[0]; nvl->nv_header.nvh_endian = stream[1]; nvl->nv_header.nvh_reserved1 = stream[2]; nvl->nv_header.nvh_reserved2 = stream[3]; xdr.xdr_buf = xdr.xdr_idx = (uint8_t *)stream + 4; xdr.xdr_buf_size = size - 4; if (!nvlist_size_xdr(&xdr, &nvl->nv_asize)) { free(nvl); return (NULL); } nvl->nv_size = nvl->nv_asize; nvl->nv_data = malloc(nvl->nv_asize); if (nvl->nv_data == NULL) { free(nvl); return (NULL); } nvl->nv_idx = nvl->nv_data; bcopy(stream + 4, nvl->nv_data, nvl->nv_asize); xdr.xdr_buf = xdr.xdr_idx = nvl->nv_data; xdr.xdr_buf_size = nvl->nv_asize; if (nvlist_xdr_nvlist(&xdr, nvl) != 0) { free(nvl->nv_data); free(nvl); nvl = NULL; } return (nvl); } /* * remove pair from this nvlist. */ int nvlist_remove(nvlist_t *nvl, const char *name, data_type_t type) { uint8_t *head, *tail; nvs_data_t *data; nvp_header_t *nvp; nv_string_t *nvp_name; nv_pair_data_t *nvp_data; size_t size; xdr_t xdr; if (nvl == NULL || nvl->nv_data == NULL || name == NULL) return (EINVAL); /* Make sure the nvlist size is set correct */ xdr.xdr_idx = nvl->nv_data; xdr.xdr_buf = xdr.xdr_idx; xdr.xdr_buf_size = nvl->nv_size; if (!nvlist_size_native(&xdr, &nvl->nv_size)) return (EINVAL); data = (nvs_data_t *)nvl->nv_data; nvp = &data->nvl_pair; /* first pair in nvlist */ head = (uint8_t *)nvp; while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { nvp_name = (nv_string_t *)(nvp + 1); nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] + NV_ALIGN4(nvp_name->nv_size)); if (strlen(name) == nvp_name->nv_size && memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0 && (nvp_data->nv_type == type || type == DATA_TYPE_UNKNOWN)) { /* * set tail to point to next nvpair and size * is the length of the tail. */ tail = head + nvp->encoded_size; size = nvl->nv_size - (tail - nvl->nv_data); /* adjust the size of the nvlist. */ nvl->nv_size -= nvp->encoded_size; bcopy(tail, head, size); return (0); } /* Not our pair, skip to next. */ head = head + nvp->encoded_size; nvp = (nvp_header_t *)head; } return (ENOENT); } static int clone_nvlist(const nvlist_t *nvl, const uint8_t *ptr, unsigned size, nvlist_t **nvlist) { nvlist_t *nv; nv = calloc(1, sizeof(*nv)); if (nv == NULL) return (ENOMEM); nv->nv_header = nvl->nv_header; nv->nv_asize = size; nv->nv_size = size; nv->nv_data = malloc(nv->nv_asize); if (nv->nv_data == NULL) { free(nv); return (ENOMEM); } bcopy(ptr, nv->nv_data, nv->nv_asize); *nvlist = nv; return (0); } /* * Return the next nvlist in an nvlist array. */ static uint8_t * nvlist_next(const uint8_t *ptr) { nvs_data_t *data; nvp_header_t *nvp; data = (nvs_data_t *)ptr; nvp = &data->nvl_pair; /* first pair in nvlist */ while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); } return ((uint8_t *)nvp + sizeof(*nvp)); } /* * Note: nvlist and nvlist array must be freed by caller. */ int nvlist_find(const nvlist_t *nvl, const char *name, data_type_t type, int *elementsp, void *valuep, int *sizep) { nvs_data_t *data; nvp_header_t *nvp; nv_string_t *nvp_name; nv_pair_data_t *nvp_data; nvlist_t **nvlist, *nv; uint8_t *ptr; int rv; if (nvl == NULL || nvl->nv_data == NULL || name == NULL) return (EINVAL); data = (nvs_data_t *)nvl->nv_data; nvp = &data->nvl_pair; /* first pair in nvlist */ while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { nvp_name = (nv_string_t *)((uint8_t *)nvp + sizeof(*nvp)); if (nvl->nv_data + nvl->nv_size < nvp_name->nv_data + nvp_name->nv_size) return (EIO); nvp_data = (nv_pair_data_t *) NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + nvp_name->nv_size); if (strlen(name) == nvp_name->nv_size && memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0 && (nvp_data->nv_type == type || type == DATA_TYPE_UNKNOWN)) { if (elementsp != NULL) *elementsp = nvp_data->nv_nelem; switch (nvp_data->nv_type) { case DATA_TYPE_UINT64: bcopy(nvp_data->nv_data, valuep, sizeof(uint64_t)); return (0); case DATA_TYPE_STRING: nvp_name = (nv_string_t *)nvp_data->nv_data; if (sizep != NULL) { *sizep = nvp_name->nv_size; } *(const uint8_t **)valuep = &nvp_name->nv_data[0]; return (0); case DATA_TYPE_NVLIST: ptr = &nvp_data->nv_data[0]; rv = clone_nvlist(nvl, ptr, nvlist_next(ptr) - ptr, &nv); if (rv == 0) { *(nvlist_t **)valuep = nv; } return (rv); case DATA_TYPE_NVLIST_ARRAY: nvlist = calloc(nvp_data->nv_nelem, sizeof(nvlist_t *)); if (nvlist == NULL) return (ENOMEM); ptr = &nvp_data->nv_data[0]; rv = 0; for (unsigned i = 0; i < nvp_data->nv_nelem; i++) { rv = clone_nvlist(nvl, ptr, nvlist_next(ptr) - ptr, &nvlist[i]); if (rv != 0) goto error; ptr = nvlist_next(ptr); } *(nvlist_t ***)valuep = nvlist; return (rv); } return (EIO); } /* Not our pair, skip to next. */ nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); if (nvl->nv_data + nvl->nv_size < (uint8_t *)nvp) return (EIO); } return (ENOENT); error: for (unsigned i = 0; i < nvp_data->nv_nelem; i++) { free(nvlist[i]->nv_data); free(nvlist[i]); } free(nvlist); return (rv); } static int get_value_size(data_type_t type, const void *data, uint32_t nelem) { uint64_t value_sz = 0; switch (type) { case DATA_TYPE_BOOLEAN: value_sz = 0; break; case DATA_TYPE_BOOLEAN_VALUE: case DATA_TYPE_BYTE: case DATA_TYPE_INT8: case DATA_TYPE_UINT8: case DATA_TYPE_INT16: case DATA_TYPE_UINT16: case DATA_TYPE_INT32: case DATA_TYPE_UINT32: /* Our smallest data unit is 32-bit */ value_sz = sizeof(uint32_t); break; case DATA_TYPE_HRTIME: case DATA_TYPE_INT64: value_sz = sizeof(int64_t); break; case DATA_TYPE_UINT64: value_sz = sizeof(uint64_t); break; case DATA_TYPE_STRING: if (data == NULL) value_sz = 0; else value_sz = strlen(data) + 1; break; case DATA_TYPE_BYTE_ARRAY: value_sz = nelem * sizeof(uint8_t); break; case DATA_TYPE_BOOLEAN_ARRAY: case DATA_TYPE_INT8_ARRAY: case DATA_TYPE_UINT8_ARRAY: case DATA_TYPE_INT16_ARRAY: case DATA_TYPE_UINT16_ARRAY: case DATA_TYPE_INT32_ARRAY: case DATA_TYPE_UINT32_ARRAY: value_sz = (uint64_t)nelem * sizeof(uint32_t); break; case DATA_TYPE_INT64_ARRAY: value_sz = (uint64_t)nelem * sizeof(int64_t); break; case DATA_TYPE_UINT64_ARRAY: value_sz = (uint64_t)nelem * sizeof(uint64_t); break; case DATA_TYPE_STRING_ARRAY: value_sz = (uint64_t)nelem * sizeof(uint64_t); if (data != NULL) { char *const *strs = data; uint32_t i; for (i = 0; i < nelem; i++) { if (strs[i] == NULL) return (-1); value_sz += strlen(strs[i]) + 1; } } break; case DATA_TYPE_NVLIST: /* * The decoded size of nvlist is constant. */ value_sz = NV_ALIGN(6 * 4); /* sizeof nvlist_t */ break; case DATA_TYPE_NVLIST_ARRAY: value_sz = (uint64_t)nelem * sizeof(uint64_t) + (uint64_t)nelem * NV_ALIGN(6 * 4); /* sizeof nvlist_t */ break; default: return (-1); } return (value_sz > INT32_MAX ? -1 : (int)value_sz); } static int get_nvp_data_size(data_type_t type, const void *data, uint32_t nelem) { uint64_t value_sz = 0; xdr_t xdr; size_t size; switch (type) { case DATA_TYPE_BOOLEAN: value_sz = 0; break; case DATA_TYPE_BOOLEAN_VALUE: case DATA_TYPE_BYTE: case DATA_TYPE_INT8: case DATA_TYPE_UINT8: case DATA_TYPE_INT16: case DATA_TYPE_UINT16: case DATA_TYPE_INT32: case DATA_TYPE_UINT32: /* Our smallest data unit is 32-bit */ value_sz = sizeof(uint32_t); break; case DATA_TYPE_HRTIME: case DATA_TYPE_INT64: case DATA_TYPE_UINT64: value_sz = sizeof(uint64_t); break; case DATA_TYPE_STRING: value_sz = 4 + NV_ALIGN4(strlen(data)); break; case DATA_TYPE_BYTE_ARRAY: value_sz = NV_ALIGN4(nelem); break; case DATA_TYPE_BOOLEAN_ARRAY: case DATA_TYPE_INT8_ARRAY: case DATA_TYPE_UINT8_ARRAY: case DATA_TYPE_INT16_ARRAY: case DATA_TYPE_UINT16_ARRAY: case DATA_TYPE_INT32_ARRAY: case DATA_TYPE_UINT32_ARRAY: value_sz = 4 + (uint64_t)nelem * sizeof(uint32_t); break; case DATA_TYPE_INT64_ARRAY: case DATA_TYPE_UINT64_ARRAY: value_sz = 4 + (uint64_t)nelem * sizeof(uint64_t); break; case DATA_TYPE_STRING_ARRAY: if (data != NULL) { char *const *strs = data; uint32_t i; for (i = 0; i < nelem; i++) { value_sz += 4 + NV_ALIGN4(strlen(strs[i])); } } break; case DATA_TYPE_NVLIST: xdr.xdr_idx = ((nvlist_t *)data)->nv_data; xdr.xdr_buf = xdr.xdr_idx; xdr.xdr_buf_size = ((nvlist_t *)data)->nv_size; if (!nvlist_size_native(&xdr, &size)) return (-1); value_sz = size; break; case DATA_TYPE_NVLIST_ARRAY: value_sz = 0; for (uint32_t i = 0; i < nelem; i++) { xdr.xdr_idx = ((nvlist_t **)data)[i]->nv_data; xdr.xdr_buf = xdr.xdr_idx; xdr.xdr_buf_size = ((nvlist_t **)data)[i]->nv_size; if (!nvlist_size_native(&xdr, &size)) return (-1); value_sz += size; } break; default: return (-1); } return (value_sz > INT32_MAX ? -1 : (int)value_sz); } #define NVPE_SIZE(name_len, data_len) \ (4 + 4 + 4 + NV_ALIGN4(name_len) + 4 + 4 + data_len) #define NVP_SIZE(name_len, data_len) \ (NV_ALIGN((4 * 4) + (name_len)) + NV_ALIGN(data_len)) static int nvlist_add_common(nvlist_t *nvl, const char *name, data_type_t type, uint32_t nelem, const void *data) { nvs_data_t *nvs; nvp_header_t head, *hp; uint8_t *ptr; size_t namelen; int decoded_size, encoded_size; xdr_t xdr = { .xdr_op = XDR_OP_ENCODE, .xdr_putint = _putint_mem, .xdr_putuint = _putuint_mem, .xdr_buf = nvl->nv_data, .xdr_idx = nvl->nv_data, .xdr_buf_size = nvl->nv_size }; nvs = (nvs_data_t *)nvl->nv_data; if (nvs->nvl_nvflag & NV_UNIQUE_NAME) (void) nvlist_remove(nvl, name, type); xdr.xdr_buf = nvl->nv_data; xdr.xdr_idx = nvl->nv_data; xdr.xdr_buf_size = nvl->nv_size; if (!nvlist_size_native(&xdr, &nvl->nv_size)) return (EINVAL); namelen = strlen(name); if ((decoded_size = get_value_size(type, data, nelem)) < 0) return (EINVAL); if ((encoded_size = get_nvp_data_size(type, data, nelem)) < 0) return (EINVAL); /* * The encoded size is calculated as: * encode_size (4) + decode_size (4) + * name string size (4 + NV_ALIGN4(namelen) + * data type (4) + nelem size (4) + datalen * * The decoded size is calculated as: * Note: namelen is with terminating 0. * NV_ALIGN(sizeof(nvpair_t) (4 * 4) + namelen + 1) + * NV_ALIGN(data_len) */ head.encoded_size = NVPE_SIZE(namelen, encoded_size); head.decoded_size = NVP_SIZE(namelen + 1, decoded_size); if (nvl->nv_asize - nvl->nv_size < head.encoded_size + 8) { ptr = realloc(nvl->nv_data, nvl->nv_asize + head.encoded_size); if (ptr == NULL) return (ENOMEM); nvl->nv_data = ptr; nvl->nv_asize += head.encoded_size; } nvl->nv_idx = nvl->nv_data + nvl->nv_size - sizeof(*hp); bzero(nvl->nv_idx, head.encoded_size + 8); hp = (nvp_header_t *)nvl->nv_idx; *hp = head; nvl->nv_idx += sizeof(*hp); xdr.xdr_buf = nvl->nv_data; xdr.xdr_buf_size = nvl->nv_asize; xdr.xdr_idx = nvl->nv_idx; xdr.xdr_idx += xdr.xdr_putuint(&xdr, namelen); strlcpy((char *)xdr.xdr_idx, name, namelen + 1); xdr.xdr_idx += NV_ALIGN4(namelen); xdr.xdr_idx += xdr.xdr_putuint(&xdr, type); xdr.xdr_idx += xdr.xdr_putuint(&xdr, nelem); switch (type) { case DATA_TYPE_BOOLEAN: break; case DATA_TYPE_BYTE_ARRAY: xdr.xdr_idx += xdr.xdr_putuint(&xdr, encoded_size); bcopy(data, xdr.xdr_idx, nelem); xdr.xdr_idx += NV_ALIGN4(encoded_size); break; case DATA_TYPE_STRING: encoded_size = strlen(data); xdr.xdr_idx += xdr.xdr_putuint(&xdr, encoded_size); strlcpy((char *)xdr.xdr_idx, data, encoded_size + 1); xdr.xdr_idx += NV_ALIGN4(encoded_size); break; case DATA_TYPE_STRING_ARRAY: for (uint32_t i = 0; i < nelem; i++) { encoded_size = strlen(((char **)data)[i]); xdr.xdr_idx += xdr.xdr_putuint(&xdr, encoded_size); strlcpy((char *)xdr.xdr_idx, ((char **)data)[i], encoded_size + 1); xdr.xdr_idx += NV_ALIGN4(encoded_size); } break; case DATA_TYPE_BYTE: case DATA_TYPE_INT8: case DATA_TYPE_UINT8: xdr_char(&xdr, (char *)data); break; case DATA_TYPE_INT8_ARRAY: case DATA_TYPE_UINT8_ARRAY: xdr_array(&xdr, nelem, (xdrproc_t)xdr_char); break; case DATA_TYPE_INT16: xdr_short(&xdr, (short *)data); break; case DATA_TYPE_UINT16: xdr_u_short(&xdr, (unsigned short *)data); break; case DATA_TYPE_INT16_ARRAY: xdr_array(&xdr, nelem, (xdrproc_t)xdr_short); break; case DATA_TYPE_UINT16_ARRAY: xdr_array(&xdr, nelem, (xdrproc_t)xdr_u_short); break; case DATA_TYPE_BOOLEAN_VALUE: case DATA_TYPE_INT32: xdr_int(&xdr, (int *)data); break; case DATA_TYPE_UINT32: xdr_u_int(&xdr, (unsigned int *)data); break; case DATA_TYPE_BOOLEAN_ARRAY: case DATA_TYPE_INT32_ARRAY: xdr_array(&xdr, nelem, (xdrproc_t)xdr_int); break; case DATA_TYPE_UINT32_ARRAY: xdr_array(&xdr, nelem, (xdrproc_t)xdr_u_int); break; case DATA_TYPE_INT64: xdr_int64(&xdr, (int64_t *)data); break; case DATA_TYPE_UINT64: xdr_uint64(&xdr, (uint64_t *)data); break; case DATA_TYPE_INT64_ARRAY: xdr_array(&xdr, nelem, (xdrproc_t)xdr_int64); break; case DATA_TYPE_UINT64_ARRAY: xdr_array(&xdr, nelem, (xdrproc_t)xdr_uint64); break; case DATA_TYPE_NVLIST: bcopy(((nvlist_t *)data)->nv_data, xdr.xdr_idx, encoded_size); break; case DATA_TYPE_NVLIST_ARRAY: { size_t size; xdr_t xdr_nv; for (uint32_t i = 0; i < nelem; i++) { xdr_nv.xdr_idx = ((nvlist_t **)data)[i]->nv_data; xdr_nv.xdr_buf = xdr_nv.xdr_idx; xdr_nv.xdr_buf_size = ((nvlist_t **)data)[i]->nv_size; if (!nvlist_size_native(&xdr_nv, &size)) return (EINVAL); bcopy(((nvlist_t **)data)[i]->nv_data, xdr.xdr_idx, size); xdr.xdr_idx += size; } break; } default: bcopy(data, xdr.xdr_idx, encoded_size); } nvl->nv_size += head.encoded_size; return (0); } int -nvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t value) +nvlist_add_boolean_value(nvlist_t *nvl, const char *name, int value) { return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_VALUE, 1, &value)); } int nvlist_add_byte(nvlist_t *nvl, const char *name, uint8_t value) { return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE, 1, &value)); } int nvlist_add_int8(nvlist_t *nvl, const char *name, int8_t value) { return (nvlist_add_common(nvl, name, DATA_TYPE_INT8, 1, &value)); } int nvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t value) { return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8, 1, &value)); } int nvlist_add_int16(nvlist_t *nvl, const char *name, int16_t value) { return (nvlist_add_common(nvl, name, DATA_TYPE_INT16, 1, &value)); } int nvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t value) { return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16, 1, &value)); } int nvlist_add_int32(nvlist_t *nvl, const char *name, int32_t value) { return (nvlist_add_common(nvl, name, DATA_TYPE_INT32, 1, &value)); } int nvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t value) { return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32, 1, &value)); } int nvlist_add_int64(nvlist_t *nvl, const char *name, int64_t value) { return (nvlist_add_common(nvl, name, DATA_TYPE_INT64, 1, &value)); } int nvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t value) { return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64, 1, &value)); } int nvlist_add_string(nvlist_t *nvl, const char *name, const char *value) { return (nvlist_add_common(nvl, name, DATA_TYPE_STRING, 1, value)); } int -nvlist_add_boolean_array(nvlist_t *nvl, const char *name, - boolean_t *a, uint32_t n) +nvlist_add_boolean_array(nvlist_t *nvl, const char *name, int *a, uint32_t n) { return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_ARRAY, n, a)); } int nvlist_add_byte_array(nvlist_t *nvl, const char *name, uint8_t *a, uint32_t n) { return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a)); } int nvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *a, uint32_t n) { return (nvlist_add_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a)); } int nvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *a, uint32_t n) { return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a)); } int nvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *a, uint32_t n) { return (nvlist_add_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a)); } int nvlist_add_uint16_array(nvlist_t *nvl, const char *name, uint16_t *a, uint32_t n) { return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a)); } int nvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *a, uint32_t n) { return (nvlist_add_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a)); } int nvlist_add_uint32_array(nvlist_t *nvl, const char *name, uint32_t *a, uint32_t n) { return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a)); } int nvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *a, uint32_t n) { return (nvlist_add_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a)); } int nvlist_add_uint64_array(nvlist_t *nvl, const char *name, uint64_t *a, uint32_t n) { return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a)); } int nvlist_add_string_array(nvlist_t *nvl, const char *name, char * const *a, uint32_t n) { return (nvlist_add_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a)); } int nvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val) { return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST, 1, val)); } int nvlist_add_nvlist_array(nvlist_t *nvl, const char *name, nvlist_t **a, uint32_t n) { return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a)); } static const char *typenames[] = { "DATA_TYPE_UNKNOWN", "DATA_TYPE_BOOLEAN", "DATA_TYPE_BYTE", "DATA_TYPE_INT16", "DATA_TYPE_UINT16", "DATA_TYPE_INT32", "DATA_TYPE_UINT32", "DATA_TYPE_INT64", "DATA_TYPE_UINT64", "DATA_TYPE_STRING", "DATA_TYPE_BYTE_ARRAY", "DATA_TYPE_INT16_ARRAY", "DATA_TYPE_UINT16_ARRAY", "DATA_TYPE_INT32_ARRAY", "DATA_TYPE_UINT32_ARRAY", "DATA_TYPE_INT64_ARRAY", "DATA_TYPE_UINT64_ARRAY", "DATA_TYPE_STRING_ARRAY", "DATA_TYPE_HRTIME", "DATA_TYPE_NVLIST", "DATA_TYPE_NVLIST_ARRAY", "DATA_TYPE_BOOLEAN_VALUE", "DATA_TYPE_INT8", "DATA_TYPE_UINT8", "DATA_TYPE_BOOLEAN_ARRAY", "DATA_TYPE_INT8_ARRAY", "DATA_TYPE_UINT8_ARRAY" }; int nvpair_type_from_name(const char *name) { unsigned i; for (i = 0; i < nitems(typenames); i++) { if (strcmp(name, typenames[i]) == 0) return (i); } return (0); } nvp_header_t * nvpair_find(nvlist_t *nv, const char *name) { nvp_header_t *nvh; nvh = NULL; while ((nvh = nvlist_next_nvpair(nv, nvh)) != NULL) { nv_string_t *nvp_name; nvp_name = (nv_string_t *)(nvh + 1); if (nvp_name->nv_size == strlen(name) && memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0) break; } return (nvh); } void nvpair_print(nvp_header_t *nvp, unsigned int indent) { nv_string_t *nvp_name; nv_pair_data_t *nvp_data; nvlist_t nvlist; unsigned i, j; xdr_t xdr = { .xdr_op = XDR_OP_DECODE, .xdr_getint = _getint_mem, .xdr_getuint = _getuint_mem, .xdr_buf = (const uint8_t *)nvp, .xdr_idx = NULL, .xdr_buf_size = nvp->encoded_size }; nvp_name = (nv_string_t *)((uintptr_t)nvp + sizeof(*nvp)); nvp_data = (nv_pair_data_t *) NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + nvp_name->nv_size); for (i = 0; i < indent; i++) printf(" "); printf("%s [%d] %.*s", typenames[nvp_data->nv_type], nvp_data->nv_nelem, nvp_name->nv_size, nvp_name->nv_data); xdr.xdr_idx = nvp_data->nv_data; switch (nvp_data->nv_type) { case DATA_TYPE_BYTE: case DATA_TYPE_INT8: case DATA_TYPE_UINT8: { char c; if (xdr_char(&xdr, &c)) printf(" = 0x%x\n", c); break; } case DATA_TYPE_INT16: case DATA_TYPE_UINT16: { unsigned short u; if (xdr_u_short(&xdr, &u)) printf(" = 0x%hx\n", u); break; } case DATA_TYPE_BOOLEAN_VALUE: case DATA_TYPE_INT32: case DATA_TYPE_UINT32: { unsigned u; if (xdr_u_int(&xdr, &u)) printf(" = 0x%x\n", u); break; } case DATA_TYPE_INT64: case DATA_TYPE_UINT64: { uint64_t u; if (xdr_uint64(&xdr, &u)) printf(" = 0x%jx\n", (uintmax_t)u); break; } case DATA_TYPE_INT64_ARRAY: case DATA_TYPE_UINT64_ARRAY: { uint64_t *u; if (xdr_array(&xdr, nvp_data->nv_nelem, (xdrproc_t)xdr_uint64)) { u = (uint64_t *)(nvp_data->nv_data + sizeof(unsigned)); for (i = 0; i < nvp_data->nv_nelem; i++) printf(" [%u] = 0x%jx", i, (uintmax_t)u[i]); printf("\n"); } break; } case DATA_TYPE_STRING: case DATA_TYPE_STRING_ARRAY: nvp_name = (nv_string_t *)&nvp_data->nv_data[0]; for (i = 0; i < nvp_data->nv_nelem; i++) { printf(" = \"%.*s\"\n", nvp_name->nv_size, nvp_name->nv_data); } break; case DATA_TYPE_NVLIST: printf("\n"); nvlist.nv_data = &nvp_data->nv_data[0]; nvlist_print(&nvlist, indent + 2); break; case DATA_TYPE_NVLIST_ARRAY: nvlist.nv_data = &nvp_data->nv_data[0]; for (j = 0; j < nvp_data->nv_nelem; j++) { size_t size; printf("[%d]\n", j); nvlist_print(&nvlist, indent + 2); if (j != nvp_data->nv_nelem - 1) { for (i = 0; i < indent; i++) printf(" "); printf("%s %.*s", typenames[nvp_data->nv_type], nvp_name->nv_size, nvp_name->nv_data); } xdr.xdr_idx = nvlist.nv_data; xdr.xdr_buf = xdr.xdr_idx; xdr.xdr_buf_size = nvp->encoded_size - (xdr.xdr_idx - (uint8_t *)nvp); if (!nvlist_size_native(&xdr, &size)) return; nvlist.nv_data += size; } break; default: printf("\n"); } } void nvlist_print(const nvlist_t *nvl, unsigned int indent) { nvs_data_t *data; nvp_header_t *nvp; data = (nvs_data_t *)nvl->nv_data; nvp = &data->nvl_pair; /* first pair in nvlist */ while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { nvpair_print(nvp, indent); nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); } printf("%*s\n", indent + 13, "End of nvlist"); } diff --git a/stand/libsa/zfs/libzfs.h b/stand/libsa/zfs/nvlist.h similarity index 77% copy from stand/libsa/zfs/libzfs.h copy to stand/libsa/zfs/nvlist.h index 27fde493670c..9dab53d006c2 100644 --- a/stand/libsa/zfs/libzfs.h +++ b/stand/libsa/zfs/nvlist.h @@ -1,172 +1,164 @@ /*- - * Copyright (c) 2012 Andriy Gapon - * All rights reserved. + * Copyright 2020 Toomas Soome * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#ifndef _BOOT_LIBZFS_H_ -#define _BOOT_LIBZFS_H_ - -#include - -#ifdef LOADER_GELI_SUPPORT -#include -#endif - -#define ZFS_MAXNAMELEN 256 - -/* - * ZFS fully-qualified device descriptor. - */ -struct zfs_devdesc { - struct devdesc dd; /* Must be first. */ - uint64_t pool_guid; - uint64_t root_guid; -}; +#ifndef _BOOT_NVLIST_H_ +#define _BOOT_NVLIST_H_ + +typedef enum { + DATA_TYPE_UNKNOWN = 0, + DATA_TYPE_BOOLEAN, + DATA_TYPE_BYTE, + DATA_TYPE_INT16, + DATA_TYPE_UINT16, + DATA_TYPE_INT32, + DATA_TYPE_UINT32, + DATA_TYPE_INT64, + DATA_TYPE_UINT64, + DATA_TYPE_STRING, + DATA_TYPE_BYTE_ARRAY, + DATA_TYPE_INT16_ARRAY, + DATA_TYPE_UINT16_ARRAY, + DATA_TYPE_INT32_ARRAY, + DATA_TYPE_UINT32_ARRAY, + DATA_TYPE_INT64_ARRAY, + DATA_TYPE_UINT64_ARRAY, + DATA_TYPE_STRING_ARRAY, + DATA_TYPE_HRTIME, + DATA_TYPE_NVLIST, + DATA_TYPE_NVLIST_ARRAY, + DATA_TYPE_BOOLEAN_VALUE, + DATA_TYPE_INT8, + DATA_TYPE_UINT8, + DATA_TYPE_BOOLEAN_ARRAY, + DATA_TYPE_INT8_ARRAY, + DATA_TYPE_UINT8_ARRAY +} data_type_t; /* nvp implementation version */ #define NV_VERSION 0 +/* nvlist pack encoding */ +#define NV_ENCODE_NATIVE 0 +#define NV_ENCODE_XDR 1 + /* nvlist persistent unique name flags, stored in nvl_nvflags */ #define NV_UNIQUE_NAME 0x1 #define NV_UNIQUE_NAME_TYPE 0x2 #define NV_ALIGN4(x) (((x) + 3) & ~3) #define NV_ALIGN(x) (((x) + 7) & ~7) /* * nvlist header. * nvlist has 4 bytes header followed by version and flags, then nvpairs * and the list is terminated by double zero. */ typedef struct { char nvh_encoding; char nvh_endian; char nvh_reserved1; char nvh_reserved2; } nvs_header_t; typedef struct { nvs_header_t nv_header; size_t nv_asize; size_t nv_size; uint8_t *nv_data; uint8_t *nv_idx; } nvlist_t; /* * nvpair header. * nvpair has encoded and decoded size * name string (size and data) * data type and number of elements * data */ typedef struct { unsigned encoded_size; unsigned decoded_size; } nvp_header_t; /* * nvlist stream head. */ typedef struct { unsigned nvl_version; unsigned nvl_nvflag; nvp_header_t nvl_pair; } nvs_data_t; typedef struct { unsigned nv_size; uint8_t nv_data[]; /* NV_ALIGN4(string) */ } nv_string_t; typedef struct { unsigned nv_type; /* data_type_t */ unsigned nv_nelem; /* number of elements */ uint8_t nv_data[]; /* data stream */ } nv_pair_data_t; nvlist_t *nvlist_create(int); void nvlist_destroy(nvlist_t *); nvlist_t *nvlist_import(const char *, size_t); int nvlist_export(nvlist_t *); int nvlist_remove(nvlist_t *, const char *, data_type_t); int nvpair_type_from_name(const char *); nvp_header_t *nvpair_find(nvlist_t *, const char *); void nvpair_print(nvp_header_t *, unsigned int); void nvlist_print(const nvlist_t *, unsigned int); char *nvstring_get(nv_string_t *); int nvlist_find(const nvlist_t *, const char *, data_type_t, int *, void *, int *); nvp_header_t *nvlist_next_nvpair(nvlist_t *, nvp_header_t *); -int nvlist_add_boolean_value(nvlist_t *, const char *, boolean_t); +int nvlist_add_boolean_value(nvlist_t *, const char *, int); int nvlist_add_byte(nvlist_t *, const char *, uint8_t); int nvlist_add_int8(nvlist_t *, const char *, int8_t); int nvlist_add_uint8(nvlist_t *, const char *, uint8_t); int nvlist_add_int16(nvlist_t *, const char *, int16_t); int nvlist_add_uint16(nvlist_t *, const char *, uint16_t); int nvlist_add_int32(nvlist_t *, const char *, int32_t); int nvlist_add_uint32(nvlist_t *, const char *, uint32_t); int nvlist_add_int64(nvlist_t *, const char *, int64_t); int nvlist_add_uint64(nvlist_t *, const char *, uint64_t); int nvlist_add_string(nvlist_t *, const char *, const char *); -int nvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint32_t); +int nvlist_add_boolean_array(nvlist_t *, const char *, int *, uint32_t); int nvlist_add_byte_array(nvlist_t *, const char *, uint8_t *, uint32_t); int nvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint32_t); int nvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint32_t); int nvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint32_t); int nvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint32_t); int nvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint32_t); int nvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint32_t); int nvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint32_t); int nvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint32_t); int nvlist_add_string_array(nvlist_t *, const char *, char * const *, uint32_t); int nvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *); int nvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint32_t); -int zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, - const char **path); -char *zfs_fmtdev(void *vdev); -int zfs_probe_dev(const char *devname, uint64_t *pool_guid); -int zfs_list(const char *name); -int zfs_get_bootonce(void *, const char *, char *, size_t); -int zfs_get_bootenv(void *, nvlist_t **); -int zfs_set_bootenv(void *, nvlist_t *); -int zfs_attach_nvstore(void *); -uint64_t ldi_get_size(void *); -void init_zfs_boot_options(const char *currdev); - -int zfs_bootenv(const char *name); -int zfs_attach_nvstore(void *); -int zfs_belist_add(const char *name, uint64_t __unused); -int zfs_set_env(void); - -nvlist_t *vdev_read_bootenv(vdev_t *); - -extern struct devsw zfs_dev; -extern struct fs_ops zfs_fsops; - -#endif /*_BOOT_LIBZFS_H_*/ +#endif /* !_BOOT_NVLIST_H_ */ diff --git a/sys/cddl/boot/zfs/zfsimpl.h b/sys/cddl/boot/zfs/zfsimpl.h index 0adcd8e1bbab..46f42bc0386d 100644 --- a/sys/cddl/boot/zfs/zfsimpl.h +++ b/sys/cddl/boot/zfs/zfsimpl.h @@ -1,1903 +1,1869 @@ /*- * Copyright (c) 2002 McAfee, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and McAfee Research,, the Security Research Division of * McAfee, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as * part of the DARPA CHATS research program * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright 2013 by Saso Kiselkov. All rights reserved. */ /* * Copyright (c) 2020 by Delphix. All rights reserved. */ #include #include #include #ifndef _ZFSIMPL_H_ #define _ZFSIMPL_H_ #define MAXNAMELEN 256 #define _NOTE(s) /* * AVL comparator helpers */ #define AVL_ISIGN(a) (((a) > 0) - ((a) < 0)) #define AVL_CMP(a, b) (((a) > (b)) - ((a) < (b))) #define AVL_PCMP(a, b) \ (((uintptr_t)(a) > (uintptr_t)(b)) - ((uintptr_t)(a) < (uintptr_t)(b))) typedef enum { B_FALSE, B_TRUE } boolean_t; /* CRC64 table */ #define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */ /* * Macros for various sorts of alignment and rounding when the alignment * is known to be a power of 2. */ #define P2ALIGN(x, align) ((x) & -(align)) #define P2PHASE(x, align) ((x) & ((align) - 1)) #define P2NPHASE(x, align) (-(x) & ((align) - 1)) #define P2ROUNDUP(x, align) (-(-(x) & -(align))) #define P2END(x, align) (-(~(x) & -(align))) #define P2PHASEUP(x, align, phase) ((phase) - (((phase) - (x)) & -(align))) #define P2BOUNDARY(off, len, align) (((off) ^ ((off) + (len) - 1)) > (align) - 1) /* * General-purpose 32-bit and 64-bit bitfield encodings. */ #define BF32_DECODE(x, low, len) P2PHASE((x) >> (low), 1U << (len)) #define BF64_DECODE(x, low, len) P2PHASE((x) >> (low), 1ULL << (len)) #define BF32_ENCODE(x, low, len) (P2PHASE((x), 1U << (len)) << (low)) #define BF64_ENCODE(x, low, len) (P2PHASE((x), 1ULL << (len)) << (low)) #define BF32_GET(x, low, len) BF32_DECODE(x, low, len) #define BF64_GET(x, low, len) BF64_DECODE(x, low, len) #define BF32_SET(x, low, len, val) \ ((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len)) #define BF64_SET(x, low, len, val) \ ((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len)) #define BF32_GET_SB(x, low, len, shift, bias) \ ((BF32_GET(x, low, len) + (bias)) << (shift)) #define BF64_GET_SB(x, low, len, shift, bias) \ ((BF64_GET(x, low, len) + (bias)) << (shift)) #define BF32_SET_SB(x, low, len, shift, bias, val) \ BF32_SET(x, low, len, ((val) >> (shift)) - (bias)) #define BF64_SET_SB(x, low, len, shift, bias, val) \ BF64_SET(x, low, len, ((val) >> (shift)) - (bias)) /* * Macros to reverse byte order */ #define BSWAP_8(x) ((x) & 0xff) #define BSWAP_16(x) ((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8)) #define BSWAP_32(x) ((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16)) #define BSWAP_64(x) ((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32)) #define SPA_MINBLOCKSHIFT 9 #define SPA_OLDMAXBLOCKSHIFT 17 #define SPA_MAXBLOCKSHIFT 24 #define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT) #define SPA_OLDMAXBLOCKSIZE (1ULL << SPA_OLDMAXBLOCKSHIFT) #define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT) /* * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB. * The ASIZE encoding should be at least 64 times larger (6 more bits) * to support up to 4-way RAID-Z mirror mode with worst-case gang block * overhead, three DVAs per bp, plus one more bit in case we do anything * else that expands the ASIZE. */ #define SPA_LSIZEBITS 16 /* LSIZE up to 32M (2^16 * 512) */ #define SPA_PSIZEBITS 16 /* PSIZE up to 32M (2^16 * 512) */ #define SPA_ASIZEBITS 24 /* ASIZE up to 64 times larger */ /* * All SPA data is represented by 128-bit data virtual addresses (DVAs). * The members of the dva_t should be considered opaque outside the SPA. */ typedef struct dva { uint64_t dva_word[2]; } dva_t; /* * Each block has a 256-bit checksum -- strong enough for cryptographic hashes. */ typedef struct zio_cksum { uint64_t zc_word[4]; } zio_cksum_t; /* * Some checksums/hashes need a 256-bit initialization salt. This salt is kept * secret and is suitable for use in MAC algorithms as the key. */ typedef struct zio_cksum_salt { uint8_t zcs_bytes[32]; } zio_cksum_salt_t; /* * Each block is described by its DVAs, time of birth, checksum, etc. * The word-by-word, bit-by-bit layout of the blkptr is as follows: * * 64 56 48 40 32 24 16 8 0 * +-------+-------+-------+-------+-------+-------+-------+-------+ * 0 | vdev1 | GRID | ASIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 1 |G| offset1 | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 2 | vdev2 | GRID | ASIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 3 |G| offset2 | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 4 | vdev3 | GRID | ASIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 5 |G| offset3 | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 7 | padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 8 | padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 9 | physical birth txg | * +-------+-------+-------+-------+-------+-------+-------+-------+ * a | logical birth txg | * +-------+-------+-------+-------+-------+-------+-------+-------+ * b | fill count | * +-------+-------+-------+-------+-------+-------+-------+-------+ * c | checksum[0] | * +-------+-------+-------+-------+-------+-------+-------+-------+ * d | checksum[1] | * +-------+-------+-------+-------+-------+-------+-------+-------+ * e | checksum[2] | * +-------+-------+-------+-------+-------+-------+-------+-------+ * f | checksum[3] | * +-------+-------+-------+-------+-------+-------+-------+-------+ * * Legend: * * vdev virtual device ID * offset offset into virtual device * LSIZE logical size * PSIZE physical size (after compression) * ASIZE allocated size (including RAID-Z parity and gang block headers) * GRID RAID-Z layout information (reserved for future use) * cksum checksum function * comp compression function * G gang block indicator * B byteorder (endianness) * D dedup * X encryption (on version 30, which is not supported) * E blkptr_t contains embedded data (see below) * lvl level of indirection * type DMU object type * phys birth txg of block allocation; zero if same as logical birth txg * log. birth transaction group in which the block was logically born * fill count number of non-zero blocks under this bp * checksum[4] 256-bit checksum of the data this bp describes */ /* * "Embedded" blkptr_t's don't actually point to a block, instead they * have a data payload embedded in the blkptr_t itself. See the comment * in blkptr.c for more details. * * The blkptr_t is laid out as follows: * * 64 56 48 40 32 24 16 8 0 * +-------+-------+-------+-------+-------+-------+-------+-------+ * 0 | payload | * 1 | payload | * 2 | payload | * 3 | payload | * 4 | payload | * 5 | payload | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 6 |BDX|lvl| type | etype |E| comp| PSIZE| LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 7 | payload | * 8 | payload | * 9 | payload | * +-------+-------+-------+-------+-------+-------+-------+-------+ * a | logical birth txg | * +-------+-------+-------+-------+-------+-------+-------+-------+ * b | payload | * c | payload | * d | payload | * e | payload | * f | payload | * +-------+-------+-------+-------+-------+-------+-------+-------+ * * Legend: * * payload contains the embedded data * B (byteorder) byteorder (endianness) * D (dedup) padding (set to zero) * X encryption (set to zero; see above) * E (embedded) set to one * lvl indirection level * type DMU object type * etype how to interpret embedded data (BP_EMBEDDED_TYPE_*) * comp compression function of payload * PSIZE size of payload after compression, in bytes * LSIZE logical size of payload, in bytes * note that 25 bits is enough to store the largest * "normal" BP's LSIZE (2^16 * 2^9) in bytes * log. birth transaction group in which the block was logically born * * Note that LSIZE and PSIZE are stored in bytes, whereas for non-embedded * bp's they are stored in units of SPA_MINBLOCKSHIFT. * Generally, the generic BP_GET_*() macros can be used on embedded BP's. * The B, D, X, lvl, type, and comp fields are stored the same as with normal * BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must * be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before * other macros, as they assert that they are only used on BP's of the correct * "embedded-ness". */ #define BPE_GET_ETYPE(bp) \ (ASSERT(BP_IS_EMBEDDED(bp)), \ BF64_GET((bp)->blk_prop, 40, 8)) #define BPE_SET_ETYPE(bp, t) do { \ ASSERT(BP_IS_EMBEDDED(bp)); \ BF64_SET((bp)->blk_prop, 40, 8, t); \ _NOTE(CONSTCOND) } while (0) #define BPE_GET_LSIZE(bp) \ (ASSERT(BP_IS_EMBEDDED(bp)), \ BF64_GET_SB((bp)->blk_prop, 0, 25, 0, 1)) #define BPE_SET_LSIZE(bp, x) do { \ ASSERT(BP_IS_EMBEDDED(bp)); \ BF64_SET_SB((bp)->blk_prop, 0, 25, 0, 1, x); \ _NOTE(CONSTCOND) } while (0) #define BPE_GET_PSIZE(bp) \ (ASSERT(BP_IS_EMBEDDED(bp)), \ BF64_GET_SB((bp)->blk_prop, 25, 7, 0, 1)) #define BPE_SET_PSIZE(bp, x) do { \ ASSERT(BP_IS_EMBEDDED(bp)); \ BF64_SET_SB((bp)->blk_prop, 25, 7, 0, 1, x); \ _NOTE(CONSTCOND) } while (0) typedef enum bp_embedded_type { BP_EMBEDDED_TYPE_DATA, BP_EMBEDDED_TYPE_RESERVED, /* Reserved for an unintegrated feature. */ NUM_BP_EMBEDDED_TYPES = BP_EMBEDDED_TYPE_RESERVED } bp_embedded_type_t; #define BPE_NUM_WORDS 14 #define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t)) #define BPE_IS_PAYLOADWORD(bp, wp) \ ((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth) #define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */ #define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */ typedef struct blkptr { dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */ uint64_t blk_prop; /* size, compression, type, etc */ uint64_t blk_pad[2]; /* Extra space for the future */ uint64_t blk_phys_birth; /* txg when block was allocated */ uint64_t blk_birth; /* transaction group at birth */ uint64_t blk_fill; /* fill count */ zio_cksum_t blk_cksum; /* 256-bit checksum */ } blkptr_t; /* * Macros to get and set fields in a bp or DVA. */ #define DVA_GET_ASIZE(dva) \ BF64_GET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, SPA_MINBLOCKSHIFT, 0) #define DVA_SET_ASIZE(dva, x) \ BF64_SET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, \ SPA_MINBLOCKSHIFT, 0, x) #define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8) #define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x) #define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, 32) #define DVA_SET_VDEV(dva, x) BF64_SET((dva)->dva_word[0], 32, 32, x) #define DVA_GET_OFFSET(dva) \ BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0) #define DVA_SET_OFFSET(dva, x) \ BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x) #define DVA_GET_GANG(dva) BF64_GET((dva)->dva_word[1], 63, 1) #define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x) #define BP_GET_LSIZE(bp) \ (BP_IS_EMBEDDED(bp) ? \ (BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA ? BPE_GET_LSIZE(bp) : 0): \ BF64_GET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)) #define BP_SET_LSIZE(bp, x) do { \ ASSERT(!BP_IS_EMBEDDED(bp)); \ BF64_SET_SB((bp)->blk_prop, \ 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x); \ _NOTE(CONSTCOND) } while (0) #define BP_GET_PSIZE(bp) \ BF64_GET_SB((bp)->blk_prop, 16, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1) #define BP_SET_PSIZE(bp, x) \ BF64_SET_SB((bp)->blk_prop, 16, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x) #define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 7) #define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 7, x) #define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8) #define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x) #define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8) #define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x) #define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) #define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) #define BP_IS_EMBEDDED(bp) BF64_GET((bp)->blk_prop, 39, 1) #define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1) #define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x) #define BP_GET_BYTEORDER(bp) BF64_GET((bp)->blk_prop, 63, 1) #define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x) #define BP_PHYSICAL_BIRTH(bp) \ ((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth) #define BP_GET_ASIZE(bp) \ (DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ DVA_GET_ASIZE(&(bp)->blk_dva[2])) #define BP_GET_UCSIZE(bp) \ ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \ BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)); #define BP_GET_NDVAS(bp) \ (!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ !!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ !!DVA_GET_ASIZE(&(bp)->blk_dva[2])) #define DVA_EQUAL(dva1, dva2) \ ((dva1)->dva_word[1] == (dva2)->dva_word[1] && \ (dva1)->dva_word[0] == (dva2)->dva_word[0]) #define ZIO_CHECKSUM_EQUAL(zc1, zc2) \ (0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \ ((zc1).zc_word[1] - (zc2).zc_word[1]) | \ ((zc1).zc_word[2] - (zc2).zc_word[2]) | \ ((zc1).zc_word[3] - (zc2).zc_word[3]))) #define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0) #define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \ { \ (zcp)->zc_word[0] = w0; \ (zcp)->zc_word[1] = w1; \ (zcp)->zc_word[2] = w2; \ (zcp)->zc_word[3] = w3; \ } #define BP_IDENTITY(bp) (&(bp)->blk_dva[0]) #define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp)) #define DVA_IS_EMPTY(dva) ((dva)->dva_word[0] == 0ULL && \ (dva)->dva_word[1] == 0ULL) #define BP_IS_HOLE(bp) DVA_IS_EMPTY(BP_IDENTITY(bp)) #define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg)) #define BP_ZERO(bp) \ { \ (bp)->blk_dva[0].dva_word[0] = 0; \ (bp)->blk_dva[0].dva_word[1] = 0; \ (bp)->blk_dva[1].dva_word[0] = 0; \ (bp)->blk_dva[1].dva_word[1] = 0; \ (bp)->blk_dva[2].dva_word[0] = 0; \ (bp)->blk_dva[2].dva_word[1] = 0; \ (bp)->blk_prop = 0; \ (bp)->blk_pad[0] = 0; \ (bp)->blk_pad[1] = 0; \ (bp)->blk_phys_birth = 0; \ (bp)->blk_birth = 0; \ (bp)->blk_fill = 0; \ ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \ } #if BYTE_ORDER == _BIG_ENDIAN #define ZFS_HOST_BYTEORDER (0ULL) #else #define ZFS_HOST_BYTEORDER (1ULL) #endif #define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER) #define BPE_NUM_WORDS 14 #define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t)) #define BPE_IS_PAYLOADWORD(bp, wp) \ ((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth) /* * Embedded checksum */ #define ZEC_MAGIC 0x210da7ab10c7a11ULL typedef struct zio_eck { uint64_t zec_magic; /* for validation, endianness */ zio_cksum_t zec_cksum; /* 256-bit checksum */ } zio_eck_t; /* * Gang block headers are self-checksumming and contain an array * of block pointers. */ #define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE #define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \ sizeof (zio_eck_t)) / sizeof (blkptr_t)) #define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \ sizeof (zio_eck_t) - \ (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\ sizeof (uint64_t)) typedef struct zio_gbh { blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS]; uint64_t zg_filler[SPA_GBH_FILLER]; zio_eck_t zg_tail; } zio_gbh_phys_t; #define VDEV_RAIDZ_MAXPARITY 3 #define VDEV_PAD_SIZE (8 << 10) /* 2 padding areas (vl_pad1 and vl_be) to skip */ #define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2 #define VDEV_PHYS_SIZE (112 << 10) #define VDEV_UBERBLOCK_RING (128 << 10) /* * MMP blocks occupy the last MMP_BLOCKS_PER_LABEL slots in the uberblock * ring when MMP is enabled. */ #define MMP_BLOCKS_PER_LABEL 1 /* The largest uberblock we support is 8k. */ #define MAX_UBERBLOCK_SHIFT (13) #define VDEV_UBERBLOCK_SHIFT(vd) \ MIN(MAX((vd)->v_top->v_ashift, UBERBLOCK_SHIFT), MAX_UBERBLOCK_SHIFT) #define VDEV_UBERBLOCK_COUNT(vd) \ (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(vd)) #define VDEV_UBERBLOCK_OFFSET(vd, n) \ offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(vd)]) #define VDEV_UBERBLOCK_SIZE(vd) (1ULL << VDEV_UBERBLOCK_SHIFT(vd)) typedef struct vdev_phys { char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_eck_t)]; zio_eck_t vp_zbt; } vdev_phys_t; typedef enum vbe_vers { /* The bootenv file is stored as ascii text in the envblock */ VB_RAW = 0, /* * The bootenv file is converted to an nvlist and then packed into the * envblock. */ VB_NVLIST = 1 } vbe_vers_t; typedef struct vdev_boot_envblock { uint64_t vbe_version; char vbe_bootenv[VDEV_PAD_SIZE - sizeof (uint64_t) - sizeof (zio_eck_t)]; zio_eck_t vbe_zbt; } vdev_boot_envblock_t; CTASSERT(sizeof (vdev_boot_envblock_t) == VDEV_PAD_SIZE); typedef struct vdev_label { char vl_pad1[VDEV_PAD_SIZE]; /* 8K */ vdev_boot_envblock_t vl_be; /* 8K */ vdev_phys_t vl_vdev_phys; /* 112K */ char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */ } vdev_label_t; /* 256K total */ /* * vdev_dirty() flags */ #define VDD_METASLAB 0x01 #define VDD_DTL 0x02 /* * Size and offset of embedded boot loader region on each label. * The total size of the first two labels plus the boot area is 4MB. */ #define VDEV_BOOT_OFFSET (2 * sizeof (vdev_label_t)) #define VDEV_BOOT_SIZE (7ULL << 19) /* 3.5M */ /* * Size of label regions at the start and end of each leaf device. */ #define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE) #define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t)) #define VDEV_LABELS 4 enum zio_checksum { ZIO_CHECKSUM_INHERIT = 0, ZIO_CHECKSUM_ON, ZIO_CHECKSUM_OFF, ZIO_CHECKSUM_LABEL, ZIO_CHECKSUM_GANG_HEADER, ZIO_CHECKSUM_ZILOG, ZIO_CHECKSUM_FLETCHER_2, ZIO_CHECKSUM_FLETCHER_4, ZIO_CHECKSUM_SHA256, ZIO_CHECKSUM_ZILOG2, ZIO_CHECKSUM_NOPARITY, ZIO_CHECKSUM_SHA512, ZIO_CHECKSUM_SKEIN, ZIO_CHECKSUM_EDONR, ZIO_CHECKSUM_FUNCTIONS }; #define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_4 #define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON enum zio_compress { ZIO_COMPRESS_INHERIT = 0, ZIO_COMPRESS_ON, ZIO_COMPRESS_OFF, ZIO_COMPRESS_LZJB, ZIO_COMPRESS_EMPTY, ZIO_COMPRESS_GZIP_1, ZIO_COMPRESS_GZIP_2, ZIO_COMPRESS_GZIP_3, ZIO_COMPRESS_GZIP_4, ZIO_COMPRESS_GZIP_5, ZIO_COMPRESS_GZIP_6, ZIO_COMPRESS_GZIP_7, ZIO_COMPRESS_GZIP_8, ZIO_COMPRESS_GZIP_9, ZIO_COMPRESS_ZLE, ZIO_COMPRESS_LZ4, ZIO_COMPRESS_ZSTD, ZIO_COMPRESS_FUNCTIONS }; enum zio_zstd_levels { ZIO_ZSTD_LEVEL_INHERIT = 0, ZIO_ZSTD_LEVEL_1, #define ZIO_ZSTD_LEVEL_MIN ZIO_ZSTD_LEVEL_1 ZIO_ZSTD_LEVEL_2, ZIO_ZSTD_LEVEL_3, #define ZIO_ZSTD_LEVEL_DEFAULT ZIO_ZSTD_LEVEL_3 ZIO_ZSTD_LEVEL_4, ZIO_ZSTD_LEVEL_5, ZIO_ZSTD_LEVEL_6, ZIO_ZSTD_LEVEL_7, ZIO_ZSTD_LEVEL_8, ZIO_ZSTD_LEVEL_9, ZIO_ZSTD_LEVEL_10, ZIO_ZSTD_LEVEL_11, ZIO_ZSTD_LEVEL_12, ZIO_ZSTD_LEVEL_13, ZIO_ZSTD_LEVEL_14, ZIO_ZSTD_LEVEL_15, ZIO_ZSTD_LEVEL_16, ZIO_ZSTD_LEVEL_17, ZIO_ZSTD_LEVEL_18, ZIO_ZSTD_LEVEL_19, #define ZIO_ZSTD_LEVEL_MAX ZIO_ZSTD_LEVEL_19 ZIO_ZSTD_LEVEL_RESERVE = 101, /* Leave room for new positive levels */ ZIO_ZSTD_LEVEL_FAST, /* Fast levels are negative */ ZIO_ZSTD_LEVEL_FAST_1, #define ZIO_ZSTD_LEVEL_FAST_DEFAULT ZIO_ZSTD_LEVEL_FAST_1 ZIO_ZSTD_LEVEL_FAST_2, ZIO_ZSTD_LEVEL_FAST_3, ZIO_ZSTD_LEVEL_FAST_4, ZIO_ZSTD_LEVEL_FAST_5, ZIO_ZSTD_LEVEL_FAST_6, ZIO_ZSTD_LEVEL_FAST_7, ZIO_ZSTD_LEVEL_FAST_8, ZIO_ZSTD_LEVEL_FAST_9, ZIO_ZSTD_LEVEL_FAST_10, ZIO_ZSTD_LEVEL_FAST_20, ZIO_ZSTD_LEVEL_FAST_30, ZIO_ZSTD_LEVEL_FAST_40, ZIO_ZSTD_LEVEL_FAST_50, ZIO_ZSTD_LEVEL_FAST_60, ZIO_ZSTD_LEVEL_FAST_70, ZIO_ZSTD_LEVEL_FAST_80, ZIO_ZSTD_LEVEL_FAST_90, ZIO_ZSTD_LEVEL_FAST_100, ZIO_ZSTD_LEVEL_FAST_500, ZIO_ZSTD_LEVEL_FAST_1000, #define ZIO_ZSTD_LEVEL_FAST_MAX ZIO_ZSTD_LEVEL_FAST_1000 ZIO_ZSTD_LEVEL_AUTO = 251, /* Reserved for future use */ ZIO_ZSTD_LEVEL_LEVELS }; #define ZIO_COMPRESS_ON_VALUE ZIO_COMPRESS_LZJB #define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF -/* nvlist pack encoding */ -#define NV_ENCODE_NATIVE 0 -#define NV_ENCODE_XDR 1 - -typedef enum { - DATA_TYPE_UNKNOWN = 0, - DATA_TYPE_BOOLEAN, - DATA_TYPE_BYTE, - DATA_TYPE_INT16, - DATA_TYPE_UINT16, - DATA_TYPE_INT32, - DATA_TYPE_UINT32, - DATA_TYPE_INT64, - DATA_TYPE_UINT64, - DATA_TYPE_STRING, - DATA_TYPE_BYTE_ARRAY, - DATA_TYPE_INT16_ARRAY, - DATA_TYPE_UINT16_ARRAY, - DATA_TYPE_INT32_ARRAY, - DATA_TYPE_UINT32_ARRAY, - DATA_TYPE_INT64_ARRAY, - DATA_TYPE_UINT64_ARRAY, - DATA_TYPE_STRING_ARRAY, - DATA_TYPE_HRTIME, - DATA_TYPE_NVLIST, - DATA_TYPE_NVLIST_ARRAY, - DATA_TYPE_BOOLEAN_VALUE, - DATA_TYPE_INT8, - DATA_TYPE_UINT8, - DATA_TYPE_BOOLEAN_ARRAY, - DATA_TYPE_INT8_ARRAY, - DATA_TYPE_UINT8_ARRAY -} data_type_t; - /* * On-disk version number. */ #define SPA_VERSION_1 1ULL #define SPA_VERSION_2 2ULL #define SPA_VERSION_3 3ULL #define SPA_VERSION_4 4ULL #define SPA_VERSION_5 5ULL #define SPA_VERSION_6 6ULL #define SPA_VERSION_7 7ULL #define SPA_VERSION_8 8ULL #define SPA_VERSION_9 9ULL #define SPA_VERSION_10 10ULL #define SPA_VERSION_11 11ULL #define SPA_VERSION_12 12ULL #define SPA_VERSION_13 13ULL #define SPA_VERSION_14 14ULL #define SPA_VERSION_15 15ULL #define SPA_VERSION_16 16ULL #define SPA_VERSION_17 17ULL #define SPA_VERSION_18 18ULL #define SPA_VERSION_19 19ULL #define SPA_VERSION_20 20ULL #define SPA_VERSION_21 21ULL #define SPA_VERSION_22 22ULL #define SPA_VERSION_23 23ULL #define SPA_VERSION_24 24ULL #define SPA_VERSION_25 25ULL #define SPA_VERSION_26 26ULL #define SPA_VERSION_27 27ULL #define SPA_VERSION_28 28ULL #define SPA_VERSION_5000 5000ULL /* * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk * format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*}, * and do the appropriate changes. Also bump the version number in * usr/src/grub/capability. */ #define SPA_VERSION SPA_VERSION_5000 #define SPA_VERSION_STRING "5000" /* * Symbolic names for the changes that caused a SPA_VERSION switch. * Used in the code when checking for presence or absence of a feature. * Feel free to define multiple symbolic names for each version if there * were multiple changes to on-disk structures during that version. * * NOTE: When checking the current SPA_VERSION in your code, be sure * to use spa_version() since it reports the version of the * last synced uberblock. Checking the in-flight version can * be dangerous in some cases. */ #define SPA_VERSION_INITIAL SPA_VERSION_1 #define SPA_VERSION_DITTO_BLOCKS SPA_VERSION_2 #define SPA_VERSION_SPARES SPA_VERSION_3 #define SPA_VERSION_RAID6 SPA_VERSION_3 #define SPA_VERSION_BPLIST_ACCOUNT SPA_VERSION_3 #define SPA_VERSION_RAIDZ_DEFLATE SPA_VERSION_3 #define SPA_VERSION_DNODE_BYTES SPA_VERSION_3 #define SPA_VERSION_ZPOOL_HISTORY SPA_VERSION_4 #define SPA_VERSION_GZIP_COMPRESSION SPA_VERSION_5 #define SPA_VERSION_BOOTFS SPA_VERSION_6 #define SPA_VERSION_SLOGS SPA_VERSION_7 #define SPA_VERSION_DELEGATED_PERMS SPA_VERSION_8 #define SPA_VERSION_FUID SPA_VERSION_9 #define SPA_VERSION_REFRESERVATION SPA_VERSION_9 #define SPA_VERSION_REFQUOTA SPA_VERSION_9 #define SPA_VERSION_UNIQUE_ACCURATE SPA_VERSION_9 #define SPA_VERSION_L2CACHE SPA_VERSION_10 #define SPA_VERSION_NEXT_CLONES SPA_VERSION_11 #define SPA_VERSION_ORIGIN SPA_VERSION_11 #define SPA_VERSION_DSL_SCRUB SPA_VERSION_11 #define SPA_VERSION_SNAP_PROPS SPA_VERSION_12 #define SPA_VERSION_USED_BREAKDOWN SPA_VERSION_13 #define SPA_VERSION_PASSTHROUGH_X SPA_VERSION_14 #define SPA_VERSION_USERSPACE SPA_VERSION_15 #define SPA_VERSION_STMF_PROP SPA_VERSION_16 #define SPA_VERSION_RAIDZ3 SPA_VERSION_17 #define SPA_VERSION_USERREFS SPA_VERSION_18 #define SPA_VERSION_HOLES SPA_VERSION_19 #define SPA_VERSION_ZLE_COMPRESSION SPA_VERSION_20 #define SPA_VERSION_DEDUP SPA_VERSION_21 #define SPA_VERSION_RECVD_PROPS SPA_VERSION_22 #define SPA_VERSION_SLIM_ZIL SPA_VERSION_23 #define SPA_VERSION_SA SPA_VERSION_24 #define SPA_VERSION_SCAN SPA_VERSION_25 #define SPA_VERSION_DIR_CLONES SPA_VERSION_26 #define SPA_VERSION_DEADLISTS SPA_VERSION_26 #define SPA_VERSION_FAST_SNAP SPA_VERSION_27 #define SPA_VERSION_MULTI_REPLACE SPA_VERSION_28 #define SPA_VERSION_BEFORE_FEATURES SPA_VERSION_28 #define SPA_VERSION_FEATURES SPA_VERSION_5000 #define SPA_VERSION_IS_SUPPORTED(v) \ (((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \ ((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION)) /* * The following are configuration names used in the nvlist describing a pool's * configuration. */ #define ZPOOL_CONFIG_VERSION "version" #define ZPOOL_CONFIG_POOL_NAME "name" #define ZPOOL_CONFIG_POOL_STATE "state" #define ZPOOL_CONFIG_POOL_TXG "txg" #define ZPOOL_CONFIG_POOL_GUID "pool_guid" #define ZPOOL_CONFIG_CREATE_TXG "create_txg" #define ZPOOL_CONFIG_TOP_GUID "top_guid" #define ZPOOL_CONFIG_VDEV_TREE "vdev_tree" #define ZPOOL_CONFIG_TYPE "type" #define ZPOOL_CONFIG_CHILDREN "children" #define ZPOOL_CONFIG_ID "id" #define ZPOOL_CONFIG_GUID "guid" #define ZPOOL_CONFIG_INDIRECT_OBJECT "com.delphix:indirect_object" #define ZPOOL_CONFIG_INDIRECT_BIRTHS "com.delphix:indirect_births" #define ZPOOL_CONFIG_PREV_INDIRECT_VDEV "com.delphix:prev_indirect_vdev" #define ZPOOL_CONFIG_PATH "path" #define ZPOOL_CONFIG_DEVID "devid" #define ZPOOL_CONFIG_METASLAB_ARRAY "metaslab_array" #define ZPOOL_CONFIG_METASLAB_SHIFT "metaslab_shift" #define ZPOOL_CONFIG_ASHIFT "ashift" #define ZPOOL_CONFIG_ASIZE "asize" #define ZPOOL_CONFIG_DTL "DTL" #define ZPOOL_CONFIG_STATS "stats" #define ZPOOL_CONFIG_WHOLE_DISK "whole_disk" #define ZPOOL_CONFIG_ERRCOUNT "error_count" #define ZPOOL_CONFIG_NOT_PRESENT "not_present" #define ZPOOL_CONFIG_SPARES "spares" #define ZPOOL_CONFIG_IS_SPARE "is_spare" #define ZPOOL_CONFIG_NPARITY "nparity" #define ZPOOL_CONFIG_HOSTID "hostid" #define ZPOOL_CONFIG_HOSTNAME "hostname" #define ZPOOL_CONFIG_IS_LOG "is_log" #define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ #define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read" #define ZPOOL_CONFIG_VDEV_CHILDREN "vdev_children" /* * The persistent vdev state is stored as separate values rather than a single * 'vdev_state' entry. This is because a device can be in multiple states, such * as offline and degraded. */ #define ZPOOL_CONFIG_OFFLINE "offline" #define ZPOOL_CONFIG_FAULTED "faulted" #define ZPOOL_CONFIG_DEGRADED "degraded" #define ZPOOL_CONFIG_REMOVED "removed" #define ZPOOL_CONFIG_FRU "fru" #define ZPOOL_CONFIG_AUX_STATE "aux_state" #define VDEV_TYPE_ROOT "root" #define VDEV_TYPE_MIRROR "mirror" #define VDEV_TYPE_REPLACING "replacing" #define VDEV_TYPE_RAIDZ "raidz" #define VDEV_TYPE_DISK "disk" #define VDEV_TYPE_FILE "file" #define VDEV_TYPE_MISSING "missing" #define VDEV_TYPE_HOLE "hole" #define VDEV_TYPE_SPARE "spare" #define VDEV_TYPE_LOG "log" #define VDEV_TYPE_L2CACHE "l2cache" #define VDEV_TYPE_INDIRECT "indirect" /* * This is needed in userland to report the minimum necessary device size. */ #define SPA_MINDEVSIZE (64ULL << 20) /* * The location of the pool configuration repository, shared between kernel and * userland. */ #define ZPOOL_CACHE "/boot/zfs/zpool.cache" /* * vdev states are ordered from least to most healthy. * A vdev that's CANT_OPEN or below is considered unusable. */ typedef enum vdev_state { VDEV_STATE_UNKNOWN = 0, /* Uninitialized vdev */ VDEV_STATE_CLOSED, /* Not currently open */ VDEV_STATE_OFFLINE, /* Not allowed to open */ VDEV_STATE_REMOVED, /* Explicitly removed from system */ VDEV_STATE_CANT_OPEN, /* Tried to open, but failed */ VDEV_STATE_FAULTED, /* External request to fault device */ VDEV_STATE_DEGRADED, /* Replicated vdev with unhealthy kids */ VDEV_STATE_HEALTHY /* Presumed good */ } vdev_state_t; /* * vdev aux states. When a vdev is in the CANT_OPEN state, the aux field * of the vdev stats structure uses these constants to distinguish why. */ typedef enum vdev_aux { VDEV_AUX_NONE, /* no error */ VDEV_AUX_OPEN_FAILED, /* ldi_open_*() or vn_open() failed */ VDEV_AUX_CORRUPT_DATA, /* bad label or disk contents */ VDEV_AUX_NO_REPLICAS, /* insufficient number of replicas */ VDEV_AUX_BAD_GUID_SUM, /* vdev guid sum doesn't match */ VDEV_AUX_TOO_SMALL, /* vdev size is too small */ VDEV_AUX_BAD_LABEL, /* the label is OK but invalid */ VDEV_AUX_VERSION_NEWER, /* on-disk version is too new */ VDEV_AUX_VERSION_OLDER, /* on-disk version is too old */ VDEV_AUX_SPARED /* hot spare used in another pool */ } vdev_aux_t; /* * pool state. The following states are written to disk as part of the normal * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE. The remaining states are * software abstractions used at various levels to communicate pool state. */ typedef enum pool_state { POOL_STATE_ACTIVE = 0, /* In active use */ POOL_STATE_EXPORTED, /* Explicitly exported */ POOL_STATE_DESTROYED, /* Explicitly destroyed */ POOL_STATE_SPARE, /* Reserved for hot spare use */ POOL_STATE_UNINITIALIZED, /* Internal spa_t state */ POOL_STATE_UNAVAIL, /* Internal libzfs state */ POOL_STATE_POTENTIALLY_ACTIVE /* Internal libzfs state */ } pool_state_t; /* * The uberblock version is incremented whenever an incompatible on-disk * format change is made to the SPA, DMU, or ZAP. * * Note: the first two fields should never be moved. When a storage pool * is opened, the uberblock must be read off the disk before the version * can be checked. If the ub_version field is moved, we may not detect * version mismatch. If the ub_magic field is moved, applications that * expect the magic number in the first word won't work. */ #define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */ #define UBERBLOCK_SHIFT 10 /* up to 1K */ #define MMP_MAGIC 0xa11cea11 /* all-see-all */ #define MMP_INTERVAL_VALID_BIT 0x01 #define MMP_SEQ_VALID_BIT 0x02 #define MMP_FAIL_INT_VALID_BIT 0x04 #define MMP_VALID(ubp) (ubp->ub_magic == UBERBLOCK_MAGIC && \ ubp->ub_mmp_magic == MMP_MAGIC) #define MMP_INTERVAL_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \ MMP_INTERVAL_VALID_BIT)) #define MMP_SEQ_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \ MMP_SEQ_VALID_BIT)) #define MMP_FAIL_INT_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \ MMP_FAIL_INT_VALID_BIT)) #define MMP_INTERVAL(ubp) ((ubp->ub_mmp_config & 0x00000000FFFFFF00) \ >> 8) #define MMP_SEQ(ubp) ((ubp->ub_mmp_config & 0x0000FFFF00000000) \ >> 32) #define MMP_FAIL_INT(ubp) ((ubp->ub_mmp_config & 0xFFFF000000000000) \ >> 48) typedef struct uberblock { uint64_t ub_magic; /* UBERBLOCK_MAGIC */ uint64_t ub_version; /* SPA_VERSION */ uint64_t ub_txg; /* txg of last sync */ uint64_t ub_guid_sum; /* sum of all vdev guids */ uint64_t ub_timestamp; /* UTC time of last sync */ blkptr_t ub_rootbp; /* MOS objset_phys_t */ /* highest SPA_VERSION supported by software that wrote this txg */ uint64_t ub_software_version; /* Maybe missing in uberblocks we read, but always written */ uint64_t ub_mmp_magic; /* * If ub_mmp_delay == 0 and ub_mmp_magic is valid, MMP is off. * Otherwise, nanosec since last MMP write. */ uint64_t ub_mmp_delay; /* * The ub_mmp_config contains the multihost write interval, multihost * fail intervals, sequence number for sub-second granularity, and * valid bit mask. This layout is as follows: * * 64 56 48 40 32 24 16 8 0 * +-------+-------+-------+-------+-------+-------+-------+-------+ * 0 | Fail Intervals| Seq | Write Interval (ms) | VALID | * +-------+-------+-------+-------+-------+-------+-------+-------+ * * This allows a write_interval of (2^24/1000)s, over 4.5 hours * * VALID Bits: * - 0x01 - Write Interval (ms) * - 0x02 - Sequence number exists * - 0x04 - Fail Intervals * - 0xf8 - Reserved */ uint64_t ub_mmp_config; /* * ub_checkpoint_txg indicates two things about the current uberblock: * * 1] If it is not zero then this uberblock is a checkpoint. If it is * zero, then this uberblock is not a checkpoint. * * 2] On checkpointed uberblocks, the value of ub_checkpoint_txg is * the ub_txg that the uberblock had at the time we moved it to * the MOS config. * * The field is set when we checkpoint the uberblock and continues to * hold that value even after we've rewound (unlike the ub_txg that * is reset to a higher value). * * Besides checks used to determine whether we are reopening the * pool from a checkpointed uberblock [see spa_ld_select_uberblock()], * the value of the field is used to determine which ZIL blocks have * been allocated according to the ms_sm when we are rewinding to a * checkpoint. Specifically, if blk_birth > ub_checkpoint_txg, then * the ZIL block is not allocated [see uses of spa_min_claim_txg()]. */ uint64_t ub_checkpoint_txg; } uberblock_t; /* * Flags. */ #define DNODE_MUST_BE_ALLOCATED 1 #define DNODE_MUST_BE_FREE 2 /* * Fixed constants. */ #define DNODE_SHIFT 9 /* 512 bytes */ #define DN_MIN_INDBLKSHIFT 12 /* 4k */ #define DN_MAX_INDBLKSHIFT 17 /* 128k */ #define DNODE_BLOCK_SHIFT 14 /* 16k */ #define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */ #define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */ #define DN_MAX_OFFSET_SHIFT 64 /* 2^64 bytes in a dnode */ /* * Derived constants. */ #define DNODE_MIN_SIZE (1 << DNODE_SHIFT) #define DNODE_MAX_SIZE (1 << DNODE_BLOCK_SHIFT) #define DNODE_BLOCK_SIZE (1 << DNODE_BLOCK_SHIFT) #define DNODE_MIN_SLOTS (DNODE_MIN_SIZE >> DNODE_SHIFT) #define DNODE_MAX_SLOTS (DNODE_MAX_SIZE >> DNODE_SHIFT) #define DN_BONUS_SIZE(dnsize) ((dnsize) - DNODE_CORE_SIZE - \ (1 << SPA_BLKPTRSHIFT)) #define DN_SLOTS_TO_BONUSLEN(slots) DN_BONUS_SIZE((slots) << DNODE_SHIFT) #define DN_OLD_MAX_BONUSLEN (DN_BONUS_SIZE(DNODE_MIN_SIZE)) #define DN_MAX_NBLKPTR ((DNODE_MIN_SIZE - DNODE_CORE_SIZE) >> \ SPA_BLKPTRSHIFT) #define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT) #define DN_ZERO_BONUSLEN (DN_BONUS_SIZE(DNODE_MAX_SIZE) + 1) #define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT) #define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT) #define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT) /* The +2 here is a cheesy way to round up */ #define DN_MAX_LEVELS (2 + ((DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT) / \ (DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT))) #define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \ (((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t)))) #define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \ (dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT) #define EPB(blkshift, typeshift) (1 << (blkshift - typeshift)) /* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */ #define DNODE_FLAG_USED_BYTES (1<<0) #define DNODE_FLAG_USERUSED_ACCOUNTED (1<<1) /* Does dnode have a SA spill blkptr in bonus? */ #define DNODE_FLAG_SPILL_BLKPTR (1<<2) typedef struct dnode_phys { uint8_t dn_type; /* dmu_object_type_t */ uint8_t dn_indblkshift; /* ln2(indirect block size) */ uint8_t dn_nlevels; /* 1=dn_blkptr->data blocks */ uint8_t dn_nblkptr; /* length of dn_blkptr */ uint8_t dn_bonustype; /* type of data in bonus buffer */ uint8_t dn_checksum; /* ZIO_CHECKSUM type */ uint8_t dn_compress; /* ZIO_COMPRESS type */ uint8_t dn_flags; /* DNODE_FLAG_* */ uint16_t dn_datablkszsec; /* data block size in 512b sectors */ uint16_t dn_bonuslen; /* length of dn_bonus */ uint8_t dn_extra_slots; /* # of subsequent slots consumed */ uint8_t dn_pad2[3]; /* accounting is protected by dn_dirty_mtx */ uint64_t dn_maxblkid; /* largest allocated block ID */ uint64_t dn_used; /* bytes (or sectors) of disk space */ uint64_t dn_pad3[4]; /* * The tail region is 448 bytes for a 512 byte dnode, and * correspondingly larger for larger dnode sizes. The spill * block pointer, when present, is always at the end of the tail * region. There are three ways this space may be used, using * a 512 byte dnode for this diagram: * * 0 64 128 192 256 320 384 448 (offset) * +---------------+---------------+---------------+-------+ * | dn_blkptr[0] | dn_blkptr[1] | dn_blkptr[2] | / | * +---------------+---------------+---------------+-------+ * | dn_blkptr[0] | dn_bonus[0..319] | * +---------------+-----------------------+---------------+ * | dn_blkptr[0] | dn_bonus[0..191] | dn_spill | * +---------------+-----------------------+---------------+ */ union { blkptr_t dn_blkptr[1+DN_OLD_MAX_BONUSLEN/sizeof (blkptr_t)]; struct { blkptr_t __dn_ignore1; uint8_t dn_bonus[DN_OLD_MAX_BONUSLEN]; }; struct { blkptr_t __dn_ignore2; uint8_t __dn_ignore3[DN_OLD_MAX_BONUSLEN - sizeof (blkptr_t)]; blkptr_t dn_spill; }; }; } dnode_phys_t; #define DN_SPILL_BLKPTR(dnp) (blkptr_t *)((char *)(dnp) + \ (((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT)) typedef enum dmu_object_byteswap { DMU_BSWAP_UINT8, DMU_BSWAP_UINT16, DMU_BSWAP_UINT32, DMU_BSWAP_UINT64, DMU_BSWAP_ZAP, DMU_BSWAP_DNODE, DMU_BSWAP_OBJSET, DMU_BSWAP_ZNODE, DMU_BSWAP_OLDACL, DMU_BSWAP_ACL, /* * Allocating a new byteswap type number makes the on-disk format * incompatible with any other format that uses the same number. * * Data can usually be structured to work with one of the * DMU_BSWAP_UINT* or DMU_BSWAP_ZAP types. */ DMU_BSWAP_NUMFUNCS } dmu_object_byteswap_t; #define DMU_OT_NEWTYPE 0x80 #define DMU_OT_METADATA 0x40 #define DMU_OT_BYTESWAP_MASK 0x3f /* * Defines a uint8_t object type. Object types specify if the data * in the object is metadata (boolean) and how to byteswap the data * (dmu_object_byteswap_t). */ #define DMU_OT(byteswap, metadata) \ (DMU_OT_NEWTYPE | \ ((metadata) ? DMU_OT_METADATA : 0) | \ ((byteswap) & DMU_OT_BYTESWAP_MASK)) typedef enum dmu_object_type { DMU_OT_NONE, /* general: */ DMU_OT_OBJECT_DIRECTORY, /* ZAP */ DMU_OT_OBJECT_ARRAY, /* UINT64 */ DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */ DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */ DMU_OT_BPLIST, /* UINT64 */ DMU_OT_BPLIST_HDR, /* UINT64 */ /* spa: */ DMU_OT_SPACE_MAP_HEADER, /* UINT64 */ DMU_OT_SPACE_MAP, /* UINT64 */ /* zil: */ DMU_OT_INTENT_LOG, /* UINT64 */ /* dmu: */ DMU_OT_DNODE, /* DNODE */ DMU_OT_OBJSET, /* OBJSET */ /* dsl: */ DMU_OT_DSL_DIR, /* UINT64 */ DMU_OT_DSL_DIR_CHILD_MAP, /* ZAP */ DMU_OT_DSL_DS_SNAP_MAP, /* ZAP */ DMU_OT_DSL_PROPS, /* ZAP */ DMU_OT_DSL_DATASET, /* UINT64 */ /* zpl: */ DMU_OT_ZNODE, /* ZNODE */ DMU_OT_OLDACL, /* Old ACL */ DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */ DMU_OT_DIRECTORY_CONTENTS, /* ZAP */ DMU_OT_MASTER_NODE, /* ZAP */ DMU_OT_UNLINKED_SET, /* ZAP */ /* zvol: */ DMU_OT_ZVOL, /* UINT8 */ DMU_OT_ZVOL_PROP, /* ZAP */ /* other; for testing only! */ DMU_OT_PLAIN_OTHER, /* UINT8 */ DMU_OT_UINT64_OTHER, /* UINT64 */ DMU_OT_ZAP_OTHER, /* ZAP */ /* new object types: */ DMU_OT_ERROR_LOG, /* ZAP */ DMU_OT_SPA_HISTORY, /* UINT8 */ DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */ DMU_OT_POOL_PROPS, /* ZAP */ DMU_OT_DSL_PERMS, /* ZAP */ DMU_OT_ACL, /* ACL */ DMU_OT_SYSACL, /* SYSACL */ DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */ DMU_OT_FUID_SIZE, /* FUID table size UINT64 */ DMU_OT_NEXT_CLONES, /* ZAP */ DMU_OT_SCAN_QUEUE, /* ZAP */ DMU_OT_USERGROUP_USED, /* ZAP */ DMU_OT_USERGROUP_QUOTA, /* ZAP */ DMU_OT_USERREFS, /* ZAP */ DMU_OT_DDT_ZAP, /* ZAP */ DMU_OT_DDT_STATS, /* ZAP */ DMU_OT_SA, /* System attr */ DMU_OT_SA_MASTER_NODE, /* ZAP */ DMU_OT_SA_ATTR_REGISTRATION, /* ZAP */ DMU_OT_SA_ATTR_LAYOUTS, /* ZAP */ DMU_OT_SCAN_XLATE, /* ZAP */ DMU_OT_DEDUP, /* fake dedup BP from ddt_bp_create() */ DMU_OT_NUMTYPES, /* * Names for valid types declared with DMU_OT(). */ DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE), DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE), DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE), DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE), DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE), DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE), DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE), DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE), DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE), DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE) } dmu_object_type_t; typedef enum dmu_objset_type { DMU_OST_NONE, DMU_OST_META, DMU_OST_ZFS, DMU_OST_ZVOL, DMU_OST_OTHER, /* For testing only! */ DMU_OST_ANY, /* Be careful! */ DMU_OST_NUMTYPES } dmu_objset_type_t; #define ZAP_MAXVALUELEN (1024 * 8) /* * header for all bonus and spill buffers. * The header has a fixed portion with a variable number * of "lengths" depending on the number of variable sized * attribues which are determined by the "layout number" */ #define SA_MAGIC 0x2F505A /* ZFS SA */ typedef struct sa_hdr_phys { uint32_t sa_magic; uint16_t sa_layout_info; /* Encoded with hdrsize and layout number */ uint16_t sa_lengths[1]; /* optional sizes for variable length attrs */ /* ... Data follows the lengths. */ } sa_hdr_phys_t; /* * sa_hdr_phys -> sa_layout_info * * 16 10 0 * +--------+-------+ * | hdrsz |layout | * +--------+-------+ * * Bits 0-10 are the layout number * Bits 11-16 are the size of the header. * The hdrsize is the number * 8 * * For example. * hdrsz of 1 ==> 8 byte header * 2 ==> 16 byte header * */ #define SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10) #define SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 16, 3, 0) #define SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \ { \ BF32_SET_SB(x, 10, 6, 3, 0, size); \ BF32_SET(x, 0, 10, num); \ } #define SA_MODE_OFFSET 0 #define SA_SIZE_OFFSET 8 #define SA_GEN_OFFSET 16 #define SA_UID_OFFSET 24 #define SA_GID_OFFSET 32 #define SA_PARENT_OFFSET 40 #define SA_SYMLINK_OFFSET 160 #define ZIO_OBJSET_MAC_LEN 32 /* * Intent log header - this on disk structure holds fields to manage * the log. All fields are 64 bit to easily handle cross architectures. */ typedef struct zil_header { uint64_t zh_claim_txg; /* txg in which log blocks were claimed */ uint64_t zh_replay_seq; /* highest replayed sequence number */ blkptr_t zh_log; /* log chain */ uint64_t zh_claim_seq; /* highest claimed sequence number */ uint64_t zh_pad[5]; } zil_header_t; #define OBJSET_PHYS_SIZE_V2 2048 #define OBJSET_PHYS_SIZE_V3 4096 typedef struct objset_phys { dnode_phys_t os_meta_dnode; zil_header_t os_zil_header; uint64_t os_type; uint64_t os_flags; uint8_t os_portable_mac[ZIO_OBJSET_MAC_LEN]; uint8_t os_local_mac[ZIO_OBJSET_MAC_LEN]; char os_pad0[OBJSET_PHYS_SIZE_V2 - sizeof (dnode_phys_t)*3 - sizeof (zil_header_t) - sizeof (uint64_t)*2 - 2*ZIO_OBJSET_MAC_LEN]; dnode_phys_t os_userused_dnode; dnode_phys_t os_groupused_dnode; dnode_phys_t os_projectused_dnode; char os_pad1[OBJSET_PHYS_SIZE_V3 - OBJSET_PHYS_SIZE_V2 - sizeof (dnode_phys_t)]; } objset_phys_t; typedef struct dsl_dir_phys { uint64_t dd_creation_time; /* not actually used */ uint64_t dd_head_dataset_obj; uint64_t dd_parent_obj; uint64_t dd_clone_parent_obj; uint64_t dd_child_dir_zapobj; /* * how much space our children are accounting for; for leaf * datasets, == physical space used by fs + snaps */ uint64_t dd_used_bytes; uint64_t dd_compressed_bytes; uint64_t dd_uncompressed_bytes; /* Administrative quota setting */ uint64_t dd_quota; /* Administrative reservation setting */ uint64_t dd_reserved; uint64_t dd_props_zapobj; uint64_t dd_pad[21]; /* pad out to 256 bytes for good measure */ } dsl_dir_phys_t; typedef struct dsl_dataset_phys { uint64_t ds_dir_obj; uint64_t ds_prev_snap_obj; uint64_t ds_prev_snap_txg; uint64_t ds_next_snap_obj; uint64_t ds_snapnames_zapobj; /* zap obj of snaps; ==0 for snaps */ uint64_t ds_num_children; /* clone/snap children; ==0 for head */ uint64_t ds_creation_time; /* seconds since 1970 */ uint64_t ds_creation_txg; uint64_t ds_deadlist_obj; uint64_t ds_used_bytes; uint64_t ds_compressed_bytes; uint64_t ds_uncompressed_bytes; uint64_t ds_unique_bytes; /* only relevant to snapshots */ /* * The ds_fsid_guid is a 56-bit ID that can change to avoid * collisions. The ds_guid is a 64-bit ID that will never * change, so there is a small probability that it will collide. */ uint64_t ds_fsid_guid; uint64_t ds_guid; uint64_t ds_flags; blkptr_t ds_bp; uint64_t ds_pad[8]; /* pad out to 320 bytes for good measure */ } dsl_dataset_phys_t; /* * The names of zap entries in the DIRECTORY_OBJECT of the MOS. */ #define DMU_POOL_DIRECTORY_OBJECT 1 #define DMU_POOL_CONFIG "config" #define DMU_POOL_FEATURES_FOR_READ "features_for_read" #define DMU_POOL_ROOT_DATASET "root_dataset" #define DMU_POOL_SYNC_BPLIST "sync_bplist" #define DMU_POOL_ERRLOG_SCRUB "errlog_scrub" #define DMU_POOL_ERRLOG_LAST "errlog_last" #define DMU_POOL_SPARES "spares" #define DMU_POOL_DEFLATE "deflate" #define DMU_POOL_HISTORY "history" #define DMU_POOL_PROPS "pool_props" #define DMU_POOL_CHECKSUM_SALT "org.illumos:checksum_salt" #define DMU_POOL_REMOVING "com.delphix:removing" #define DMU_POOL_OBSOLETE_BPOBJ "com.delphix:obsolete_bpobj" #define DMU_POOL_CONDENSING_INDIRECT "com.delphix:condensing_indirect" #define DMU_POOL_ZPOOL_CHECKPOINT "com.delphix:zpool_checkpoint" #define ZAP_MAGIC 0x2F52AB2ABULL #define FZAP_BLOCK_SHIFT(zap) ((zap)->zap_block_shift) #define ZAP_MAXCD (uint32_t)(-1) #define ZAP_HASHBITS 28 #define MZAP_ENT_LEN 64 #define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2) #define MZAP_MAX_BLKSZ SPA_OLD_MAXBLOCKSIZE typedef struct mzap_ent_phys { uint64_t mze_value; uint32_t mze_cd; uint16_t mze_pad; /* in case we want to chain them someday */ char mze_name[MZAP_NAME_LEN]; } mzap_ent_phys_t; typedef struct mzap_phys { uint64_t mz_block_type; /* ZBT_MICRO */ uint64_t mz_salt; uint64_t mz_normflags; uint64_t mz_pad[5]; mzap_ent_phys_t mz_chunk[1]; /* actually variable size depending on block size */ } mzap_phys_t; /* * The (fat) zap is stored in one object. It is an array of * 1<= 6] [zap_leaf_t] [ptrtbl] ... * */ #define ZBT_LEAF ((1ULL << 63) + 0) #define ZBT_HEADER ((1ULL << 63) + 1) #define ZBT_MICRO ((1ULL << 63) + 3) /* any other values are ptrtbl blocks */ /* * the embedded pointer table takes up half a block: * block size / entry size (2^3) / 2 */ #define ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1) /* * The embedded pointer table starts half-way through the block. Since * the pointer table itself is half the block, it starts at (64-bit) * word number (1<zap_phys) \ [(idx) + (1<l_bs) - hash entry size (2) * number of hash * entries - header space (2*chunksize) */ #define ZAP_LEAF_NUMCHUNKS(l) \ (((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \ ZAP_LEAF_CHUNKSIZE - 2) /* * The amount of space within the chunk available for the array is: * chunk size - space for type (1) - space for next pointer (2) */ #define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3) #define ZAP_LEAF_ARRAY_NCHUNKS(bytes) \ (((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES) /* * Low water mark: when there are only this many chunks free, start * growing the ptrtbl. Ideally, this should be larger than a * "reasonably-sized" entry. 20 chunks is more than enough for the * largest directory entry (MAXNAMELEN (256) byte name, 8-byte value), * while still being only around 3% for 16k blocks. */ #define ZAP_LEAF_LOW_WATER (20) /* * The leaf hash table has block size / 2^5 (32) number of entries, * which should be more than enough for the maximum number of entries, * which is less than block size / CHUNKSIZE (24) / minimum number of * chunks per entry (3). */ #define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5) #define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l)) /* * The chunks start immediately after the hash table. The end of the * hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a * chunk_t. */ #define ZAP_LEAF_CHUNK(l, idx) \ ((zap_leaf_chunk_t *) \ ((l)->l_phys->l_hash + ZAP_LEAF_HASH_NUMENTRIES(l)))[idx] #define ZAP_LEAF_ENTRY(l, idx) (&ZAP_LEAF_CHUNK(l, idx).l_entry) typedef enum zap_chunk_type { ZAP_CHUNK_FREE = 253, ZAP_CHUNK_ENTRY = 252, ZAP_CHUNK_ARRAY = 251, ZAP_CHUNK_TYPE_MAX = 250 } zap_chunk_type_t; /* * TAKE NOTE: * If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified. */ typedef struct zap_leaf_phys { struct zap_leaf_header { uint64_t lh_block_type; /* ZBT_LEAF */ uint64_t lh_pad1; uint64_t lh_prefix; /* hash prefix of this leaf */ uint32_t lh_magic; /* ZAP_LEAF_MAGIC */ uint16_t lh_nfree; /* number free chunks */ uint16_t lh_nentries; /* number of entries */ uint16_t lh_prefix_len; /* num bits used to id this */ /* above is accessable to zap, below is zap_leaf private */ uint16_t lh_freelist; /* chunk head of free list */ uint8_t lh_pad2[12]; } l_hdr; /* 2 24-byte chunks */ /* * The header is followed by a hash table with * ZAP_LEAF_HASH_NUMENTRIES(zap) entries. The hash table is * followed by an array of ZAP_LEAF_NUMCHUNKS(zap) * zap_leaf_chunk structures. These structures are accessed * with the ZAP_LEAF_CHUNK() macro. */ uint16_t l_hash[1]; } zap_leaf_phys_t; typedef union zap_leaf_chunk { struct zap_leaf_entry { uint8_t le_type; /* always ZAP_CHUNK_ENTRY */ uint8_t le_value_intlen; /* size of ints */ uint16_t le_next; /* next entry in hash chain */ uint16_t le_name_chunk; /* first chunk of the name */ uint16_t le_name_numints; /* bytes in name, incl null */ uint16_t le_value_chunk; /* first chunk of the value */ uint16_t le_value_numints; /* value length in ints */ uint32_t le_cd; /* collision differentiator */ uint64_t le_hash; /* hash value of the name */ } l_entry; struct zap_leaf_array { uint8_t la_type; /* always ZAP_CHUNK_ARRAY */ uint8_t la_array[ZAP_LEAF_ARRAY_BYTES]; uint16_t la_next; /* next blk or CHAIN_END */ } l_array; struct zap_leaf_free { uint8_t lf_type; /* always ZAP_CHUNK_FREE */ uint8_t lf_pad[ZAP_LEAF_ARRAY_BYTES]; uint16_t lf_next; /* next in free list, or CHAIN_END */ } l_free; } zap_leaf_chunk_t; typedef struct zap_leaf { int l_bs; /* block size shift */ zap_leaf_phys_t *l_phys; } zap_leaf_t; /* * Define special zfs pflags */ #define ZFS_XATTR 0x1 /* is an extended attribute */ #define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */ #define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */ #define MASTER_NODE_OBJ 1 /* * special attributes for master node. */ #define ZFS_FSID "FSID" #define ZFS_UNLINKED_SET "DELETE_QUEUE" #define ZFS_ROOT_OBJ "ROOT" #define ZPL_VERSION_OBJ "VERSION" #define ZFS_PROP_BLOCKPERPAGE "BLOCKPERPAGE" #define ZFS_PROP_NOGROWBLOCKS "NOGROWBLOCKS" #define ZFS_FLAG_BLOCKPERPAGE 0x1 #define ZFS_FLAG_NOGROWBLOCKS 0x2 /* * ZPL version - rev'd whenever an incompatible on-disk format change * occurs. Independent of SPA/DMU/ZAP versioning. */ #define ZPL_VERSION 1ULL /* * The directory entry has the type (currently unused on Solaris) in the * top 4 bits, and the object number in the low 48 bits. The "middle" * 12 bits are unused. */ #define ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4) #define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48) #define ZFS_DIRENT_MAKE(type, obj) (((uint64_t)type << 60) | obj) typedef struct ace { uid_t a_who; /* uid or gid */ uint32_t a_access_mask; /* read,write,... */ uint16_t a_flags; /* see below */ uint16_t a_type; /* allow or deny */ } ace_t; #define ACE_SLOT_CNT 6 typedef struct zfs_znode_acl { uint64_t z_acl_extern_obj; /* ext acl pieces */ uint32_t z_acl_count; /* Number of ACEs */ uint16_t z_acl_version; /* acl version */ uint16_t z_acl_pad; /* pad */ ace_t z_ace_data[ACE_SLOT_CNT]; /* 6 standard ACEs */ } zfs_znode_acl_t; /* * This is the persistent portion of the znode. It is stored * in the "bonus buffer" of the file. Short symbolic links * are also stored in the bonus buffer. */ typedef struct znode_phys { uint64_t zp_atime[2]; /* 0 - last file access time */ uint64_t zp_mtime[2]; /* 16 - last file modification time */ uint64_t zp_ctime[2]; /* 32 - last file change time */ uint64_t zp_crtime[2]; /* 48 - creation time */ uint64_t zp_gen; /* 64 - generation (txg of creation) */ uint64_t zp_mode; /* 72 - file mode bits */ uint64_t zp_size; /* 80 - size of file */ uint64_t zp_parent; /* 88 - directory parent (`..') */ uint64_t zp_links; /* 96 - number of links to file */ uint64_t zp_xattr; /* 104 - DMU object for xattrs */ uint64_t zp_rdev; /* 112 - dev_t for VBLK & VCHR files */ uint64_t zp_flags; /* 120 - persistent flags */ uint64_t zp_uid; /* 128 - file owner */ uint64_t zp_gid; /* 136 - owning group */ uint64_t zp_pad[4]; /* 144 - future */ zfs_znode_acl_t zp_acl; /* 176 - 263 ACL */ /* * Data may pad out any remaining bytes in the znode buffer, eg: * * |<---------------------- dnode_phys (512) ------------------------>| * |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->| * |<---- znode (264) ---->|<---- data (56) ---->| * * At present, we only use this space to store symbolic links. */ } znode_phys_t; /* * In-core vdev representation. */ struct vdev; struct spa; typedef int vdev_phys_read_t(struct vdev *, void *, off_t, void *, size_t); typedef int vdev_phys_write_t(struct vdev *, off_t, void *, size_t); typedef int vdev_read_t(struct vdev *, const blkptr_t *, void *, off_t, size_t); typedef STAILQ_HEAD(vdev_list, vdev) vdev_list_t; typedef struct vdev_indirect_mapping_entry_phys { /* * Decode with DVA_MAPPING_* macros. * Contains: * the source offset (low 63 bits) * the one-bit "mark", used for garbage collection (by zdb) */ uint64_t vimep_src; /* * Note: the DVA's asize is 24 bits, and can thus store ranges * up to 8GB. */ dva_t vimep_dst; } vdev_indirect_mapping_entry_phys_t; #define DVA_MAPPING_GET_SRC_OFFSET(vimep) \ BF64_GET_SB((vimep)->vimep_src, 0, 63, SPA_MINBLOCKSHIFT, 0) #define DVA_MAPPING_SET_SRC_OFFSET(vimep, x) \ BF64_SET_SB((vimep)->vimep_src, 0, 63, SPA_MINBLOCKSHIFT, 0, x) typedef struct vdev_indirect_mapping_entry { vdev_indirect_mapping_entry_phys_t vime_mapping; uint32_t vime_obsolete_count; list_node_t vime_node; } vdev_indirect_mapping_entry_t; /* * This is stored in the bonus buffer of the mapping object, see comment of * vdev_indirect_config for more details. */ typedef struct vdev_indirect_mapping_phys { uint64_t vimp_max_offset; uint64_t vimp_bytes_mapped; uint64_t vimp_num_entries; /* number of v_i_m_entry_phys_t's */ /* * For each entry in the mapping object, this object contains an * entry representing the number of bytes of that mapping entry * that were no longer in use by the pool at the time this indirect * vdev was last condensed. */ uint64_t vimp_counts_object; } vdev_indirect_mapping_phys_t; #define VDEV_INDIRECT_MAPPING_SIZE_V0 (3 * sizeof (uint64_t)) typedef struct vdev_indirect_mapping { uint64_t vim_object; boolean_t vim_havecounts; /* vim_entries segment offset currently in memory. */ uint64_t vim_entry_offset; /* vim_entries segment size. */ size_t vim_num_entries; /* Needed by dnode_read() */ const void *vim_spa; dnode_phys_t *vim_dn; /* * An ordered array of mapping entries, sorted by source offset. * Note that vim_entries is needed during a removal (and contains * mappings that have been synced to disk so far) to handle frees * from the removing device. */ vdev_indirect_mapping_entry_phys_t *vim_entries; objset_phys_t *vim_objset; vdev_indirect_mapping_phys_t *vim_phys; } vdev_indirect_mapping_t; /* * On-disk indirect vdev state. * * An indirect vdev is described exclusively in the MOS config of a pool. * The config for an indirect vdev includes several fields, which are * accessed in memory by a vdev_indirect_config_t. */ typedef struct vdev_indirect_config { /* * Object (in MOS) which contains the indirect mapping. This object * contains an array of vdev_indirect_mapping_entry_phys_t ordered by * vimep_src. The bonus buffer for this object is a * vdev_indirect_mapping_phys_t. This object is allocated when a vdev * removal is initiated. * * Note that this object can be empty if none of the data on the vdev * has been copied yet. */ uint64_t vic_mapping_object; /* * Object (in MOS) which contains the birth times for the mapping * entries. This object contains an array of * vdev_indirect_birth_entry_phys_t sorted by vibe_offset. The bonus * buffer for this object is a vdev_indirect_birth_phys_t. This object * is allocated when a vdev removal is initiated. * * Note that this object can be empty if none of the vdev has yet been * copied. */ uint64_t vic_births_object; /* * This is the vdev ID which was removed previous to this vdev, or * UINT64_MAX if there are no previously removed vdevs. */ uint64_t vic_prev_indirect_vdev; } vdev_indirect_config_t; typedef struct vdev { STAILQ_ENTRY(vdev) v_childlink; /* link in parent's child list */ STAILQ_ENTRY(vdev) v_alllink; /* link in global vdev list */ vdev_list_t v_children; /* children of this vdev */ const char *v_name; /* vdev name */ uint64_t v_guid; /* vdev guid */ uint64_t v_id; /* index in parent */ uint64_t v_psize; /* physical device capacity */ int v_ashift; /* offset to block shift */ int v_nparity; /* # parity for raidz */ struct vdev *v_top; /* parent vdev */ size_t v_nchildren; /* # children */ vdev_state_t v_state; /* current state */ vdev_phys_read_t *v_phys_read; /* read from raw leaf vdev */ vdev_phys_write_t *v_phys_write; /* write to raw leaf vdev */ vdev_read_t *v_read; /* read from vdev */ void *v_priv; /* data for read/write function */ boolean_t v_islog; struct spa *v_spa; /* link to spa */ /* * Values stored in the config for an indirect or removing vdev. */ vdev_indirect_config_t vdev_indirect_config; vdev_indirect_mapping_t *v_mapping; } vdev_t; /* * In-core pool representation. */ typedef STAILQ_HEAD(spa_list, spa) spa_list_t; typedef struct spa { STAILQ_ENTRY(spa) spa_link; /* link in global pool list */ char *spa_name; /* pool name */ uint64_t spa_guid; /* pool guid */ uint64_t spa_txg; /* most recent transaction */ struct uberblock *spa_uberblock; /* best uberblock so far */ vdev_t *spa_root_vdev; /* toplevel vdev container */ objset_phys_t *spa_mos; /* MOS for this pool */ zio_cksum_salt_t spa_cksum_salt; /* secret salt for cksum */ void *spa_cksum_tmpls[ZIO_CHECKSUM_FUNCTIONS]; boolean_t spa_with_log; /* this pool has log */ struct uberblock spa_uberblock_master; /* best uberblock so far */ objset_phys_t spa_mos_master; /* MOS for this pool */ struct uberblock spa_uberblock_checkpoint; /* checkpoint uberblock */ objset_phys_t spa_mos_checkpoint; /* Checkpoint MOS */ void *spa_bootenv; /* bootenv from pool label */ } spa_t; /* IO related arguments. */ typedef struct zio { spa_t *io_spa; blkptr_t *io_bp; void *io_data; uint64_t io_size; uint64_t io_offset; /* Stuff for the vdev stack */ vdev_t *io_vd; void *io_vsd; int io_error; } zio_t; static void decode_embedded_bp_compressed(const blkptr_t *, void *); #endif /* _ZFSIMPL_H_ */