diff --git a/usr.sbin/makefs/zfs/dsl.c b/usr.sbin/makefs/zfs/dsl.c index 93083f286e81..f7264b9d2ca7 100644 --- a/usr.sbin/makefs/zfs/dsl.c +++ b/usr.sbin/makefs/zfs/dsl.c @@ -1,628 +1,626 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 The FreeBSD Foundation * * This software was developed by Mark Johnston under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include "makefs.h" #include "zfs.h" typedef struct zfs_dsl_dataset { zfs_objset_t *os; /* referenced objset, may be null */ dsl_dataset_phys_t *phys; /* on-disk representation */ uint64_t dsid; /* DSL dataset dnode */ struct zfs_dsl_dir *dir; /* containing parent */ } zfs_dsl_dataset_t; typedef STAILQ_HEAD(zfs_dsl_dir_list, zfs_dsl_dir) zfs_dsl_dir_list_t; typedef struct zfs_dsl_dir { char *fullname; /* full dataset name */ char *name; /* basename(fullname) */ dsl_dir_phys_t *phys; /* on-disk representation */ nvlist_t *propsnv; /* properties saved in propszap */ zfs_dsl_dataset_t *headds; /* principal dataset, may be null */ uint64_t dirid; /* DSL directory dnode */ zfs_zap_t *propszap; /* dataset properties */ zfs_zap_t *childzap; /* child directories */ /* DSL directory tree linkage. */ struct zfs_dsl_dir *parent; zfs_dsl_dir_list_t children; STAILQ_ENTRY(zfs_dsl_dir) next; } zfs_dsl_dir_t; static zfs_dsl_dir_t *dsl_dir_alloc(zfs_opt_t *zfs, const char *name); static zfs_dsl_dataset_t *dsl_dataset_alloc(zfs_opt_t *zfs, zfs_dsl_dir_t *dir); static int nvlist_find_string(nvlist_t *nvl, const char *key, char **retp) { char *str; int error, len; error = nvlist_find(nvl, key, DATA_TYPE_STRING, NULL, &str, &len); if (error == 0) { *retp = ecalloc(1, len + 1); memcpy(*retp, str, len); } return (error); } static int nvlist_find_uint64(nvlist_t *nvl, const char *key, uint64_t *retp) { return (nvlist_find(nvl, key, DATA_TYPE_UINT64, NULL, retp, NULL)); } /* * Return an allocated string containing the head dataset's mountpoint, * including the root path prefix. * * If the dataset has a mountpoint property, it is returned. Otherwise we have * to follow ZFS' inheritance rules. */ char * dsl_dir_get_mountpoint(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) { zfs_dsl_dir_t *pdir; char *mountpoint; if (nvlist_find_string(dir->propsnv, "mountpoint", &mountpoint) == 0) { if (strcmp(mountpoint, "none") == 0) return (NULL); } else { /* * If we don't have a mountpoint, it's inherited from one of our * ancestors. Walk up the hierarchy until we find it, building * up our mountpoint along the way. The mountpoint property is * always set for the root dataset. */ for (pdir = dir->parent, mountpoint = estrdup(dir->name);; pdir = pdir->parent) { char *origmountpoint, *tmp; origmountpoint = mountpoint; if (nvlist_find_string(pdir->propsnv, "mountpoint", &tmp) == 0) { easprintf(&mountpoint, "%s%s%s", tmp, tmp[strlen(tmp) - 1] == '/' ? "" : "/", origmountpoint); free(tmp); free(origmountpoint); break; } easprintf(&mountpoint, "%s/%s", pdir->name, origmountpoint); free(origmountpoint); } } assert(mountpoint[0] == '/'); assert(strstr(mountpoint, zfs->rootpath) == mountpoint); return (mountpoint); } int dsl_dir_get_canmount(zfs_dsl_dir_t *dir, uint64_t *canmountp) { return (nvlist_find_uint64(dir->propsnv, "canmount", canmountp)); } /* * Handle dataset properties that we know about; stash them into an nvlist to be * written later to the properties ZAP object. * * If the set of properties we handle grows too much, we should probably explore * using libzfs to manage them. */ static void dsl_dir_set_prop(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, const char *key, const char *val) { nvlist_t *nvl; nvl = dir->propsnv; if (val == NULL || val[0] == '\0') errx(1, "missing value for property `%s'", key); if (nvpair_find(nvl, key) != NULL) errx(1, "property `%s' already set", key); if (strcmp(key, "mountpoint") == 0) { if (strcmp(val, "none") != 0) { if (val[0] != '/') errx(1, "mountpoint `%s' is not absolute", val); if (strcmp(val, zfs->rootpath) != 0 && strcmp(zfs->rootpath, "/") != 0 && (strstr(val, zfs->rootpath) != val || val[strlen(zfs->rootpath)] != '/')) { errx(1, "mountpoint `%s' is not prefixed by " "the root path `%s'", val, zfs->rootpath); } } nvlist_add_string(nvl, key, val); } else if (strcmp(key, "atime") == 0 || strcmp(key, "exec") == 0 || strcmp(key, "setuid") == 0) { if (strcmp(val, "on") == 0) nvlist_add_uint64(nvl, key, 1); else if (strcmp(val, "off") == 0) nvlist_add_uint64(nvl, key, 0); else errx(1, "invalid value `%s' for %s", val, key); } else if (strcmp(key, "canmount") == 0) { if (strcmp(val, "noauto") == 0) nvlist_add_uint64(nvl, key, 2); else if (strcmp(val, "on") == 0) nvlist_add_uint64(nvl, key, 1); else if (strcmp(val, "off") == 0) nvlist_add_uint64(nvl, key, 0); else errx(1, "invalid value `%s' for %s", val, key); } else { errx(1, "unknown property `%s'", key); } } static zfs_dsl_dir_t * dsl_metadir_alloc(zfs_opt_t *zfs, const char *name) { zfs_dsl_dir_t *dir; char *path; easprintf(&path, "%s/%s", zfs->poolname, name); dir = dsl_dir_alloc(zfs, path); free(path); return (dir); } static void dsl_origindir_init(zfs_opt_t *zfs) { dnode_phys_t *clones; uint64_t clonesid; zfs->origindsldir = dsl_metadir_alloc(zfs, "$ORIGIN"); zfs->originds = dsl_dataset_alloc(zfs, zfs->origindsldir); zfs->snapds = dsl_dataset_alloc(zfs, zfs->origindsldir); clones = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_CLONES, &clonesid); zfs->cloneszap = zap_alloc(zfs->mos, clones); zfs->origindsldir->phys->dd_clones = clonesid; } void dsl_init(zfs_opt_t *zfs) { zfs_dsl_dir_t *dir; struct dataset_desc *d; const char *dspropdelim; dspropdelim = ";"; zfs->rootdsldir = dsl_dir_alloc(zfs, NULL); nvlist_add_uint64(zfs->rootdsldir->propsnv, "compression", ZIO_COMPRESS_OFF); zfs->rootds = dsl_dataset_alloc(zfs, zfs->rootdsldir); zfs->rootdsldir->headds = zfs->rootds; zfs->mosdsldir = dsl_metadir_alloc(zfs, "$MOS"); zfs->freedsldir = dsl_metadir_alloc(zfs, "$FREE"); dsl_origindir_init(zfs); /* * Go through the list of user-specified datasets and create DSL objects * for them. */ STAILQ_FOREACH(d, &zfs->datasetdescs, next) { char *dsname, *next, *params, *param, *nextparam; params = d->params; dsname = strsep(¶ms, dspropdelim); if (strcmp(dsname, zfs->poolname) == 0) { /* * This is the root dataset; it's already created, so * we're just setting options. */ dir = zfs->rootdsldir; } else { /* * This dataset must be a child of the root dataset. */ if (strstr(dsname, zfs->poolname) != dsname || (next = strchr(dsname, '/')) == NULL || (size_t)(next - dsname) != strlen(zfs->poolname)) { errx(1, "dataset `%s' must be a child of `%s'", dsname, zfs->poolname); } dir = dsl_dir_alloc(zfs, dsname); dir->headds = dsl_dataset_alloc(zfs, dir); } for (nextparam = param = params; nextparam != NULL;) { char *key, *val; param = strsep(&nextparam, dspropdelim); key = val = param; key = strsep(&val, "="); dsl_dir_set_prop(zfs, dir, key, val); } } /* * Set the root dataset's mount point if the user didn't override the * default. */ if (nvpair_find(zfs->rootdsldir->propsnv, "mountpoint") == NULL) { nvlist_add_string(zfs->rootdsldir->propsnv, "mountpoint", zfs->rootpath); } } uint64_t dsl_dir_id(zfs_dsl_dir_t *dir) { return (dir->dirid); } uint64_t dsl_dir_dataset_id(zfs_dsl_dir_t *dir) { return (dir->headds->dsid); } static void dsl_dir_foreach_post(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void (*cb)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *arg) { zfs_dsl_dir_t *cdsldir; STAILQ_FOREACH(cdsldir, &dsldir->children, next) { dsl_dir_foreach_post(zfs, cdsldir, cb, arg); } cb(zfs, dsldir, arg); } /* * Used when the caller doesn't care about the order one way or another. */ void dsl_dir_foreach(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void (*cb)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *arg) { dsl_dir_foreach_post(zfs, dsldir, cb, arg); } const char * dsl_dir_fullname(const zfs_dsl_dir_t *dir) { return (dir->fullname); } /* * Create a DSL directory, which is effectively an entry in the ZFS namespace. * We always create a root DSL directory, whose name is the pool's name, and * several metadata directories. * * Each directory has two ZAP objects, one pointing to child directories, and * one for properties (which are inherited by children unless overridden). * Directories typically reference a DSL dataset, the "head dataset", which * points to an object set. */ static zfs_dsl_dir_t * dsl_dir_alloc(zfs_opt_t *zfs, const char *name) { zfs_dsl_dir_list_t l, *lp; zfs_dsl_dir_t *dir, *parent; dnode_phys_t *dnode; char *dirname, *nextdir, *origname; uint64_t childid, propsid; dir = ecalloc(1, sizeof(*dir)); dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DSL_DIR, DMU_OT_DSL_DIR, sizeof(dsl_dir_phys_t), &dir->dirid); dir->phys = (dsl_dir_phys_t *)DN_BONUS(dnode); dnode = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_PROPS, &propsid); dir->propszap = zap_alloc(zfs->mos, dnode); dnode = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DIR_CHILD_MAP, &childid); dir->childzap = zap_alloc(zfs->mos, dnode); dir->propsnv = nvlist_create(NV_UNIQUE_NAME); STAILQ_INIT(&dir->children); dir->phys->dd_child_dir_zapobj = childid; dir->phys->dd_props_zapobj = propsid; if (name == NULL) { /* * This is the root DSL directory. */ dir->name = estrdup(zfs->poolname); dir->fullname = estrdup(zfs->poolname); dir->parent = NULL; dir->phys->dd_parent_obj = 0; assert(zfs->rootdsldir == NULL); zfs->rootdsldir = dir; return (dir); } /* * Insert the new directory into the hierarchy. Currently this must be * done in order, e.g., when creating pool/a/b, pool/a must already * exist. */ STAILQ_INIT(&l); STAILQ_INSERT_HEAD(&l, zfs->rootdsldir, next); origname = dirname = nextdir = estrdup(name); for (lp = &l;; lp = &parent->children) { dirname = strsep(&nextdir, "/"); if (nextdir == NULL) break; STAILQ_FOREACH(parent, lp, next) { if (strcmp(parent->name, dirname) == 0) break; } if (parent == NULL) { errx(1, "no parent at `%s' for filesystem `%s'", dirname, name); } } dir->fullname = estrdup(name); dir->name = estrdup(dirname); free(origname); STAILQ_INSERT_TAIL(lp, dir, next); zap_add_uint64(parent->childzap, dir->name, dir->dirid); dir->parent = parent; dir->phys->dd_parent_obj = parent->dirid; return (dir); } static void dsl_dir_size_add(zfs_dsl_dir_t *dir, uint64_t bytes) { dir->phys->dd_used_bytes += bytes; dir->phys->dd_compressed_bytes += bytes; dir->phys->dd_uncompressed_bytes += bytes; } /* * See dsl_dir_root_finalize(). */ void dsl_dir_root_finalize(zfs_opt_t *zfs, uint64_t bytes) { dsl_dir_size_add(zfs->mosdsldir, bytes); zfs->mosdsldir->phys->dd_used_breakdown[DD_USED_HEAD] += bytes; dsl_dir_size_add(zfs->rootdsldir, bytes); zfs->rootdsldir->phys->dd_used_breakdown[DD_USED_CHILD] += bytes; } /* * Convert dataset properties into entries in the DSL directory's properties * ZAP. */ static void dsl_dir_finalize_props(zfs_dsl_dir_t *dir) { for (nvp_header_t *nvh = NULL; (nvh = nvlist_next_nvpair(dir->propsnv, nvh)) != NULL;) { nv_string_t *nvname; nv_pair_data_t *nvdata; char *name; nvname = (nv_string_t *)(nvh + 1); nvdata = (nv_pair_data_t *)(&nvname->nv_data[0] + NV_ALIGN4(nvname->nv_size)); name = nvstring_get(nvname); switch (nvdata->nv_type) { case DATA_TYPE_UINT64: { uint64_t val; memcpy(&val, &nvdata->nv_data[0], sizeof(uint64_t)); zap_add_uint64(dir->propszap, name, val); break; } case DATA_TYPE_STRING: { nv_string_t *nvstr; char *val; nvstr = (nv_string_t *)&nvdata->nv_data[0]; val = nvstring_get(nvstr); zap_add_string(dir->propszap, name, val); free(val); break; } default: assert(0); } free(name); } } static void dsl_dir_finalize(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, void *arg __unused) { - char key[32]; zfs_dsl_dir_t *cdir; dnode_phys_t *snapnames; zfs_dsl_dataset_t *headds; zfs_objset_t *os; uint64_t bytes, childbytes, snapnamesid; dsl_dir_finalize_props(dir); zap_write(zfs, dir->propszap); zap_write(zfs, dir->childzap); headds = dir->headds; if (headds == NULL) return; os = headds->os; if (os == NULL) return; snapnames = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DS_SNAP_MAP, &snapnamesid); zap_write(zfs, zap_alloc(zfs->mos, snapnames)); dir->phys->dd_head_dataset_obj = headds->dsid; dir->phys->dd_clone_parent_obj = zfs->snapds->dsid; headds->phys->ds_prev_snap_obj = zfs->snapds->dsid; headds->phys->ds_snapnames_zapobj = snapnamesid; objset_root_blkptr_copy(os, &headds->phys->ds_bp); zfs->snapds->phys->ds_num_children++; - snprintf(key, sizeof(key), "%jx", (uintmax_t)headds->dsid); - zap_add_uint64(zfs->cloneszap, key, headds->dsid); + zap_add_uint64_self(zfs->cloneszap, headds->dsid); bytes = objset_space(os); headds->phys->ds_used_bytes = bytes; headds->phys->ds_uncompressed_bytes = bytes; headds->phys->ds_compressed_bytes = bytes; childbytes = 0; STAILQ_FOREACH(cdir, &dir->children, next) { /* * The root directory needs a special case: the amount of * space used for the MOS isn't known until everything else is * finalized, so it can't be accounted in the MOS directory's * parent until then, at which point dsl_dir_root_finalize() is * called. */ if (dir == zfs->rootdsldir && cdir == zfs->mosdsldir) continue; childbytes += cdir->phys->dd_used_bytes; } dsl_dir_size_add(dir, bytes + childbytes); dir->phys->dd_flags |= DD_FLAG_USED_BREAKDOWN; dir->phys->dd_used_breakdown[DD_USED_HEAD] = bytes; dir->phys->dd_used_breakdown[DD_USED_CHILD] = childbytes; } void dsl_write(zfs_opt_t *zfs) { zfs_zap_t *snapnameszap; dnode_phys_t *snapnames; uint64_t snapmapid; /* * Perform accounting, starting from the leaves of the DSL directory * tree. Accounting for $MOS is done later, once we've finished * allocating space. */ dsl_dir_foreach_post(zfs, zfs->rootdsldir, dsl_dir_finalize, NULL); snapnames = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DS_SNAP_MAP, &snapmapid); snapnameszap = zap_alloc(zfs->mos, snapnames); zap_add_uint64(snapnameszap, "$ORIGIN", zfs->snapds->dsid); zap_write(zfs, snapnameszap); zfs->origindsldir->phys->dd_head_dataset_obj = zfs->originds->dsid; zfs->originds->phys->ds_prev_snap_obj = zfs->snapds->dsid; zfs->originds->phys->ds_snapnames_zapobj = snapmapid; zfs->snapds->phys->ds_next_snap_obj = zfs->originds->dsid; assert(zfs->snapds->phys->ds_num_children > 0); zfs->snapds->phys->ds_num_children++; zap_write(zfs, zfs->cloneszap); /* XXX-MJ dirs and datasets are leaked */ } void dsl_dir_dataset_write(zfs_opt_t *zfs, zfs_objset_t *os, zfs_dsl_dir_t *dir) { dir->headds->os = os; objset_write(zfs, os); } bool dsl_dir_has_dataset(zfs_dsl_dir_t *dir) { return (dir->headds != NULL); } bool dsl_dir_dataset_has_objset(zfs_dsl_dir_t *dir) { return (dsl_dir_has_dataset(dir) && dir->headds->os != NULL); } static zfs_dsl_dataset_t * dsl_dataset_alloc(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) { zfs_dsl_dataset_t *ds; dnode_phys_t *dnode; uint64_t deadlistid; ds = ecalloc(1, sizeof(*ds)); dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DSL_DATASET, DMU_OT_DSL_DATASET, sizeof(dsl_dataset_phys_t), &ds->dsid); ds->phys = (dsl_dataset_phys_t *)DN_BONUS(dnode); dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DEADLIST, DMU_OT_DEADLIST_HDR, sizeof(dsl_deadlist_phys_t), &deadlistid); zap_write(zfs, zap_alloc(zfs->mos, dnode)); ds->phys->ds_dir_obj = dir->dirid; ds->phys->ds_deadlist_obj = deadlistid; ds->phys->ds_creation_txg = TXG - 1; if (ds != zfs->snapds) ds->phys->ds_prev_snap_txg = TXG - 1; ds->phys->ds_guid = randomguid(); ds->dir = dir; return (ds); } diff --git a/usr.sbin/makefs/zfs/zap.c b/usr.sbin/makefs/zfs/zap.c index 2437ee3bfd1e..d01f7527adf9 100644 --- a/usr.sbin/makefs/zfs/zap.c +++ b/usr.sbin/makefs/zfs/zap.c @@ -1,549 +1,558 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 The FreeBSD Foundation * * This software was developed by Mark Johnston under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "makefs.h" #include "zfs.h" typedef struct zfs_zap_entry { char *name; /* entry key, private copy */ uint64_t hash; /* key hash */ union { uint8_t *valp; uint16_t *val16p; uint32_t *val32p; uint64_t *val64p; }; /* entry value, an integer array */ uint64_t val64; /* embedded value for a common case */ size_t intsz; /* array element size; 1, 2, 4 or 8 */ size_t intcnt; /* array size */ STAILQ_ENTRY(zfs_zap_entry) next; } zfs_zap_entry_t; struct zfs_zap { STAILQ_HEAD(, zfs_zap_entry) kvps; uint64_t hashsalt; /* key hash input */ unsigned long kvpcnt; /* number of key-value pairs */ unsigned long chunks; /* count of chunks needed for fat ZAP */ bool micro; /* can this be a micro ZAP? */ dnode_phys_t *dnode; /* backpointer */ zfs_objset_t *os; /* backpointer */ }; static uint16_t zap_entry_chunks(zfs_zap_entry_t *ent) { return (1 + howmany(strlen(ent->name) + 1, ZAP_LEAF_ARRAY_BYTES) + howmany(ent->intsz * ent->intcnt, ZAP_LEAF_ARRAY_BYTES)); } static uint64_t zap_hash(uint64_t salt, const char *name) { static uint64_t crc64_table[256]; const uint64_t crc64_poly = 0xC96C5795D7870F42UL; const uint8_t *cp; uint64_t crc; uint8_t c; assert(salt != 0); if (crc64_table[128] == 0) { for (int i = 0; i < 256; i++) { uint64_t *t; t = crc64_table + i; *t = i; for (int j = 8; j > 0; j--) *t = (*t >> 1) ^ (-(*t & 1) & crc64_poly); } } assert(crc64_table[128] == crc64_poly); for (cp = (const uint8_t *)name, crc = salt; (c = *cp) != '\0'; cp++) crc = (crc >> 8) ^ crc64_table[(crc ^ c) & 0xFF]; /* * Only use 28 bits, since we need 4 bits in the cookie for the * collision differentiator. We MUST use the high bits, since * those are the ones that we first pay attention to when * choosing the bucket. */ crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1); return (crc); } zfs_zap_t * zap_alloc(zfs_objset_t *os, dnode_phys_t *dnode) { zfs_zap_t *zap; zap = ecalloc(1, sizeof(*zap)); STAILQ_INIT(&zap->kvps); zap->hashsalt = ((uint64_t)random() << 32) | random(); zap->micro = true; zap->kvpcnt = 0; zap->chunks = 0; zap->dnode = dnode; zap->os = os; return (zap); } void zap_add(zfs_zap_t *zap, const char *name, size_t intsz, size_t intcnt, const uint8_t *val) { zfs_zap_entry_t *ent; assert(intsz == 1 || intsz == 2 || intsz == 4 || intsz == 8); assert(strlen(name) + 1 <= ZAP_MAXNAMELEN); assert(intcnt <= ZAP_MAXVALUELEN && intcnt * intsz <= ZAP_MAXVALUELEN); ent = ecalloc(1, sizeof(*ent)); ent->name = estrdup(name); ent->hash = zap_hash(zap->hashsalt, ent->name); ent->intsz = intsz; ent->intcnt = intcnt; if (intsz == sizeof(uint64_t) && intcnt == 1) { /* * Micro-optimization to elide a memory allocation in that most * common case where this is a directory entry. */ ent->val64p = &ent->val64; } else { ent->valp = ecalloc(intcnt, intsz); } memcpy(ent->valp, val, intcnt * intsz); zap->kvpcnt++; zap->chunks += zap_entry_chunks(ent); STAILQ_INSERT_TAIL(&zap->kvps, ent, next); if (zap->micro && (intcnt != 1 || intsz != sizeof(uint64_t) || strlen(name) + 1 > MZAP_NAME_LEN || zap->kvpcnt > MZAP_ENT_MAX)) zap->micro = false; } void zap_add_uint64(zfs_zap_t *zap, const char *name, uint64_t val) { zap_add(zap, name, sizeof(uint64_t), 1, (uint8_t *)&val); } +void +zap_add_uint64_self(zfs_zap_t *zap, uint64_t val) +{ + char name[32]; + + snprintf(name, sizeof(name), "%jx", (uintmax_t)val); + zap_add(zap, name, sizeof(uint64_t), 1, (uint8_t *)&val); +} + void zap_add_string(zfs_zap_t *zap, const char *name, const char *val) { zap_add(zap, name, 1, strlen(val) + 1, val); } bool zap_entry_exists(zfs_zap_t *zap, const char *name) { zfs_zap_entry_t *ent; STAILQ_FOREACH(ent, &zap->kvps, next) { if (strcmp(ent->name, name) == 0) return (true); } return (false); } static void zap_micro_write(zfs_opt_t *zfs, zfs_zap_t *zap) { dnode_phys_t *dnode; zfs_zap_entry_t *ent; mzap_phys_t *mzap; mzap_ent_phys_t *ment; off_t bytes, loc; memset(zfs->filebuf, 0, sizeof(zfs->filebuf)); mzap = (mzap_phys_t *)&zfs->filebuf[0]; mzap->mz_block_type = ZBT_MICRO; mzap->mz_salt = zap->hashsalt; mzap->mz_normflags = 0; bytes = sizeof(*mzap) + (zap->kvpcnt - 1) * sizeof(*ment); assert(bytes <= (off_t)MZAP_MAX_BLKSZ); ment = &mzap->mz_chunk[0]; STAILQ_FOREACH(ent, &zap->kvps, next) { memcpy(&ment->mze_value, ent->valp, ent->intsz * ent->intcnt); ment->mze_cd = 0; /* XXX-MJ */ strlcpy(ment->mze_name, ent->name, sizeof(ment->mze_name)); ment++; } loc = objset_space_alloc(zfs, zap->os, &bytes); dnode = zap->dnode; dnode->dn_maxblkid = 0; dnode->dn_datablkszsec = bytes >> MINBLOCKSHIFT; vdev_pwrite_dnode_data(zfs, dnode, zfs->filebuf, bytes, loc); } /* * Write some data to the fat ZAP leaf chunk starting at index "li". * * Note that individual integers in the value may be split among consecutive * leaves. */ static void zap_fat_write_array_chunk(zap_leaf_t *l, uint16_t li, size_t sz, const uint8_t *val) { struct zap_leaf_array *la; assert(sz <= ZAP_MAXVALUELEN); for (uint16_t n, resid = sz; resid > 0; resid -= n, val += n, li++) { n = MIN(resid, ZAP_LEAF_ARRAY_BYTES); la = &ZAP_LEAF_CHUNK(l, li).l_array; assert(la->la_type == ZAP_CHUNK_FREE); la->la_type = ZAP_CHUNK_ARRAY; memcpy(la->la_array, val, n); la->la_next = li + 1; } la->la_next = 0xffff; } /* * Find the shortest hash prefix length which lets us distribute keys without * overflowing a leaf block. This is not (space) optimal, but is simple, and * directories large enough to overflow a single 128KB leaf block are uncommon. */ static unsigned int zap_fat_write_prefixlen(zfs_zap_t *zap, zap_leaf_t *l) { zfs_zap_entry_t *ent; unsigned int prefixlen; if (zap->chunks <= ZAP_LEAF_NUMCHUNKS(l)) { /* * All chunks will fit in a single leaf block. */ return (0); } for (prefixlen = 1; prefixlen < (unsigned int)l->l_bs; prefixlen++) { uint32_t *leafchunks; leafchunks = ecalloc(1u << prefixlen, sizeof(*leafchunks)); STAILQ_FOREACH(ent, &zap->kvps, next) { uint64_t li; uint16_t chunks; li = ZAP_HASH_IDX(ent->hash, prefixlen); chunks = zap_entry_chunks(ent); if (ZAP_LEAF_NUMCHUNKS(l) - leafchunks[li] < chunks) { /* * Not enough space, grow the prefix and retry. */ break; } leafchunks[li] += chunks; } free(leafchunks); if (ent == NULL) { /* * Everything fits, we're done. */ break; } } /* * If this fails, then we need to expand the pointer table. For now * this situation is unhandled since it is hard to trigger. */ assert(prefixlen < (unsigned int)l->l_bs); return (prefixlen); } /* * Initialize a fat ZAP leaf block. */ static void zap_fat_write_leaf_init(zap_leaf_t *l, uint64_t prefix, int prefixlen) { zap_leaf_phys_t *leaf; leaf = l->l_phys; leaf->l_hdr.lh_block_type = ZBT_LEAF; leaf->l_hdr.lh_magic = ZAP_LEAF_MAGIC; leaf->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l); leaf->l_hdr.lh_prefix = prefix; leaf->l_hdr.lh_prefix_len = prefixlen; /* Initialize the leaf hash table. */ assert(leaf->l_hdr.lh_nfree < 0xffff); memset(leaf->l_hash, 0xff, ZAP_LEAF_HASH_NUMENTRIES(l) * sizeof(*leaf->l_hash)); /* Initialize the leaf chunks. */ for (uint16_t i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { struct zap_leaf_free *lf; lf = &ZAP_LEAF_CHUNK(l, i).l_free; lf->lf_type = ZAP_CHUNK_FREE; if (i + 1 == ZAP_LEAF_NUMCHUNKS(l)) lf->lf_next = 0xffff; else lf->lf_next = i + 1; } } static void zap_fat_write(zfs_opt_t *zfs, zfs_zap_t *zap) { struct dnode_cursor *c; zap_leaf_t l; zap_phys_t *zaphdr; struct zap_table_phys *zt; zfs_zap_entry_t *ent; dnode_phys_t *dnode; uint8_t *leafblks; uint64_t lblkcnt, *ptrhasht; off_t loc, blksz; size_t blkshift; unsigned int prefixlen; int ptrcnt; /* * For simplicity, always use the largest block size. This should be ok * since most directories will be micro ZAPs, but it's space inefficient * for small ZAPs and might need to be revisited. */ blkshift = MAXBLOCKSHIFT; blksz = (off_t)1 << blkshift; /* * Embedded pointer tables give up to 8192 entries. This ought to be * enough for anything except massive directories. */ ptrcnt = (blksz / 2) / sizeof(uint64_t); memset(zfs->filebuf, 0, sizeof(zfs->filebuf)); zaphdr = (zap_phys_t *)&zfs->filebuf[0]; zaphdr->zap_block_type = ZBT_HEADER; zaphdr->zap_magic = ZAP_MAGIC; zaphdr->zap_num_entries = zap->kvpcnt; zaphdr->zap_salt = zap->hashsalt; l.l_bs = blkshift; l.l_phys = NULL; zt = &zaphdr->zap_ptrtbl; zt->zt_blk = 0; zt->zt_numblks = 0; zt->zt_shift = flsll(ptrcnt) - 1; zt->zt_nextblk = 0; zt->zt_blks_copied = 0; /* * How many leaf blocks do we need? Initialize them and update the * header. */ prefixlen = zap_fat_write_prefixlen(zap, &l); lblkcnt = (uint64_t)1 << prefixlen; leafblks = ecalloc(lblkcnt, blksz); for (unsigned int li = 0; li < lblkcnt; li++) { l.l_phys = (zap_leaf_phys_t *)(leafblks + li * blksz); zap_fat_write_leaf_init(&l, li, prefixlen); } zaphdr->zap_num_leafs = lblkcnt; zaphdr->zap_freeblk = lblkcnt + 1; /* * For each entry, figure out which leaf block it belongs to based on * the upper bits of its hash, allocate chunks from that leaf, and fill * them out. */ ptrhasht = (uint64_t *)(&zfs->filebuf[0] + blksz / 2); STAILQ_FOREACH(ent, &zap->kvps, next) { struct zap_leaf_entry *le; uint16_t *lptr; uint64_t hi, li; uint16_t namelen, nchunks, nnamechunks, nvalchunks; hi = ZAP_HASH_IDX(ent->hash, zt->zt_shift); li = ZAP_HASH_IDX(ent->hash, prefixlen); assert(ptrhasht[hi] == 0 || ptrhasht[hi] == li + 1); ptrhasht[hi] = li + 1; l.l_phys = (zap_leaf_phys_t *)(leafblks + li * blksz); namelen = strlen(ent->name) + 1; /* * How many leaf chunks do we need for this entry? */ nnamechunks = howmany(namelen, ZAP_LEAF_ARRAY_BYTES); nvalchunks = howmany(ent->intcnt, ZAP_LEAF_ARRAY_BYTES / ent->intsz); nchunks = 1 + nnamechunks + nvalchunks; /* * Allocate a run of free leaf chunks for this entry, * potentially extending a hash chain. */ assert(l.l_phys->l_hdr.lh_nfree >= nchunks); l.l_phys->l_hdr.lh_nfree -= nchunks; l.l_phys->l_hdr.lh_nentries++; lptr = ZAP_LEAF_HASH_ENTPTR(&l, ent->hash); while (*lptr != 0xffff) { assert(*lptr < ZAP_LEAF_NUMCHUNKS(&l)); le = ZAP_LEAF_ENTRY(&l, *lptr); assert(le->le_type == ZAP_CHUNK_ENTRY); le->le_cd++; lptr = &le->le_next; } *lptr = l.l_phys->l_hdr.lh_freelist; l.l_phys->l_hdr.lh_freelist += nchunks; assert(l.l_phys->l_hdr.lh_freelist <= ZAP_LEAF_NUMCHUNKS(&l)); if (l.l_phys->l_hdr.lh_freelist == ZAP_LEAF_NUMCHUNKS(&l)) l.l_phys->l_hdr.lh_freelist = 0xffff; /* * Integer values must be stored in big-endian format. */ switch (ent->intsz) { case 1: break; case 2: for (uint16_t *v = ent->val16p; v - ent->val16p < (ptrdiff_t)ent->intcnt; v++) *v = htobe16(*v); break; case 4: for (uint32_t *v = ent->val32p; v - ent->val32p < (ptrdiff_t)ent->intcnt; v++) *v = htobe32(*v); break; case 8: for (uint64_t *v = ent->val64p; v - ent->val64p < (ptrdiff_t)ent->intcnt; v++) *v = htobe64(*v); break; default: assert(0); } /* * Finally, write out the leaf chunks for this entry. */ le = ZAP_LEAF_ENTRY(&l, *lptr); assert(le->le_type == ZAP_CHUNK_FREE); le->le_type = ZAP_CHUNK_ENTRY; le->le_next = 0xffff; le->le_name_chunk = *lptr + 1; le->le_name_numints = namelen; le->le_value_chunk = *lptr + 1 + nnamechunks; le->le_value_intlen = ent->intsz; le->le_value_numints = ent->intcnt; le->le_hash = ent->hash; zap_fat_write_array_chunk(&l, *lptr + 1, namelen, ent->name); zap_fat_write_array_chunk(&l, *lptr + 1 + nnamechunks, ent->intcnt * ent->intsz, ent->valp); } /* * Initialize unused slots of the pointer table. */ for (int i = 0; i < ptrcnt; i++) if (ptrhasht[i] == 0) ptrhasht[i] = (i >> (zt->zt_shift - prefixlen)) + 1; /* * Write the whole thing to disk. */ dnode = zap->dnode; dnode->dn_datablkszsec = blksz >> MINBLOCKSHIFT; dnode->dn_maxblkid = lblkcnt + 1; c = dnode_cursor_init(zfs, zap->os, zap->dnode, (lblkcnt + 1) * blksz, blksz); loc = objset_space_alloc(zfs, zap->os, &blksz); vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, zfs->filebuf, blksz, loc, dnode_cursor_next(zfs, c, 0)); for (uint64_t i = 0; i < lblkcnt; i++) { loc = objset_space_alloc(zfs, zap->os, &blksz); vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, leafblks + i * blksz, blksz, loc, dnode_cursor_next(zfs, c, (i + 1) * blksz)); } dnode_cursor_finish(zfs, c); free(leafblks); } void zap_write(zfs_opt_t *zfs, zfs_zap_t *zap) { zfs_zap_entry_t *ent; if (zap->micro) { zap_micro_write(zfs, zap); } else { assert(!STAILQ_EMPTY(&zap->kvps)); assert(zap->kvpcnt > 0); zap_fat_write(zfs, zap); } while ((ent = STAILQ_FIRST(&zap->kvps)) != NULL) { STAILQ_REMOVE_HEAD(&zap->kvps, next); if (ent->val64p != &ent->val64) free(ent->valp); free(ent->name); free(ent); } free(zap); } diff --git a/usr.sbin/makefs/zfs/zfs.h b/usr.sbin/makefs/zfs/zfs.h index 9af090b14912..ff94c270bbf6 100644 --- a/usr.sbin/makefs/zfs/zfs.h +++ b/usr.sbin/makefs/zfs/zfs.h @@ -1,173 +1,174 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 The FreeBSD Foundation * * This software was developed by Mark Johnston under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _MAKEFS_ZFS_H_ #define _MAKEFS_ZFS_H_ #include #include #include #include #include #include #include "makefs.h" #include "zfs/nvlist.h" #define ASSERT assert #include "zfs/zfsimpl.h" #define MAXBLOCKSHIFT 17 /* 128KB */ #define MAXBLOCKSIZE ((off_t)(1 << MAXBLOCKSHIFT)) _Static_assert(MAXBLOCKSIZE == SPA_OLDMAXBLOCKSIZE, ""); #define MINBLOCKSHIFT 9 /* 512B */ #define MINBLOCKSIZE ((off_t)(1 << MINBLOCKSHIFT)) _Static_assert(MINBLOCKSIZE == SPA_MINBLOCKSIZE, ""); #define MINDEVSIZE ((off_t)SPA_MINDEVSIZE) /* All data was written in this transaction group. */ #define TXG 4 typedef struct zfs_dsl_dataset zfs_dsl_dataset_t; typedef struct zfs_dsl_dir zfs_dsl_dir_t; typedef struct zfs_objset zfs_objset_t; typedef struct zfs_zap zfs_zap_t; struct dataset_desc { char *params; STAILQ_ENTRY(dataset_desc) next; }; typedef struct { /* * Block buffer, needs to be aligned for various on-disk structures, * ZAPs, etc.. */ char filebuf[MAXBLOCKSIZE] __aligned(alignof(uint64_t)); bool nowarn; /* Pool parameters. */ const char *poolname; char *rootpath; /* implicit mount point prefix */ char *bootfs; /* bootable dataset, pool property */ int ashift; /* vdev block size */ uint64_t mssize; /* metaslab size */ STAILQ_HEAD(, dataset_desc) datasetdescs; /* non-root dataset descrs */ /* Pool state. */ uint64_t poolguid; /* pool and root vdev GUID */ zfs_zap_t *poolprops; /* MOS state. */ zfs_objset_t *mos; /* meta object set */ uint64_t objarrid; /* space map object array */ /* DSL state. */ zfs_dsl_dir_t *rootdsldir; /* root DSL directory */ zfs_dsl_dataset_t *rootds; zfs_dsl_dir_t *origindsldir; /* $ORIGIN */ zfs_dsl_dataset_t *originds; zfs_dsl_dataset_t *snapds; zfs_zap_t *cloneszap; zfs_dsl_dir_t *freedsldir; /* $FREE */ zfs_dsl_dir_t *mosdsldir; /* $MOS */ /* vdev state. */ int fd; /* vdev disk fd */ uint64_t vdevguid; /* disk vdev GUID */ off_t vdevsize; /* vdev size, including labels */ off_t asize; /* vdev size, excluding labels */ bitstr_t *spacemap; /* space allocation tracking */ int spacemapbits; /* one bit per ashift-sized block */ uint64_t msshift; /* log2(metaslab size) */ uint64_t mscount; /* number of metaslabs for this vdev */ } zfs_opt_t; /* dsl.c */ void dsl_init(zfs_opt_t *); const char *dsl_dir_fullname(const zfs_dsl_dir_t *); uint64_t dsl_dir_id(zfs_dsl_dir_t *); uint64_t dsl_dir_dataset_id(zfs_dsl_dir_t *); void dsl_dir_foreach(zfs_opt_t *, zfs_dsl_dir_t *, void (*)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *); int dsl_dir_get_canmount(zfs_dsl_dir_t *, uint64_t *); char *dsl_dir_get_mountpoint(zfs_opt_t *, zfs_dsl_dir_t *); bool dsl_dir_has_dataset(zfs_dsl_dir_t *); bool dsl_dir_dataset_has_objset(zfs_dsl_dir_t *); void dsl_dir_dataset_write(zfs_opt_t *, zfs_objset_t *, zfs_dsl_dir_t *); void dsl_dir_root_finalize(zfs_opt_t *, uint64_t); void dsl_write(zfs_opt_t *); /* fs.c */ void fs_build(zfs_opt_t *, int, fsnode *); /* objset.c */ zfs_objset_t *objset_alloc(zfs_opt_t *zfs, uint64_t type); off_t objset_space_alloc(zfs_opt_t *, zfs_objset_t *, off_t *); dnode_phys_t *objset_dnode_alloc(zfs_objset_t *, uint8_t, uint64_t *); dnode_phys_t *objset_dnode_bonus_alloc(zfs_objset_t *, uint8_t, uint8_t, uint16_t, uint64_t *); dnode_phys_t *objset_dnode_lookup(zfs_objset_t *, uint64_t); void objset_root_blkptr_copy(const zfs_objset_t *, blkptr_t *); uint64_t objset_space(const zfs_objset_t *); void objset_write(zfs_opt_t *zfs, zfs_objset_t *os); /* vdev.c */ void vdev_init(zfs_opt_t *, const char *); off_t vdev_space_alloc(zfs_opt_t *zfs, off_t *lenp); void vdev_pwrite_data(zfs_opt_t *zfs, uint8_t datatype, uint8_t cksumtype, uint8_t level, uint64_t fill, const void *data, off_t sz, off_t loc, blkptr_t *bp); void vdev_pwrite_dnode_indir(zfs_opt_t *zfs, dnode_phys_t *dnode, uint8_t level, uint64_t fill, const void *data, off_t sz, off_t loc, blkptr_t *bp); void vdev_pwrite_dnode_data(zfs_opt_t *zfs, dnode_phys_t *dnode, const void *data, off_t sz, off_t loc); void vdev_label_write(zfs_opt_t *zfs, int ind, const vdev_label_t *labelp); void vdev_spacemap_write(zfs_opt_t *); void vdev_fini(zfs_opt_t *zfs); /* zap.c */ zfs_zap_t *zap_alloc(zfs_objset_t *, dnode_phys_t *); void zap_add(zfs_zap_t *, const char *, size_t, size_t, const uint8_t *); void zap_add_uint64(zfs_zap_t *, const char *, uint64_t); +void zap_add_uint64_self(zfs_zap_t *, uint64_t); void zap_add_string(zfs_zap_t *, const char *, const char *); bool zap_entry_exists(zfs_zap_t *, const char *); void zap_write(zfs_opt_t *, zfs_zap_t *); /* zfs.c */ struct dnode_cursor *dnode_cursor_init(zfs_opt_t *, zfs_objset_t *, dnode_phys_t *, off_t, off_t); blkptr_t *dnode_cursor_next(zfs_opt_t *, struct dnode_cursor *, off_t); void dnode_cursor_finish(zfs_opt_t *, struct dnode_cursor *); uint64_t randomguid(void); #endif /* !_MAKEFS_ZFS_H_ */