diff --git a/usr.sbin/makefs/zfs/dsl.c b/usr.sbin/makefs/zfs/dsl.c index f7abebe2e245..93083f286e81 100644 --- a/usr.sbin/makefs/zfs/dsl.c +++ b/usr.sbin/makefs/zfs/dsl.c @@ -1,614 +1,628 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 The FreeBSD Foundation * * This software was developed by Mark Johnston under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include "makefs.h" #include "zfs.h" typedef struct zfs_dsl_dataset { zfs_objset_t *os; /* referenced objset, may be null */ dsl_dataset_phys_t *phys; /* on-disk representation */ uint64_t dsid; /* DSL dataset dnode */ struct zfs_dsl_dir *dir; /* containing parent */ } zfs_dsl_dataset_t; typedef STAILQ_HEAD(zfs_dsl_dir_list, zfs_dsl_dir) zfs_dsl_dir_list_t; typedef struct zfs_dsl_dir { char *fullname; /* full dataset name */ char *name; /* basename(fullname) */ dsl_dir_phys_t *phys; /* on-disk representation */ nvlist_t *propsnv; /* properties saved in propszap */ zfs_dsl_dataset_t *headds; /* principal dataset, may be null */ uint64_t dirid; /* DSL directory dnode */ zfs_zap_t *propszap; /* dataset properties */ zfs_zap_t *childzap; /* child directories */ /* DSL directory tree linkage. */ struct zfs_dsl_dir *parent; zfs_dsl_dir_list_t children; STAILQ_ENTRY(zfs_dsl_dir) next; } zfs_dsl_dir_t; static zfs_dsl_dir_t *dsl_dir_alloc(zfs_opt_t *zfs, const char *name); static zfs_dsl_dataset_t *dsl_dataset_alloc(zfs_opt_t *zfs, zfs_dsl_dir_t *dir); static int nvlist_find_string(nvlist_t *nvl, const char *key, char **retp) { char *str; int error, len; error = nvlist_find(nvl, key, DATA_TYPE_STRING, NULL, &str, &len); if (error == 0) { *retp = ecalloc(1, len + 1); memcpy(*retp, str, len); } return (error); } static int nvlist_find_uint64(nvlist_t *nvl, const char *key, uint64_t *retp) { return (nvlist_find(nvl, key, DATA_TYPE_UINT64, NULL, retp, NULL)); } /* * Return an allocated string containing the head dataset's mountpoint, * including the root path prefix. * * If the dataset has a mountpoint property, it is returned. Otherwise we have * to follow ZFS' inheritance rules. */ char * dsl_dir_get_mountpoint(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) { zfs_dsl_dir_t *pdir; char *mountpoint; if (nvlist_find_string(dir->propsnv, "mountpoint", &mountpoint) == 0) { if (strcmp(mountpoint, "none") == 0) return (NULL); } else { /* * If we don't have a mountpoint, it's inherited from one of our * ancestors. Walk up the hierarchy until we find it, building * up our mountpoint along the way. The mountpoint property is * always set for the root dataset. */ for (pdir = dir->parent, mountpoint = estrdup(dir->name);; pdir = pdir->parent) { char *origmountpoint, *tmp; origmountpoint = mountpoint; if (nvlist_find_string(pdir->propsnv, "mountpoint", &tmp) == 0) { easprintf(&mountpoint, "%s%s%s", tmp, tmp[strlen(tmp) - 1] == '/' ? "" : "/", origmountpoint); free(tmp); free(origmountpoint); break; } easprintf(&mountpoint, "%s/%s", pdir->name, origmountpoint); free(origmountpoint); } } assert(mountpoint[0] == '/'); assert(strstr(mountpoint, zfs->rootpath) == mountpoint); return (mountpoint); } int dsl_dir_get_canmount(zfs_dsl_dir_t *dir, uint64_t *canmountp) { return (nvlist_find_uint64(dir->propsnv, "canmount", canmountp)); } /* * Handle dataset properties that we know about; stash them into an nvlist to be * written later to the properties ZAP object. * * If the set of properties we handle grows too much, we should probably explore * using libzfs to manage them. */ static void dsl_dir_set_prop(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, const char *key, const char *val) { nvlist_t *nvl; nvl = dir->propsnv; if (val == NULL || val[0] == '\0') errx(1, "missing value for property `%s'", key); if (nvpair_find(nvl, key) != NULL) errx(1, "property `%s' already set", key); if (strcmp(key, "mountpoint") == 0) { if (strcmp(val, "none") != 0) { if (val[0] != '/') errx(1, "mountpoint `%s' is not absolute", val); if (strcmp(val, zfs->rootpath) != 0 && strcmp(zfs->rootpath, "/") != 0 && (strstr(val, zfs->rootpath) != val || val[strlen(zfs->rootpath)] != '/')) { errx(1, "mountpoint `%s' is not prefixed by " "the root path `%s'", val, zfs->rootpath); } } nvlist_add_string(nvl, key, val); } else if (strcmp(key, "atime") == 0 || strcmp(key, "exec") == 0 || strcmp(key, "setuid") == 0) { if (strcmp(val, "on") == 0) nvlist_add_uint64(nvl, key, 1); else if (strcmp(val, "off") == 0) nvlist_add_uint64(nvl, key, 0); else errx(1, "invalid value `%s' for %s", val, key); } else if (strcmp(key, "canmount") == 0) { if (strcmp(val, "noauto") == 0) nvlist_add_uint64(nvl, key, 2); else if (strcmp(val, "on") == 0) nvlist_add_uint64(nvl, key, 1); else if (strcmp(val, "off") == 0) nvlist_add_uint64(nvl, key, 0); else errx(1, "invalid value `%s' for %s", val, key); } else { errx(1, "unknown property `%s'", key); } } static zfs_dsl_dir_t * dsl_metadir_alloc(zfs_opt_t *zfs, const char *name) { zfs_dsl_dir_t *dir; char *path; easprintf(&path, "%s/%s", zfs->poolname, name); dir = dsl_dir_alloc(zfs, path); free(path); return (dir); } static void dsl_origindir_init(zfs_opt_t *zfs) { dnode_phys_t *clones; uint64_t clonesid; zfs->origindsldir = dsl_metadir_alloc(zfs, "$ORIGIN"); zfs->originds = dsl_dataset_alloc(zfs, zfs->origindsldir); zfs->snapds = dsl_dataset_alloc(zfs, zfs->origindsldir); clones = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_CLONES, &clonesid); zfs->cloneszap = zap_alloc(zfs->mos, clones); zfs->origindsldir->phys->dd_clones = clonesid; } void dsl_init(zfs_opt_t *zfs) { zfs_dsl_dir_t *dir; struct dataset_desc *d; const char *dspropdelim; dspropdelim = ";"; zfs->rootdsldir = dsl_dir_alloc(zfs, NULL); nvlist_add_uint64(zfs->rootdsldir->propsnv, "compression", ZIO_COMPRESS_OFF); zfs->rootds = dsl_dataset_alloc(zfs, zfs->rootdsldir); zfs->rootdsldir->headds = zfs->rootds; zfs->mosdsldir = dsl_metadir_alloc(zfs, "$MOS"); zfs->freedsldir = dsl_metadir_alloc(zfs, "$FREE"); dsl_origindir_init(zfs); /* * Go through the list of user-specified datasets and create DSL objects * for them. */ STAILQ_FOREACH(d, &zfs->datasetdescs, next) { char *dsname, *next, *params, *param, *nextparam; params = d->params; dsname = strsep(¶ms, dspropdelim); if (strcmp(dsname, zfs->poolname) == 0) { /* * This is the root dataset; it's already created, so * we're just setting options. */ dir = zfs->rootdsldir; } else { /* * This dataset must be a child of the root dataset. */ if (strstr(dsname, zfs->poolname) != dsname || (next = strchr(dsname, '/')) == NULL || (size_t)(next - dsname) != strlen(zfs->poolname)) { errx(1, "dataset `%s' must be a child of `%s'", dsname, zfs->poolname); } dir = dsl_dir_alloc(zfs, dsname); dir->headds = dsl_dataset_alloc(zfs, dir); } for (nextparam = param = params; nextparam != NULL;) { char *key, *val; param = strsep(&nextparam, dspropdelim); key = val = param; key = strsep(&val, "="); dsl_dir_set_prop(zfs, dir, key, val); } } /* * Set the root dataset's mount point if the user didn't override the * default. */ if (nvpair_find(zfs->rootdsldir->propsnv, "mountpoint") == NULL) { nvlist_add_string(zfs->rootdsldir->propsnv, "mountpoint", zfs->rootpath); } } uint64_t dsl_dir_id(zfs_dsl_dir_t *dir) { return (dir->dirid); } uint64_t dsl_dir_dataset_id(zfs_dsl_dir_t *dir) { return (dir->headds->dsid); } static void dsl_dir_foreach_post(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void (*cb)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *arg) { zfs_dsl_dir_t *cdsldir; STAILQ_FOREACH(cdsldir, &dsldir->children, next) { dsl_dir_foreach_post(zfs, cdsldir, cb, arg); } cb(zfs, dsldir, arg); } /* * Used when the caller doesn't care about the order one way or another. */ void dsl_dir_foreach(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void (*cb)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *arg) { dsl_dir_foreach_post(zfs, dsldir, cb, arg); } const char * dsl_dir_fullname(const zfs_dsl_dir_t *dir) { return (dir->fullname); } /* * Create a DSL directory, which is effectively an entry in the ZFS namespace. * We always create a root DSL directory, whose name is the pool's name, and * several metadata directories. * * Each directory has two ZAP objects, one pointing to child directories, and * one for properties (which are inherited by children unless overridden). * Directories typically reference a DSL dataset, the "head dataset", which * points to an object set. */ static zfs_dsl_dir_t * dsl_dir_alloc(zfs_opt_t *zfs, const char *name) { zfs_dsl_dir_list_t l, *lp; zfs_dsl_dir_t *dir, *parent; dnode_phys_t *dnode; char *dirname, *nextdir, *origname; uint64_t childid, propsid; dir = ecalloc(1, sizeof(*dir)); dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DSL_DIR, DMU_OT_DSL_DIR, sizeof(dsl_dir_phys_t), &dir->dirid); dir->phys = (dsl_dir_phys_t *)DN_BONUS(dnode); dnode = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_PROPS, &propsid); dir->propszap = zap_alloc(zfs->mos, dnode); dnode = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DIR_CHILD_MAP, &childid); dir->childzap = zap_alloc(zfs->mos, dnode); dir->propsnv = nvlist_create(NV_UNIQUE_NAME); STAILQ_INIT(&dir->children); dir->phys->dd_child_dir_zapobj = childid; dir->phys->dd_props_zapobj = propsid; if (name == NULL) { /* * This is the root DSL directory. */ dir->name = estrdup(zfs->poolname); dir->fullname = estrdup(zfs->poolname); dir->parent = NULL; dir->phys->dd_parent_obj = 0; assert(zfs->rootdsldir == NULL); zfs->rootdsldir = dir; return (dir); } /* * Insert the new directory into the hierarchy. Currently this must be * done in order, e.g., when creating pool/a/b, pool/a must already * exist. */ STAILQ_INIT(&l); STAILQ_INSERT_HEAD(&l, zfs->rootdsldir, next); origname = dirname = nextdir = estrdup(name); for (lp = &l;; lp = &parent->children) { dirname = strsep(&nextdir, "/"); if (nextdir == NULL) break; STAILQ_FOREACH(parent, lp, next) { if (strcmp(parent->name, dirname) == 0) break; } if (parent == NULL) { errx(1, "no parent at `%s' for filesystem `%s'", dirname, name); } } dir->fullname = estrdup(name); dir->name = estrdup(dirname); free(origname); STAILQ_INSERT_TAIL(lp, dir, next); zap_add_uint64(parent->childzap, dir->name, dir->dirid); dir->parent = parent; dir->phys->dd_parent_obj = parent->dirid; return (dir); } -void +static void dsl_dir_size_add(zfs_dsl_dir_t *dir, uint64_t bytes) { dir->phys->dd_used_bytes += bytes; dir->phys->dd_compressed_bytes += bytes; dir->phys->dd_uncompressed_bytes += bytes; } +/* + * See dsl_dir_root_finalize(). + */ +void +dsl_dir_root_finalize(zfs_opt_t *zfs, uint64_t bytes) +{ + dsl_dir_size_add(zfs->mosdsldir, bytes); + zfs->mosdsldir->phys->dd_used_breakdown[DD_USED_HEAD] += bytes; + + dsl_dir_size_add(zfs->rootdsldir, bytes); + zfs->rootdsldir->phys->dd_used_breakdown[DD_USED_CHILD] += bytes; +} + /* * Convert dataset properties into entries in the DSL directory's properties * ZAP. */ static void dsl_dir_finalize_props(zfs_dsl_dir_t *dir) { for (nvp_header_t *nvh = NULL; (nvh = nvlist_next_nvpair(dir->propsnv, nvh)) != NULL;) { nv_string_t *nvname; nv_pair_data_t *nvdata; char *name; nvname = (nv_string_t *)(nvh + 1); nvdata = (nv_pair_data_t *)(&nvname->nv_data[0] + NV_ALIGN4(nvname->nv_size)); name = nvstring_get(nvname); switch (nvdata->nv_type) { case DATA_TYPE_UINT64: { uint64_t val; memcpy(&val, &nvdata->nv_data[0], sizeof(uint64_t)); zap_add_uint64(dir->propszap, name, val); break; } case DATA_TYPE_STRING: { nv_string_t *nvstr; char *val; nvstr = (nv_string_t *)&nvdata->nv_data[0]; val = nvstring_get(nvstr); zap_add_string(dir->propszap, name, val); free(val); break; } default: assert(0); } free(name); } } static void dsl_dir_finalize(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, void *arg __unused) { char key[32]; zfs_dsl_dir_t *cdir; dnode_phys_t *snapnames; zfs_dsl_dataset_t *headds; zfs_objset_t *os; uint64_t bytes, childbytes, snapnamesid; dsl_dir_finalize_props(dir); zap_write(zfs, dir->propszap); zap_write(zfs, dir->childzap); headds = dir->headds; if (headds == NULL) return; os = headds->os; if (os == NULL) return; snapnames = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DS_SNAP_MAP, &snapnamesid); zap_write(zfs, zap_alloc(zfs->mos, snapnames)); dir->phys->dd_head_dataset_obj = headds->dsid; dir->phys->dd_clone_parent_obj = zfs->snapds->dsid; headds->phys->ds_prev_snap_obj = zfs->snapds->dsid; headds->phys->ds_snapnames_zapobj = snapnamesid; objset_root_blkptr_copy(os, &headds->phys->ds_bp); zfs->snapds->phys->ds_num_children++; snprintf(key, sizeof(key), "%jx", (uintmax_t)headds->dsid); zap_add_uint64(zfs->cloneszap, key, headds->dsid); bytes = objset_space(os); headds->phys->ds_used_bytes = bytes; headds->phys->ds_uncompressed_bytes = bytes; headds->phys->ds_compressed_bytes = bytes; childbytes = 0; STAILQ_FOREACH(cdir, &dir->children, next) { /* * The root directory needs a special case: the amount of * space used for the MOS isn't known until everything else is * finalized, so it can't be accounted in the MOS directory's - * parent until then. + * parent until then, at which point dsl_dir_root_finalize() is + * called. */ if (dir == zfs->rootdsldir && cdir == zfs->mosdsldir) continue; childbytes += cdir->phys->dd_used_bytes; } dsl_dir_size_add(dir, bytes + childbytes); dir->phys->dd_flags |= DD_FLAG_USED_BREAKDOWN; dir->phys->dd_used_breakdown[DD_USED_HEAD] = bytes; dir->phys->dd_used_breakdown[DD_USED_CHILD] = childbytes; } void dsl_write(zfs_opt_t *zfs) { zfs_zap_t *snapnameszap; dnode_phys_t *snapnames; uint64_t snapmapid; /* * Perform accounting, starting from the leaves of the DSL directory * tree. Accounting for $MOS is done later, once we've finished * allocating space. */ dsl_dir_foreach_post(zfs, zfs->rootdsldir, dsl_dir_finalize, NULL); snapnames = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DS_SNAP_MAP, &snapmapid); snapnameszap = zap_alloc(zfs->mos, snapnames); zap_add_uint64(snapnameszap, "$ORIGIN", zfs->snapds->dsid); zap_write(zfs, snapnameszap); zfs->origindsldir->phys->dd_head_dataset_obj = zfs->originds->dsid; zfs->originds->phys->ds_prev_snap_obj = zfs->snapds->dsid; zfs->originds->phys->ds_snapnames_zapobj = snapmapid; zfs->snapds->phys->ds_next_snap_obj = zfs->originds->dsid; assert(zfs->snapds->phys->ds_num_children > 0); zfs->snapds->phys->ds_num_children++; zap_write(zfs, zfs->cloneszap); /* XXX-MJ dirs and datasets are leaked */ } void dsl_dir_dataset_write(zfs_opt_t *zfs, zfs_objset_t *os, zfs_dsl_dir_t *dir) { dir->headds->os = os; objset_write(zfs, os); } bool dsl_dir_has_dataset(zfs_dsl_dir_t *dir) { return (dir->headds != NULL); } bool dsl_dir_dataset_has_objset(zfs_dsl_dir_t *dir) { return (dsl_dir_has_dataset(dir) && dir->headds->os != NULL); } static zfs_dsl_dataset_t * dsl_dataset_alloc(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) { zfs_dsl_dataset_t *ds; dnode_phys_t *dnode; uint64_t deadlistid; ds = ecalloc(1, sizeof(*ds)); dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DSL_DATASET, DMU_OT_DSL_DATASET, sizeof(dsl_dataset_phys_t), &ds->dsid); ds->phys = (dsl_dataset_phys_t *)DN_BONUS(dnode); dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DEADLIST, DMU_OT_DEADLIST_HDR, sizeof(dsl_deadlist_phys_t), &deadlistid); zap_write(zfs, zap_alloc(zfs->mos, dnode)); ds->phys->ds_dir_obj = dir->dirid; ds->phys->ds_deadlist_obj = deadlistid; ds->phys->ds_creation_txg = TXG - 1; if (ds != zfs->snapds) ds->phys->ds_prev_snap_txg = TXG - 1; ds->phys->ds_guid = randomguid(); ds->dir = dir; return (ds); } diff --git a/usr.sbin/makefs/zfs/objset.c b/usr.sbin/makefs/zfs/objset.c index 3e3dcdeced4b..6be732db477a 100644 --- a/usr.sbin/makefs/zfs/objset.c +++ b/usr.sbin/makefs/zfs/objset.c @@ -1,263 +1,262 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 The FreeBSD Foundation * * This software was developed by Mark Johnston under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include "zfs.h" #define DNODES_PER_CHUNK (MAXBLOCKSIZE / sizeof(dnode_phys_t)) struct objset_dnode_chunk { dnode_phys_t buf[DNODES_PER_CHUNK]; unsigned int nextfree; STAILQ_ENTRY(objset_dnode_chunk) next; }; typedef struct zfs_objset { /* Physical object set. */ objset_phys_t *phys; off_t osloc; off_t osblksz; blkptr_t osbp; /* set in objset_write() */ /* Accounting. */ off_t space; /* bytes allocated to this objset */ /* dnode allocator. */ uint64_t dnodecount; STAILQ_HEAD(, objset_dnode_chunk) dnodechunks; } zfs_objset_t; static void dnode_init(dnode_phys_t *dnode, uint8_t type, uint8_t bonustype, uint16_t bonuslen) { dnode->dn_indblkshift = MAXBLOCKSHIFT; dnode->dn_type = type; dnode->dn_bonustype = bonustype; dnode->dn_bonuslen = bonuslen; dnode->dn_checksum = ZIO_CHECKSUM_FLETCHER_4; dnode->dn_nlevels = 1; dnode->dn_nblkptr = 1; dnode->dn_flags = DNODE_FLAG_USED_BYTES; } zfs_objset_t * objset_alloc(zfs_opt_t *zfs, uint64_t type) { struct objset_dnode_chunk *chunk; zfs_objset_t *os; os = ecalloc(1, sizeof(*os)); os->osblksz = sizeof(objset_phys_t); os->osloc = objset_space_alloc(zfs, os, &os->osblksz); /* * Object ID zero is always reserved for the meta dnode, which is * embedded in the objset itself. */ STAILQ_INIT(&os->dnodechunks); chunk = ecalloc(1, sizeof(*chunk)); chunk->nextfree = 1; STAILQ_INSERT_HEAD(&os->dnodechunks, chunk, next); os->dnodecount = 1; os->phys = ecalloc(1, os->osblksz); os->phys->os_type = type; dnode_init(&os->phys->os_meta_dnode, DMU_OT_DNODE, DMU_OT_NONE, 0); os->phys->os_meta_dnode.dn_datablkszsec = DNODE_BLOCK_SIZE >> MINBLOCKSHIFT; return (os); } /* * Write the dnode array and physical object set to disk. */ static void _objset_write(zfs_opt_t *zfs, zfs_objset_t *os, struct dnode_cursor *c, off_t loc) { struct objset_dnode_chunk *chunk, *tmp; unsigned int total; /* * Write out the dnode array, i.e., the meta-dnode. For some reason its * data blocks must be 16KB in size no matter how large the array is. */ total = 0; STAILQ_FOREACH_SAFE(chunk, &os->dnodechunks, next, tmp) { unsigned int i; assert(chunk->nextfree > 0); assert(chunk->nextfree <= os->dnodecount); assert(chunk->nextfree <= DNODES_PER_CHUNK); for (i = 0; i < chunk->nextfree; i += DNODES_PER_BLOCK) { blkptr_t *bp; uint64_t fill; if (chunk->nextfree - i < DNODES_PER_BLOCK) fill = DNODES_PER_BLOCK - (chunk->nextfree - i); else fill = 0; bp = dnode_cursor_next(zfs, c, (total + i) * sizeof(dnode_phys_t)); vdev_pwrite_dnode_indir(zfs, &os->phys->os_meta_dnode, 0, fill, chunk->buf + i, DNODE_BLOCK_SIZE, loc, bp); loc += DNODE_BLOCK_SIZE; } total += i; free(chunk); } dnode_cursor_finish(zfs, c); STAILQ_INIT(&os->dnodechunks); /* * Write the object set itself. The saved block pointer will be copied * into the referencing DSL dataset or the uberblocks. */ vdev_pwrite_data(zfs, DMU_OT_OBJSET, ZIO_CHECKSUM_FLETCHER_4, 0, os->dnodecount - 1, os->phys, os->osblksz, os->osloc, &os->osbp); } void objset_write(zfs_opt_t *zfs, zfs_objset_t *os) { struct dnode_cursor *c; off_t dnodeloc, dnodesz; uint64_t dnodecount; /* * There is a chicken-and-egg problem here when writing the MOS: we * cannot write space maps before we're finished allocating space from * the vdev, and we can't write the MOS without having allocated space * for indirect dnode blocks. Thus, rather than lazily allocating * indirect blocks for the meta-dnode (which would be simpler), they are * allocated up-front and before writing space maps. */ dnodecount = os->dnodecount; if (os == zfs->mos) dnodecount += zfs->mscount; dnodesz = dnodecount * sizeof(dnode_phys_t); c = dnode_cursor_init(zfs, os, &os->phys->os_meta_dnode, dnodesz, DNODE_BLOCK_SIZE); dnodesz = roundup2(dnodesz, DNODE_BLOCK_SIZE); dnodeloc = objset_space_alloc(zfs, os, &dnodesz); if (os == zfs->mos) { vdev_spacemap_write(zfs); /* * We've finished allocating space, account for it in $MOS and * in the parent directory. */ - dsl_dir_size_add(zfs->mosdsldir, os->space); - dsl_dir_size_add(zfs->rootdsldir, os->space); + dsl_dir_root_finalize(zfs, os->space); } _objset_write(zfs, os, c, dnodeloc); } dnode_phys_t * objset_dnode_bonus_alloc(zfs_objset_t *os, uint8_t type, uint8_t bonustype, uint16_t bonuslen, uint64_t *idp) { struct objset_dnode_chunk *chunk; dnode_phys_t *dnode; assert(bonuslen <= DN_OLD_MAX_BONUSLEN); assert(!STAILQ_EMPTY(&os->dnodechunks)); chunk = STAILQ_LAST(&os->dnodechunks, objset_dnode_chunk, next); if (chunk->nextfree == DNODES_PER_CHUNK) { chunk = ecalloc(1, sizeof(*chunk)); STAILQ_INSERT_TAIL(&os->dnodechunks, chunk, next); } *idp = os->dnodecount++; dnode = &chunk->buf[chunk->nextfree++]; dnode_init(dnode, type, bonustype, bonuslen); dnode->dn_datablkszsec = os->osblksz >> MINBLOCKSHIFT; return (dnode); } dnode_phys_t * objset_dnode_alloc(zfs_objset_t *os, uint8_t type, uint64_t *idp) { return (objset_dnode_bonus_alloc(os, type, DMU_OT_NONE, 0, idp)); } /* * Look up a physical dnode by ID. This is not used often so a linear search is * fine. */ dnode_phys_t * objset_dnode_lookup(zfs_objset_t *os, uint64_t id) { struct objset_dnode_chunk *chunk; assert(id > 0); assert(id < os->dnodecount); STAILQ_FOREACH(chunk, &os->dnodechunks, next) { if (id < DNODES_PER_CHUNK) return (&chunk->buf[id]); id -= DNODES_PER_CHUNK; } assert(0); return (NULL); } off_t objset_space_alloc(zfs_opt_t *zfs, zfs_objset_t *os, off_t *lenp) { off_t loc; loc = vdev_space_alloc(zfs, lenp); os->space += *lenp; return (loc); } uint64_t objset_space(const zfs_objset_t *os) { return (os->space); } void objset_root_blkptr_copy(const zfs_objset_t *os, blkptr_t *bp) { memcpy(bp, &os->osbp, sizeof(blkptr_t)); } diff --git a/usr.sbin/makefs/zfs/zfs.h b/usr.sbin/makefs/zfs/zfs.h index 193ca1248d89..9af090b14912 100644 --- a/usr.sbin/makefs/zfs/zfs.h +++ b/usr.sbin/makefs/zfs/zfs.h @@ -1,173 +1,173 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 The FreeBSD Foundation * * This software was developed by Mark Johnston under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _MAKEFS_ZFS_H_ #define _MAKEFS_ZFS_H_ #include #include #include #include #include #include #include "makefs.h" #include "zfs/nvlist.h" #define ASSERT assert #include "zfs/zfsimpl.h" #define MAXBLOCKSHIFT 17 /* 128KB */ #define MAXBLOCKSIZE ((off_t)(1 << MAXBLOCKSHIFT)) _Static_assert(MAXBLOCKSIZE == SPA_OLDMAXBLOCKSIZE, ""); #define MINBLOCKSHIFT 9 /* 512B */ #define MINBLOCKSIZE ((off_t)(1 << MINBLOCKSHIFT)) _Static_assert(MINBLOCKSIZE == SPA_MINBLOCKSIZE, ""); #define MINDEVSIZE ((off_t)SPA_MINDEVSIZE) /* All data was written in this transaction group. */ #define TXG 4 typedef struct zfs_dsl_dataset zfs_dsl_dataset_t; typedef struct zfs_dsl_dir zfs_dsl_dir_t; typedef struct zfs_objset zfs_objset_t; typedef struct zfs_zap zfs_zap_t; struct dataset_desc { char *params; STAILQ_ENTRY(dataset_desc) next; }; typedef struct { /* * Block buffer, needs to be aligned for various on-disk structures, * ZAPs, etc.. */ char filebuf[MAXBLOCKSIZE] __aligned(alignof(uint64_t)); bool nowarn; /* Pool parameters. */ const char *poolname; char *rootpath; /* implicit mount point prefix */ char *bootfs; /* bootable dataset, pool property */ int ashift; /* vdev block size */ uint64_t mssize; /* metaslab size */ STAILQ_HEAD(, dataset_desc) datasetdescs; /* non-root dataset descrs */ /* Pool state. */ uint64_t poolguid; /* pool and root vdev GUID */ zfs_zap_t *poolprops; /* MOS state. */ zfs_objset_t *mos; /* meta object set */ uint64_t objarrid; /* space map object array */ /* DSL state. */ zfs_dsl_dir_t *rootdsldir; /* root DSL directory */ zfs_dsl_dataset_t *rootds; zfs_dsl_dir_t *origindsldir; /* $ORIGIN */ zfs_dsl_dataset_t *originds; zfs_dsl_dataset_t *snapds; zfs_zap_t *cloneszap; zfs_dsl_dir_t *freedsldir; /* $FREE */ zfs_dsl_dir_t *mosdsldir; /* $MOS */ /* vdev state. */ int fd; /* vdev disk fd */ uint64_t vdevguid; /* disk vdev GUID */ off_t vdevsize; /* vdev size, including labels */ off_t asize; /* vdev size, excluding labels */ bitstr_t *spacemap; /* space allocation tracking */ int spacemapbits; /* one bit per ashift-sized block */ uint64_t msshift; /* log2(metaslab size) */ uint64_t mscount; /* number of metaslabs for this vdev */ } zfs_opt_t; /* dsl.c */ void dsl_init(zfs_opt_t *); const char *dsl_dir_fullname(const zfs_dsl_dir_t *); uint64_t dsl_dir_id(zfs_dsl_dir_t *); uint64_t dsl_dir_dataset_id(zfs_dsl_dir_t *); void dsl_dir_foreach(zfs_opt_t *, zfs_dsl_dir_t *, void (*)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *); int dsl_dir_get_canmount(zfs_dsl_dir_t *, uint64_t *); char *dsl_dir_get_mountpoint(zfs_opt_t *, zfs_dsl_dir_t *); bool dsl_dir_has_dataset(zfs_dsl_dir_t *); bool dsl_dir_dataset_has_objset(zfs_dsl_dir_t *); void dsl_dir_dataset_write(zfs_opt_t *, zfs_objset_t *, zfs_dsl_dir_t *); -void dsl_dir_size_add(zfs_dsl_dir_t *, uint64_t); +void dsl_dir_root_finalize(zfs_opt_t *, uint64_t); void dsl_write(zfs_opt_t *); /* fs.c */ void fs_build(zfs_opt_t *, int, fsnode *); /* objset.c */ zfs_objset_t *objset_alloc(zfs_opt_t *zfs, uint64_t type); off_t objset_space_alloc(zfs_opt_t *, zfs_objset_t *, off_t *); dnode_phys_t *objset_dnode_alloc(zfs_objset_t *, uint8_t, uint64_t *); dnode_phys_t *objset_dnode_bonus_alloc(zfs_objset_t *, uint8_t, uint8_t, uint16_t, uint64_t *); dnode_phys_t *objset_dnode_lookup(zfs_objset_t *, uint64_t); void objset_root_blkptr_copy(const zfs_objset_t *, blkptr_t *); uint64_t objset_space(const zfs_objset_t *); void objset_write(zfs_opt_t *zfs, zfs_objset_t *os); /* vdev.c */ void vdev_init(zfs_opt_t *, const char *); off_t vdev_space_alloc(zfs_opt_t *zfs, off_t *lenp); void vdev_pwrite_data(zfs_opt_t *zfs, uint8_t datatype, uint8_t cksumtype, uint8_t level, uint64_t fill, const void *data, off_t sz, off_t loc, blkptr_t *bp); void vdev_pwrite_dnode_indir(zfs_opt_t *zfs, dnode_phys_t *dnode, uint8_t level, uint64_t fill, const void *data, off_t sz, off_t loc, blkptr_t *bp); void vdev_pwrite_dnode_data(zfs_opt_t *zfs, dnode_phys_t *dnode, const void *data, off_t sz, off_t loc); void vdev_label_write(zfs_opt_t *zfs, int ind, const vdev_label_t *labelp); void vdev_spacemap_write(zfs_opt_t *); void vdev_fini(zfs_opt_t *zfs); /* zap.c */ zfs_zap_t *zap_alloc(zfs_objset_t *, dnode_phys_t *); void zap_add(zfs_zap_t *, const char *, size_t, size_t, const uint8_t *); void zap_add_uint64(zfs_zap_t *, const char *, uint64_t); void zap_add_string(zfs_zap_t *, const char *, const char *); bool zap_entry_exists(zfs_zap_t *, const char *); void zap_write(zfs_opt_t *, zfs_zap_t *); /* zfs.c */ struct dnode_cursor *dnode_cursor_init(zfs_opt_t *, zfs_objset_t *, dnode_phys_t *, off_t, off_t); blkptr_t *dnode_cursor_next(zfs_opt_t *, struct dnode_cursor *, off_t); void dnode_cursor_finish(zfs_opt_t *, struct dnode_cursor *); uint64_t randomguid(void); #endif /* !_MAKEFS_ZFS_H_ */